All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mike Snitzer <snitzer@redhat.com>
To: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>,
	linux-raid@vger.kernel.org, Hans de Goede <hdegoede@redhat.com>,
	Minchan Kim <minchan@kernel.org>,
	Richard Weinberger <richard@nod.at>,
	linux-kernel@vger.kernel.org, linux-block@vger.kernel.org,
	Song Liu <song@kernel.org>,
	dm-devel@redhat.com, linux-mtd@lists.infradead.org,
	cgroups@vger.kernel.org, drbd-dev@tron.linbit.com,
	linux-fsdevel@vger.kernel.org, linux-mm@kvack.org,
	martin.petersen@oracle.com
Subject: Re: [PATCH 06/14] block: lift setting the readahead size into the block layer
Date: Wed, 26 Aug 2020 18:07:38 -0400	[thread overview]
Message-ID: <20200826220737.GA25613@redhat.com> (raw)
In-Reply-To: <20200726150333.305527-7-hch@lst.de>

On Sun, Jul 26 2020 at 11:03am -0400,
Christoph Hellwig <hch@lst.de> wrote:

> Drivers shouldn't really mess with the readahead size, as that is a VM
> concept.  Instead set it based on the optimal I/O size by lifting the
> algorithm from the md driver when registering the disk.  Also set
> bdi->io_pages there as well by applying the same scheme based on
> max_sectors.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  block/blk-settings.c         |  5 ++---
>  block/blk-sysfs.c            |  1 -
>  block/genhd.c                | 13 +++++++++++--
>  drivers/block/aoe/aoeblk.c   |  2 --
>  drivers/block/drbd/drbd_nl.c | 12 +-----------
>  drivers/md/bcache/super.c    |  4 ----
>  drivers/md/dm-table.c        |  3 ---
>  drivers/md/raid0.c           | 16 ----------------
>  drivers/md/raid10.c          | 24 +-----------------------
>  drivers/md/raid5.c           | 13 +------------
>  10 files changed, 16 insertions(+), 77 deletions(-)


In general these changes need a solid audit relative to stacking
drivers.  That is, the limits stacking methods (blk_stack_limits)
vs lower level allocation methods (__device_add_disk).

You optimized for lowlevel __device_add_disk establishing the bdi's
ra_pages and io_pages.  That is at the beginning of disk allocation,
well before any build up of stacking driver's queue_io_opt() -- which
was previously done in disk_stack_limits or driver specific methods
(e.g. dm_table_set_restrictions) that are called _after_ all the limits
stacking occurs.

By inverting the setting of the bdi's ra_pages and io_pages to be done
so early in __device_add_disk it'll break properly setting these values
for at least DM afaict.

Mike


> diff --git a/block/blk-settings.c b/block/blk-settings.c
> index 76a7e03bcd6cac..01049e9b998f1d 100644
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -452,6 +452,8 @@ EXPORT_SYMBOL(blk_limits_io_opt);
>  void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
>  {
>  	blk_limits_io_opt(&q->limits, opt);
> +	q->backing_dev_info->ra_pages =
> +		max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
>  }
>  EXPORT_SYMBOL(blk_queue_io_opt);
>  
> @@ -628,9 +630,6 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
>  		printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
>  		       top, bottom);
>  	}
> -
> -	t->backing_dev_info->io_pages =
> -		t->limits.max_sectors >> (PAGE_SHIFT - 9);
>  }
>  EXPORT_SYMBOL(disk_stack_limits);
>  
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 7dda709f3ccb6f..ce418d9128a0b2 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -245,7 +245,6 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
>  
>  	spin_lock_irq(&q->queue_lock);
>  	q->limits.max_sectors = max_sectors_kb << 1;
> -	q->backing_dev_info->io_pages = max_sectors_kb >> (PAGE_SHIFT - 10);
>  	spin_unlock_irq(&q->queue_lock);
>  
>  	return ret;
> diff --git a/block/genhd.c b/block/genhd.c
> index 8b1e9f48957cb5..097d4e4bc0b8a2 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -775,6 +775,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
>  			      const struct attribute_group **groups,
>  			      bool register_queue)
>  {
> +	struct request_queue *q = disk->queue;
>  	dev_t devt;
>  	int retval;
>  
> @@ -785,7 +786,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
>  	 * registration.
>  	 */
>  	if (register_queue)
> -		elevator_init_mq(disk->queue);
> +		elevator_init_mq(q);
>  
>  	/* minors == 0 indicates to use ext devt from part0 and should
>  	 * be accompanied with EXT_DEVT flag.  Make sure all
> @@ -815,10 +816,18 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
>  		disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
>  		disk->flags |= GENHD_FL_NO_PART_SCAN;
>  	} else {
> -		struct backing_dev_info *bdi = disk->queue->backing_dev_info;
> +		struct backing_dev_info *bdi = q->backing_dev_info;
>  		struct device *dev = disk_to_dev(disk);
>  		int ret;
>  
> +		/*
> +		 * For read-ahead of large files to be effective, we need to
> +		 * readahead at least twice the optimal I/O size.
> +		 */
> +		bdi->ra_pages = max(queue_io_opt(q) * 2 / PAGE_SIZE,
> +				    VM_READAHEAD_PAGES);
> +		bdi->io_pages = queue_max_sectors(q) >> (PAGE_SHIFT - 9);
> +
>  		/* Register BDI before referencing it from bdev */
>  		dev->devt = devt;
>  		ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt));
> diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
> index 5ca7216e9e01f3..89b33b402b4e52 100644
> --- a/drivers/block/aoe/aoeblk.c
> +++ b/drivers/block/aoe/aoeblk.c
> @@ -347,7 +347,6 @@ aoeblk_gdalloc(void *vp)
>  	mempool_t *mp;
>  	struct request_queue *q;
>  	struct blk_mq_tag_set *set;
> -	enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, };
>  	ulong flags;
>  	int late = 0;
>  	int err;
> @@ -407,7 +406,6 @@ aoeblk_gdalloc(void *vp)
>  	WARN_ON(d->gd);
>  	WARN_ON(d->flags & DEVFL_UP);
>  	blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
> -	q->backing_dev_info->ra_pages = READ_AHEAD / PAGE_SIZE;
>  	d->bufpool = mp;
>  	d->blkq = gd->queue = q;
>  	q->queuedata = d;
> diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
> index 650372ee2c7822..212bf711fb6b41 100644
> --- a/drivers/block/drbd/drbd_nl.c
> +++ b/drivers/block/drbd/drbd_nl.c
> @@ -1360,18 +1360,8 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
>  	decide_on_discard_support(device, q, b, discard_zeroes_if_aligned);
>  	decide_on_write_same_support(device, q, b, o, disable_write_same);
>  
> -	if (b) {
> +	if (b)
>  		blk_stack_limits(&q->limits, &b->limits, 0);
> -
> -		if (q->backing_dev_info->ra_pages !=
> -		    b->backing_dev_info->ra_pages) {
> -			drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
> -				 q->backing_dev_info->ra_pages,
> -				 b->backing_dev_info->ra_pages);
> -			q->backing_dev_info->ra_pages =
> -						b->backing_dev_info->ra_pages;
> -		}
> -	}
>  	fixup_discard_if_not_supported(q);
>  	fixup_write_zeroes(device, q);
>  }
> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> index 9e45faa054b6f4..9d3f0711be030f 100644
> --- a/drivers/md/bcache/super.c
> +++ b/drivers/md/bcache/super.c
> @@ -1367,10 +1367,6 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
>  	if (ret)
>  		return ret;
>  
> -	dc->disk.disk->queue->backing_dev_info->ra_pages =
> -		max(dc->disk.disk->queue->backing_dev_info->ra_pages,
> -		    q->backing_dev_info->ra_pages);
> -
>  	atomic_set(&dc->io_errors, 0);
>  	dc->io_disable = false;
>  	dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
> diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
> index aac4c31cfc8498..324a42ed2f8894 100644
> --- a/drivers/md/dm-table.c
> +++ b/drivers/md/dm-table.c
> @@ -1924,9 +1924,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
>  		q->nr_zones = blkdev_nr_zones(t->md->disk);
>  	}
>  #endif
> -
> -	/* Allow reads to exceed readahead limits */
> -	q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9);
>  }
>  
>  unsigned int dm_table_get_num_targets(struct dm_table *t)
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index f54a449f97aa79..aa2d7279176880 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -410,22 +410,6 @@ static int raid0_run(struct mddev *mddev)
>  		 mdname(mddev),
>  		 (unsigned long long)mddev->array_sectors);
>  
> -	if (mddev->queue) {
> -		/* calculate the max read-ahead size.
> -		 * For read-ahead of large files to be effective, we need to
> -		 * readahead at least twice a whole stripe. i.e. number of devices
> -		 * multiplied by chunk size times 2.
> -		 * If an individual device has an ra_pages greater than the
> -		 * chunk size, then we will not drive that device as hard as it
> -		 * wants.  We consider this a configuration error: a larger
> -		 * chunksize should be used in that case.
> -		 */
> -		int stripe = mddev->raid_disks *
> -			(mddev->chunk_sectors << 9) / PAGE_SIZE;
> -		if (mddev->queue->backing_dev_info->ra_pages < 2* stripe)
> -			mddev->queue->backing_dev_info->ra_pages = 2* stripe;
> -	}
> -
>  	dump_zones(mddev);
>  
>  	ret = md_integrity_register(mddev);
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 9f88ff9bdee437..23d15acbf457d4 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -3865,19 +3865,6 @@ static int raid10_run(struct mddev *mddev)
>  	mddev->resync_max_sectors = size;
>  	set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
>  
> -	if (mddev->queue) {
> -		int stripe = conf->geo.raid_disks *
> -			((mddev->chunk_sectors << 9) / PAGE_SIZE);
> -
> -		/* Calculate max read-ahead size.
> -		 * We need to readahead at least twice a whole stripe....
> -		 * maybe...
> -		 */
> -		stripe /= conf->geo.near_copies;
> -		if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
> -			mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
> -	}
> -
>  	if (md_integrity_register(mddev))
>  		goto out_free_conf;
>  
> @@ -4715,17 +4702,8 @@ static void end_reshape(struct r10conf *conf)
>  	conf->reshape_safe = MaxSector;
>  	spin_unlock_irq(&conf->device_lock);
>  
> -	/* read-ahead size must cover two whole stripes, which is
> -	 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
> -	 */
> -	if (conf->mddev->queue) {
> -		int stripe = conf->geo.raid_disks *
> -			((conf->mddev->chunk_sectors << 9) / PAGE_SIZE);
> -		stripe /= conf->geo.near_copies;
> -		if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
> -			conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
> +	if (conf->mddev->queue)
>  		raid10_set_io_opt(conf);
> -	}
>  	conf->fullsync = 0;
>  }
>  
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 68e41ce3ca75cc..415ce3cc155698 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -7413,8 +7413,6 @@ static int raid5_run(struct mddev *mddev)
>  		int data_disks = conf->previous_raid_disks - conf->max_degraded;
>  		int stripe = data_disks *
>  			((mddev->chunk_sectors << 9) / PAGE_SIZE);
> -		if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
> -			mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
>  
>  		chunk_size = mddev->chunk_sectors << 9;
>  		blk_queue_io_min(mddev->queue, chunk_size);
> @@ -8002,17 +8000,8 @@ static void end_reshape(struct r5conf *conf)
>  		spin_unlock_irq(&conf->device_lock);
>  		wake_up(&conf->wait_for_overlap);
>  
> -		/* read-ahead size must cover two whole stripes, which is
> -		 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
> -		 */
> -		if (conf->mddev->queue) {
> -			int data_disks = conf->raid_disks - conf->max_degraded;
> -			int stripe = data_disks * ((conf->chunk_sectors << 9)
> -						   / PAGE_SIZE);
> -			if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
> -				conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
> +		if (conf->mddev->queue)
>  			raid5_set_io_opt(conf);
> -		}
>  	}
>  }
>  
> -- 
> 2.27.0
> 
> --
> dm-devel mailing list
> dm-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel


WARNING: multiple messages have this Message-ID (diff)
From: Mike Snitzer <snitzer@redhat.com>
To: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>,
	linux-block@vger.kernel.org, martin.petersen@oracle.com,
	Hans de Goede <hdegoede@redhat.com>, Song Liu <song@kernel.org>,
	Richard Weinberger <richard@nod.at>,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-raid@vger.kernel.org, Minchan Kim <minchan@kernel.org>,
	dm-devel@redhat.com, linux-mtd@lists.infradead.org,
	drbd-dev@tron.linbit.com, cgroups@vger.kernel.org,
	linux-mm@kvack.org
Subject: Re: [PATCH 06/14] block: lift setting the readahead size into the block layer
Date: Wed, 26 Aug 2020 18:07:38 -0400	[thread overview]
Message-ID: <20200826220737.GA25613@redhat.com> (raw)
In-Reply-To: <20200726150333.305527-7-hch@lst.de>

On Sun, Jul 26 2020 at 11:03am -0400,
Christoph Hellwig <hch@lst.de> wrote:

> Drivers shouldn't really mess with the readahead size, as that is a VM
> concept.  Instead set it based on the optimal I/O size by lifting the
> algorithm from the md driver when registering the disk.  Also set
> bdi->io_pages there as well by applying the same scheme based on
> max_sectors.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  block/blk-settings.c         |  5 ++---
>  block/blk-sysfs.c            |  1 -
>  block/genhd.c                | 13 +++++++++++--
>  drivers/block/aoe/aoeblk.c   |  2 --
>  drivers/block/drbd/drbd_nl.c | 12 +-----------
>  drivers/md/bcache/super.c    |  4 ----
>  drivers/md/dm-table.c        |  3 ---
>  drivers/md/raid0.c           | 16 ----------------
>  drivers/md/raid10.c          | 24 +-----------------------
>  drivers/md/raid5.c           | 13 +------------
>  10 files changed, 16 insertions(+), 77 deletions(-)


In general these changes need a solid audit relative to stacking
drivers.  That is, the limits stacking methods (blk_stack_limits)
vs lower level allocation methods (__device_add_disk).

You optimized for lowlevel __device_add_disk establishing the bdi's
ra_pages and io_pages.  That is at the beginning of disk allocation,
well before any build up of stacking driver's queue_io_opt() -- which
was previously done in disk_stack_limits or driver specific methods
(e.g. dm_table_set_restrictions) that are called _after_ all the limits
stacking occurs.

By inverting the setting of the bdi's ra_pages and io_pages to be done
so early in __device_add_disk it'll break properly setting these values
for at least DM afaict.

Mike


> diff --git a/block/blk-settings.c b/block/blk-settings.c
> index 76a7e03bcd6cac..01049e9b998f1d 100644
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -452,6 +452,8 @@ EXPORT_SYMBOL(blk_limits_io_opt);
>  void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
>  {
>  	blk_limits_io_opt(&q->limits, opt);
> +	q->backing_dev_info->ra_pages =
> +		max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
>  }
>  EXPORT_SYMBOL(blk_queue_io_opt);
>  
> @@ -628,9 +630,6 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
>  		printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
>  		       top, bottom);
>  	}
> -
> -	t->backing_dev_info->io_pages =
> -		t->limits.max_sectors >> (PAGE_SHIFT - 9);
>  }
>  EXPORT_SYMBOL(disk_stack_limits);
>  
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 7dda709f3ccb6f..ce418d9128a0b2 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -245,7 +245,6 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
>  
>  	spin_lock_irq(&q->queue_lock);
>  	q->limits.max_sectors = max_sectors_kb << 1;
> -	q->backing_dev_info->io_pages = max_sectors_kb >> (PAGE_SHIFT - 10);
>  	spin_unlock_irq(&q->queue_lock);
>  
>  	return ret;
> diff --git a/block/genhd.c b/block/genhd.c
> index 8b1e9f48957cb5..097d4e4bc0b8a2 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -775,6 +775,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
>  			      const struct attribute_group **groups,
>  			      bool register_queue)
>  {
> +	struct request_queue *q = disk->queue;
>  	dev_t devt;
>  	int retval;
>  
> @@ -785,7 +786,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
>  	 * registration.
>  	 */
>  	if (register_queue)
> -		elevator_init_mq(disk->queue);
> +		elevator_init_mq(q);
>  
>  	/* minors == 0 indicates to use ext devt from part0 and should
>  	 * be accompanied with EXT_DEVT flag.  Make sure all
> @@ -815,10 +816,18 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
>  		disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
>  		disk->flags |= GENHD_FL_NO_PART_SCAN;
>  	} else {
> -		struct backing_dev_info *bdi = disk->queue->backing_dev_info;
> +		struct backing_dev_info *bdi = q->backing_dev_info;
>  		struct device *dev = disk_to_dev(disk);
>  		int ret;
>  
> +		/*
> +		 * For read-ahead of large files to be effective, we need to
> +		 * readahead at least twice the optimal I/O size.
> +		 */
> +		bdi->ra_pages = max(queue_io_opt(q) * 2 / PAGE_SIZE,
> +				    VM_READAHEAD_PAGES);
> +		bdi->io_pages = queue_max_sectors(q) >> (PAGE_SHIFT - 9);
> +
>  		/* Register BDI before referencing it from bdev */
>  		dev->devt = devt;
>  		ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt));
> diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
> index 5ca7216e9e01f3..89b33b402b4e52 100644
> --- a/drivers/block/aoe/aoeblk.c
> +++ b/drivers/block/aoe/aoeblk.c
> @@ -347,7 +347,6 @@ aoeblk_gdalloc(void *vp)
>  	mempool_t *mp;
>  	struct request_queue *q;
>  	struct blk_mq_tag_set *set;
> -	enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, };
>  	ulong flags;
>  	int late = 0;
>  	int err;
> @@ -407,7 +406,6 @@ aoeblk_gdalloc(void *vp)
>  	WARN_ON(d->gd);
>  	WARN_ON(d->flags & DEVFL_UP);
>  	blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
> -	q->backing_dev_info->ra_pages = READ_AHEAD / PAGE_SIZE;
>  	d->bufpool = mp;
>  	d->blkq = gd->queue = q;
>  	q->queuedata = d;
> diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
> index 650372ee2c7822..212bf711fb6b41 100644
> --- a/drivers/block/drbd/drbd_nl.c
> +++ b/drivers/block/drbd/drbd_nl.c
> @@ -1360,18 +1360,8 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
>  	decide_on_discard_support(device, q, b, discard_zeroes_if_aligned);
>  	decide_on_write_same_support(device, q, b, o, disable_write_same);
>  
> -	if (b) {
> +	if (b)
>  		blk_stack_limits(&q->limits, &b->limits, 0);
> -
> -		if (q->backing_dev_info->ra_pages !=
> -		    b->backing_dev_info->ra_pages) {
> -			drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
> -				 q->backing_dev_info->ra_pages,
> -				 b->backing_dev_info->ra_pages);
> -			q->backing_dev_info->ra_pages =
> -						b->backing_dev_info->ra_pages;
> -		}
> -	}
>  	fixup_discard_if_not_supported(q);
>  	fixup_write_zeroes(device, q);
>  }
> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> index 9e45faa054b6f4..9d3f0711be030f 100644
> --- a/drivers/md/bcache/super.c
> +++ b/drivers/md/bcache/super.c
> @@ -1367,10 +1367,6 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
>  	if (ret)
>  		return ret;
>  
> -	dc->disk.disk->queue->backing_dev_info->ra_pages =
> -		max(dc->disk.disk->queue->backing_dev_info->ra_pages,
> -		    q->backing_dev_info->ra_pages);
> -
>  	atomic_set(&dc->io_errors, 0);
>  	dc->io_disable = false;
>  	dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
> diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
> index aac4c31cfc8498..324a42ed2f8894 100644
> --- a/drivers/md/dm-table.c
> +++ b/drivers/md/dm-table.c
> @@ -1924,9 +1924,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
>  		q->nr_zones = blkdev_nr_zones(t->md->disk);
>  	}
>  #endif
> -
> -	/* Allow reads to exceed readahead limits */
> -	q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9);
>  }
>  
>  unsigned int dm_table_get_num_targets(struct dm_table *t)
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index f54a449f97aa79..aa2d7279176880 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -410,22 +410,6 @@ static int raid0_run(struct mddev *mddev)
>  		 mdname(mddev),
>  		 (unsigned long long)mddev->array_sectors);
>  
> -	if (mddev->queue) {
> -		/* calculate the max read-ahead size.
> -		 * For read-ahead of large files to be effective, we need to
> -		 * readahead at least twice a whole stripe. i.e. number of devices
> -		 * multiplied by chunk size times 2.
> -		 * If an individual device has an ra_pages greater than the
> -		 * chunk size, then we will not drive that device as hard as it
> -		 * wants.  We consider this a configuration error: a larger
> -		 * chunksize should be used in that case.
> -		 */
> -		int stripe = mddev->raid_disks *
> -			(mddev->chunk_sectors << 9) / PAGE_SIZE;
> -		if (mddev->queue->backing_dev_info->ra_pages < 2* stripe)
> -			mddev->queue->backing_dev_info->ra_pages = 2* stripe;
> -	}
> -
>  	dump_zones(mddev);
>  
>  	ret = md_integrity_register(mddev);
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 9f88ff9bdee437..23d15acbf457d4 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -3865,19 +3865,6 @@ static int raid10_run(struct mddev *mddev)
>  	mddev->resync_max_sectors = size;
>  	set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
>  
> -	if (mddev->queue) {
> -		int stripe = conf->geo.raid_disks *
> -			((mddev->chunk_sectors << 9) / PAGE_SIZE);
> -
> -		/* Calculate max read-ahead size.
> -		 * We need to readahead at least twice a whole stripe....
> -		 * maybe...
> -		 */
> -		stripe /= conf->geo.near_copies;
> -		if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
> -			mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
> -	}
> -
>  	if (md_integrity_register(mddev))
>  		goto out_free_conf;
>  
> @@ -4715,17 +4702,8 @@ static void end_reshape(struct r10conf *conf)
>  	conf->reshape_safe = MaxSector;
>  	spin_unlock_irq(&conf->device_lock);
>  
> -	/* read-ahead size must cover two whole stripes, which is
> -	 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
> -	 */
> -	if (conf->mddev->queue) {
> -		int stripe = conf->geo.raid_disks *
> -			((conf->mddev->chunk_sectors << 9) / PAGE_SIZE);
> -		stripe /= conf->geo.near_copies;
> -		if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
> -			conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
> +	if (conf->mddev->queue)
>  		raid10_set_io_opt(conf);
> -	}
>  	conf->fullsync = 0;
>  }
>  
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 68e41ce3ca75cc..415ce3cc155698 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -7413,8 +7413,6 @@ static int raid5_run(struct mddev *mddev)
>  		int data_disks = conf->previous_raid_disks - conf->max_degraded;
>  		int stripe = data_disks *
>  			((mddev->chunk_sectors << 9) / PAGE_SIZE);
> -		if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
> -			mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
>  
>  		chunk_size = mddev->chunk_sectors << 9;
>  		blk_queue_io_min(mddev->queue, chunk_size);
> @@ -8002,17 +8000,8 @@ static void end_reshape(struct r5conf *conf)
>  		spin_unlock_irq(&conf->device_lock);
>  		wake_up(&conf->wait_for_overlap);
>  
> -		/* read-ahead size must cover two whole stripes, which is
> -		 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
> -		 */
> -		if (conf->mddev->queue) {
> -			int data_disks = conf->raid_disks - conf->max_degraded;
> -			int stripe = data_disks * ((conf->chunk_sectors << 9)
> -						   / PAGE_SIZE);
> -			if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
> -				conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
> +		if (conf->mddev->queue)
>  			raid5_set_io_opt(conf);
> -		}
>  	}
>  }
>  
> -- 
> 2.27.0
> 
> --
> dm-devel mailing list
> dm-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel


______________________________________________________
Linux MTD discussion mailing list
http://lists.infradead.org/mailman/listinfo/linux-mtd/

  reply	other threads:[~2020-08-26 22:07 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-26 15:03 bdi cleanups v3 Christoph Hellwig
2020-07-26 15:03 ` Christoph Hellwig
2020-07-26 15:03 ` [PATCH 01/14] fs: remove the unused SB_I_MULTIROOT flag Christoph Hellwig
2020-07-26 15:03   ` Christoph Hellwig
2020-07-26 15:03 ` [PATCH 03/14] drbd: remove RB_CONGESTED_REMOTE Christoph Hellwig
2020-07-26 15:03   ` Christoph Hellwig
     [not found] ` <20200726150333.305527-1-hch-jcswGhMUV9g@public.gmane.org>
2020-07-26 15:03   ` [PATCH 02/14] drbd: remove dead code in device_to_statistics Christoph Hellwig
2020-07-26 15:03     ` Christoph Hellwig
2020-07-26 15:03     ` Christoph Hellwig
2020-07-26 15:03   ` [PATCH 04/14] bdi: initialize ->ra_pages in bdi_init Christoph Hellwig
2020-07-26 15:03     ` Christoph Hellwig
2020-07-26 15:03     ` Christoph Hellwig
2020-07-26 15:03   ` [PATCH 05/14] md: update the optimal I/O size on reshape Christoph Hellwig
2020-07-26 15:03     ` Christoph Hellwig
2020-07-26 15:03     ` Christoph Hellwig
2020-07-26 15:03   ` [PATCH 06/14] block: lift setting the readahead size into the block layer Christoph Hellwig
2020-07-26 15:03     ` Christoph Hellwig
2020-07-26 15:03     ` Christoph Hellwig
2020-08-26 22:07     ` Mike Snitzer [this message]
2020-08-26 22:07       ` Mike Snitzer
2020-09-02 15:11       ` Christoph Hellwig
2020-09-02 15:11         ` Christoph Hellwig
2020-09-02 15:11         ` Christoph Hellwig
2020-09-02 16:20         ` Mike Snitzer
2020-09-02 16:20           ` Mike Snitzer
2020-09-10  9:28           ` Christoph Hellwig
2020-09-10  9:28             ` Christoph Hellwig
2020-09-10  9:28             ` Christoph Hellwig
2020-09-10 17:15             ` Mike Snitzer
2020-09-10 17:15               ` Mike Snitzer
2020-09-15  7:05               ` Christoph Hellwig
2020-09-15  7:05                 ` Christoph Hellwig
2020-09-15  7:05                 ` Christoph Hellwig
2020-07-26 15:03   ` [PATCH 07/14] block: make QUEUE_SYSFS_BIT_FNS a little more useful Christoph Hellwig
2020-07-26 15:03     ` Christoph Hellwig
2020-07-26 15:03     ` Christoph Hellwig
2020-07-29  8:57     ` [block] 54529aac98: fsmark.files_per_sec -13.6% regression kernel test robot
2020-07-29  8:57       ` kernel test robot
2020-07-30  2:14     ` [block] 54529aac98: blktests.block.005.fail kernel test robot
2020-07-30  2:14       ` kernel test robot
2020-07-26 15:03   ` [PATCH 08/14] block: add helper macros for queue sysfs entries Christoph Hellwig
2020-07-26 15:03     ` Christoph Hellwig
2020-07-26 15:03     ` Christoph Hellwig
2020-07-26 15:03   ` [PATCH 13/14] bdi: invert BDI_CAP_NO_ACCT_WB Christoph Hellwig
2020-07-26 15:03     ` Christoph Hellwig
2020-07-26 15:03     ` Christoph Hellwig
2020-07-26 15:03 ` [PATCH 09/14] bdi: remove BDI_CAP_CGROUP_WRITEBACK Christoph Hellwig
2020-07-26 15:03   ` Christoph Hellwig
2020-07-26 15:03 ` [PATCH 10/14] bdi: remove BDI_CAP_SYNCHRONOUS_IO Christoph Hellwig
2020-07-26 15:03   ` Christoph Hellwig
2020-07-26 19:06   ` Minchan Kim
2020-07-26 19:06     ` Minchan Kim
     [not found]     ` <20200726190639.GA560221-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
2020-07-27  7:58       ` Christoph Hellwig
2020-07-27  7:58         ` Christoph Hellwig
2020-07-27  7:58         ` Christoph Hellwig
2020-07-26 15:03 ` [PATCH 11/14] mm: use SWP_SYNCHRONOUS_IO more intelligently Christoph Hellwig
2020-07-26 15:03   ` Christoph Hellwig
2020-07-26 15:03 ` [PATCH 12/14] bdi: replace BDI_CAP_STABLE_WRITES with a queue and a sb flag Christoph Hellwig
2020-07-26 15:03   ` Christoph Hellwig
2020-07-26 15:03 ` [PATCH 14/14] bdi: replace BDI_CAP_NO_{WRITEBACK,ACCT_DIRTY} with a single flag Christoph Hellwig
2020-07-26 15:03   ` [PATCH 14/14] bdi: replace BDI_CAP_NO_{WRITEBACK, ACCT_DIRTY} " Christoph Hellwig
2020-07-26 15:12 ` bdi cleanups v3 Christoph Hellwig
2020-07-26 15:12   ` Christoph Hellwig
  -- strict thread matches above, loose matches on Subject: below --
2020-07-24  7:32 Christoph Hellwig
     [not found] ` <20200724073313.138789-1-hch-jcswGhMUV9g@public.gmane.org>
2020-07-24  7:33   ` [PATCH 06/14] block: lift setting the readahead size into the block layer Christoph Hellwig
2020-07-24  7:33     ` Christoph Hellwig
2020-07-24  7:33     ` Christoph Hellwig
2020-07-28 12:23     ` Johannes Thumshirn
2020-07-28 12:23       ` Johannes Thumshirn
2020-07-28 12:23       ` Johannes Thumshirn
2020-07-22  6:25 bdi cleanups v2 Christoph Hellwig
2020-07-22  6:25 ` [PATCH 06/14] block: lift setting the readahead size into the block layer Christoph Hellwig
2020-07-22  6:25   ` Christoph Hellwig
2020-07-22  7:13   ` Johannes Thumshirn
2020-07-22  7:13     ` Johannes Thumshirn
2020-07-22  7:13     ` Johannes Thumshirn
     [not found]     ` <SN4PR0401MB3598470B14C754768A2D8F389B790-OZENnpeWoeG1GMmP3NTBp/nBZW37Ciw+nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2020-07-22  7:18       ` Christoph Hellwig
2020-07-22  7:18         ` Christoph Hellwig
2020-07-22  7:18         ` Christoph Hellwig
2020-07-20  7:51 bdi cleanups Christoph Hellwig
2020-07-20  7:51 ` [PATCH 06/14] block: lift setting the readahead size into the block layer Christoph Hellwig
2020-07-20  7:51   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200826220737.GA25613@redhat.com \
    --to=snitzer@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=cgroups@vger.kernel.org \
    --cc=dm-devel@redhat.com \
    --cc=drbd-dev@tron.linbit.com \
    --cc=hch@lst.de \
    --cc=hdegoede@redhat.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-mtd@lists.infradead.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=minchan@kernel.org \
    --cc=richard@nod.at \
    --cc=song@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.