All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/3] sd_zbc: Avoid that resetting a zone fails sporadically
       [not found] <20180417010441.28903-1-bart.vanassche@wdc.com>
@ 2018-04-17  1:04 ` Bart Van Assche
  2018-04-17 22:42   ` Damien Le Moal
  0 siblings, 1 reply; 2+ messages in thread
From: Bart Van Assche @ 2018-04-17  1:04 UTC (permalink / raw)
  To: Martin K . Petersen, James E . J . Bottomley
  Cc: linux-scsi, Bart Van Assche, Jens Axboe, Damien Le Moal,
	Christoph Hellwig, Hannes Reinecke, stable

Since SCSI scanning occurs asynchronously, since sd_revalidate_disk()
is called from sd_probe_async() and since sd_revalidate_disk() calls
sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called
concurrently with blkdev_report_zones() and/or blkdev_reset_zones().
That can cause these functions to fail with -EIO because
sd_zbc_read_zones() e.g. sets q->nr_zones to zero before restoring it
to the actual value, even if no drive characteristics have changed.
Avoid that this can happen by making the following changes:
- Protect the code that updates zone information with blk_queue_enter()
  and blk_queue_exit().
- Modify sd_zbc_setup_seq_zones_bitmap() and sd_zbc_setup() such that
  these functions do not modify struct scsi_disk before all zone
  information has been obtained.

Note: since commit 055f6e18e08f ("block: Make q_usage_counter also
track legacy requests"; kernel v4.15) the request queue freezing
mechanism also affects legacy request queues.

Fixes: 89d947561077 ("sd: Implement support for ZBC devices")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: stable@vger.kernel.org # v4.10
---
 drivers/scsi/sd_zbc.c  | 140 +++++++++++++++++++++++++++++--------------------
 include/linux/blkdev.h |   5 ++
 2 files changed, 87 insertions(+), 58 deletions(-)

diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 2d0c06f7db3e..323e3dc4bc59 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -390,8 +390,10 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf)
  *
  * Check that all zones of the device are equal. The last zone can however
  * be smaller. The zone size must also be a power of two number of LBAs.
+ *
+ * Returns the zone size in bytes upon success or an error code upon failure.
  */
-static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
+static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 {
 	u64 zone_blocks = 0;
 	sector_t block = 0;
@@ -402,8 +404,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 	int ret;
 	u8 same;
 
-	sdkp->zone_blocks = 0;
-
 	/* Get a buffer */
 	buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
 	if (!buf)
@@ -435,16 +435,17 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 
 		/* Parse zone descriptors */
 		while (rec < buf + buf_len) {
-			zone_blocks = get_unaligned_be64(&rec[8]);
-			if (sdkp->zone_blocks == 0) {
-				sdkp->zone_blocks = zone_blocks;
-			} else if (zone_blocks != sdkp->zone_blocks &&
-				   (block + zone_blocks < sdkp->capacity
-				    || zone_blocks > sdkp->zone_blocks)) {
-				zone_blocks = 0;
+			u64 this_zone_blocks = get_unaligned_be64(&rec[8]);
+
+			if (zone_blocks == 0) {
+				zone_blocks = this_zone_blocks;
+			} else if (this_zone_blocks != zone_blocks &&
+				   (block + this_zone_blocks < sdkp->capacity
+				    || this_zone_blocks > zone_blocks)) {
+				this_zone_blocks = 0;
 				goto out;
 			}
-			block += zone_blocks;
+			block += this_zone_blocks;
 			rec += 64;
 		}
 
@@ -457,8 +458,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 
 	} while (block < sdkp->capacity);
 
-	zone_blocks = sdkp->zone_blocks;
-
 out:
 	if (!zone_blocks) {
 		if (sdkp->first_scan)
@@ -478,8 +477,7 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 				  "Zone size too large\n");
 		ret = -ENODEV;
 	} else {
-		sdkp->zone_blocks = zone_blocks;
-		sdkp->zone_shift = ilog2(zone_blocks);
+		ret = zone_blocks;
 	}
 
 out_free:
@@ -490,15 +488,14 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 
 /**
  * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone).
- * @sdkp: The disk of the bitmap
+ * @nr_zones: Number of zones to allocate space for.
+ * @numa_node: NUMA node to allocate the memory from.
  */
-static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
+static inline unsigned long *
+sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node)
 {
-	struct request_queue *q = sdkp->disk->queue;
-
-	return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones)
-			    * sizeof(unsigned long),
-			    GFP_KERNEL, q->node);
+	return kzalloc_node(BITS_TO_LONGS(nr_zones) * sizeof(unsigned long),
+			    GFP_KERNEL, numa_node);
 }
 
 /**
@@ -506,6 +503,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
  * @sdkp: disk used
  * @buf: report reply buffer
  * @buflen: length of @buf
+ * @zone_shift: logarithm base 2 of the number of blocks in a zone
  * @seq_zones_bitmap: bitmap of sequential zones to set
  *
  * Parse reported zone descriptors in @buf to identify sequential zones and
@@ -515,7 +513,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
  * Return the LBA after the last zone reported.
  */
 static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
-				     unsigned int buflen,
+				     unsigned int buflen, u32 zone_shift,
 				     unsigned long *seq_zones_bitmap)
 {
 	sector_t lba, next_lba = sdkp->capacity;
@@ -534,7 +532,7 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
 		if (type != ZBC_ZONE_TYPE_CONV &&
 		    cond != ZBC_ZONE_COND_READONLY &&
 		    cond != ZBC_ZONE_COND_OFFLINE)
-			set_bit(lba >> sdkp->zone_shift, seq_zones_bitmap);
+			set_bit(lba >> zone_shift, seq_zones_bitmap);
 		next_lba = lba + get_unaligned_be64(&rec[8]);
 		rec += 64;
 	}
@@ -543,12 +541,16 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
 }
 
 /**
- * sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap.
+ * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap.
  * @sdkp: target disk
+ * @zone_shift: logarithm base 2 of the number of blocks in a zone
+ * @nr_zones: number of zones to set up a seq zone bitmap for
  *
  * Allocate a zone bitmap and initialize it by identifying sequential zones.
  */
-static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
+static unsigned long *
+sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift,
+			      u32 nr_zones)
 {
 	struct request_queue *q = sdkp->disk->queue;
 	unsigned long *seq_zones_bitmap;
@@ -556,9 +558,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
 	unsigned char *buf;
 	int ret = -ENOMEM;
 
-	seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp);
+	seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node);
 	if (!seq_zones_bitmap)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
 	if (!buf)
@@ -569,7 +571,7 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
 		if (ret)
 			goto out;
 		lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
-					   seq_zones_bitmap);
+					   zone_shift, seq_zones_bitmap);
 	}
 
 	if (lba != sdkp->capacity) {
@@ -581,12 +583,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
 	kfree(buf);
 	if (ret) {
 		kfree(seq_zones_bitmap);
-		return ret;
+		return ERR_PTR(ret);
 	}
-
-	q->seq_zones_bitmap = seq_zones_bitmap;
-
-	return 0;
+	return seq_zones_bitmap;
 }
 
 static void sd_zbc_cleanup(struct scsi_disk *sdkp)
@@ -602,44 +601,64 @@ static void sd_zbc_cleanup(struct scsi_disk *sdkp)
 	q->nr_zones = 0;
 }
 
-static int sd_zbc_setup(struct scsi_disk *sdkp)
+static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks)
 {
 	struct request_queue *q = sdkp->disk->queue;
+	u32 zone_shift = ilog2(zone_blocks);
+	u32 nr_zones;
 	int ret;
 
-	/* READ16/WRITE16 is mandatory for ZBC disks */
-	sdkp->device->use_16_for_rw = 1;
-	sdkp->device->use_10_for_rw = 0;
-
 	/* chunk_sectors indicates the zone size */
-	blk_queue_chunk_sectors(sdkp->disk->queue,
-			logical_to_sectors(sdkp->device, sdkp->zone_blocks));
-	sdkp->nr_zones =
-		round_up(sdkp->capacity, sdkp->zone_blocks) >> sdkp->zone_shift;
+	blk_queue_chunk_sectors(q,
+			logical_to_sectors(sdkp->device, zone_blocks));
+	nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift;
 
 	/*
 	 * Initialize the device request queue information if the number
 	 * of zones changed.
 	 */
-	if (sdkp->nr_zones != q->nr_zones) {
-
-		sd_zbc_cleanup(sdkp);
-
-		q->nr_zones = sdkp->nr_zones;
-		if (sdkp->nr_zones) {
-			q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp);
-			if (!q->seq_zones_wlock) {
+	if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) {
+		unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
+		size_t zone_bitmap_size;
+
+		if (nr_zones) {
+			seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones,
+								   q->node);
+			if (!seq_zones_wlock) {
 				ret = -ENOMEM;
 				goto err;
 			}
 
-			ret = sd_zbc_setup_seq_zones_bitmap(sdkp);
-			if (ret) {
-				sd_zbc_cleanup(sdkp);
+			seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp,
+							zone_shift, nr_zones);
+			if (IS_ERR(seq_zones_bitmap)) {
+				ret = PTR_ERR(seq_zones_bitmap);
+				kfree(seq_zones_wlock);
 				goto err;
 			}
 		}
-
+		zone_bitmap_size = BITS_TO_LONGS(nr_zones) *
+			sizeof(unsigned long);
+		blk_mq_freeze_queue(q);
+		if (q->nr_zones != nr_zones) {
+			/* READ16/WRITE16 is mandatory for ZBC disks */
+			sdkp->device->use_16_for_rw = 1;
+			sdkp->device->use_10_for_rw = 0;
+
+			sdkp->zone_blocks = zone_blocks;
+			sdkp->zone_shift = zone_shift;
+			sdkp->nr_zones = nr_zones;
+			q->nr_zones = nr_zones;
+			swap(q->seq_zones_wlock, seq_zones_wlock);
+			swap(q->seq_zones_bitmap, seq_zones_bitmap);
+		} else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap,
+				  zone_bitmap_size) != 0) {
+			memcpy(q->seq_zones_bitmap, seq_zones_bitmap,
+			       zone_bitmap_size);
+		}
+		blk_mq_unfreeze_queue(q);
+		kfree(seq_zones_wlock);
+		kfree(seq_zones_bitmap);
 	}
 
 	return 0;
@@ -651,6 +670,7 @@ static int sd_zbc_setup(struct scsi_disk *sdkp)
 
 int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
 {
+	int64_t zone_blocks;
 	int ret;
 
 	if (!sd_is_zoned(sdkp))
@@ -687,12 +707,16 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
 	 * Check zone size: only devices with a constant zone size (except
 	 * an eventual last runt zone) that is a power of 2 are supported.
 	 */
-	ret = sd_zbc_check_zone_size(sdkp);
-	if (ret)
+	zone_blocks = sd_zbc_check_zone_size(sdkp);
+	ret = -EFBIG;
+	if (zone_blocks != (u32)zone_blocks)
+		goto err;
+	ret = zone_blocks;
+	if (ret < 0)
 		goto err;
 
 	/* The drive satisfies the kernel restrictions: set it up */
-	ret = sd_zbc_setup(sdkp);
+	ret = sd_zbc_setup(sdkp, zone_blocks);
 	if (ret)
 		goto err;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9af3e0f430bc..21e21f273a21 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -605,6 +605,11 @@ struct request_queue {
 	 * initialized by the low level device driver (e.g. scsi/sd.c).
 	 * Stacking drivers (device mappers) may or may not initialize
 	 * these fields.
+	 *
+	 * Reads of this information must be protected with blk_queue_enter() /
+	 * blk_queue_exit(). Modifying this information is only allowed while
+	 * no requests are being processed. See also blk_mq_freeze_queue() and
+	 * blk_mq_unfreeze_queue().
 	 */
 	unsigned int		nr_zones;
 	unsigned long		*seq_zones_bitmap;
-- 
2.16.3

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH 3/3] sd_zbc: Avoid that resetting a zone fails sporadically
  2018-04-17  1:04 ` [PATCH 3/3] sd_zbc: Avoid that resetting a zone fails sporadically Bart Van Assche
@ 2018-04-17 22:42   ` Damien Le Moal
  0 siblings, 0 replies; 2+ messages in thread
From: Damien Le Moal @ 2018-04-17 22:42 UTC (permalink / raw)
  To: Bart Van Assche, Martin K . Petersen, James E . J . Bottomley
  Cc: linux-scsi, Jens Axboe, Christoph Hellwig, Hannes Reinecke, stable

On 2018/04/16 18:04, Bart Van Assche wrote:
> Since SCSI scanning occurs asynchronously, since sd_revalidate_disk()
> is called from sd_probe_async() and since sd_revalidate_disk() calls
> sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called
> concurrently with blkdev_report_zones() and/or blkdev_reset_zones().
> That can cause these functions to fail with -EIO because
> sd_zbc_read_zones() e.g. sets q->nr_zones to zero before restoring it
> to the actual value, even if no drive characteristics have changed.
> Avoid that this can happen by making the following changes:
> - Protect the code that updates zone information with blk_queue_enter()
>   and blk_queue_exit().
> - Modify sd_zbc_setup_seq_zones_bitmap() and sd_zbc_setup() such that
>   these functions do not modify struct scsi_disk before all zone
>   information has been obtained.
> 
> Note: since commit 055f6e18e08f ("block: Make q_usage_counter also
> track legacy requests"; kernel v4.15) the request queue freezing
> mechanism also affects legacy request queues.
> 
> Fixes: 89d947561077 ("sd: Implement support for ZBC devices")
> Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
> Cc: Jens Axboe <axboe@kernel.dk>
> Cc: Damien Le Moal <damien.lemoal@wdc.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Hannes Reinecke <hare@suse.com>
> Cc: stable@vger.kernel.org # v4.10
> ---
>  drivers/scsi/sd_zbc.c  | 140 +++++++++++++++++++++++++++++--------------------
>  include/linux/blkdev.h |   5 ++
>  2 files changed, 87 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
> index 2d0c06f7db3e..323e3dc4bc59 100644
> --- a/drivers/scsi/sd_zbc.c
> +++ b/drivers/scsi/sd_zbc.c
> @@ -390,8 +390,10 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf)
>   *
>   * Check that all zones of the device are equal. The last zone can however
>   * be smaller. The zone size must also be a power of two number of LBAs.
> + *
> + * Returns the zone size in bytes upon success or an error code upon failure.
>   */
> -static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
> +static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp)
>  {
>  	u64 zone_blocks = 0;
>  	sector_t block = 0;
> @@ -402,8 +404,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
>  	int ret;
>  	u8 same;
>  
> -	sdkp->zone_blocks = 0;
> -
>  	/* Get a buffer */
>  	buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
>  	if (!buf)
> @@ -435,16 +435,17 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
>  
>  		/* Parse zone descriptors */
>  		while (rec < buf + buf_len) {
> -			zone_blocks = get_unaligned_be64(&rec[8]);
> -			if (sdkp->zone_blocks == 0) {
> -				sdkp->zone_blocks = zone_blocks;
> -			} else if (zone_blocks != sdkp->zone_blocks &&
> -				   (block + zone_blocks < sdkp->capacity
> -				    || zone_blocks > sdkp->zone_blocks)) {
> -				zone_blocks = 0;
> +			u64 this_zone_blocks = get_unaligned_be64(&rec[8]);
> +
> +			if (zone_blocks == 0) {
> +				zone_blocks = this_zone_blocks;
> +			} else if (this_zone_blocks != zone_blocks &&
> +				   (block + this_zone_blocks < sdkp->capacity
> +				    || this_zone_blocks > zone_blocks)) {
> +				this_zone_blocks = 0;
>  				goto out;
>  			}
> -			block += zone_blocks;
> +			block += this_zone_blocks;
>  			rec += 64;
>  		}
>  
> @@ -457,8 +458,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
>  
>  	} while (block < sdkp->capacity);
>  
> -	zone_blocks = sdkp->zone_blocks;
> -
>  out:
>  	if (!zone_blocks) {
>  		if (sdkp->first_scan)
> @@ -478,8 +477,7 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
>  				  "Zone size too large\n");
>  		ret = -ENODEV;
>  	} else {
> -		sdkp->zone_blocks = zone_blocks;
> -		sdkp->zone_shift = ilog2(zone_blocks);
> +		ret = zone_blocks;
>  	}
>  
>  out_free:
> @@ -490,15 +488,14 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
>  
>  /**
>   * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone).
> - * @sdkp: The disk of the bitmap
> + * @nr_zones: Number of zones to allocate space for.
> + * @numa_node: NUMA node to allocate the memory from.
>   */
> -static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
> +static inline unsigned long *
> +sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node)
>  {
> -	struct request_queue *q = sdkp->disk->queue;
> -
> -	return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones)
> -			    * sizeof(unsigned long),
> -			    GFP_KERNEL, q->node);
> +	return kzalloc_node(BITS_TO_LONGS(nr_zones) * sizeof(unsigned long),
> +			    GFP_KERNEL, numa_node);
>  }
>  
>  /**
> @@ -506,6 +503,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
>   * @sdkp: disk used
>   * @buf: report reply buffer
>   * @buflen: length of @buf
> + * @zone_shift: logarithm base 2 of the number of blocks in a zone
>   * @seq_zones_bitmap: bitmap of sequential zones to set
>   *
>   * Parse reported zone descriptors in @buf to identify sequential zones and
> @@ -515,7 +513,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
>   * Return the LBA after the last zone reported.
>   */
>  static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
> -				     unsigned int buflen,
> +				     unsigned int buflen, u32 zone_shift,
>  				     unsigned long *seq_zones_bitmap)
>  {
>  	sector_t lba, next_lba = sdkp->capacity;
> @@ -534,7 +532,7 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
>  		if (type != ZBC_ZONE_TYPE_CONV &&
>  		    cond != ZBC_ZONE_COND_READONLY &&
>  		    cond != ZBC_ZONE_COND_OFFLINE)
> -			set_bit(lba >> sdkp->zone_shift, seq_zones_bitmap);
> +			set_bit(lba >> zone_shift, seq_zones_bitmap);
>  		next_lba = lba + get_unaligned_be64(&rec[8]);
>  		rec += 64;
>  	}
> @@ -543,12 +541,16 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
>  }
>  
>  /**
> - * sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap.
> + * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap.
>   * @sdkp: target disk
> + * @zone_shift: logarithm base 2 of the number of blocks in a zone
> + * @nr_zones: number of zones to set up a seq zone bitmap for
>   *
>   * Allocate a zone bitmap and initialize it by identifying sequential zones.
>   */
> -static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
> +static unsigned long *
> +sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift,
> +			      u32 nr_zones)
>  {
>  	struct request_queue *q = sdkp->disk->queue;
>  	unsigned long *seq_zones_bitmap;
> @@ -556,9 +558,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
>  	unsigned char *buf;
>  	int ret = -ENOMEM;
>  
> -	seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp);
> +	seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node);
>  	if (!seq_zones_bitmap)
> -		return -ENOMEM;
> +		return ERR_PTR(-ENOMEM);
>  
>  	buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
>  	if (!buf)
> @@ -569,7 +571,7 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
>  		if (ret)
>  			goto out;
>  		lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
> -					   seq_zones_bitmap);
> +					   zone_shift, seq_zones_bitmap);
>  	}
>  
>  	if (lba != sdkp->capacity) {
> @@ -581,12 +583,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
>  	kfree(buf);
>  	if (ret) {
>  		kfree(seq_zones_bitmap);
> -		return ret;
> +		return ERR_PTR(ret);
>  	}
> -
> -	q->seq_zones_bitmap = seq_zones_bitmap;
> -
> -	return 0;
> +	return seq_zones_bitmap;
>  }
>  
>  static void sd_zbc_cleanup(struct scsi_disk *sdkp)
> @@ -602,44 +601,64 @@ static void sd_zbc_cleanup(struct scsi_disk *sdkp)
>  	q->nr_zones = 0;
>  }
>  
> -static int sd_zbc_setup(struct scsi_disk *sdkp)
> +static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks)
>  {
>  	struct request_queue *q = sdkp->disk->queue;
> +	u32 zone_shift = ilog2(zone_blocks);
> +	u32 nr_zones;
>  	int ret;
>  
> -	/* READ16/WRITE16 is mandatory for ZBC disks */
> -	sdkp->device->use_16_for_rw = 1;
> -	sdkp->device->use_10_for_rw = 0;
> -
>  	/* chunk_sectors indicates the zone size */
> -	blk_queue_chunk_sectors(sdkp->disk->queue,
> -			logical_to_sectors(sdkp->device, sdkp->zone_blocks));
> -	sdkp->nr_zones =
> -		round_up(sdkp->capacity, sdkp->zone_blocks) >> sdkp->zone_shift;
> +	blk_queue_chunk_sectors(q,
> +			logical_to_sectors(sdkp->device, zone_blocks));
> +	nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift;
>  
>  	/*
>  	 * Initialize the device request queue information if the number
>  	 * of zones changed.
>  	 */
> -	if (sdkp->nr_zones != q->nr_zones) {
> -
> -		sd_zbc_cleanup(sdkp);
> -
> -		q->nr_zones = sdkp->nr_zones;
> -		if (sdkp->nr_zones) {
> -			q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp);
> -			if (!q->seq_zones_wlock) {
> +	if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) {
> +		unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
> +		size_t zone_bitmap_size;
> +
> +		if (nr_zones) {
> +			seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones,
> +								   q->node);
> +			if (!seq_zones_wlock) {
>  				ret = -ENOMEM;
>  				goto err;
>  			}
>  
> -			ret = sd_zbc_setup_seq_zones_bitmap(sdkp);
> -			if (ret) {
> -				sd_zbc_cleanup(sdkp);
> +			seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp,
> +							zone_shift, nr_zones);
> +			if (IS_ERR(seq_zones_bitmap)) {
> +				ret = PTR_ERR(seq_zones_bitmap);
> +				kfree(seq_zones_wlock);
>  				goto err;
>  			}
>  		}
> -
> +		zone_bitmap_size = BITS_TO_LONGS(nr_zones) *
> +			sizeof(unsigned long);
> +		blk_mq_freeze_queue(q);
> +		if (q->nr_zones != nr_zones) {
> +			/* READ16/WRITE16 is mandatory for ZBC disks */
> +			sdkp->device->use_16_for_rw = 1;
> +			sdkp->device->use_10_for_rw = 0;
> +
> +			sdkp->zone_blocks = zone_blocks;
> +			sdkp->zone_shift = zone_shift;
> +			sdkp->nr_zones = nr_zones;
> +			q->nr_zones = nr_zones;
> +			swap(q->seq_zones_wlock, seq_zones_wlock);
> +			swap(q->seq_zones_bitmap, seq_zones_bitmap);
> +		} else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap,
> +				  zone_bitmap_size) != 0) {
> +			memcpy(q->seq_zones_bitmap, seq_zones_bitmap,
> +			       zone_bitmap_size);
> +		}
> +		blk_mq_unfreeze_queue(q);
> +		kfree(seq_zones_wlock);
> +		kfree(seq_zones_bitmap);
>  	}
>  
>  	return 0;
> @@ -651,6 +670,7 @@ static int sd_zbc_setup(struct scsi_disk *sdkp)
>  
>  int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
>  {
> +	int64_t zone_blocks;
>  	int ret;
>  
>  	if (!sd_is_zoned(sdkp))
> @@ -687,12 +707,16 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
>  	 * Check zone size: only devices with a constant zone size (except
>  	 * an eventual last runt zone) that is a power of 2 are supported.
>  	 */
> -	ret = sd_zbc_check_zone_size(sdkp);
> -	if (ret)
> +	zone_blocks = sd_zbc_check_zone_size(sdkp);
> +	ret = -EFBIG;
> +	if (zone_blocks != (u32)zone_blocks)
> +		goto err;
> +	ret = zone_blocks;
> +	if (ret < 0)
>  		goto err;
>  
>  	/* The drive satisfies the kernel restrictions: set it up */
> -	ret = sd_zbc_setup(sdkp);
> +	ret = sd_zbc_setup(sdkp, zone_blocks);
>  	if (ret)
>  		goto err;
>  
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 9af3e0f430bc..21e21f273a21 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -605,6 +605,11 @@ struct request_queue {
>  	 * initialized by the low level device driver (e.g. scsi/sd.c).
>  	 * Stacking drivers (device mappers) may or may not initialize
>  	 * these fields.
> +	 *
> +	 * Reads of this information must be protected with blk_queue_enter() /
> +	 * blk_queue_exit(). Modifying this information is only allowed while
> +	 * no requests are being processed. See also blk_mq_freeze_queue() and
> +	 * blk_mq_unfreeze_queue().
>  	 */
>  	unsigned int		nr_zones;
>  	unsigned long		*seq_zones_bitmap;
> 

Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>

Note: This will not apply as is for stable kernels 4.10 to 4.15 since these do
not have zone information attached to the request queue.

-- 
Damien Le Moal
Western Digital Research

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2018-04-17 22:42 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20180417010441.28903-1-bart.vanassche@wdc.com>
2018-04-17  1:04 ` [PATCH 3/3] sd_zbc: Avoid that resetting a zone fails sporadically Bart Van Assche
2018-04-17 22:42   ` Damien Le Moal

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.