All of lore.kernel.org
 help / color / mirror / Atom feed
From: Shaun Tancheff <shaun.tancheff@seagate.com>
To: Hannes Reinecke <hare@suse.de>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>,
	James Bottomley <james.bottomley@hansenpartnership.com>,
	linux-scsi@vger.kernel.org, Christoph Hellwig <hch@lst.de>,
	Damien Le Moal <damien.lemoal@hgst.com>
Subject: Re: [PATCH 3/5] sd: Implement support for ZBC devices
Date: Fri, 12 Aug 2016 01:00:20 -0500	[thread overview]
Message-ID: <CAJVOszCecNaGm45avqfhUOT04+yMnFMTZKnXui=2p8ndWGE7VQ@mail.gmail.com> (raw)
In-Reply-To: <1468934710-93876-4-git-send-email-hare@suse.de>

On Tue, Jul 19, 2016 at 8:25 AM, Hannes Reinecke <hare@suse.de> wrote:
> Implement ZBC support functions to read in the zone information
> and setup the zone tree.
>
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>  drivers/scsi/Kconfig  |   8 +
>  drivers/scsi/Makefile |   1 +
>  drivers/scsi/sd.c     | 129 ++++++------
>  drivers/scsi/sd.h     |  54 +++++
>  drivers/scsi/sd_zbc.c | 538 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 670 insertions(+), 60 deletions(-)
>  create mode 100644 drivers/scsi/sd_zbc.c
>
> diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
> index 98e5d51..4b9a882 100644
> --- a/drivers/scsi/Kconfig
> +++ b/drivers/scsi/Kconfig
> @@ -202,6 +202,14 @@ config SCSI_ENCLOSURE
>           it has an enclosure device.  Selecting this option will just allow
>           certain enclosure conditions to be reported and is not required.
>
> +config SCSI_ZBC
> +       bool "SCSI ZBC (zoned block commands) Support"
> +       depends on SCSI && BLK_DEV_ZONED
> +       help
> +         Enable support for ZBC (zoned block commands) devices.
> +
> +         If unsure say N.
> +
>  config SCSI_CONSTANTS
>         bool "Verbose SCSI error reporting (kernel size += 36K)"
>         depends on SCSI
> diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
> index 862ab4e..49bde97 100644
> --- a/drivers/scsi/Makefile
> +++ b/drivers/scsi/Makefile
> @@ -178,6 +178,7 @@ hv_storvsc-y                        := storvsc_drv.o
>
>  sd_mod-objs    := sd.o
>  sd_mod-$(CONFIG_BLK_DEV_INTEGRITY) += sd_dif.o
> +sd_mod-$(CONFIG_SCSI_ZBC) += sd_zbc.o
>
>  sr_mod-objs    := sr.o sr_ioctl.o sr_vendor.o
>  ncr53c8xx-flags-$(CONFIG_SCSI_ZALON) \
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 52dda83..f7b6132 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -92,6 +92,7 @@ MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK15_MAJOR);
>  MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK);
>  MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD);
>  MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC);
> +MODULE_ALIAS_SCSI_DEVICE(TYPE_ZBC);
>
>  #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
>  #define SD_MINORS      16
> @@ -162,7 +163,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
>         static const char temp[] = "temporary ";
>         int len;
>
> -       if (sdp->type != TYPE_DISK)
> +       if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC)
>                 /* no cache control on RBC devices; theoretically they
>                  * can do it, but there's probably so many exceptions
>                  * it's not worth the risk */
> @@ -261,7 +262,7 @@ allow_restart_store(struct device *dev, struct device_attribute *attr,
>         if (!capable(CAP_SYS_ADMIN))
>                 return -EACCES;
>
> -       if (sdp->type != TYPE_DISK)
> +       if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC)
>                 return -EINVAL;
>
>         sdp->allow_restart = simple_strtoul(buf, NULL, 10);
> @@ -392,7 +393,7 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr,
>         if (!capable(CAP_SYS_ADMIN))
>                 return -EACCES;
>
> -       if (sdkp->zoned == 1) {
> +       if (sdkp->zoned == 1 || sdp->type == TYPE_ZBC) {
>                 if (!strncmp(buf, lbp_mode[SD_ZBC_RESET_WP], 20)) {
>                         sd_config_discard(sdkp, SD_ZBC_RESET_WP);
>                         return count;
> @@ -466,7 +467,7 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr,
>         if (!capable(CAP_SYS_ADMIN))
>                 return -EACCES;
>
> -       if (sdp->type != TYPE_DISK)
> +       if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC)
>                 return -EINVAL;
>
>         err = kstrtoul(buf, 10, &max);
> @@ -778,6 +779,11 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
>                 break;
>
>         case SD_ZBC_RESET_WP:
> +               /* sd_zbc_setup_discard uses block layer sector units */
> +               ret = sd_zbc_setup_discard(sdkp, rq, blk_rq_pos(rq),
> +                                          blk_rq_sectors(rq));
> +               if (ret != BLKPREP_OK)
> +                       goto out;
>                 cmd->cmd_len = 16;
>                 cmd->cmnd[0] = ZBC_OUT;
>                 cmd->cmnd[1] = ZO_RESET_WRITE_POINTER;
> @@ -873,6 +879,13 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
>
>         BUG_ON(bio_offset(bio) || bio_iovec(bio).bv_len != sdp->sector_size);
>
> +       if (sdkp->zoned == 1 || sdp->type == TYPE_ZBC) {
> +               /* sd_zbc_setup_read_write uses block layer sector units */
> +               ret = sd_zbc_setup_read_write(sdkp, rq, sector, nr_sectors);
> +               if (ret != BLKPREP_OK)
> +                       return ret;
> +       }
> +
>         sector >>= ilog2(sdp->sector_size) - 9;
>         nr_sectors >>= ilog2(sdp->sector_size) - 9;
>
> @@ -992,6 +1005,13 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
>         SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "block=%llu\n",
>                                         (unsigned long long)block));
>
> +       if (sdkp->zoned == 1 || sdp->type == TYPE_ZBC) {
> +               /* sd_zbc_setup_read_write uses block layer sector units */
> +               ret = sd_zbc_setup_read_write(sdkp, rq, block, this_count);
> +               if (ret != BLKPREP_OK)
> +                       goto out;
> +       }
> +
>         /*
>          * If we have a 1K hardware sectorsize, prevent access to single
>          * 512 byte sectors.  In theory we could handle this - in fact
> @@ -1806,6 +1826,13 @@ static int sd_done(struct scsi_cmnd *SCpnt)
>                         good_bytes = blk_rq_bytes(req);
>                         scsi_set_resid(SCpnt, 0);
>                 } else {
> +#ifdef CONFIG_SCSI_ZBC
> +                       if (op == ZBC_OUT)
> +                               /* RESET WRITE POINTER failed */
> +                               sd_zbc_update_zones(sdkp,
> +                                                   blk_rq_pos(req),
> +                                                   512, true);
> +#endif
>                         good_bytes = 0;
>                         scsi_set_resid(SCpnt, blk_rq_bytes(req));
>                 }
> @@ -1869,6 +1896,26 @@ static int sd_done(struct scsi_cmnd *SCpnt)
>                                 }
>                         }
>                 }
> +               if (sshdr.asc == 0x21) {
> +                       /*
> +                        * ZBC: read beyond the write pointer position.
> +                        * Clear out error and return the buffer as-is.
> +                        */
> +                       if (sshdr.ascq == 0x06) {
> +                               good_bytes = blk_rq_bytes(req);
> +                               scsi_set_resid(SCpnt, 0);
> +                       }
> +#ifdef CONFIG_SCSI_ZBC
> +                       /*
> +                        * ZBC: Unaligned write command.
> +                        * Write did not start a write pointer position.
> +                        */
> +                       if (sshdr.ascq == 0x04)
> +                               sd_zbc_update_zones(sdkp,
> +                                                   blk_rq_pos(req),
> +                                                   512, true);
> +#endif
> +               }
>                 break;
>         default:
>                 break;
> @@ -2008,58 +2055,6 @@ sd_spinup_disk(struct scsi_disk *sdkp)
>         }
>  }
>
> -/**
> - * sd_zbc_report_zones - Issue a REPORT ZONES scsi command
> - * @sdkp: SCSI disk to which the command should be send
> - * @buffer: response buffer
> - * @bufflen: length of @buffer
> - * @start_sector: logical sector for the zone information should be reported
> - * @option: option for report zones command
> - * @partial: flag to set 'partial' bit for report zones command
> - */
> -static int
> -sd_zbc_report_zones(struct scsi_disk *sdkp, unsigned char *buffer,
> -                   int bufflen, sector_t start_sector,
> -                   enum zbc_zone_reporting_options option, bool partial)
> -{
> -       struct scsi_device *sdp = sdkp->device;
> -       const int timeout = sdp->request_queue->rq_timeout
> -               * SD_FLUSH_TIMEOUT_MULTIPLIER;
> -       struct scsi_sense_hdr sshdr;
> -       sector_t start_lba = sectors_to_logical(sdkp->device, start_sector);
> -       unsigned char cmd[16];
> -       int result;
> -
> -       if (!scsi_device_online(sdp)) {
> -               sd_printk(KERN_INFO, sdkp, "device not online\n");
> -               return -ENODEV;
> -       }
> -
> -       sd_printk(KERN_INFO, sdkp, "REPORT ZONES lba %zu len %d\n",
> -                 start_lba, bufflen);
> -
> -       memset(cmd, 0, 16);
> -       cmd[0] = ZBC_IN;
> -       cmd[1] = ZI_REPORT_ZONES;
> -       put_unaligned_be64(start_lba, &cmd[2]);
> -       put_unaligned_be32(bufflen, &cmd[10]);
> -       cmd[14] = (partial ? ZBC_REPORT_ZONE_PARTIAL : 0) | option;
> -       memset(buffer, 0, bufflen);
> -
> -       result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
> -                                 buffer, bufflen, &sshdr,
> -                                 timeout, SD_MAX_RETRIES, NULL);
> -
> -       if (result) {
> -               sd_printk(KERN_NOTICE, sdkp,
> -                         "REPORT ZONES lba %zu failed with %d/%d\n",
> -                         start_lba, host_byte(result), driver_byte(result));
> -
> -               return -EIO;
> -       }
> -       return 0;
> -}
> -
>  /*
>   * Determine whether disk supports Data Integrity Field.
>   */
> @@ -2109,8 +2104,11 @@ static void sd_read_zones(struct scsi_disk *sdkp, unsigned char *buffer)
>         u8 same;
>         u64 zone_len, lba;
>
> -       if (sdkp->zoned != 1)
> -               /* Device managed, no special handling required */
> +       if (sdkp->zoned != 1 && sdkp->device->type != TYPE_ZBC)
> +               /*
> +                * Device managed or normal SCSI disk,
> +                * no special handling required
> +                */
>                 return;
>
>         retval = sd_zbc_report_zones(sdkp, buffer, SD_BUF_SIZE,
> @@ -2155,6 +2153,8 @@ static void sd_read_zones(struct scsi_disk *sdkp, unsigned char *buffer)
>         blk_queue_chunk_sectors(sdkp->disk->queue,
>                                 logical_to_sectors(sdkp->device, zone_len));
>         sd_config_discard(sdkp, SD_ZBC_RESET_WP);
> +
> +       sd_zbc_setup(sdkp, buffer, SD_BUF_SIZE);
>  }
>
>  static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp,
> @@ -2750,7 +2750,7 @@ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer)
>         struct scsi_mode_data data;
>         struct scsi_sense_hdr sshdr;
>
> -       if (sdp->type != TYPE_DISK)
> +       if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC)
>                 return;
>
>         if (sdkp->protection_type == 0)
> @@ -3180,9 +3180,16 @@ static int sd_probe(struct device *dev)
>
>         scsi_autopm_get_device(sdp);
>         error = -ENODEV;
> -       if (sdp->type != TYPE_DISK && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC)
> +       if (sdp->type != TYPE_DISK &&
> +           sdp->type != TYPE_ZBC &&
> +           sdp->type != TYPE_MOD &&
> +           sdp->type != TYPE_RBC)
>                 goto out;
>
> +#ifndef CONFIG_SCSI_ZBC
> +       if (sdp->type == TYPE_ZBC)
> +               goto out;
> +#endif
>         SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp,
>                                         "sd_probe\n"));
>
> @@ -3286,6 +3293,8 @@ static int sd_remove(struct device *dev)
>         del_gendisk(sdkp->disk);
>         sd_shutdown(dev);
>
> +       sd_zbc_remove(sdkp);
> +
>         blk_register_region(devt, SD_MINORS, NULL,
>                             sd_default_probe, NULL, NULL);
>
> diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
> index 4439693..5827b62 100644
> --- a/drivers/scsi/sd.h
> +++ b/drivers/scsi/sd.h
> @@ -65,6 +65,12 @@ struct scsi_disk {
>         struct scsi_device *device;
>         struct device   dev;
>         struct gendisk  *disk;
> +#ifdef CONFIG_SCSI_ZBC
> +       struct workqueue_struct *zone_work_q;
> +       unsigned long   zone_flags;
> +#define SD_ZBC_ZONE_RESET 1
> +#define SD_ZBC_ZONE_INIT  2
> +#endif
>         atomic_t        openers;
>         sector_t        capacity;       /* size in logical blocks */
>         u32             max_xfer_blocks;
> @@ -277,4 +283,52 @@ static inline void sd_dif_complete(struct scsi_cmnd *cmd, unsigned int a)
>
>  #endif /* CONFIG_BLK_DEV_INTEGRITY */
>
> +#ifdef CONFIG_SCSI_ZBC
> +
> +extern int sd_zbc_report_zones(struct scsi_disk *, unsigned char *, int,
> +                              sector_t, enum zbc_zone_reporting_options, bool);
> +extern int sd_zbc_setup(struct scsi_disk *, char *, int);
> +extern void sd_zbc_remove(struct scsi_disk *);
> +extern void sd_zbc_reset_zones(struct scsi_disk *);
> +extern int sd_zbc_setup_discard(struct scsi_disk *, struct request *,
> +                               sector_t, unsigned int);
> +extern int sd_zbc_setup_read_write(struct scsi_disk *, struct request *,
> +                                  sector_t, unsigned int);
> +extern void sd_zbc_update_zones(struct scsi_disk *, sector_t, int, bool);
> +extern void sd_zbc_refresh_zone_work(struct work_struct *);
> +
> +#else /* CONFIG_SCSI_ZBC */
> +
> +static inline int sd_zbc_report_zones(struct scsi_disk *sdkp,
> +                                     unsigned char *buf, int buf_len,
> +                                     sector_t start_sector,
> +                                     enum zbc_zone_reporting_options option,
> +                                     bool partial)
> +{
> +       return -EOPNOTSUPP;
> +}
> +
> +static inline int sd_zbc_setup(struct scsi_disk *sdkp,
> +                              unsigned char *buf, int buf_len)
> +{
> +       return 0;
> +}
> +
> +static inline int sd_zbc_setup_discard(struct scsi_disk *sdkp,
> +                                      struct request *rq, sector_t sector,
> +                                      unsigned int num_sectors)
> +{
> +       return BLKPREP_OK;
> +}
> +
> +static inline int sd_zbc_setup_read_write(struct scsi_disk *sdkp,
> +                                         struct request *rq, sector_t sector,
> +                                         unsigned int num_sectors)
> +{
> +       return BLKPREP_OK;
> +}
> +
> +static inline void sd_zbc_remove(struct scsi_disk *sdkp) {}
> +#endif /* CONFIG_SCSI_ZBC */
> +
>  #endif /* _SCSI_DISK_H */
> diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
> new file mode 100644
> index 0000000..75cef62
> --- /dev/null
> +++ b/drivers/scsi/sd_zbc.c
> @@ -0,0 +1,538 @@
> +/*
> + * sd_zbc.c - SCSI Zoned Block commands
> + *
> + * Copyright (C) 2014-2015 SUSE Linux GmbH
> + * Written by: Hannes Reinecke <hare@suse.de>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; see the file COPYING.  If not, write to
> + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
> + * USA.
> + *
> + */
> +
> +#include <linux/blkdev.h>
> +#include <linux/rbtree.h>
> +
> +#include <asm/unaligned.h>
> +
> +#include <scsi/scsi.h>
> +#include <scsi/scsi_cmnd.h>
> +#include <scsi/scsi_dbg.h>
> +#include <scsi/scsi_device.h>
> +#include <scsi/scsi_driver.h>
> +#include <scsi/scsi_host.h>
> +#include <scsi/scsi_eh.h>
> +
> +#include "sd.h"
> +#include "scsi_priv.h"
> +
> +enum zbc_zone_cond {
> +       ZBC_ZONE_COND_NO_WP,
> +       ZBC_ZONE_COND_EMPTY,
> +       ZBC_ZONE_COND_IMPLICIT_OPEN,
> +       ZBC_ZONE_COND_EXPLICIT_OPEN,
> +       ZBC_ZONE_COND_CLOSED,
> +       ZBC_ZONE_COND_READONLY = 0xd,
> +       ZBC_ZONE_COND_FULL,
> +       ZBC_ZONE_COND_OFFLINE,
> +};
> +
> +#define SD_ZBC_BUF_SIZE 524288
> +
> +#define sd_zbc_debug(sdkp, fmt, args...)                               \
> +       pr_debug("%s %s [%s]: " fmt,                                    \
> +                dev_driver_string(&(sdkp)->device->sdev_gendev),       \
> +                dev_name(&(sdkp)->device->sdev_gendev),                \
> +                (sdkp)->disk->disk_name, ## args)
> +
> +#define sd_zbc_debug_ratelimit(sdkp, fmt, args...)             \
> +       do {                                                    \
> +               if (printk_ratelimit())                         \
> +                       sd_zbc_debug(sdkp, fmt, ## args);       \
> +       } while( 0 )
> +
> +struct zbc_update_work {
> +       struct work_struct zone_work;
> +       struct scsi_disk *sdkp;
> +       sector_t        zone_sector;
> +       int             zone_buflen;
> +       char            zone_buf[0];
> +};
> +
> +struct blk_zone *zbc_desc_to_zone(struct scsi_disk *sdkp, unsigned char *rec)
> +{
> +       struct blk_zone *zone;
> +       enum zbc_zone_cond zone_cond;
> +       sector_t wp = (sector_t)-1;
> +
> +       zone = kzalloc(sizeof(struct blk_zone), GFP_KERNEL);
> +       if (!zone)
> +               return NULL;
> +
> +       spin_lock_init(&zone->lock);
> +       zone->type = rec[0] & 0xf;
> +       zone_cond = (rec[1] >> 4) & 0xf;
> +       zone->len = logical_to_sectors(sdkp->device,
> +                                      get_unaligned_be64(&rec[8]));
> +       zone->start = logical_to_sectors(sdkp->device,
> +                                        get_unaligned_be64(&rec[16]));
> +
> +       if (blk_zone_is_smr(zone)) {
> +               wp = logical_to_sectors(sdkp->device,
> +                                       get_unaligned_be64(&rec[24]));
> +               if (zone_cond == ZBC_ZONE_COND_READONLY) {
> +                       zone->state = BLK_ZONE_READONLY;
> +               } else if (zone_cond == ZBC_ZONE_COND_OFFLINE) {
> +                       zone->state = BLK_ZONE_OFFLINE;
> +               } else {
> +                       zone->state = BLK_ZONE_OPEN;
> +               }
> +       } else
> +               zone->state = BLK_ZONE_NO_WP;
> +
> +       zone->wp = wp;
> +       /*
> +        * Fixup block zone state
> +        */
> +       if (zone_cond == ZBC_ZONE_COND_EMPTY &&
> +           zone->wp != zone->start) {
> +               sd_zbc_debug(sdkp,
> +                            "zone %zu state EMPTY wp %zu: adjust wp\n",
> +                            zone->start, zone->wp);
> +               zone->wp = zone->start;
> +       }
> +       if (zone_cond == ZBC_ZONE_COND_FULL &&
> +           zone->wp != zone->start + zone->len) {
> +               sd_zbc_debug(sdkp,
> +                            "zone %zu state FULL wp %zu: adjust wp\n",
> +                            zone->start, zone->wp);
> +               zone->wp = zone->start + zone->len;
> +       }
> +
> +       return zone;
> +}
> +
> +sector_t zbc_parse_zones(struct scsi_disk *sdkp, unsigned char *buf,
> +                        unsigned int buf_len)
> +{
> +       struct request_queue *q = sdkp->disk->queue;
> +       unsigned char *rec = buf;
> +       int rec_no = 0;
> +       unsigned int list_length;
> +       sector_t next_sector = -1;
> +       u8 same;
> +
> +       /* Parse REPORT ZONES header */
> +       list_length = get_unaligned_be32(&buf[0]);
> +       same = buf[4] & 0xf;
> +       rec = buf + 64;
> +       list_length += 64;
> +
> +       if (list_length < buf_len)
> +               buf_len = list_length;
> +
> +       while (rec < buf + buf_len) {
> +               struct blk_zone *this, *old;
> +               unsigned long flags;
> +
> +               this = zbc_desc_to_zone(sdkp, rec);
> +               if (!this)
> +                       break;
> +
> +               next_sector = this->start + this->len;
> +               old = blk_insert_zone(q, this);
> +               if (old) {
> +                       spin_lock_irqsave(&old->lock, flags);
> +                       if (blk_zone_is_smr(old)) {
> +                               old->wp = this->wp;
> +                               old->state = this->state;
> +                       }
> +                       spin_unlock_irqrestore(&old->lock, flags);
> +                       kfree(this);
> +               }
> +               rec += 64;
> +               rec_no++;
> +       }
> +
> +       sd_zbc_debug(sdkp,
> +                    "Inserted %d zones, next sector %zu len %d\n",
> +                    rec_no, next_sector, list_length);
> +
> +       return next_sector;
> +}
> +
> +void sd_zbc_refresh_zone_work(struct work_struct *work)
> +{
> +       struct zbc_update_work *zbc_work =
> +               container_of(work, struct zbc_update_work, zone_work);
> +       struct scsi_disk *sdkp = zbc_work->sdkp;
> +       struct request_queue *q = sdkp->disk->queue;
> +       unsigned int zone_buflen;
> +       int ret;
> +       sector_t last_sector;
> +       sector_t capacity = logical_to_sectors(sdkp->device, sdkp->capacity);
> +
> +       zone_buflen = zbc_work->zone_buflen;
> +       ret = sd_zbc_report_zones(sdkp, zbc_work->zone_buf, zone_buflen,
> +                                 zbc_work->zone_sector,
> +                                 ZBC_ZONE_REPORTING_OPTION_ALL, true);
> +       if (ret)
> +               goto done_free;
> +
> +       last_sector = zbc_parse_zones(sdkp, zbc_work->zone_buf, zone_buflen);
> +       if (last_sector != -1 && last_sector < capacity) {
> +               if (test_bit(SD_ZBC_ZONE_RESET, &sdkp->zone_flags)) {
> +                       sd_zbc_debug(sdkp,
> +                                    "zones in reset, cancelling refresh\n");
> +                       ret = -EAGAIN;
> +                       goto done_free;
> +               }
> +
> +               zbc_work->zone_sector = last_sector;
> +               queue_work(sdkp->zone_work_q, &zbc_work->zone_work);
> +               /* Kick request queue to be on the safe side */
> +               goto done_start_queue;
> +       }
> +done_free:
> +       kfree(zbc_work);
> +       if (test_and_clear_bit(SD_ZBC_ZONE_INIT, &sdkp->zone_flags) && ret) {
> +               sd_zbc_debug(sdkp,
> +                            "Cancelling zone initialisation\n");
> +       }
> +done_start_queue:
> +       if (q->mq_ops)
> +               blk_mq_start_hw_queues(q);
> +       else {
> +               unsigned long flags;
> +
> +               spin_lock_irqsave(q->queue_lock, flags);
> +               blk_start_queue(q);
> +               spin_unlock_irqrestore(q->queue_lock, flags);
> +       }
> +}
> +
> +/**
> + * sd_zbc_update_zones - Update zone information for @sector
> + * @sdkp: SCSI disk for which the zone information needs to be updated
> + * @sector: sector to be updated
> + * @bufsize: buffersize to be allocated
> + * @update: true if existing zones should be updated
> + */
> +void sd_zbc_update_zones(struct scsi_disk *sdkp, sector_t sector, int bufsize,
> +                        bool update)
> +{
> +       struct request_queue *q = sdkp->disk->queue;
> +       struct zbc_update_work *zbc_work;
> +       struct blk_zone *zone;
> +       struct rb_node *node;
> +       int zone_num = 0, zone_busy = 0, num_rec;
> +       sector_t next_sector = sector;
> +
> +       if (test_bit(SD_ZBC_ZONE_RESET, &sdkp->zone_flags)) {
> +               sd_zbc_debug(sdkp,
> +                            "zones in reset, not starting update\n");
> +               return;
> +       }
> +
> +retry:
> +       zbc_work = kzalloc(sizeof(struct zbc_update_work) + bufsize,
> +                          update ? GFP_NOWAIT : GFP_KERNEL);
> +       if (!zbc_work) {
> +               if (bufsize > 512) {
> +                       sd_zbc_debug(sdkp,
> +                                    "retry with buffer size %d\n", bufsize);
> +                       bufsize = bufsize >> 1;
> +                       goto retry;
> +               }
> +               sd_zbc_debug(sdkp,
> +                            "failed to allocate %d bytes\n", bufsize);
> +               if (!update)
> +                       clear_bit(SD_ZBC_ZONE_INIT, &sdkp->zone_flags);
> +               return;
> +       }
> +       zbc_work->zone_sector = sector;
> +       zbc_work->zone_buflen = bufsize;
> +       zbc_work->sdkp = sdkp;
> +       INIT_WORK(&zbc_work->zone_work, sd_zbc_refresh_zone_work);
> +       num_rec = (bufsize / 64) - 1;
> +
> +       /*
> +        * Mark zones under update as BUSY
> +        */
> +       if (update) {
> +               for (node = rb_first(&q->zones); node; node = rb_next(node)) {
> +                       unsigned long flags;
> +
> +                       zone = rb_entry(node, struct blk_zone, node);
> +                       if (num_rec == 0)
> +                               break;
> +                       if (zone->start != next_sector)
> +                               continue;
> +                       next_sector += zone->len;
> +                       num_rec--;
> +
> +                       spin_lock_irqsave(&zone->lock, flags);
> +                       if (blk_zone_is_smr(zone)) {
> +                               if (zone->state == BLK_ZONE_BUSY) {
> +                                       zone_busy++;
> +                               } else {
> +                                       zone->state = BLK_ZONE_BUSY;
> +                                       zone->wp = zone->start;
> +                               }
> +                               zone_num++;
> +                       }
> +                       spin_unlock_irqrestore(&zone->lock, flags);
> +               }
> +               if (zone_num && (zone_num == zone_busy)) {
> +                       sd_zbc_debug(sdkp,
> +                                    "zone update for %zu in progress\n",
> +                                    sector);
> +                       kfree(zbc_work);
> +                       return;
> +               }
> +       }
> +
> +       if (!queue_work(sdkp->zone_work_q, &zbc_work->zone_work)) {
> +               sd_zbc_debug(sdkp,
> +                            "zone update already queued?\n");
> +               kfree(zbc_work);
> +       }
> +}
> +
> +/**
> + * sd_zbc_report_zones - Issue a REPORT ZONES scsi command
> + * @sdkp: SCSI disk to which the command should be send
> + * @buffer: response buffer
> + * @bufflen: length of @buffer
> + * @start_sector: logical sector for the zone information should be reported
> + * @option: reporting option to be used
> + * @partial: flag to set the 'partial' bit for report zones command
> + */
> +int sd_zbc_report_zones(struct scsi_disk *sdkp, unsigned char *buffer,
> +                       int bufflen, sector_t start_sector,
> +                       enum zbc_zone_reporting_options option, bool partial)
> +{
> +       struct scsi_device *sdp = sdkp->device;
> +       const int timeout = sdp->request_queue->rq_timeout
> +                       * SD_FLUSH_TIMEOUT_MULTIPLIER;
> +       struct scsi_sense_hdr sshdr;
> +       sector_t start_lba = sectors_to_logical(sdkp->device, start_sector);
> +       unsigned char cmd[16];
> +       int result;
> +
> +       if (!scsi_device_online(sdp))
> +               return -ENODEV;
> +
> +       sd_zbc_debug(sdkp, "REPORT ZONES lba %zu len %d\n",
> +                    start_lba, bufflen);
> +
> +       memset(cmd, 0, 16);
> +       cmd[0] = ZBC_IN;
> +       cmd[1] = ZI_REPORT_ZONES;
> +       put_unaligned_be64(start_lba, &cmd[2]);
> +       put_unaligned_be32(bufflen, &cmd[10]);
> +       cmd[14] = (partial ? ZBC_REPORT_ZONE_PARTIAL : 0) | option;
> +       memset(buffer, 0, bufflen);
> +
> +       result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
> +                               buffer, bufflen, &sshdr,
> +                               timeout, SD_MAX_RETRIES, NULL);
> +
> +       if (result) {
> +               sd_zbc_debug(sdkp,
> +                            "REPORT ZONES lba %zu failed with %d/%d\n",
> +                            start_lba, host_byte(result), driver_byte(result));
> +               return -EIO;
> +       }
> +
> +       return 0;
> +}
> +
> +int sd_zbc_setup_discard(struct scsi_disk *sdkp, struct request *rq,
> +                        sector_t sector, unsigned int num_sectors)
> +{
> +       struct blk_zone *zone;
> +       int ret = BLKPREP_OK;
> +       unsigned long flags;
> +
> +       zone = blk_lookup_zone(rq->q, sector);
> +       if (!zone)
> +               return BLKPREP_KILL;
> +
> +       spin_lock_irqsave(&zone->lock, flags);
> +
> +       if (zone->state == BLK_ZONE_UNKNOWN ||
> +           zone->state == BLK_ZONE_BUSY) {
> +               sd_zbc_debug_ratelimit(sdkp,
> +                                      "Discarding zone %zu state %x, deferring\n",
> +                                      zone->start, zone->state);
> +               ret = BLKPREP_DEFER;
> +               goto out;
> +       }
> +       if (zone->state == BLK_ZONE_OFFLINE) {
> +               /* let the drive fail the command */
> +               sd_zbc_debug_ratelimit(sdkp,
> +                                      "Discarding offline zone %zu\n",
> +                                      zone->start);
> +               goto out;
> +       }
> +
> +       if (!blk_zone_is_smr(zone)) {
> +               sd_zbc_debug_ratelimit(sdkp,
> +                                      "Discarding %s zone %zu\n",
> +                                      blk_zone_is_cmr(zone) ? "CMR" : "unknown",
> +                                      zone->start);
> +               ret = BLKPREP_DONE;
> +               goto out;
> +       }
> +       if (blk_zone_is_empty(zone)) {
> +               sd_zbc_debug_ratelimit(sdkp,
> +                                      "Discarding empty zone %zu\n",
> +                                      zone->start);
> +               ret = BLKPREP_DONE;
> +               goto out;
> +       }
> +
> +       if (zone->start != sector ||
> +           zone->len < num_sectors) {
> +               sd_printk(KERN_ERR, sdkp,
> +                         "Misaligned RESET WP, start %zu/%zu "
> +                         "len %zu/%u\n",
> +                         zone->start, sector, zone->len, num_sectors);
> +               ret = BLKPREP_KILL;
> +               goto out;
> +       }

It appears that you allow RESET WP to succeed here when
num_sectors is less than the number of blocks in use as
indicated by the zone->wp.

> +       /*
> +        * Opportunistic setting, will be fixed up with
> +        * zone update if RESET WRITE POINTER fails.
> +        */
> +       zone->wp = zone->start;
> +
> +out:
> +       spin_unlock_irqrestore(&zone->lock, flags);
> +
> +       return ret;
> +}
> +
> +int sd_zbc_setup_read_write(struct scsi_disk *sdkp, struct request *rq,
> +                           sector_t sector, unsigned int num_sectors)
> +{
> +       struct blk_zone *zone;
> +       int ret = BLKPREP_OK;
> +       unsigned long flags;
> +
> +       zone = blk_lookup_zone(sdkp->disk->queue, sector);
> +       if (!zone) {
> +               /* Might happen during zone initialization */
> +               sd_zbc_debug_ratelimit(sdkp,
> +                                      "zone for sector %zu not found, skipping\n",
> +                                      sector);
> +               return BLKPREP_OK;
> +       }
> +
> +       spin_lock_irqsave(&zone->lock, flags);
> +
> +       if (zone->state == BLK_ZONE_UNKNOWN ||
> +           zone->state == BLK_ZONE_BUSY) {
> +               sd_zbc_debug_ratelimit(sdkp,
> +                                      "zone %zu state %x, deferring\n",
> +                                      zone->start, zone->state);
> +               ret = BLKPREP_DEFER;
> +               goto out;
> +       }
> +       if (zone->state == BLK_ZONE_OFFLINE) {
> +               /* let the drive fail the command */
> +               sd_zbc_debug_ratelimit(sdkp,
> +                                      "zone %zu offline\n",
> +                                      zone->start);
> +               goto out;
> +       }
> +
> +       if (rq->cmd_flags & (REQ_WRITE | REQ_WRITE_SAME)) {
> +               if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
> +                       goto out;
> +               if (zone->state == BLK_ZONE_READONLY)
> +                       goto out;
> +               if (blk_zone_is_full(zone)) {
> +                       sd_zbc_debug(sdkp,
> +                                    "Write to full zone %zu/%zu\n",
> +                                    sector, zone->wp);
> +                       ret = BLKPREP_KILL;
> +                       goto out;
> +               }
> +               if (zone->wp != sector) {
> +                       sd_zbc_debug(sdkp,
> +                                    "Misaligned write %zu/%zu\n",
> +                                    sector, zone->wp);
> +                       ret = BLKPREP_KILL;
> +                       goto out;
> +               }
> +               zone->wp += num_sectors;
> +       } else if (blk_zone_is_smr(zone) && (zone->wp <= sector)) {
> +               sd_zbc_debug(sdkp,
> +                            "Read beyond wp %zu/%zu\n",
> +                            sector, zone->wp);
> +               ret = BLKPREP_DONE;
> +       }
> +
> +out:
> +       spin_unlock_irqrestore(&zone->lock, flags);
> +
> +       return ret;
> +}
> +
> +int sd_zbc_setup(struct scsi_disk *sdkp, char *buf, int buf_len)
> +{
> +       sector_t capacity = logical_to_sectors(sdkp->device, sdkp->capacity);
> +       sector_t last_sector;
> +
> +       if (test_and_set_bit(SD_ZBC_ZONE_INIT, &sdkp->zone_flags)) {
> +               sdev_printk(KERN_WARNING, sdkp->device,
> +                           "zone initialisation already running\n");
> +               return 0;
> +       }
> +
> +       if (!sdkp->zone_work_q) {
> +               char wq_name[32];
> +
> +               sprintf(wq_name, "zbc_wq_%s", sdkp->disk->disk_name);
> +               sdkp->zone_work_q = create_singlethread_workqueue(wq_name);
> +               if (!sdkp->zone_work_q) {
> +                       sdev_printk(KERN_WARNING, sdkp->device,
> +                                   "create zoned disk workqueue failed\n");
> +                       return -ENOMEM;
> +               }
> +       } else if (!test_and_set_bit(SD_ZBC_ZONE_RESET, &sdkp->zone_flags)) {
> +               drain_workqueue(sdkp->zone_work_q);
> +               clear_bit(SD_ZBC_ZONE_RESET, &sdkp->zone_flags);
> +       }
> +
> +       last_sector = zbc_parse_zones(sdkp, buf, buf_len);
> +       if (last_sector != -1 && last_sector < capacity) {
> +               sd_zbc_update_zones(sdkp, last_sector, SD_ZBC_BUF_SIZE, false);
> +       } else
> +               clear_bit(SD_ZBC_ZONE_INIT, &sdkp->zone_flags);
> +
> +       return 0;
> +}
> +
> +void sd_zbc_remove(struct scsi_disk *sdkp)
> +{
> +       if (sdkp->zone_work_q) {
> +               if (!test_and_set_bit(SD_ZBC_ZONE_RESET, &sdkp->zone_flags))
> +                       drain_workqueue(sdkp->zone_work_q);
> +               clear_bit(SD_ZBC_ZONE_INIT, &sdkp->zone_flags);
> +               destroy_workqueue(sdkp->zone_work_q);
> +       }
> +}
> --
> 1.8.5.6
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  https://urldefense.proofpoint.com/v2/url?u=http-3A__vger.kernel.org_majordomo-2Dinfo.html&d=CwIBAg&c=IGDlg0lD0b-nebmJJ0Kp8A&r=Wg5NqlNlVTT7Ugl8V50qIHLe856QW0qfG3WVYGOrWzA&m=TECAPpeng5OMyCHPt1hU8vo6KAxzybSw2on8YvGxkFA&s=FuZ8S92fAROISBQ96aUzY73nDV4L0J8ME36u9FCTWK8&e=



-- 
Shaun Tancheff

  parent reply	other threads:[~2016-08-12  6:01 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-19 13:25 [PATCH 0/5] Add support for ZBC host-managed devices Hannes Reinecke
2016-07-19 13:25 ` [PATCH 1/5] sd: configure ZBC devices Hannes Reinecke
2016-07-20  0:46   ` Damien Le Moal
2016-07-22 21:56   ` Ewan D. Milne
2016-07-23 20:31     ` Hannes Reinecke
2016-07-23 22:04       ` Bart Van Assche
2016-07-24  7:07         ` Hannes Reinecke
2016-07-25  6:00         ` Hannes Reinecke
2016-07-25 13:24           ` Ewan D. Milne
2016-08-01 14:24   ` Shaun Tancheff
2016-08-01 14:29     ` Hannes Reinecke
2016-07-19 13:25 ` [PATCH 2/5] sd: Implement new RESET_WP provisioning mode Hannes Reinecke
2016-07-20  0:49   ` Damien Le Moal
2016-07-20 14:52     ` Hannes Reinecke
2016-07-19 13:25 ` [PATCH 3/5] sd: Implement support for ZBC devices Hannes Reinecke
2016-07-20  0:54   ` Damien Le Moal
2016-08-12  6:00   ` Shaun Tancheff [this message]
2016-07-19 13:25 ` [PATCH 4/5] sd: Limit messages for ZBC disks capacity change Hannes Reinecke
2016-07-19 13:25 ` [PATCH 5/5] sd_zbc: Fix handling of ZBC read after write pointer Hannes Reinecke

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAJVOszCecNaGm45avqfhUOT04+yMnFMTZKnXui=2p8ndWGE7VQ@mail.gmail.com' \
    --to=shaun.tancheff@seagate.com \
    --cc=damien.lemoal@hgst.com \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=james.bottomley@hansenpartnership.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.