* Re: [PATCH V13 2/4] nvmet: add ZBD over ZNS backend support
2021-04-08 0:14 ` [PATCH V13 2/4] nvmet: add ZBD over ZNS backend support Chaitanya Kulkarni
@ 2021-04-08 7:23 ` Christoph Hellwig
2021-04-08 21:14 ` Chaitanya Kulkarni
2021-04-09 7:09 ` Chaitanya Kulkarni
2021-04-08 8:01 ` Damien Le Moal
2021-04-08 8:15 ` Damien Le Moal
2 siblings, 2 replies; 17+ messages in thread
From: Christoph Hellwig @ 2021-04-08 7:23 UTC (permalink / raw)
To: Chaitanya Kulkarni; +Cc: linux-nvme, hch, damien.lemoal
> +static void nvmet_set_csi_zns_effects(struct nvme_effects_log *log)
Same naming nitpick as for the nvm version.
> switch (req->ns->csi) {
> case NVME_CSI_NVM:
> + return nvmet_copy_ns_identifier(req, NVME_NIDT_CSI,
> + NVME_NIDT_CSI_LEN,
> + &req->ns->csi, o);
> + case NVME_CSI_ZNS:
> + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
> + return NVME_SC_INVALID_IO_CMD_SET;
> +
This goes away with my comment on the previous patch.
> @@ -448,6 +456,15 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
> case nvme_cmd_write_zeroes:
> req->execute = nvmet_bdev_execute_write_zeroes;
> return 0;
> + case nvme_cmd_zone_append:
> + req->execute = nvmet_bdev_execute_zone_append;
> + return 0;
> + case nvme_cmd_zone_mgmt_recv:
> + req->execute = nvmet_bdev_execute_zone_mgmt_recv;
> + return 0;
> + case nvme_cmd_zone_mgmt_send:
> + req->execute = nvmet_bdev_execute_zone_mgmt_send;
I think we need a separate _parse for just ZNS. That way we can
do the ns.csi and IS_ENABLED check in one single place, and we
also don't need stubs for any of these functions as they are all
under the IS_ENABLED check and thus the compiler will never generate
a call to them for !CONFIG_BLK_DEV_ZONED.
> +static u16 nvmet_bdev_validate_zone_mgmt_recv(struct nvmet_req *req)
> +{
> + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
> + u32 out_bufsize = (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2;
> +
> + if (!bdev_is_zoned(req->ns->bdev))
> + return NVME_SC_INVALID_NS | NVME_SC_DNR;
I think checking the csi іn the nvmet_ns structure here is a lot
cleaner. And as mentioned abive I think we should do this once for
all zns-specific commands.
> +void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req)
> +{
> + u8 zasl = req->sq->ctrl->subsys->zasl;
> + struct nvmet_ctrl *ctrl = req->sq->ctrl;
> + struct nvme_id_ctrl_zns *id;
> + u16 status;
> +
> + if (req->cmd->identify.csi != NVME_CSI_ZNS) {
> + req->error_loc = offsetof(struct nvme_common_command, opcode);
> + status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
> + goto out;
> + }
The CSI check or rather a switch on the CSI with a default fail
needs to move into the common code, probably into the main parsing
function.
> +void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
> +{
> + struct nvme_id_ns_zns *id_zns;
> + u64 zsze;
> + u16 status;
> +
> + if (req->cmd->identify.csi != NVME_CSI_ZNS) {
> + req->error_loc = offsetof(struct nvme_common_command, opcode);
> + status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
> + goto out;
> + }
Same here.
> +static int nvmet_bdev_report_zone_cb(struct blk_zone *z, unsigned i, void *d)
> +{
> + struct nvmet_report_zone_data *rz = d;
> + struct nvme_zone_descriptor *entries = rz->rz->entries;
> + struct nvmet_ns *ns = rz->ns;
> + static const unsigned int blk_zcond_to_nvme_zstate[] = {
> + [BLK_ZONE_COND_EMPTY] = NVME_ZRASF_ZONE_STATE_EMPTY,
> + [BLK_ZONE_COND_IMP_OPEN] = NVME_ZRASF_ZONE_STATE_IMP_OPEN,
> + [BLK_ZONE_COND_EXP_OPEN] = NVME_ZRASF_ZONE_STATE_EXP_OPEN,
> + [BLK_ZONE_COND_CLOSED] = NVME_ZRASF_ZONE_STATE_CLOSED,
> + [BLK_ZONE_COND_READONLY] = NVME_ZRASF_ZONE_STATE_READONLY,
> + [BLK_ZONE_COND_FULL] = NVME_ZRASF_ZONE_STATE_FULL,
> + [BLK_ZONE_COND_OFFLINE] = NVME_ZRASF_ZONE_STATE_OFFLINE,
> + };
> +
> + if (rz->zrasf == NVME_ZRASF_ZONE_REPORT_ALL)
> + goto record_zone;
> +
> + /*
> + * Make sure this zone condition's value is mapped to NVMe ZNS zone
> + * condition value.
> + */
> + if (z->cond > ARRAY_SIZE(blk_zcond_to_nvme_zstate) ||
> + !blk_zcond_to_nvme_zstate[z->cond])
> + return -EINVAL;
> +
> + /* filter zone by condition */
> + if (blk_zcond_to_nvme_zstate[z->cond] != rz->zrasf)
> + return 0;
> +
> +record_zone:
While not bad per se I ind the structure a little odd. I'd move the
checks into a level of indentation instead.
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V13 2/4] nvmet: add ZBD over ZNS backend support
2021-04-08 7:23 ` Christoph Hellwig
@ 2021-04-08 21:14 ` Chaitanya Kulkarni
2021-04-09 5:40 ` Christoph Hellwig
2021-04-09 7:09 ` Chaitanya Kulkarni
1 sibling, 1 reply; 17+ messages in thread
From: Chaitanya Kulkarni @ 2021-04-08 21:14 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-nvme, Damien Le Moal
On 4/8/21 00:24, Christoph Hellwig wrote:
>> +static int nvmet_bdev_report_zone_cb(struct blk_zone *z, unsigned i, void *d)
>> +{
>> + struct nvmet_report_zone_data *rz = d;
>> + struct nvme_zone_descriptor *entries = rz->rz->entries;
>> + struct nvmet_ns *ns = rz->ns;
>> + static const unsigned int blk_zcond_to_nvme_zstate[] = {
>> + [BLK_ZONE_COND_EMPTY] = NVME_ZRASF_ZONE_STATE_EMPTY,
>> + [BLK_ZONE_COND_IMP_OPEN] = NVME_ZRASF_ZONE_STATE_IMP_OPEN,
>> + [BLK_ZONE_COND_EXP_OPEN] = NVME_ZRASF_ZONE_STATE_EXP_OPEN,
>> + [BLK_ZONE_COND_CLOSED] = NVME_ZRASF_ZONE_STATE_CLOSED,
>> + [BLK_ZONE_COND_READONLY] = NVME_ZRASF_ZONE_STATE_READONLY,
>> + [BLK_ZONE_COND_FULL] = NVME_ZRASF_ZONE_STATE_FULL,
>> + [BLK_ZONE_COND_OFFLINE] = NVME_ZRASF_ZONE_STATE_OFFLINE,
>> + };
>> +
>> + if (rz->zrasf == NVME_ZRASF_ZONE_REPORT_ALL)
>> + goto record_zone;
>> +
>> + /*
>> + * Make sure this zone condition's value is mapped to NVMe ZNS zone
>> + * condition value.
>> + */
>> + if (z->cond > ARRAY_SIZE(blk_zcond_to_nvme_zstate) ||
>> + !blk_zcond_to_nvme_zstate[z->cond])
>> + return -EINVAL;
>> +
>> + /* filter zone by condition */
>> + if (blk_zcond_to_nvme_zstate[z->cond] != rz->zrasf)
>> + return 0;
>> +
>> +record_zone:
> While not bad per se I ind the structure a little odd. I'd move the
> checks into a level of indentation instead.
>
Can you please elaborate on this ?
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V13 2/4] nvmet: add ZBD over ZNS backend support
2021-04-08 21:14 ` Chaitanya Kulkarni
@ 2021-04-09 5:40 ` Christoph Hellwig
2021-04-09 5:42 ` Chaitanya Kulkarni
0 siblings, 1 reply; 17+ messages in thread
From: Christoph Hellwig @ 2021-04-09 5:40 UTC (permalink / raw)
To: Chaitanya Kulkarni; +Cc: Christoph Hellwig, linux-nvme, Damien Le Moal
On Thu, Apr 08, 2021 at 09:14:24PM +0000, Chaitanya Kulkarni wrote:
> >> + if (rz->zrasf == NVME_ZRASF_ZONE_REPORT_ALL)
> >> + goto record_zone;
> >> +
> >> + /*
> >> + * Make sure this zone condition's value is mapped to NVMe ZNS zone
> >> + * condition value.
> >> + */
> >> + if (z->cond > ARRAY_SIZE(blk_zcond_to_nvme_zstate) ||
> >> + !blk_zcond_to_nvme_zstate[z->cond])
> >> + return -EINVAL;
> >> +
> >> + /* filter zone by condition */
> >> + if (blk_zcond_to_nvme_zstate[z->cond] != rz->zrasf)
> >> + return 0;
> >> +
> >> +record_zone:
> > While not bad per se I ind the structure a little odd. I'd move the
> > checks into a level of indentation instead.
> >
>
> Can you please elaborate on this ?
If we have a condition that limits a query having that inside the
conditional just flows much easier than a goto that jumps over the
checks.
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V13 2/4] nvmet: add ZBD over ZNS backend support
2021-04-09 5:40 ` Christoph Hellwig
@ 2021-04-09 5:42 ` Chaitanya Kulkarni
0 siblings, 0 replies; 17+ messages in thread
From: Chaitanya Kulkarni @ 2021-04-09 5:42 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-nvme, Damien Le Moal
On 4/8/21 22:41, Christoph Hellwig wrote:
>>> While not bad per se I ind the structure a little odd. I'd move the
>>> checks into a level of indentation instead.
>>>
>> Can you please elaborate on this ?
> If we have a condition that limits a query having that inside the
> conditional just flows much easier than a goto that jumps over the
> checks.
>
I see, will make that change.
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V13 2/4] nvmet: add ZBD over ZNS backend support
2021-04-08 7:23 ` Christoph Hellwig
2021-04-08 21:14 ` Chaitanya Kulkarni
@ 2021-04-09 7:09 ` Chaitanya Kulkarni
2021-04-09 9:53 ` Christoph Hellwig
1 sibling, 1 reply; 17+ messages in thread
From: Chaitanya Kulkarni @ 2021-04-09 7:09 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-nvme, Damien Le Moal
On 4/8/21 00:24, Christoph Hellwig wrote:
>> @@ -448,6 +456,15 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
>> case nvme_cmd_write_zeroes:
>> req->execute = nvmet_bdev_execute_write_zeroes;
>> return 0;
>> + case nvme_cmd_zone_append:
>> + req->execute = nvmet_bdev_execute_zone_append;
>> + return 0;
>> + case nvme_cmd_zone_mgmt_recv:
>> + req->execute = nvmet_bdev_execute_zone_mgmt_recv;
>> + return 0;
>> + case nvme_cmd_zone_mgmt_send:
>> + req->execute = nvmet_bdev_execute_zone_mgmt_send;
> I think we need a separate _parse for just ZNS. That way we can
> do the ns.csi and IS_ENABLED check in one single place, and we
> also don't need stubs for any of these functions as they are all
> under the IS_ENABLED check and thus the compiler will never generate
> a call to them for !CONFIG_BLK_DEV_ZONED.
>
Do you prefer something like this ? (partially tested with and without
CONFIG_BLK_DEV_ZONED) :-
# git diff
diff --git a/drivers/nvme/target/admin-cmd.c
b/drivers/nvme/target/admin-cmd.c
index fc5f64a38661..da6d5225f25a 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -685,6 +685,14 @@ static void nvmet_execute_identify(struct nvmet_req
*req)
if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
return;
+ switch (req->cmd->identify.csi) {
+ case NVME_CSI_NVM:
+ case NVME_CSI_ZNS:
+ break;
+ default:
+ goto out;
+ }
+
switch (req->cmd->identify.cns) {
case NVME_ID_CNS_NS:
return nvmet_execute_identify_ns(req);
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index d33ba4c3c3f4..a0bdd39de23b 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -892,7 +892,16 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
if (req->ns->file)
return nvmet_file_parse_io_cmd(req);
- return nvmet_bdev_parse_io_cmd(req);
+ switch (req->ns->csi) {
+ case NVME_CSI_NVM:
+ return nvmet_bdev_parse_io_cmd(req);
+ case NVME_CSI_ZNS:
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
+ return nvmet_bdev_zns_parse_io_cmd(req);
+ /* fallthrough */
+ default:
+ return NVME_SC_INVALID_IO_CMD_SET;
+ }
}
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 8333701e4154..fcd01e303e19 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -404,6 +404,7 @@ void nvmet_stop_keep_alive_timer(struct nvmet_ctrl
*ctrl);
u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns
*id);
u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req);
+u16 nvmet_bdev_zns_parse_io_cmd(struct nvmet_req *req);
u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index b62925eb49ab..c65ac53d9b0b 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -445,3 +445,22 @@ void nvmet_bdev_execute_zone_append(struct
nvmet_req *req)
bio_put(bio);
nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL : status);
}
+
+u16 nvmet_bdev_zns_parse_io_cmd(struct nvmet_req *req)
+{
+ struct nvme_command *cmd = req->cmd;
+
+ switch (cmd->common.opcode) {
+ case nvme_cmd_zone_append:
+ req->execute = nvmet_bdev_execute_zone_append;
+ return 0;
+ case nvme_cmd_zone_mgmt_recv:
+ req->execute = nvmet_bdev_execute_zone_mgmt_recv;
+ return 0;
+ case nvme_cmd_zone_mgmt_send:
+ req->execute = nvmet_bdev_execute_zone_mgmt_send;
+ return 0;
+ default:
+ return nvmet_bdev_parse_io_cmd(req);
+ }
+}
roo
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH V13 2/4] nvmet: add ZBD over ZNS backend support
2021-04-09 7:09 ` Chaitanya Kulkarni
@ 2021-04-09 9:53 ` Christoph Hellwig
0 siblings, 0 replies; 17+ messages in thread
From: Christoph Hellwig @ 2021-04-09 9:53 UTC (permalink / raw)
To: Chaitanya Kulkarni; +Cc: Christoph Hellwig, linux-nvme, Damien Le Moal
On Fri, Apr 09, 2021 at 07:09:12AM +0000, Chaitanya Kulkarni wrote:
> diff --git a/drivers/nvme/target/admin-cmd.c
> b/drivers/nvme/target/admin-cmd.c
> index fc5f64a38661..da6d5225f25a 100644
> --- a/drivers/nvme/target/admin-cmd.c
> +++ b/drivers/nvme/target/admin-cmd.c
> @@ -685,6 +685,14 @@ static void nvmet_execute_identify(struct nvmet_req
> *req)
> if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
> return;
>
> + switch (req->cmd->identify.csi) {
> + case NVME_CSI_NVM:
> + case NVME_CSI_ZNS:
> + break;
> + default:
> + goto out;
> + }
> +
I don't think thi is the right thing to do as only a few CNS values
look at the CSI, so we'd need this switch in those switch cases that
need CNS values only.
The rest looks good.
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V13 2/4] nvmet: add ZBD over ZNS backend support
2021-04-08 0:14 ` [PATCH V13 2/4] nvmet: add ZBD over ZNS backend support Chaitanya Kulkarni
2021-04-08 7:23 ` Christoph Hellwig
@ 2021-04-08 8:01 ` Damien Le Moal
2021-04-08 22:06 ` Chaitanya Kulkarni
2021-04-08 8:15 ` Damien Le Moal
2 siblings, 1 reply; 17+ messages in thread
From: Damien Le Moal @ 2021-04-08 8:01 UTC (permalink / raw)
To: Chaitanya Kulkarni, linux-nvme; +Cc: hch
On 2021/04/08 9:14, Chaitanya Kulkarni wrote:
> NVMe TP 4053 – Zoned Namespaces (ZNS) allows host software to
> communicate with a non-volatile memory subsystem using zones for NVMe
> protocol-based controllers. NVMeOF already support the ZNS NVMe
> Protocol compliant devices on the target in the passthru mode. There
> are Generic zoned block devices like Shingled Magnetic Recording (SMR)
> HDDs that are not based on the NVMe protocol.
>
> This patch adds ZNS backend to support the ZBDs for NVMeOF target.
>
> This support includes implementing the new command set NVME_CSI_ZNS,
> adding different command handlers for ZNS command set such as NVMe
> Identify Controller, NVMe Identify Namespace, NVMe Zone Append,
> NVMe Zone Management Send and NVMe Zone Management Receive.
>
> With the new command set identifier, we also update the target command
> effects logs to reflect the ZNS compliant commands.
>
> Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
> ---
> drivers/nvme/target/Makefile | 1 +
> drivers/nvme/target/admin-cmd.c | 27 ++
> drivers/nvme/target/io-cmd-bdev.c | 35 ++-
> drivers/nvme/target/nvmet.h | 47 +++
> drivers/nvme/target/zns.c | 477 ++++++++++++++++++++++++++++++
> include/linux/nvme.h | 7 +
> 6 files changed, 585 insertions(+), 9 deletions(-)
> create mode 100644 drivers/nvme/target/zns.c
>
> diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
> index ebf91fc4c72e..9837e580fa7e 100644
> --- a/drivers/nvme/target/Makefile
> +++ b/drivers/nvme/target/Makefile
> @@ -12,6 +12,7 @@ obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o
> nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
> discovery.o io-cmd-file.o io-cmd-bdev.o
> nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o
> +nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o
> nvme-loop-y += loop.o
> nvmet-rdma-y += rdma.o
> nvmet-fc-y += fc.o
> diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
> index 176c8593d341..bf4876df624a 100644
> --- a/drivers/nvme/target/admin-cmd.c
> +++ b/drivers/nvme/target/admin-cmd.c
> @@ -179,6 +179,13 @@ static void nvmet_set_csi_nvm_effects(struct nvme_effects_log *log)
> log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(1 << 0);
> }
>
> +static void nvmet_set_csi_zns_effects(struct nvme_effects_log *log)
> +{
> + log->iocs[nvme_cmd_zone_append] = cpu_to_le32(1 << 0);
> + log->iocs[nvme_cmd_zone_mgmt_send] = cpu_to_le32(1 << 0);
> + log->iocs[nvme_cmd_zone_mgmt_recv] = cpu_to_le32(1 << 0);
> +}
> +
> static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
> {
> struct nvme_effects_log *log;
> @@ -194,6 +201,15 @@ static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
> case NVME_CSI_NVM:
> nvmet_set_csi_nvm_effects(log);
> break;
> + case NVME_CSI_ZNS:
> + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
> + status = NVME_SC_INVALID_IO_CMD_SET;
> + goto free;
> + }
> +
> + nvmet_set_csi_nvm_effects(log);
> + nvmet_set_csi_zns_effects(log);
> + break;
> default:
> status = NVME_SC_INVALID_LOG_PAGE;
> goto free;
> @@ -630,6 +646,13 @@ static u16 nvmet_execute_identify_desclist_csi(struct nvmet_req *req, off_t *o)
> {
> switch (req->ns->csi) {
> case NVME_CSI_NVM:
> + return nvmet_copy_ns_identifier(req, NVME_NIDT_CSI,
> + NVME_NIDT_CSI_LEN,
> + &req->ns->csi, o);
> + case NVME_CSI_ZNS:
> + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
> + return NVME_SC_INVALID_IO_CMD_SET;
> +
> return nvmet_copy_ns_identifier(req, NVME_NIDT_CSI,
> NVME_NIDT_CSI_LEN,
> &req->ns->csi, o);
> @@ -682,8 +705,12 @@ static void nvmet_execute_identify(struct nvmet_req *req)
> switch (req->cmd->identify.cns) {
> case NVME_ID_CNS_NS:
> return nvmet_execute_identify_ns(req);
> + case NVME_ID_CNS_CS_NS:
> + return nvmet_execute_identify_cns_cs_ns(req);
> case NVME_ID_CNS_CTRL:
> return nvmet_execute_identify_ctrl(req);
> + case NVME_ID_CNS_CS_CTRL:
> + return nvmet_execute_identify_cns_cs_ctrl(req);
> case NVME_ID_CNS_NS_ACTIVE_LIST:
> return nvmet_execute_identify_nslist(req);
> case NVME_ID_CNS_NS_DESC_LIST:
> diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
> index 9a8b3726a37c..1e54e7478735 100644
> --- a/drivers/nvme/target/io-cmd-bdev.c
> +++ b/drivers/nvme/target/io-cmd-bdev.c
> @@ -63,6 +63,14 @@ static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
> }
> }
>
> +void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
> +{
> + if (ns->bdev) {
> + blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
> + ns->bdev = NULL;
> + }
> +}
> +
> int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
> {
> int ret;
> @@ -86,15 +94,15 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
> if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
> nvmet_bdev_ns_enable_integrity(ns);
>
> - return 0;
> -}
> -
> -void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
> -{
> - if (ns->bdev) {
> - blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
> - ns->bdev = NULL;
> + if (bdev_is_zoned(ns->bdev)) {
> + if (!nvmet_bdev_zns_enable(ns)) {
> + nvmet_bdev_ns_disable(ns);
> + return -EINVAL;
> + }
> + ns->csi = NVME_CSI_ZNS;
> }
> +
> + return 0;
> }
>
> void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
> @@ -102,7 +110,7 @@ void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
> ns->size = i_size_read(ns->bdev->bd_inode);
> }
>
> -static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
> +u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
> {
> u16 status = NVME_SC_SUCCESS;
>
> @@ -448,6 +456,15 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
> case nvme_cmd_write_zeroes:
> req->execute = nvmet_bdev_execute_write_zeroes;
> return 0;
> + case nvme_cmd_zone_append:
> + req->execute = nvmet_bdev_execute_zone_append;
> + return 0;
> + case nvme_cmd_zone_mgmt_recv:
> + req->execute = nvmet_bdev_execute_zone_mgmt_recv;
> + return 0;
> + case nvme_cmd_zone_mgmt_send:
> + req->execute = nvmet_bdev_execute_zone_mgmt_send;
> + return 0;
> default:
> return nvmet_report_invalid_opcode(req);
> }
> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
> index ab878fb96fbd..5e6514565f8c 100644
> --- a/drivers/nvme/target/nvmet.h
> +++ b/drivers/nvme/target/nvmet.h
> @@ -248,6 +248,10 @@ struct nvmet_subsys {
> unsigned int admin_timeout;
> unsigned int io_timeout;
> #endif /* CONFIG_NVME_TARGET_PASSTHRU */
> +
> +#ifdef CONFIG_BLK_DEV_ZONED
> + u8 zasl;
> +#endif /* CONFIG_BLK_DEV_ZONED */
> };
>
> static inline struct nvmet_subsys *to_subsys(struct config_item *item)
> @@ -528,6 +532,7 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
> void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns);
> int nvmet_file_ns_revalidate(struct nvmet_ns *ns);
> void nvmet_ns_revalidate(struct nvmet_ns *ns);
> +u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts);
>
> static inline u32 nvmet_rw_data_len(struct nvmet_req *req)
> {
> @@ -585,6 +590,48 @@ static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys)
> }
> #endif /* CONFIG_NVME_TARGET_PASSTHRU */
>
> +#ifdef CONFIG_BLK_DEV_ZONED
> +bool nvmet_bdev_zns_enable(struct nvmet_ns *ns);
> +void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req);
> +void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req);
> +void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req);
> +void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req);
> +void nvmet_bdev_execute_zone_append(struct nvmet_req *req);
> +#else /* CONFIG_BLK_DEV_ZONED */
> +static inline bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
> +{
> + return false;
> +}
> +static inline void
> +nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req)
> +{
> + pr_err("unhandled identify cns %d on qid %d\n",
> + req->cmd->identify.cns, req->sq->qid);
> + req->error_loc = offsetof(struct nvme_identify, cns);
> + nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
> +}
> +static inline void
> +nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
> +{
> + pr_err("unhandled identify cns %d on qid %d\n",
> + req->cmd->identify.cns, req->sq->qid);
> + req->error_loc = offsetof(struct nvme_identify, cns);
> + nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
> +}
> +static inline void
> +nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req)
> +{
> +}
> +static inline void
> +nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req)
> +{
> +}
> +static inline void
> +nvmet_bdev_execute_zone_append(struct nvmet_req *req)
> +{
> +}
> +#endif /* CONFIG_BLK_DEV_ZONED */
> +
> static inline struct nvme_ctrl *
> nvmet_req_passthru_ctrl(struct nvmet_req *req)
> {
> diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
> new file mode 100644
> index 000000000000..308198dd580b
> --- /dev/null
> +++ b/drivers/nvme/target/zns.c
> @@ -0,0 +1,477 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * NVMe ZNS-ZBD command implementation.
> + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> + */
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +#include <linux/nvme.h>
> +#include <linux/blkdev.h>
> +#include "nvmet.h"
> +
> +/*
> + * We set the Memory Page Size Minimum (MPSMIN) for target controller to 0
> + * which gets added by 12 in the nvme_enable_ctrl() which results in 2^12 = 4k
> + * as page_shift value. When calculating the ZASL use shift by 12.
> + */
> +#define NVMET_MPSMIN_SHIFT 12
> +
> +static u16 nvmet_bdev_validate_zone_mgmt_recv(struct nvmet_req *req)
> +{
> + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
> + u32 out_bufsize = (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2;
> +
> + if (!bdev_is_zoned(req->ns->bdev))
> + return NVME_SC_INVALID_NS | NVME_SC_DNR;
> +
> + if (sect > get_capacity(req->ns->bdev->bd_disk)) {
> + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, slba);
> + return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
> + }
> +
> + /*
> + * Make sure out buffer size at least matches nvme report zone header.
> + * Reporting partial 64 bit nr_zones value can lead to unwanted side
> + * effects.
> + */
> + if (out_bufsize < sizeof(struct nvme_zone_report)) {
> + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, numd);
> + return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
> + }
> +
> + if (req->cmd->zmr.zra != NVME_ZRA_ZONE_REPORT) {
> + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, zra);
> + return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
> + }
> +
> + switch (req->cmd->zmr.pr) {
> + case 0:
> + case 1:
> + break;
> + default:
> + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, pr);
> + return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
> + }
> +
> + switch (req->cmd->zmr.zrasf) {
> + case NVME_ZRASF_ZONE_REPORT_ALL:
> + case NVME_ZRASF_ZONE_STATE_EMPTY:
> + case NVME_ZRASF_ZONE_STATE_IMP_OPEN:
> + case NVME_ZRASF_ZONE_STATE_EXP_OPEN:
> + case NVME_ZRASF_ZONE_STATE_CLOSED:
> + case NVME_ZRASF_ZONE_STATE_FULL:
> + case NVME_ZRASF_ZONE_STATE_READONLY:
> + case NVME_ZRASF_ZONE_STATE_OFFLINE:
> + break;
> + default:
> + req->error_loc =
> + offsetof(struct nvme_zone_mgmt_recv_cmd, zrasf);
> + return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
> + }
> +
> + return NVME_SC_SUCCESS;
> +}
> +
> +static inline u8 nvmet_zasl(unsigned int zone_append_sects)
> +{
> + /*
> + * Zone Append Size Limit is the value expressed in the units of minimum
> + * memory page size (i.e. 12) and is reported power of 2.
> + */
> + return ilog2(zone_append_sects >> (NVMET_MPSMIN_SHIFT - 9));
> +}
> +
> +static inline bool nvmet_zns_update_zasl(struct nvmet_ns *ns)
> +{
> + struct request_queue *q = ns->bdev->bd_disk->queue;
> + u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q));
> +
> + if (ns->subsys->zasl)
> + return ns->subsys->zasl < zasl;
> +
> + ns->subsys->zasl = zasl;
> + return true;
> +}
> +
> +static int nvmet_bdev_validate_zns_zones_cb(struct blk_zone *z,
> + unsigned int i, void *data)
> +{
> + if (z->type == BLK_ZONE_TYPE_CONVENTIONAL)
> + return -EOPNOTSUPP;
> + return 0;
> +}
> +
> +static bool nvmet_bdev_has_conv_zones(struct block_device *bdev)
> +{
> + int ret;
> +
> + if (bdev->bd_disk->queue->conv_zones_bitmap)
> + return true;
> +
> + ret = blkdev_report_zones(bdev, 0, blkdev_nr_zones(bdev->bd_disk),
> + nvmet_bdev_validate_zns_zones_cb, NULL);
> +
> + return ret <= 0;
> +}
> +
> +bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
> +{
> + if (nvmet_bdev_has_conv_zones(ns->bdev))
> + return false;
> +
> + ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
> +
> + if (!nvmet_zns_update_zasl(ns))
> + return false;
> + /*
> + * Generic zoned block devices may have a smaller last zone which is
> + * not supported by ZNS. Excludes zoned drives that have such smaller
> + * last zone.
> + */
> + return !(get_capacity(ns->bdev->bd_disk) &
> + (bdev_zone_sectors(ns->bdev) - 1));
> +}
> +
> +void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req)
> +{
> + u8 zasl = req->sq->ctrl->subsys->zasl;
> + struct nvmet_ctrl *ctrl = req->sq->ctrl;
> + struct nvme_id_ctrl_zns *id;
> + u16 status;
> +
> + if (req->cmd->identify.csi != NVME_CSI_ZNS) {
> + req->error_loc = offsetof(struct nvme_common_command, opcode);
> + status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
> + goto out;
> + }
> +
> + id = kzalloc(sizeof(*id), GFP_KERNEL);
> + if (!id) {
> + status = NVME_SC_INTERNAL;
> + goto out;
> + }
> +
> + if (ctrl->ops->get_mdts)
> + id->zasl = min_t(u8, ctrl->ops->get_mdts(ctrl), zasl);
> + else
> + id->zasl = zasl;
> +
> + status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
> +
> + kfree(id);
> +out:
> + nvmet_req_complete(req, status);
> +}
> +
> +void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
> +{
> + struct nvme_id_ns_zns *id_zns;
> + u64 zsze;
> + u16 status;
> +
> + if (req->cmd->identify.csi != NVME_CSI_ZNS) {
> + req->error_loc = offsetof(struct nvme_common_command, opcode);
> + status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
> + goto out;
> + }
> +
> + if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) {
> + req->error_loc = offsetof(struct nvme_identify, nsid);
> + status = NVME_SC_INVALID_NS | NVME_SC_DNR;
> + goto out;
> + }
> +
> + id_zns = kzalloc(sizeof(*id_zns), GFP_KERNEL);
> + if (!id_zns) {
> + status = NVME_SC_INTERNAL;
> + goto out;
> + }
> +
> + status = nvmet_req_find_ns(req);
> + if (status) {
> + status = NVME_SC_INTERNAL;
> + goto done;
> + }
> +
> + if (!bdev_is_zoned(req->ns->bdev)) {
> + req->error_loc = offsetof(struct nvme_identify, nsid);
> + status = NVME_SC_INVALID_NS | NVME_SC_DNR;
> + goto done;
> + }
> +
> + nvmet_ns_revalidate(req->ns);
> + zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >>
> + req->ns->blksize_shift;
> + id_zns->lbafe[0].zsze = cpu_to_le64(zsze);
> + id_zns->mor = cpu_to_le32(bdev_max_open_zones(req->ns->bdev));
> + id_zns->mar = cpu_to_le32(bdev_max_active_zones(req->ns->bdev));
> +
> +done:
> + status = nvmet_copy_to_sgl(req, 0, id_zns, sizeof(*id_zns));
> + kfree(id_zns);
> +out:
> + nvmet_req_complete(req, status);
> +}
> +
> +struct nvmet_report_zone_data {
> + struct nvme_zone_report *rz;
> + struct nvmet_ns *ns;
> + u64 nr_zones;
> + u8 zrasf;
> +};
> +
> +static int nvmet_bdev_report_zone_cb(struct blk_zone *z, unsigned i, void *d)
> +{
> + struct nvmet_report_zone_data *rz = d;
> + struct nvme_zone_descriptor *entries = rz->rz->entries;
> + struct nvmet_ns *ns = rz->ns;
> + static const unsigned int blk_zcond_to_nvme_zstate[] = {
> + [BLK_ZONE_COND_EMPTY] = NVME_ZRASF_ZONE_STATE_EMPTY,
> + [BLK_ZONE_COND_IMP_OPEN] = NVME_ZRASF_ZONE_STATE_IMP_OPEN,
> + [BLK_ZONE_COND_EXP_OPEN] = NVME_ZRASF_ZONE_STATE_EXP_OPEN,
> + [BLK_ZONE_COND_CLOSED] = NVME_ZRASF_ZONE_STATE_CLOSED,
> + [BLK_ZONE_COND_READONLY] = NVME_ZRASF_ZONE_STATE_READONLY,
> + [BLK_ZONE_COND_FULL] = NVME_ZRASF_ZONE_STATE_FULL,
> + [BLK_ZONE_COND_OFFLINE] = NVME_ZRASF_ZONE_STATE_OFFLINE,
> + };
This creates a sparse array bigger than it needs to be. If you reverse here and
use the ZRASF values as indexes (blk_zrasf_to_zcond[]), the array will shrink
and not be sparse, then... See below...
> +
> + if (rz->zrasf == NVME_ZRASF_ZONE_REPORT_ALL)
> + goto record_zone;
> +
> + /*
> + * Make sure this zone condition's value is mapped to NVMe ZNS zone
> + * condition value.
> + */
> + if (z->cond > ARRAY_SIZE(blk_zcond_to_nvme_zstate) ||
> + !blk_zcond_to_nvme_zstate[z->cond])
> + return -EINVAL;
> +
> + /* filter zone by condition */
> + if (blk_zcond_to_nvme_zstate[z->cond] != rz->zrasf)
> + return 0;
...since zrasf is already validated, all of the above becomes:
/* filter zones by condition */
if (rz->zrasf != NVME_ZRASF_ZONE_REPORT_ALL &&
z->cond != blk_zrasf_to_zcond[rz->zrasf])
return 0;
> +
> +record_zone:
This label can go away too.
> +
> + entries[rz->nr_zones].zcap = nvmet_sect_to_lba(ns, z->capacity);
> + entries[rz->nr_zones].zslba = nvmet_sect_to_lba(ns, z->start);
> + entries[rz->nr_zones].wp = nvmet_sect_to_lba(ns, z->wp);
> + entries[rz->nr_zones].za = z->reset ? 1 << 2 : 0;
> + entries[rz->nr_zones].zs = z->cond << 4;
> + entries[rz->nr_zones].zt = z->type;
> +
> + rz->nr_zones++;
> +
> + return 0;
> +}
> +
> +unsigned long nvmet_req_nr_zones_from_slba(struct nvmet_req *req)
> +{
> + sector_t total_sect_from_slba;
> +
> + total_sect_from_slba = get_capacity(req->ns->bdev->bd_disk) -
> + nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
> +
> + return total_sect_from_slba / bdev_zone_sectors(req->ns->bdev);
> +}
> +
> +unsigned long get_nr_zones_from_buf(struct nvmet_req *req, u32 out_bufsize)
> +{
> + if (out_bufsize < sizeof(struct nvme_zone_report))
> + return 0;
> +
> + return (out_bufsize - sizeof(struct nvme_zone_report)) /
> + sizeof(struct nvme_zone_descriptor);
> +}
> +
> +unsigned long bufsize_from_zones(unsigned long nr_zones)
> +{
> + return sizeof(struct nvme_zone_report) +
> + (sizeof(struct nvme_zone_descriptor) * nr_zones);
> +}
> +
> +void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req)
> +{
> + unsigned long req_slba_nr_zones = nvmet_req_nr_zones_from_slba(req);
> + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
> + u32 out_bufsize = (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2;
> + unsigned long out_nr_zones = get_nr_zones_from_buf(req, out_bufsize);
> + int reported_zones;
> + u32 bufsize;
> + u16 status;
> + struct nvmet_report_zone_data data = {
> + .ns = req->ns,
> + .zrasf = req->cmd->zmr.zrasf
> + };
> +
> + status = nvmet_bdev_validate_zone_mgmt_recv(req);
> + if (status)
> + goto out;
> +
> + /* nothing to report */
> + if (!req_slba_nr_zones) {
> + status = NVME_SC_SUCCESS;
> + goto out;
> + }
> +
> + /*
> + * Allocate Zone descriptors based on the number of zones that fit from
> + * zmr.slba to disk capacity.
> + */
> + bufsize = bufsize_from_zones(req_slba_nr_zones);
> +
> + data.rz = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY | __GFP_ZERO);
> + if (!data.rz) {
> + status = NVME_SC_INTERNAL;
> + goto out;
> + }
> +
> + reported_zones = blkdev_report_zones(req->ns->bdev, sect,
> + req_slba_nr_zones,
> + nvmet_bdev_report_zone_cb, &data);
> + if (reported_zones < 0) {
> + status = NVME_SC_INTERNAL;
> + goto out_free_report_zones;
> + }
> +
> + if (req->cmd->zmr.pr) {
> + /*
> + * When partial bit is set nr_zones == zone desc transferred. So
> + * if captured zones are less than the nr zones that can fit in
> + * out buf, then trim the out_bufsize to avoid extra copy also
> + * update the number of zones that we can transfer in out buf.
> + */
> + if (data.nr_zones < out_nr_zones) {
> + out_bufsize = bufsize_from_zones(data.nr_zones);
> + out_nr_zones = data.nr_zones;
> + }
> + } else {
> + /*
> + * When partial bit is not set nr_zone == zones for which ZSLBA
> + * value is greater than or equal to the ZSLBA value of the zone
> + * specified by the SLBA value in the command and match the
> + * criteria in the Zone Receive Action Specific field ZRASF.
> + */
> + out_nr_zones = data.nr_zones;
> +
> + /* trim out_bufsize to avoid extra copy */
> + if (data.nr_zones < out_nr_zones)
> + out_bufsize = bufsize_from_zones(data.nr_zones);
> + }
> +
> + data.rz->nr_zones = cpu_to_le64(out_nr_zones);
> +
> + status = nvmet_copy_to_sgl(req, 0, data.rz, out_bufsize);
> +
> +out_free_report_zones:
> + kvfree(data.rz);
> +out:
> + nvmet_req_complete(req, status);
> +}
> +
> +void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req)
> +{
> + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zms.slba);
> + u16 status = NVME_SC_SUCCESS;
> + u8 zsa = req->cmd->zms.zsa;
> + sector_t nr_sects;
> + enum req_opf op;
> + int ret;
> + const unsigned int zsa_to_op[] = {
> + [NVME_ZONE_OPEN] = REQ_OP_ZONE_OPEN,
> + [NVME_ZONE_CLOSE] = REQ_OP_ZONE_CLOSE,
> + [NVME_ZONE_FINISH] = REQ_OP_ZONE_FINISH,
> + [NVME_ZONE_RESET] = REQ_OP_ZONE_RESET,
> + };
> +
> + if (zsa > ARRAY_SIZE(zsa_to_op)) {
> + status = NVME_SC_INVALID_FIELD;
> + goto out;
> + }
> +
> + op = zsa_to_op[zsa];
> +
> + if (req->cmd->zms.select_all) {
> + sect = 0;
> + nr_sects = get_capacity(req->ns->bdev->bd_disk);
> + } else {
> + sect = nvmet_lba_to_sect(req->ns, req->cmd->zms.slba);
> + nr_sects = bdev_zone_sectors(req->ns->bdev);
> + }
> +
> + ret = blkdev_zone_mgmt(req->ns->bdev, op, sect, nr_sects, GFP_KERNEL);
> + if (ret)
> + status = NVME_SC_INTERNAL;
This one is a little odd with regard to the ALL bit. In the block layer, only
zone reset all is supported, which mean that the above will not do
open/close/finish all. Only reset all will work. Open/close/finish all need to
be emulated here: do a full report zone and based on the zone condition and op,
call blkdev_zone_mgmt() for each zone that need a operation. Ideally,
blkdev_zone_mgmt() should be called in the report cb, but I am not sure if that
cannot create some context problems...
> +out:
> + nvmet_req_complete(req, status);
> +}
> +
> +static void nvmet_bdev_zone_append_bio_done(struct bio *bio)
> +{
> + struct nvmet_req *req = bio->bi_private;
> +
> + req->cqe->result.u64 = nvmet_sect_to_lba(req->ns,
> + bio->bi_iter.bi_sector);
You should do this only if status is success, no ?
> + nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
> + if (bio != &req->b.inline_bio)
> + bio_put(bio);
> +}
> +
> +void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
> +{
> + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
> + u16 status = NVME_SC_SUCCESS;
> + unsigned int total_len = 0;
> + struct scatterlist *sg;
> + int ret = 0, sg_cnt;
> + struct bio *bio;
> +
> + /* Request is completed on len mismatch in nvmet_check_transter_len() */
> + if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
> + return;
> +
> + if (!req->sg_cnt) {
> + nvmet_req_complete(req, 0);
isn't this an error ? (not entirely sure)
> + return;
> + }
> +
> + if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
> + bio = &req->b.inline_bio;
> + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
> + } else {
> + bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
> + }
> +
> + bio_set_dev(bio, req->ns->bdev);
> + bio->bi_iter.bi_sector = sect;
> + bio->bi_private = req;
> + bio->bi_end_io = nvmet_bdev_zone_append_bio_done;
> + bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
> + if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
> + bio->bi_opf |= REQ_FUA;
> +
> + for_each_sg(req->sg, sg, req->sg_cnt, sg_cnt) {
> + struct page *p = sg_page(sg);
> + unsigned int l = sg->length;
> + unsigned int o = sg->offset;
> +
> + ret = bio_add_zone_append_page(bio, p, l, o);
> + if (ret != sg->length) {
> + status = NVME_SC_INTERNAL;
> + goto out_bio_put;
> + }
> +
> + total_len += sg->length;
> + }
> +
> + if (total_len != nvmet_rw_data_len(req)) {
> + status = NVME_SC_INTERNAL | NVME_SC_DNR;
> + goto out_bio_put;
> + }
> +
> + submit_bio(bio);
> + return;
> +
> +out_bio_put:
> + if (bio != &req->b.inline_bio)
> + bio_put(bio);
> + nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL : status);
> +}
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h
> index c7ba83144d52..cb1197f1cfed 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -944,6 +944,13 @@ struct nvme_zone_mgmt_recv_cmd {
> enum {
> NVME_ZRA_ZONE_REPORT = 0,
> NVME_ZRASF_ZONE_REPORT_ALL = 0,
> + NVME_ZRASF_ZONE_STATE_EMPTY = 0x01,
> + NVME_ZRASF_ZONE_STATE_IMP_OPEN = 0x02,
> + NVME_ZRASF_ZONE_STATE_EXP_OPEN = 0x03,
> + NVME_ZRASF_ZONE_STATE_CLOSED = 0x04,
> + NVME_ZRASF_ZONE_STATE_READONLY = 0x05,
> + NVME_ZRASF_ZONE_STATE_FULL = 0x06,
> + NVME_ZRASF_ZONE_STATE_OFFLINE = 0x07,
> NVME_REPORT_ZONE_PARTIAL = 1,
> };
>
>
--
Damien Le Moal
Western Digital Research
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V13 2/4] nvmet: add ZBD over ZNS backend support
2021-04-08 8:01 ` Damien Le Moal
@ 2021-04-08 22:06 ` Chaitanya Kulkarni
2021-04-08 22:33 ` Damien Le Moal
0 siblings, 1 reply; 17+ messages in thread
From: Chaitanya Kulkarni @ 2021-04-08 22:06 UTC (permalink / raw)
To: Damien Le Moal; +Cc: linux-nvme, hch
On 4/8/21 01:01, Damien Le Moal wrote:
>> +struct nvmet_report_zone_data {
>> + struct nvme_zone_report *rz;
>> + struct nvmet_ns *ns;
>> + u64 nr_zones;
>> + u8 zrasf;
>> +};
>> +
>> +static int nvmet_bdev_report_zone_cb(struct blk_zone *z, unsigned i, void *d)
>> +{
>> + struct nvmet_report_zone_data *rz = d;
>> + struct nvme_zone_descriptor *entries = rz->rz->entries;
>> + struct nvmet_ns *ns = rz->ns;
>> + static const unsigned int blk_zcond_to_nvme_zstate[] = {
>> + [BLK_ZONE_COND_EMPTY] = NVME_ZRASF_ZONE_STATE_EMPTY,
>> + [BLK_ZONE_COND_IMP_OPEN] = NVME_ZRASF_ZONE_STATE_IMP_OPEN,
>> + [BLK_ZONE_COND_EXP_OPEN] = NVME_ZRASF_ZONE_STATE_EXP_OPEN,
>> + [BLK_ZONE_COND_CLOSED] = NVME_ZRASF_ZONE_STATE_CLOSED,
>> + [BLK_ZONE_COND_READONLY] = NVME_ZRASF_ZONE_STATE_READONLY,
>> + [BLK_ZONE_COND_FULL] = NVME_ZRASF_ZONE_STATE_FULL,
>> + [BLK_ZONE_COND_OFFLINE] = NVME_ZRASF_ZONE_STATE_OFFLINE,
>> + };
> This creates a sparse array bigger than it needs to be. If you reverse here and
> use the ZRASF values as indexes (blk_zrasf_to_zcond[]), the array will shrink
> and not be sparse, then... See below...
>
>> +
>> + if (rz->zrasf == NVME_ZRASF_ZONE_REPORT_ALL)
>> + goto record_zone;
>> +
>> + /*
>> + * Make sure this zone condition's value is mapped to NVMe ZNS zone
>> + * condition value.
>> + */
>> + if (z->cond > ARRAY_SIZE(blk_zcond_to_nvme_zstate) ||
>> + !blk_zcond_to_nvme_zstate[z->cond])
>> + return -EINVAL;
>> +
>> + /* filter zone by condition */
>> + if (blk_zcond_to_nvme_zstate[z->cond] != rz->zrasf)
>> + return 0;
> ...since zrasf is already validated, all of the above becomes:
>
> /* filter zones by condition */
> if (rz->zrasf != NVME_ZRASF_ZONE_REPORT_ALL &&
> z->cond != blk_zrasf_to_zcond[rz->zrasf])
> return 0;
Since you are okay with this will make this change, except the array
name should be:- nvme_zrasf_to_blk_zcond.
>> +
>> +record_zone:
> This label can go away too.
Yes.
>
>> +
>> + entries[rz->nr_zones].zcap = nvmet_sect_to_lba(ns, z->capacity);
>> + entries[rz->nr_zones].zslba = nvmet_sect_to_lba(ns, z->start);
>> + entries[rz->nr_zones].wp = nvmet_sect_to_lba(ns, z->wp);
>> + entries[rz->nr_zones].za = z->reset ? 1 << 2 : 0;
>> + entries[rz->nr_zones].zs = z->cond << 4;
>> + entries[rz->nr_zones].zt = z->type;
>> +
>> + rz->nr_zones++;
>> +
>> + return 0;
>> +}
>> +
>> +unsigned long nvmet_req_nr_zones_from_slba(struct nvmet_req *req)
[...]
>> +void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req)
>> +{
>> + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zms.slba);
>> + u16 status = NVME_SC_SUCCESS;
>> + u8 zsa = req->cmd->zms.zsa;
>> + sector_t nr_sects;
>> + enum req_opf op;
>> + int ret;
>> + const unsigned int zsa_to_op[] = {
>> + [NVME_ZONE_OPEN] = REQ_OP_ZONE_OPEN,
>> + [NVME_ZONE_CLOSE] = REQ_OP_ZONE_CLOSE,
>> + [NVME_ZONE_FINISH] = REQ_OP_ZONE_FINISH,
>> + [NVME_ZONE_RESET] = REQ_OP_ZONE_RESET,
>> + };
>> +
>> + if (zsa > ARRAY_SIZE(zsa_to_op)) {
>> + status = NVME_SC_INVALID_FIELD;
>> + goto out;
>> + }
>> +
>> + op = zsa_to_op[zsa];
>> +
>> + if (req->cmd->zms.select_all) {
>> + sect = 0;
>> + nr_sects = get_capacity(req->ns->bdev->bd_disk);
>> + } else {
>> + sect = nvmet_lba_to_sect(req->ns, req->cmd->zms.slba);
>> + nr_sects = bdev_zone_sectors(req->ns->bdev);
>> + }
>> +
>> + ret = blkdev_zone_mgmt(req->ns->bdev, op, sect, nr_sects, GFP_KERNEL);
>> + if (ret)
>> + status = NVME_SC_INTERNAL;
> This one is a little odd with regard to the ALL bit. In the block layer, only
> zone reset all is supported, which mean that the above will not do
> open/close/finish all. Only reset all will work. Open/close/finish all need to
> be emulated here: do a full report zone and based on the zone condition and op,
> call blkdev_zone_mgmt() for each zone that need a operation. Ideally,
> blkdev_zone_mgmt() should be called in the report cb, but I am not sure if that
> cannot create some context problems...
Please see the explanation at the end [1].
>> +out:
>> + nvmet_req_complete(req, status);
>> +}
>> +
>> +static void nvmet_bdev_zone_append_bio_done(struct bio *bio)
>> +{
>> + struct nvmet_req *req = bio->bi_private;
>> +
>> + req->cqe->result.u64 = nvmet_sect_to_lba(req->ns,
>> + bio->bi_iter.bi_sector);
> You should do this only if status is success, no ?
I'll add a BLK_STS_OK check before the assignment.
>> + nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
>> + if (bio != &req->b.inline_bio)
>> + bio_put(bio);
>> +}
>> +
>> +void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
>> +{
>> + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
>> + u16 status = NVME_SC_SUCCESS;
>> + unsigned int total_len = 0;
>> + struct scatterlist *sg;
>> + int ret = 0, sg_cnt;
>> + struct bio *bio;
>> +
>> + /* Request is completed on len mismatch in nvmet_check_transter_len() */
>> + if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
>> + return;
>> +
>> + if (!req->sg_cnt) {
>> + nvmet_req_complete(req, 0);
> isn't this an error ? (not entirely sure)
if transfer len matches the rw len and it is zero we cannot send any data
so just complete the command.
>
>> + return;
>> + }
>> +
>> + if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
>> + bio = &req->b.inline_bio;
>> + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
>>
[1] Zone Mgmt send emulation with all bit comment :-
I ran the test where all the zones are set to the EMPTY right after creating
a controller on the host (initiator), see [A]. With the passthru command
with
all bit set, I was able to change the state of all the zones, see [B].
So I did not understand your comment about how all bit is failing to
change the
state of all zones with passthru commnd ?
A. All zones are empty.
# nvme zns report-zones /dev/nvme1n1
nr_zones: 32
SLBA: 0x0 WP: 0x0 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x4000 WP: 0x4000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x8000 WP: 0x8000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0xc000 WP: 0xc000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x10000 WP: 0x10000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x14000 WP: 0x14000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x18000 WP: 0x18000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x1c000 WP: 0x1c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x20000 WP: 0x20000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x24000 WP: 0x24000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x28000 WP: 0x28000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x2c000 WP: 0x2c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x30000 WP: 0x30000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x34000 WP: 0x34000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x38000 WP: 0x38000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x3c000 WP: 0x3c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x40000 WP: 0x40000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x44000 WP: 0x44000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x48000 WP: 0x48000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x4c000 WP: 0x4c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x50000 WP: 0x50000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x54000 WP: 0x54000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x58000 WP: 0x58000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x5c000 WP: 0x5c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x60000 WP: 0x60000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x64000 WP: 0x64000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x68000 WP: 0x68000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x6c000 WP: 0x6c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x70000 WP: 0x70000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x74000 WP: 0x74000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x78000 WP: 0x78000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x7c000 WP: 0x7c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
B. Now we send the passthru command with all bit set for
zsa values RESET 0x04, OPEN 0x03, CLOSE 0x01, FINISH 0x02
in the same oder as above. All the zones state are changed.
# for zsa in 4 3 1 2 ; do
>nvme zns zone-mgmt-send /dev/nvme1n1 -a -zsa=0x${zsa}
>nvme zns report-zones /dev/nvme1n1
>done
+ nvme zns zone-mgmt-send /dev/nvme1n1 -a -zsa=0x4
zone-mgmt-send: Success, action:4 zone:0 all:1 nsid:1
+ nvme zns report-zones /dev/nvme1n1
nr_zones: 32
SLBA: 0x0 WP: 0x0 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x4000 WP: 0x4000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x8000 WP: 0x8000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0xc000 WP: 0xc000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x10000 WP: 0x10000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x14000 WP: 0x14000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x18000 WP: 0x18000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x1c000 WP: 0x1c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x20000 WP: 0x20000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x24000 WP: 0x24000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x28000 WP: 0x28000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x2c000 WP: 0x2c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x30000 WP: 0x30000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x34000 WP: 0x34000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x38000 WP: 0x38000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x3c000 WP: 0x3c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x40000 WP: 0x40000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x44000 WP: 0x44000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x48000 WP: 0x48000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x4c000 WP: 0x4c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x50000 WP: 0x50000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x54000 WP: 0x54000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x58000 WP: 0x58000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x5c000 WP: 0x5c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x60000 WP: 0x60000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x64000 WP: 0x64000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x68000 WP: 0x68000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x6c000 WP: 0x6c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x70000 WP: 0x70000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x74000 WP: 0x74000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x78000 WP: 0x78000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x7c000 WP: 0x7c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
+ for i in 4 3 1 2
+ nvme zns zone-mgmt-send /dev/nvme1n1 -a -zsa=0x3
zone-mgmt-send: Success, action:3 zone:0 all:1 nsid:1
+ nvme zns report-zones /dev/nvme1n1
nr_zones: 32
SLBA: 0x0 WP: 0x0 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x4000 WP: 0x4000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x8000 WP: 0x8000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0xc000 WP: 0xc000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x10000 WP: 0x10000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x14000 WP: 0x14000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x18000 WP: 0x18000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x1c000 WP: 0x1c000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x20000 WP: 0x20000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x24000 WP: 0x24000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x28000 WP: 0x28000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x2c000 WP: 0x2c000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x30000 WP: 0x30000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x34000 WP: 0x34000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x38000 WP: 0x38000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x3c000 WP: 0x3c000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x40000 WP: 0x40000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x44000 WP: 0x44000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x48000 WP: 0x48000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x4c000 WP: 0x4c000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x50000 WP: 0x50000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x54000 WP: 0x54000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x58000 WP: 0x58000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x5c000 WP: 0x5c000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x60000 WP: 0x60000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x64000 WP: 0x64000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x68000 WP: 0x68000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x6c000 WP: 0x6c000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x70000 WP: 0x70000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x74000 WP: 0x74000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x78000 WP: 0x78000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
SLBA: 0x7c000 WP: 0x7c000 Cap: 0x4000 State: EXP_OPENED Type:
SEQWRITE_REQ Attrs: 0x0
+ for i in 4 3 1 2
+ nvme zns zone-mgmt-send /dev/nvme1n1 -a -zsa=0x1
zone-mgmt-send: Success, action:1 zone:0 all:1 nsid:1
+ nvme zns report-zones /dev/nvme1n1
nr_zones: 32
SLBA: 0x0 WP: 0x0 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x4000 WP: 0x4000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x8000 WP: 0x8000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0xc000 WP: 0xc000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x10000 WP: 0x10000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x14000 WP: 0x14000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x18000 WP: 0x18000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x1c000 WP: 0x1c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x20000 WP: 0x20000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x24000 WP: 0x24000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x28000 WP: 0x28000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x2c000 WP: 0x2c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x30000 WP: 0x30000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x34000 WP: 0x34000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x38000 WP: 0x38000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x3c000 WP: 0x3c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x40000 WP: 0x40000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x44000 WP: 0x44000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x48000 WP: 0x48000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x4c000 WP: 0x4c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x50000 WP: 0x50000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x54000 WP: 0x54000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x58000 WP: 0x58000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x5c000 WP: 0x5c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x60000 WP: 0x60000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x64000 WP: 0x64000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x68000 WP: 0x68000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x6c000 WP: 0x6c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x70000 WP: 0x70000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x74000 WP: 0x74000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x78000 WP: 0x78000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x7c000 WP: 0x7c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
Attrs: 0x0
+ for i in 4 3 1 2
+ nvme zns zone-mgmt-send /dev/nvme1n1 -a -zsa=0x2
zone-mgmt-send: Success, action:2 zone:0 all:1 nsid:1
+ nvme zns report-zones /dev/nvme1n1
nr_zones: 32
SLBA: 0x0 WP: 0x4000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x4000 WP: 0x8000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x8000 WP: 0xc000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0xc000 WP: 0x10000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x10000 WP: 0x14000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x14000 WP: 0x18000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x18000 WP: 0x1c000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x1c000 WP: 0x20000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x20000 WP: 0x24000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x24000 WP: 0x28000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x28000 WP: 0x2c000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x2c000 WP: 0x30000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x30000 WP: 0x34000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x34000 WP: 0x38000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x38000 WP: 0x3c000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x3c000 WP: 0x40000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x40000 WP: 0x44000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x44000 WP: 0x48000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x48000 WP: 0x4c000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x4c000 WP: 0x50000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x50000 WP: 0x54000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x54000 WP: 0x58000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x58000 WP: 0x5c000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x5c000 WP: 0x60000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x60000 WP: 0x64000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x64000 WP: 0x68000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x68000 WP: 0x6c000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x6c000 WP: 0x70000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x70000 WP: 0x74000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x74000 WP: 0x78000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x78000 WP: 0x7c000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
SLBA: 0x7c000 WP: 0x80000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
Attrs: 0x0
+ set +x
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V13 2/4] nvmet: add ZBD over ZNS backend support
2021-04-08 22:06 ` Chaitanya Kulkarni
@ 2021-04-08 22:33 ` Damien Le Moal
0 siblings, 0 replies; 17+ messages in thread
From: Damien Le Moal @ 2021-04-08 22:33 UTC (permalink / raw)
To: Chaitanya Kulkarni; +Cc: linux-nvme, hch
On 2021/04/09 7:06, Chaitanya Kulkarni wrote:
> On 4/8/21 01:01, Damien Le Moal wrote:
>>> +struct nvmet_report_zone_data {
>>> + struct nvme_zone_report *rz;
>>> + struct nvmet_ns *ns;
>>> + u64 nr_zones;
>>> + u8 zrasf;
>>> +};
>>> +
>>> +static int nvmet_bdev_report_zone_cb(struct blk_zone *z, unsigned i, void *d)
>>> +{
>>> + struct nvmet_report_zone_data *rz = d;
>>> + struct nvme_zone_descriptor *entries = rz->rz->entries;
>>> + struct nvmet_ns *ns = rz->ns;
>>> + static const unsigned int blk_zcond_to_nvme_zstate[] = {
>>> + [BLK_ZONE_COND_EMPTY] = NVME_ZRASF_ZONE_STATE_EMPTY,
>>> + [BLK_ZONE_COND_IMP_OPEN] = NVME_ZRASF_ZONE_STATE_IMP_OPEN,
>>> + [BLK_ZONE_COND_EXP_OPEN] = NVME_ZRASF_ZONE_STATE_EXP_OPEN,
>>> + [BLK_ZONE_COND_CLOSED] = NVME_ZRASF_ZONE_STATE_CLOSED,
>>> + [BLK_ZONE_COND_READONLY] = NVME_ZRASF_ZONE_STATE_READONLY,
>>> + [BLK_ZONE_COND_FULL] = NVME_ZRASF_ZONE_STATE_FULL,
>>> + [BLK_ZONE_COND_OFFLINE] = NVME_ZRASF_ZONE_STATE_OFFLINE,
>>> + };
>> This creates a sparse array bigger than it needs to be. If you reverse here and
>> use the ZRASF values as indexes (blk_zrasf_to_zcond[]), the array will shrink
>> and not be sparse, then... See below...
>>
>>> +
>>> + if (rz->zrasf == NVME_ZRASF_ZONE_REPORT_ALL)
>>> + goto record_zone;
>>> +
>>> + /*
>>> + * Make sure this zone condition's value is mapped to NVMe ZNS zone
>>> + * condition value.
>>> + */
>>> + if (z->cond > ARRAY_SIZE(blk_zcond_to_nvme_zstate) ||
>>> + !blk_zcond_to_nvme_zstate[z->cond])
>>> + return -EINVAL;
>>> +
>>> + /* filter zone by condition */
>>> + if (blk_zcond_to_nvme_zstate[z->cond] != rz->zrasf)
>>> + return 0;
>> ...since zrasf is already validated, all of the above becomes:
>>
>> /* filter zones by condition */
>> if (rz->zrasf != NVME_ZRASF_ZONE_REPORT_ALL &&
>> z->cond != blk_zrasf_to_zcond[rz->zrasf])
>> return 0;
>
> Since you are okay with this will make this change, except the array
> name should be:- nvme_zrasf_to_blk_zcond.
>
>>> +
>>> +record_zone:
>> This label can go away too.
>
> Yes.
>
>>
>>> +
>>> + entries[rz->nr_zones].zcap = nvmet_sect_to_lba(ns, z->capacity);
>>> + entries[rz->nr_zones].zslba = nvmet_sect_to_lba(ns, z->start);
>>> + entries[rz->nr_zones].wp = nvmet_sect_to_lba(ns, z->wp);
>>> + entries[rz->nr_zones].za = z->reset ? 1 << 2 : 0;
>>> + entries[rz->nr_zones].zs = z->cond << 4;
>>> + entries[rz->nr_zones].zt = z->type;
>>> +
>>> + rz->nr_zones++;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +unsigned long nvmet_req_nr_zones_from_slba(struct nvmet_req *req)
>
> [...]
>
>>> +void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req)
>>> +{
>>> + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zms.slba);
>>> + u16 status = NVME_SC_SUCCESS;
>>> + u8 zsa = req->cmd->zms.zsa;
>>> + sector_t nr_sects;
>>> + enum req_opf op;
>>> + int ret;
>>> + const unsigned int zsa_to_op[] = {
>>> + [NVME_ZONE_OPEN] = REQ_OP_ZONE_OPEN,
>>> + [NVME_ZONE_CLOSE] = REQ_OP_ZONE_CLOSE,
>>> + [NVME_ZONE_FINISH] = REQ_OP_ZONE_FINISH,
>>> + [NVME_ZONE_RESET] = REQ_OP_ZONE_RESET,
>>> + };
>>> +
>>> + if (zsa > ARRAY_SIZE(zsa_to_op)) {
>>> + status = NVME_SC_INVALID_FIELD;
>>> + goto out;
>>> + }
>>> +
>>> + op = zsa_to_op[zsa];
>>> +
>>> + if (req->cmd->zms.select_all) {
>>> + sect = 0;
>>> + nr_sects = get_capacity(req->ns->bdev->bd_disk);
>>> + } else {
>>> + sect = nvmet_lba_to_sect(req->ns, req->cmd->zms.slba);
>>> + nr_sects = bdev_zone_sectors(req->ns->bdev);
>>> + }
>>> +
>>> + ret = blkdev_zone_mgmt(req->ns->bdev, op, sect, nr_sects, GFP_KERNEL);
>>> + if (ret)
>>> + status = NVME_SC_INTERNAL;
>> This one is a little odd with regard to the ALL bit. In the block layer, only
>> zone reset all is supported, which mean that the above will not do
>> open/close/finish all. Only reset all will work. Open/close/finish all need to
>> be emulated here: do a full report zone and based on the zone condition and op,
>> call blkdev_zone_mgmt() for each zone that need a operation. Ideally,
>> blkdev_zone_mgmt() should be called in the report cb, but I am not sure if that
>> cannot create some context problems...
>
> Please see the explanation at the end [1].
>
>>> +out:
>>> + nvmet_req_complete(req, status);
>>> +}
>>> +
>>> +static void nvmet_bdev_zone_append_bio_done(struct bio *bio)
>>> +{
>>> + struct nvmet_req *req = bio->bi_private;
>>> +
>>> + req->cqe->result.u64 = nvmet_sect_to_lba(req->ns,
>>> + bio->bi_iter.bi_sector);
>> You should do this only if status is success, no ?
>
> I'll add a BLK_STS_OK check before the assignment.
>
>>> + nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
>>> + if (bio != &req->b.inline_bio)
>>> + bio_put(bio);
>>> +}
>>> +
>>> +void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
>>> +{
>>> + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
>>> + u16 status = NVME_SC_SUCCESS;
>>> + unsigned int total_len = 0;
>>> + struct scatterlist *sg;
>>> + int ret = 0, sg_cnt;
>>> + struct bio *bio;
>>> +
>>> + /* Request is completed on len mismatch in nvmet_check_transter_len() */
>>> + if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
>>> + return;
>>> +
>>> + if (!req->sg_cnt) {
>>> + nvmet_req_complete(req, 0);
>> isn't this an error ? (not entirely sure)
>
> if transfer len matches the rw len and it is zero we cannot send any data
> so just complete the command.
That I understand. But I have not checked if len == 0 is allowed by the
standard. Did you check ? Looking at it now, zone append NLB field is 0 based,
so I do not see how the command can ever have len == 0. That seems to me like an
incorrect command that should be aborted.
>
>>
>>> + return;
>>> + }
>>> +
>>> + if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
>>> + bio = &req->b.inline_bio;
>>> + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
>>>
>
> [1] Zone Mgmt send emulation with all bit comment :-
>
> I ran the test where all the zones are set to the EMPTY right after creating
> a controller on the host (initiator), see [A]. With the passthru command
> with
> all bit set, I was able to change the state of all the zones, see [B].
>
> So I did not understand your comment about how all bit is failing to
> change the
> state of all zones with passthru commnd ?
The behavior of the management commands is different based on the ALL bit being
set or not. E.g., doing a close zone oeartion on all zones of the device with
the ALL bit not set is not equivalent to doing a single close zone operation
with the ALL bit set:
1) With the ALL bit not set:
a) imp and exp open zones will be transitioned to close
b) the command will fail for empty, full, read-only and offline zones
2) With the all bit set:
a) All imp and exp open zones will be transitioned to close
b) All oher zones will be ignored
1.b and 2.b is the difference. That is why you need filtering and cannot simply
call blkdev_zone_mgmt() over the entire capacity. Doing so will result in (1)
instead of (2). Similar differences exist for open and finish zones. I.e. Finish
all will ignore empty zones whereas a finish for a single zone will act on an
empty zone. Please check the specs to see these differences. They are clearly
stated in section 4.3.
>
>
> A. All zones are empty.
>
> # nvme zns report-zones /dev/nvme1n1
> nr_zones: 32
> SLBA: 0x0 WP: 0x0 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x4000 WP: 0x4000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x8000 WP: 0x8000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0xc000 WP: 0xc000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x10000 WP: 0x10000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x14000 WP: 0x14000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x18000 WP: 0x18000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x1c000 WP: 0x1c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x20000 WP: 0x20000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x24000 WP: 0x24000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x28000 WP: 0x28000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x2c000 WP: 0x2c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x30000 WP: 0x30000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x34000 WP: 0x34000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x38000 WP: 0x38000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x3c000 WP: 0x3c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x40000 WP: 0x40000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x44000 WP: 0x44000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x48000 WP: 0x48000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x4c000 WP: 0x4c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x50000 WP: 0x50000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x54000 WP: 0x54000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x58000 WP: 0x58000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x5c000 WP: 0x5c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x60000 WP: 0x60000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x64000 WP: 0x64000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x68000 WP: 0x68000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x6c000 WP: 0x6c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x70000 WP: 0x70000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x74000 WP: 0x74000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x78000 WP: 0x78000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x7c000 WP: 0x7c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
>
> B. Now we send the passthru command with all bit set for
> zsa values RESET 0x04, OPEN 0x03, CLOSE 0x01, FINISH 0x02
> in the same oder as above. All the zones state are changed.
>
> # for zsa in 4 3 1 2 ; do
>> nvme zns zone-mgmt-send /dev/nvme1n1 -a -zsa=0x${zsa}
>> nvme zns report-zones /dev/nvme1n1
>> done
>
> + nvme zns zone-mgmt-send /dev/nvme1n1 -a -zsa=0x4
> zone-mgmt-send: Success, action:4 zone:0 all:1 nsid:1
> + nvme zns report-zones /dev/nvme1n1
> nr_zones: 32
> SLBA: 0x0 WP: 0x0 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x4000 WP: 0x4000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x8000 WP: 0x8000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0xc000 WP: 0xc000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x10000 WP: 0x10000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x14000 WP: 0x14000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x18000 WP: 0x18000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x1c000 WP: 0x1c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x20000 WP: 0x20000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x24000 WP: 0x24000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x28000 WP: 0x28000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x2c000 WP: 0x2c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x30000 WP: 0x30000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x34000 WP: 0x34000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x38000 WP: 0x38000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x3c000 WP: 0x3c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x40000 WP: 0x40000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x44000 WP: 0x44000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x48000 WP: 0x48000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x4c000 WP: 0x4c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x50000 WP: 0x50000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x54000 WP: 0x54000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x58000 WP: 0x58000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x5c000 WP: 0x5c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x60000 WP: 0x60000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x64000 WP: 0x64000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x68000 WP: 0x68000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x6c000 WP: 0x6c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x70000 WP: 0x70000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x74000 WP: 0x74000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x78000 WP: 0x78000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x7c000 WP: 0x7c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> + for i in 4 3 1 2
> + nvme zns zone-mgmt-send /dev/nvme1n1 -a -zsa=0x3
> zone-mgmt-send: Success, action:3 zone:0 all:1 nsid:1
> + nvme zns report-zones /dev/nvme1n1
> nr_zones: 32
> SLBA: 0x0 WP: 0x0 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x4000 WP: 0x4000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x8000 WP: 0x8000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0xc000 WP: 0xc000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x10000 WP: 0x10000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x14000 WP: 0x14000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x18000 WP: 0x18000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x1c000 WP: 0x1c000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x20000 WP: 0x20000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x24000 WP: 0x24000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x28000 WP: 0x28000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x2c000 WP: 0x2c000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x30000 WP: 0x30000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x34000 WP: 0x34000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x38000 WP: 0x38000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x3c000 WP: 0x3c000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x40000 WP: 0x40000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x44000 WP: 0x44000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x48000 WP: 0x48000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x4c000 WP: 0x4c000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x50000 WP: 0x50000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x54000 WP: 0x54000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x58000 WP: 0x58000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x5c000 WP: 0x5c000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x60000 WP: 0x60000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x64000 WP: 0x64000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x68000 WP: 0x68000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x6c000 WP: 0x6c000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x70000 WP: 0x70000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x74000 WP: 0x74000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x78000 WP: 0x78000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> SLBA: 0x7c000 WP: 0x7c000 Cap: 0x4000 State: EXP_OPENED Type:
> SEQWRITE_REQ Attrs: 0x0
> + for i in 4 3 1 2
> + nvme zns zone-mgmt-send /dev/nvme1n1 -a -zsa=0x1
> zone-mgmt-send: Success, action:1 zone:0 all:1 nsid:1
> + nvme zns report-zones /dev/nvme1n1
> nr_zones: 32
> SLBA: 0x0 WP: 0x0 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x4000 WP: 0x4000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x8000 WP: 0x8000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0xc000 WP: 0xc000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x10000 WP: 0x10000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x14000 WP: 0x14000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x18000 WP: 0x18000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x1c000 WP: 0x1c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x20000 WP: 0x20000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x24000 WP: 0x24000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x28000 WP: 0x28000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x2c000 WP: 0x2c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x30000 WP: 0x30000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x34000 WP: 0x34000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x38000 WP: 0x38000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x3c000 WP: 0x3c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x40000 WP: 0x40000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x44000 WP: 0x44000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x48000 WP: 0x48000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x4c000 WP: 0x4c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x50000 WP: 0x50000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x54000 WP: 0x54000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x58000 WP: 0x58000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x5c000 WP: 0x5c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x60000 WP: 0x60000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x64000 WP: 0x64000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x68000 WP: 0x68000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x6c000 WP: 0x6c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x70000 WP: 0x70000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x74000 WP: 0x74000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x78000 WP: 0x78000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x7c000 WP: 0x7c000 Cap: 0x4000 State: EMPTY Type: SEQWRITE_REQ
> Attrs: 0x0
> + for i in 4 3 1 2
> + nvme zns zone-mgmt-send /dev/nvme1n1 -a -zsa=0x2
> zone-mgmt-send: Success, action:2 zone:0 all:1 nsid:1
> + nvme zns report-zones /dev/nvme1n1
> nr_zones: 32
> SLBA: 0x0 WP: 0x4000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
This is incorrect. Finish all ignores empty zones.
> SLBA: 0x4000 WP: 0x8000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x8000 WP: 0xc000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0xc000 WP: 0x10000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x10000 WP: 0x14000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x14000 WP: 0x18000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x18000 WP: 0x1c000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x1c000 WP: 0x20000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x20000 WP: 0x24000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x24000 WP: 0x28000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x28000 WP: 0x2c000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x2c000 WP: 0x30000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x30000 WP: 0x34000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x34000 WP: 0x38000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x38000 WP: 0x3c000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x3c000 WP: 0x40000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x40000 WP: 0x44000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x44000 WP: 0x48000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x48000 WP: 0x4c000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x4c000 WP: 0x50000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x50000 WP: 0x54000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x54000 WP: 0x58000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x58000 WP: 0x5c000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x5c000 WP: 0x60000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x60000 WP: 0x64000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x64000 WP: 0x68000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x68000 WP: 0x6c000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x6c000 WP: 0x70000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x70000 WP: 0x74000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x74000 WP: 0x78000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x78000 WP: 0x7c000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> SLBA: 0x7c000 WP: 0x80000 Cap: 0x4000 State: FULL Type: SEQWRITE_REQ
> Attrs: 0x0
> + set +x
>
>
>
--
Damien Le Moal
Western Digital Research
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V13 2/4] nvmet: add ZBD over ZNS backend support
2021-04-08 0:14 ` [PATCH V13 2/4] nvmet: add ZBD over ZNS backend support Chaitanya Kulkarni
2021-04-08 7:23 ` Christoph Hellwig
2021-04-08 8:01 ` Damien Le Moal
@ 2021-04-08 8:15 ` Damien Le Moal
2 siblings, 0 replies; 17+ messages in thread
From: Damien Le Moal @ 2021-04-08 8:15 UTC (permalink / raw)
To: Chaitanya Kulkarni, linux-nvme; +Cc: hch
On 2021/04/08 9:14, Chaitanya Kulkarni wrote:
> NVMe TP 4053 – Zoned Namespaces (ZNS) allows host software to
> communicate with a non-volatile memory subsystem using zones for NVMe
> protocol-based controllers. NVMeOF already support the ZNS NVMe
> Protocol compliant devices on the target in the passthru mode. There
> are Generic zoned block devices like Shingled Magnetic Recording (SMR)
> HDDs that are not based on the NVMe protocol.
>
> This patch adds ZNS backend to support the ZBDs for NVMeOF target.
>
> This support includes implementing the new command set NVME_CSI_ZNS,
> adding different command handlers for ZNS command set such as NVMe
> Identify Controller, NVMe Identify Namespace, NVMe Zone Append,
> NVMe Zone Management Send and NVMe Zone Management Receive.
>
> With the new command set identifier, we also update the target command
> effects logs to reflect the ZNS compliant commands.
>
> Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
> ---
> drivers/nvme/target/Makefile | 1 +
> drivers/nvme/target/admin-cmd.c | 27 ++
> drivers/nvme/target/io-cmd-bdev.c | 35 ++-
> drivers/nvme/target/nvmet.h | 47 +++
> drivers/nvme/target/zns.c | 477 ++++++++++++++++++++++++++++++
> include/linux/nvme.h | 7 +
> 6 files changed, 585 insertions(+), 9 deletions(-)
> create mode 100644 drivers/nvme/target/zns.c
>
> diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
> index ebf91fc4c72e..9837e580fa7e 100644
> --- a/drivers/nvme/target/Makefile
> +++ b/drivers/nvme/target/Makefile
> @@ -12,6 +12,7 @@ obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o
> nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
> discovery.o io-cmd-file.o io-cmd-bdev.o
> nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o
> +nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o
> nvme-loop-y += loop.o
> nvmet-rdma-y += rdma.o
> nvmet-fc-y += fc.o
> diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
> index 176c8593d341..bf4876df624a 100644
> --- a/drivers/nvme/target/admin-cmd.c
> +++ b/drivers/nvme/target/admin-cmd.c
> @@ -179,6 +179,13 @@ static void nvmet_set_csi_nvm_effects(struct nvme_effects_log *log)
> log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(1 << 0);
> }
>
> +static void nvmet_set_csi_zns_effects(struct nvme_effects_log *log)
> +{
> + log->iocs[nvme_cmd_zone_append] = cpu_to_le32(1 << 0);
> + log->iocs[nvme_cmd_zone_mgmt_send] = cpu_to_le32(1 << 0);
> + log->iocs[nvme_cmd_zone_mgmt_recv] = cpu_to_le32(1 << 0);
> +}
> +
> static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
> {
> struct nvme_effects_log *log;
> @@ -194,6 +201,15 @@ static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
> case NVME_CSI_NVM:
> nvmet_set_csi_nvm_effects(log);
> break;
> + case NVME_CSI_ZNS:
> + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
> + status = NVME_SC_INVALID_IO_CMD_SET;
> + goto free;
> + }
> +
> + nvmet_set_csi_nvm_effects(log);
> + nvmet_set_csi_zns_effects(log);
> + break;
> default:
> status = NVME_SC_INVALID_LOG_PAGE;
> goto free;
> @@ -630,6 +646,13 @@ static u16 nvmet_execute_identify_desclist_csi(struct nvmet_req *req, off_t *o)
> {
> switch (req->ns->csi) {
> case NVME_CSI_NVM:
> + return nvmet_copy_ns_identifier(req, NVME_NIDT_CSI,
> + NVME_NIDT_CSI_LEN,
> + &req->ns->csi, o);
> + case NVME_CSI_ZNS:
> + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
> + return NVME_SC_INVALID_IO_CMD_SET;
> +
> return nvmet_copy_ns_identifier(req, NVME_NIDT_CSI,
> NVME_NIDT_CSI_LEN,
> &req->ns->csi, o);
> @@ -682,8 +705,12 @@ static void nvmet_execute_identify(struct nvmet_req *req)
> switch (req->cmd->identify.cns) {
> case NVME_ID_CNS_NS:
> return nvmet_execute_identify_ns(req);
> + case NVME_ID_CNS_CS_NS:
> + return nvmet_execute_identify_cns_cs_ns(req);
> case NVME_ID_CNS_CTRL:
> return nvmet_execute_identify_ctrl(req);
> + case NVME_ID_CNS_CS_CTRL:
> + return nvmet_execute_identify_cns_cs_ctrl(req);
> case NVME_ID_CNS_NS_ACTIVE_LIST:
> return nvmet_execute_identify_nslist(req);
> case NVME_ID_CNS_NS_DESC_LIST:
> diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
> index 9a8b3726a37c..1e54e7478735 100644
> --- a/drivers/nvme/target/io-cmd-bdev.c
> +++ b/drivers/nvme/target/io-cmd-bdev.c
> @@ -63,6 +63,14 @@ static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
> }
> }
>
> +void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
> +{
> + if (ns->bdev) {
> + blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
> + ns->bdev = NULL;
> + }
> +}
> +
> int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
> {
> int ret;
> @@ -86,15 +94,15 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
> if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
> nvmet_bdev_ns_enable_integrity(ns);
>
> - return 0;
> -}
> -
> -void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
> -{
> - if (ns->bdev) {
> - blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
> - ns->bdev = NULL;
> + if (bdev_is_zoned(ns->bdev)) {
> + if (!nvmet_bdev_zns_enable(ns)) {
> + nvmet_bdev_ns_disable(ns);
> + return -EINVAL;
> + }
> + ns->csi = NVME_CSI_ZNS;
> }
> +
> + return 0;
> }
>
> void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
> @@ -102,7 +110,7 @@ void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
> ns->size = i_size_read(ns->bdev->bd_inode);
> }
>
> -static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
> +u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
> {
> u16 status = NVME_SC_SUCCESS;
>
> @@ -448,6 +456,15 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
> case nvme_cmd_write_zeroes:
> req->execute = nvmet_bdev_execute_write_zeroes;
> return 0;
> + case nvme_cmd_zone_append:
> + req->execute = nvmet_bdev_execute_zone_append;
> + return 0;
> + case nvme_cmd_zone_mgmt_recv:
> + req->execute = nvmet_bdev_execute_zone_mgmt_recv;
> + return 0;
> + case nvme_cmd_zone_mgmt_send:
> + req->execute = nvmet_bdev_execute_zone_mgmt_send;
> + return 0;
> default:
> return nvmet_report_invalid_opcode(req);
> }
> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
> index ab878fb96fbd..5e6514565f8c 100644
> --- a/drivers/nvme/target/nvmet.h
> +++ b/drivers/nvme/target/nvmet.h
> @@ -248,6 +248,10 @@ struct nvmet_subsys {
> unsigned int admin_timeout;
> unsigned int io_timeout;
> #endif /* CONFIG_NVME_TARGET_PASSTHRU */
> +
> +#ifdef CONFIG_BLK_DEV_ZONED
> + u8 zasl;
> +#endif /* CONFIG_BLK_DEV_ZONED */
> };
>
> static inline struct nvmet_subsys *to_subsys(struct config_item *item)
> @@ -528,6 +532,7 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
> void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns);
> int nvmet_file_ns_revalidate(struct nvmet_ns *ns);
> void nvmet_ns_revalidate(struct nvmet_ns *ns);
> +u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts);
>
> static inline u32 nvmet_rw_data_len(struct nvmet_req *req)
> {
> @@ -585,6 +590,48 @@ static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys)
> }
> #endif /* CONFIG_NVME_TARGET_PASSTHRU */
>
> +#ifdef CONFIG_BLK_DEV_ZONED
> +bool nvmet_bdev_zns_enable(struct nvmet_ns *ns);
> +void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req);
> +void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req);
> +void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req);
> +void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req);
> +void nvmet_bdev_execute_zone_append(struct nvmet_req *req);
> +#else /* CONFIG_BLK_DEV_ZONED */
> +static inline bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
> +{
> + return false;
> +}
> +static inline void
> +nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req)
> +{
> + pr_err("unhandled identify cns %d on qid %d\n",
> + req->cmd->identify.cns, req->sq->qid);
> + req->error_loc = offsetof(struct nvme_identify, cns);
> + nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
> +}
> +static inline void
> +nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
> +{
> + pr_err("unhandled identify cns %d on qid %d\n",
> + req->cmd->identify.cns, req->sq->qid);
> + req->error_loc = offsetof(struct nvme_identify, cns);
> + nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
> +}
> +static inline void
> +nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req)
> +{
> +}
> +static inline void
> +nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req)
> +{
> +}
> +static inline void
> +nvmet_bdev_execute_zone_append(struct nvmet_req *req)
> +{
> +}
> +#endif /* CONFIG_BLK_DEV_ZONED */
> +
> static inline struct nvme_ctrl *
> nvmet_req_passthru_ctrl(struct nvmet_req *req)
> {
> diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
> new file mode 100644
> index 000000000000..308198dd580b
> --- /dev/null
> +++ b/drivers/nvme/target/zns.c
> @@ -0,0 +1,477 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * NVMe ZNS-ZBD command implementation.
> + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> + */
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +#include <linux/nvme.h>
> +#include <linux/blkdev.h>
> +#include "nvmet.h"
> +
> +/*
> + * We set the Memory Page Size Minimum (MPSMIN) for target controller to 0
> + * which gets added by 12 in the nvme_enable_ctrl() which results in 2^12 = 4k
> + * as page_shift value. When calculating the ZASL use shift by 12.
> + */
> +#define NVMET_MPSMIN_SHIFT 12
> +
> +static u16 nvmet_bdev_validate_zone_mgmt_recv(struct nvmet_req *req)
> +{
> + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
> + u32 out_bufsize = (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2;
> +
> + if (!bdev_is_zoned(req->ns->bdev))
> + return NVME_SC_INVALID_NS | NVME_SC_DNR;
> +
> + if (sect > get_capacity(req->ns->bdev->bd_disk)) {
> + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, slba);
> + return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
> + }
> +
> + /*
> + * Make sure out buffer size at least matches nvme report zone header.
> + * Reporting partial 64 bit nr_zones value can lead to unwanted side
> + * effects.
> + */
> + if (out_bufsize < sizeof(struct nvme_zone_report)) {
> + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, numd);
> + return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
> + }
> +
> + if (req->cmd->zmr.zra != NVME_ZRA_ZONE_REPORT) {
> + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, zra);
> + return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
> + }
> +
> + switch (req->cmd->zmr.pr) {
> + case 0:
> + case 1:
> + break;
> + default:
> + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, pr);
> + return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
> + }
> +
> + switch (req->cmd->zmr.zrasf) {
> + case NVME_ZRASF_ZONE_REPORT_ALL:
> + case NVME_ZRASF_ZONE_STATE_EMPTY:
> + case NVME_ZRASF_ZONE_STATE_IMP_OPEN:
> + case NVME_ZRASF_ZONE_STATE_EXP_OPEN:
> + case NVME_ZRASF_ZONE_STATE_CLOSED:
> + case NVME_ZRASF_ZONE_STATE_FULL:
> + case NVME_ZRASF_ZONE_STATE_READONLY:
> + case NVME_ZRASF_ZONE_STATE_OFFLINE:
> + break;
> + default:
> + req->error_loc =
> + offsetof(struct nvme_zone_mgmt_recv_cmd, zrasf);
> + return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
> + }
> +
> + return NVME_SC_SUCCESS;
> +}
> +
> +static inline u8 nvmet_zasl(unsigned int zone_append_sects)
> +{
> + /*
> + * Zone Append Size Limit is the value expressed in the units of minimum
> + * memory page size (i.e. 12) and is reported power of 2.
> + */
> + return ilog2(zone_append_sects >> (NVMET_MPSMIN_SHIFT - 9));
> +}
> +
> +static inline bool nvmet_zns_update_zasl(struct nvmet_ns *ns)
> +{
> + struct request_queue *q = ns->bdev->bd_disk->queue;
> + u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q));
> +
> + if (ns->subsys->zasl)
> + return ns->subsys->zasl < zasl;
> +
> + ns->subsys->zasl = zasl;
> + return true;
> +}
> +
> +static int nvmet_bdev_validate_zns_zones_cb(struct blk_zone *z,
> + unsigned int i, void *data)
> +{
> + if (z->type == BLK_ZONE_TYPE_CONVENTIONAL)
> + return -EOPNOTSUPP;
> + return 0;
> +}
> +
> +static bool nvmet_bdev_has_conv_zones(struct block_device *bdev)
> +{
> + int ret;
> +
> + if (bdev->bd_disk->queue->conv_zones_bitmap)
> + return true;
> +
> + ret = blkdev_report_zones(bdev, 0, blkdev_nr_zones(bdev->bd_disk),
> + nvmet_bdev_validate_zns_zones_cb, NULL);
> +
> + return ret <= 0;
> +}
> +
> +bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
> +{
> + if (nvmet_bdev_has_conv_zones(ns->bdev))
> + return false;
> +
> + ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
> +
> + if (!nvmet_zns_update_zasl(ns))
> + return false;
> + /*
> + * Generic zoned block devices may have a smaller last zone which is
> + * not supported by ZNS. Excludes zoned drives that have such smaller
> + * last zone.
> + */
> + return !(get_capacity(ns->bdev->bd_disk) &
> + (bdev_zone_sectors(ns->bdev) - 1));
> +}
> +
> +void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req)
> +{
> + u8 zasl = req->sq->ctrl->subsys->zasl;
> + struct nvmet_ctrl *ctrl = req->sq->ctrl;
> + struct nvme_id_ctrl_zns *id;
> + u16 status;
> +
> + if (req->cmd->identify.csi != NVME_CSI_ZNS) {
> + req->error_loc = offsetof(struct nvme_common_command, opcode);
> + status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
> + goto out;
> + }
> +
> + id = kzalloc(sizeof(*id), GFP_KERNEL);
> + if (!id) {
> + status = NVME_SC_INTERNAL;
> + goto out;
> + }
> +
> + if (ctrl->ops->get_mdts)
> + id->zasl = min_t(u8, ctrl->ops->get_mdts(ctrl), zasl);
> + else
> + id->zasl = zasl;
> +
> + status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
> +
> + kfree(id);
> +out:
> + nvmet_req_complete(req, status);
> +}
> +
> +void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
> +{
> + struct nvme_id_ns_zns *id_zns;
> + u64 zsze;
> + u16 status;
> +
> + if (req->cmd->identify.csi != NVME_CSI_ZNS) {
> + req->error_loc = offsetof(struct nvme_common_command, opcode);
> + status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
> + goto out;
> + }
> +
> + if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) {
> + req->error_loc = offsetof(struct nvme_identify, nsid);
> + status = NVME_SC_INVALID_NS | NVME_SC_DNR;
> + goto out;
> + }
> +
> + id_zns = kzalloc(sizeof(*id_zns), GFP_KERNEL);
> + if (!id_zns) {
> + status = NVME_SC_INTERNAL;
> + goto out;
> + }
> +
> + status = nvmet_req_find_ns(req);
> + if (status) {
> + status = NVME_SC_INTERNAL;
> + goto done;
> + }
> +
> + if (!bdev_is_zoned(req->ns->bdev)) {
> + req->error_loc = offsetof(struct nvme_identify, nsid);
> + status = NVME_SC_INVALID_NS | NVME_SC_DNR;
> + goto done;
> + }
> +
> + nvmet_ns_revalidate(req->ns);
> + zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >>
> + req->ns->blksize_shift;
> + id_zns->lbafe[0].zsze = cpu_to_le64(zsze);
> + id_zns->mor = cpu_to_le32(bdev_max_open_zones(req->ns->bdev));
> + id_zns->mar = cpu_to_le32(bdev_max_active_zones(req->ns->bdev));
> +
> +done:
> + status = nvmet_copy_to_sgl(req, 0, id_zns, sizeof(*id_zns));
> + kfree(id_zns);
> +out:
> + nvmet_req_complete(req, status);
> +}
> +
> +struct nvmet_report_zone_data {
> + struct nvme_zone_report *rz;
> + struct nvmet_ns *ns;
> + u64 nr_zones;
> + u8 zrasf;
> +};
> +
> +static int nvmet_bdev_report_zone_cb(struct blk_zone *z, unsigned i, void *d)
> +{
> + struct nvmet_report_zone_data *rz = d;
> + struct nvme_zone_descriptor *entries = rz->rz->entries;
> + struct nvmet_ns *ns = rz->ns;
> + static const unsigned int blk_zcond_to_nvme_zstate[] = {
> + [BLK_ZONE_COND_EMPTY] = NVME_ZRASF_ZONE_STATE_EMPTY,
> + [BLK_ZONE_COND_IMP_OPEN] = NVME_ZRASF_ZONE_STATE_IMP_OPEN,
> + [BLK_ZONE_COND_EXP_OPEN] = NVME_ZRASF_ZONE_STATE_EXP_OPEN,
> + [BLK_ZONE_COND_CLOSED] = NVME_ZRASF_ZONE_STATE_CLOSED,
> + [BLK_ZONE_COND_READONLY] = NVME_ZRASF_ZONE_STATE_READONLY,
> + [BLK_ZONE_COND_FULL] = NVME_ZRASF_ZONE_STATE_FULL,
> + [BLK_ZONE_COND_OFFLINE] = NVME_ZRASF_ZONE_STATE_OFFLINE,
> + };
> +
> + if (rz->zrasf == NVME_ZRASF_ZONE_REPORT_ALL)
> + goto record_zone;
> +
> + /*
> + * Make sure this zone condition's value is mapped to NVMe ZNS zone
> + * condition value.
> + */
> + if (z->cond > ARRAY_SIZE(blk_zcond_to_nvme_zstate) ||
> + !blk_zcond_to_nvme_zstate[z->cond])
> + return -EINVAL;
> +
> + /* filter zone by condition */
> + if (blk_zcond_to_nvme_zstate[z->cond] != rz->zrasf)
> + return 0;
> +
> +record_zone:
> +
> + entries[rz->nr_zones].zcap = nvmet_sect_to_lba(ns, z->capacity);
> + entries[rz->nr_zones].zslba = nvmet_sect_to_lba(ns, z->start);
> + entries[rz->nr_zones].wp = nvmet_sect_to_lba(ns, z->wp);
> + entries[rz->nr_zones].za = z->reset ? 1 << 2 : 0;
> + entries[rz->nr_zones].zs = z->cond << 4;
> + entries[rz->nr_zones].zt = z->type;
> +
> + rz->nr_zones++;
> +
> + return 0;
> +}
> +
> +unsigned long nvmet_req_nr_zones_from_slba(struct nvmet_req *req)
> +{
> + sector_t total_sect_from_slba;
> +
> + total_sect_from_slba = get_capacity(req->ns->bdev->bd_disk) -
> + nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
> +
> + return total_sect_from_slba / bdev_zone_sectors(req->ns->bdev);
> +}
> +
> +unsigned long get_nr_zones_from_buf(struct nvmet_req *req, u32 out_bufsize)
> +{
> + if (out_bufsize < sizeof(struct nvme_zone_report))
> + return 0;
> +
> + return (out_bufsize - sizeof(struct nvme_zone_report)) /
> + sizeof(struct nvme_zone_descriptor);
> +}
> +
> +unsigned long bufsize_from_zones(unsigned long nr_zones)
> +{
> + return sizeof(struct nvme_zone_report) +
> + (sizeof(struct nvme_zone_descriptor) * nr_zones);
> +}
> +
> +void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req)
> +{
> + unsigned long req_slba_nr_zones = nvmet_req_nr_zones_from_slba(req);
> + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
> + u32 out_bufsize = (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2;
> + unsigned long out_nr_zones = get_nr_zones_from_buf(req, out_bufsize);
> + int reported_zones;
> + u32 bufsize;
> + u16 status;
> + struct nvmet_report_zone_data data = {
> + .ns = req->ns,
> + .zrasf = req->cmd->zmr.zrasf
> + };
> +
> + status = nvmet_bdev_validate_zone_mgmt_recv(req);
> + if (status)
> + goto out;
> +
> + /* nothing to report */
> + if (!req_slba_nr_zones) {
> + status = NVME_SC_SUCCESS;
> + goto out;
> + }
> +
> + /*
> + * Allocate Zone descriptors based on the number of zones that fit from
> + * zmr.slba to disk capacity.
> + */
> + bufsize = bufsize_from_zones(req_slba_nr_zones);
> +
> + data.rz = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY | __GFP_ZERO);
> + if (!data.rz) {
> + status = NVME_SC_INTERNAL;
> + goto out;
> + }
Forgot: for slba == 0, this will allocate a buffer for all zones of the backend.
If that backend is an SMR drive with high capcity, we are talking about multi-MB
buffer here (e.g. 20TB drive has over 75000 zones x 64B -> 4.8 MB). A nasty host
sending lots of full report zones will end up consuming a lot of memory on the
target. So it may be better to cap busize to something resonable (say the
backend max_sectors) and do the report in a loop here. That will complicate a
little the code but not by much since the buffer filling is done in the cb and
that one does not need to change because of the loop.
> +
> + reported_zones = blkdev_report_zones(req->ns->bdev, sect,
> + req_slba_nr_zones,
> + nvmet_bdev_report_zone_cb, &data);
> + if (reported_zones < 0) {
> + status = NVME_SC_INTERNAL;
> + goto out_free_report_zones;
> + }
> +
> + if (req->cmd->zmr.pr) {
> + /*
> + * When partial bit is set nr_zones == zone desc transferred. So
> + * if captured zones are less than the nr zones that can fit in
> + * out buf, then trim the out_bufsize to avoid extra copy also
> + * update the number of zones that we can transfer in out buf.
> + */
> + if (data.nr_zones < out_nr_zones) {
> + out_bufsize = bufsize_from_zones(data.nr_zones);
> + out_nr_zones = data.nr_zones;
> + }
> + } else {
> + /*
> + * When partial bit is not set nr_zone == zones for which ZSLBA
> + * value is greater than or equal to the ZSLBA value of the zone
> + * specified by the SLBA value in the command and match the
> + * criteria in the Zone Receive Action Specific field ZRASF.
> + */
> + out_nr_zones = data.nr_zones;
> +
> + /* trim out_bufsize to avoid extra copy */
> + if (data.nr_zones < out_nr_zones)
> + out_bufsize = bufsize_from_zones(data.nr_zones);
> + }
> +
> + data.rz->nr_zones = cpu_to_le64(out_nr_zones);
> +
> + status = nvmet_copy_to_sgl(req, 0, data.rz, out_bufsize);
> +
> +out_free_report_zones:
> + kvfree(data.rz);
> +out:
> + nvmet_req_complete(req, status);
> +}
> +
> +void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req)
> +{
> + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zms.slba);
> + u16 status = NVME_SC_SUCCESS;
> + u8 zsa = req->cmd->zms.zsa;
> + sector_t nr_sects;
> + enum req_opf op;
> + int ret;
> + const unsigned int zsa_to_op[] = {
> + [NVME_ZONE_OPEN] = REQ_OP_ZONE_OPEN,
> + [NVME_ZONE_CLOSE] = REQ_OP_ZONE_CLOSE,
> + [NVME_ZONE_FINISH] = REQ_OP_ZONE_FINISH,
> + [NVME_ZONE_RESET] = REQ_OP_ZONE_RESET,
> + };
> +
> + if (zsa > ARRAY_SIZE(zsa_to_op)) {
> + status = NVME_SC_INVALID_FIELD;
> + goto out;
> + }
> +
> + op = zsa_to_op[zsa];
> +
> + if (req->cmd->zms.select_all) {
> + sect = 0;
> + nr_sects = get_capacity(req->ns->bdev->bd_disk);
> + } else {
> + sect = nvmet_lba_to_sect(req->ns, req->cmd->zms.slba);
> + nr_sects = bdev_zone_sectors(req->ns->bdev);
> + }
> +
> + ret = blkdev_zone_mgmt(req->ns->bdev, op, sect, nr_sects, GFP_KERNEL);
> + if (ret)
> + status = NVME_SC_INTERNAL;
> +out:
> + nvmet_req_complete(req, status);
> +}
> +
> +static void nvmet_bdev_zone_append_bio_done(struct bio *bio)
> +{
> + struct nvmet_req *req = bio->bi_private;
> +
> + req->cqe->result.u64 = nvmet_sect_to_lba(req->ns,
> + bio->bi_iter.bi_sector);
> + nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
> + if (bio != &req->b.inline_bio)
> + bio_put(bio);
> +}
> +
> +void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
> +{
> + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
> + u16 status = NVME_SC_SUCCESS;
> + unsigned int total_len = 0;
> + struct scatterlist *sg;
> + int ret = 0, sg_cnt;
> + struct bio *bio;
> +
> + /* Request is completed on len mismatch in nvmet_check_transter_len() */
> + if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
> + return;
> +
> + if (!req->sg_cnt) {
> + nvmet_req_complete(req, 0);
> + return;
> + }
> +
> + if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
> + bio = &req->b.inline_bio;
> + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
> + } else {
> + bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
> + }
> +
> + bio_set_dev(bio, req->ns->bdev);
> + bio->bi_iter.bi_sector = sect;
> + bio->bi_private = req;
> + bio->bi_end_io = nvmet_bdev_zone_append_bio_done;
> + bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
> + if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
> + bio->bi_opf |= REQ_FUA;
> +
> + for_each_sg(req->sg, sg, req->sg_cnt, sg_cnt) {
> + struct page *p = sg_page(sg);
> + unsigned int l = sg->length;
> + unsigned int o = sg->offset;
> +
> + ret = bio_add_zone_append_page(bio, p, l, o);
> + if (ret != sg->length) {
> + status = NVME_SC_INTERNAL;
> + goto out_bio_put;
> + }
> +
> + total_len += sg->length;
> + }
> +
> + if (total_len != nvmet_rw_data_len(req)) {
> + status = NVME_SC_INTERNAL | NVME_SC_DNR;
> + goto out_bio_put;
> + }
> +
> + submit_bio(bio);
> + return;
> +
> +out_bio_put:
> + if (bio != &req->b.inline_bio)
> + bio_put(bio);
> + nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL : status);
> +}
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h
> index c7ba83144d52..cb1197f1cfed 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -944,6 +944,13 @@ struct nvme_zone_mgmt_recv_cmd {
> enum {
> NVME_ZRA_ZONE_REPORT = 0,
> NVME_ZRASF_ZONE_REPORT_ALL = 0,
> + NVME_ZRASF_ZONE_STATE_EMPTY = 0x01,
> + NVME_ZRASF_ZONE_STATE_IMP_OPEN = 0x02,
> + NVME_ZRASF_ZONE_STATE_EXP_OPEN = 0x03,
> + NVME_ZRASF_ZONE_STATE_CLOSED = 0x04,
> + NVME_ZRASF_ZONE_STATE_READONLY = 0x05,
> + NVME_ZRASF_ZONE_STATE_FULL = 0x06,
> + NVME_ZRASF_ZONE_STATE_OFFLINE = 0x07,
> NVME_REPORT_ZONE_PARTIAL = 1,
> };
>
>
--
Damien Le Moal
Western Digital Research
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 17+ messages in thread