On Mar 10 11:57, Keith Busch wrote: > Commands that access LBA contents without a data transfer between the > host historically have not had a spec defined upper limit. The driver > set the queue constraints for such commands to the max data transfer > size just to be safe, but this artificial constraint frequently limits > devices below their capabilities. > > The NVMe Workgroup ratified TP4040 defines how a controller may > advertise their non-MDTS limits. Use these if provided, and default > to the current constraints if not. > > Signed-off-by: Keith Busch > --- > drivers/nvme/host/core.c | 89 +++++++++++++++++++++++++++++++--------- > drivers/nvme/host/nvme.h | 3 ++ > include/linux/nvme.h | 10 +++++ > 3 files changed, 82 insertions(+), 20 deletions(-) > > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c > index f7bc808da3d0..af299487f7a8 100644 > --- a/drivers/nvme/host/core.c > +++ b/drivers/nvme/host/core.c > @@ -1939,7 +1939,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) > struct request_queue *queue = disk->queue; > u32 size = queue_logical_block_size(queue); > > - if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) { > + if (!(ctrl->max_discard_sectors)) { > blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue); > return; > } > @@ -1957,8 +1957,8 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) > if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue)) > return; > > - blk_queue_max_discard_sectors(queue, UINT_MAX); > - blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); > + blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors); > + blk_queue_max_discard_segments(queue, ctrl->max_discard_segments); > > if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) > blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); > @@ -1966,25 +1966,10 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) > > static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) > { > - u64 max_blocks; > + u64 max_blocks = ns->ctrl->max_zeroes_sectors; > > - if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) || > - (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) > + if (max_blocks == 0) > return; > - /* > - * Even though NVMe spec explicitly states that MDTS is not > - * applicable to the write-zeroes:- "The restriction does not apply to > - * commands that do not transfer data between the host and the > - * controller (e.g., Write Uncorrectable ro Write Zeroes command).". > - * In order to be more cautious use controller's max_hw_sectors value > - * to configure the maximum sectors for the write-zeroes which is > - * configured based on the controller's MDTS field in the > - * nvme_init_ctrl_finish() if available. > - */ > - if (ns->ctrl->max_hw_sectors == UINT_MAX) > - max_blocks = (u64)USHRT_MAX + 1; > - else > - max_blocks = ns->ctrl->max_hw_sectors + 1; > > blk_queue_max_write_zeroes_sectors(disk->queue, > nvme_lba_to_sect(ns, max_blocks)); > @@ -3063,6 +3048,65 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, > return 0; > } > > +static inline u32 nvme_mps_size_to_bytes(u8 size) > +{ > + /* XXX: modify if NVME_CTRL_PAGE_SHIFT ever changes */ > + return 1 << (size + 3); > +} > + > +static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) > +{ > + struct nvme_command c = { }; > + struct nvme_id_ctrl_nvm *id; > + int ret; > + > + if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) { > + ctrl->max_discard_sectors = 0; > + ctrl->max_discard_segments = 0; > + } else { > + ctrl->max_discard_sectors = UINT_MAX; > + ctrl->max_discard_segments = NVME_DSM_MAX_RANGES; > + } > + > + /* > + * Even though NVMe spec explicitly states that MDTS is not applicable > + * to the write-zeroes, we are cautious and limit the default size to > + * the controller's max_hw_sectors value, which is based on the MDTS > + * field and possibly other limiting factors. > + */ > + if (!(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES) && > + (ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES)) > + ctrl->max_zeroes_sectors = ctrl->max_hw_sectors; > + else > + ctrl->max_zeroes_sectors = 0; > + > + if (ctrl->vs < NVME_VS(1, 2, 0)) > + return 0; > + > + id = kzalloc(sizeof(*id), GFP_KERNEL); > + if (!id) > + return 0; > + > + c.identify.opcode = nvme_admin_identify; > + c.identify.cns = NVME_ID_CNS_CS_CTRL; > + c.identify.csi = NVME_CSI_NVM; > + > + ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id)); > + if (ret) > + goto free_data; > + > + if (id->dmrl) > + ctrl->max_discard_segments = id->dmrl; > + if (id->dmrsl) > + ctrl->max_discard_sectors = le32_to_cpu(id->dmrsl); Since DMRSL is in terms of LBAs, should this use nvme_lba_to_sect? > + if (id->wzsl) > + ctrl->max_zeroes_sectors = nvme_mps_size_to_bytes(id->wzsl); > + > +free_data: > + kfree(id); > + return ret; > +} > + > static int nvme_init_identify(struct nvme_ctrl *ctrl) > { > struct nvme_id_ctrl *id; > @@ -3238,6 +3282,10 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) > if (ret) > return ret; > > + ret = nvme_init_non_mdts_limits(ctrl); > + if (ret < 0) > + return ret; > + > ret = nvme_configure_apst(ctrl); > if (ret < 0) > return ret; > @@ -4768,6 +4816,7 @@ static inline void _nvme_check_size(void) > BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); > BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE); > BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE); > + BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_nvm) != NVME_IDENTIFY_DATA_SIZE); > BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); > BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); > BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); > diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h > index 76de7ed55d90..cf63ab07be4c 100644 > --- a/drivers/nvme/host/nvme.h > +++ b/drivers/nvme/host/nvme.h > @@ -276,6 +276,9 @@ struct nvme_ctrl { > u32 max_hw_sectors; > u32 max_segments; > u32 max_integrity_segments; > + u32 max_discard_sectors; > + u32 max_discard_segments; > + u32 max_zeroes_sectors; > #ifdef CONFIG_BLK_DEV_ZONED > u32 max_zone_append; > #endif > diff --git a/include/linux/nvme.h b/include/linux/nvme.h > index b08787cd0881..edcbd60b88b9 100644 > --- a/include/linux/nvme.h > +++ b/include/linux/nvme.h > @@ -405,6 +405,16 @@ struct nvme_id_ctrl_zns { > __u8 rsvd1[4095]; > }; > > +struct nvme_id_ctrl_nvm { > + __u8 vsl; > + __u8 wzsl; > + __u8 wusl; > + __u8 dmrl; > + __le32 dmrsl; > + __le64 dmsl; > + __u8 rsvd16[4080]; > +}; > + > enum { > NVME_ID_CNS_NS = 0x00, > NVME_ID_CNS_CTRL = 0x01, > -- > 2.25.4 > > > _______________________________________________ > Linux-nvme mailing list > Linux-nvme@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-nvme -- One of us - No more doubt, silence or taboo about mental illness.