All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/4] nvme: add csi, ms and nuse to sysfs
@ 2023-12-07 12:36 Daniel Wagner
  2023-12-07 12:36 ` [PATCH v4 1/4] nvme: move ns id info to struct nvme_ns_head Daniel Wagner
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Daniel Wagner @ 2023-12-07 12:36 UTC (permalink / raw)
  To: linux-nvme
  Cc: linux-kernel, Keith Busch, Christoph Hellwig, Sagi Grimberg,
	Hannes Reinecke, Daniel Wagner

As already reported in v3, the nvme_ns_head change was not gaining us anything.
Instead a simple repacking of nvme_ns gives better performance. Thus these
patches are gone. 

Thanks,
Daniel

libnvme changes:
  https://github.com/igaw/libnvme/tree/tree-no-cmd
  
changes:
v4:
 - drop 'use nvme_ns_head instead nvme_ns' patches
 - ratelimit nuse update per namespace not globally
 - rename ns attribute group
 - added non-multipath nuse update logic
 - added cacheline optimization

v3:
 - cut overlong lines shorter
 - fixed disk (queuedata) initialization order
 - more testing with blktest
 - added nuse ratelimit
 - added reviewed tags
 - https://lore.kernel.org/linux-nvme/20231206081244.32733-1-dwagner@suse.de/

v2:
 - moved ns id data to nvme_ns_head
 - dropped ds, nsze
 - https://lore.kernel.org/linux-nvme/20231201092735.28592-1-dwagner@suse.de/

v1:
 - initial version
 - https://lore.kernel.org/linux-nvme/20231127103208.25748-1-dwagner@suse.de/

Daniel Wagner (4):
  nvme: move ns id info to struct nvme_ns_head
  nvme: rename ns attribute group
  nvme: add csi, ms and nuse to sysfs
  nvme: repack struct nvme_ns_head

 drivers/nvme/host/core.c      |  87 ++++++++++++++--------------
 drivers/nvme/host/ioctl.c     |   8 +--
 drivers/nvme/host/multipath.c |   2 +-
 drivers/nvme/host/nvme.h      |  38 +++++++------
 drivers/nvme/host/rdma.c      |   2 +-
 drivers/nvme/host/sysfs.c     | 103 +++++++++++++++++++++++++++++++---
 drivers/nvme/host/zns.c       |  17 +++---
 7 files changed, 179 insertions(+), 78 deletions(-)

-- 
2.43.0


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v4 1/4] nvme: move ns id info to struct nvme_ns_head
  2023-12-07 12:36 [PATCH v4 0/4] nvme: add csi, ms and nuse to sysfs Daniel Wagner
@ 2023-12-07 12:36 ` Daniel Wagner
  2023-12-07 15:31   ` [PATCH v4 0/4] nvme: add csi, ms and nuse to sysfs Christoph Hellwig
  2023-12-07 16:31   ` [PATCH v4 1/4] nvme: move ns id info to struct nvme_ns_head Keith Busch
  2023-12-07 12:36 ` [PATCH v4 2/4] nvme: rename ns attribute group Daniel Wagner
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 15+ messages in thread
From: Daniel Wagner @ 2023-12-07 12:36 UTC (permalink / raw)
  To: linux-nvme
  Cc: linux-kernel, Keith Busch, Christoph Hellwig, Sagi Grimberg,
	Hannes Reinecke, Daniel Wagner

Move the namesapce info to struct nvme_ns_head, because it's the same
for all associated namespaces.

Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 drivers/nvme/host/core.c  | 81 ++++++++++++++++++++-------------------
 drivers/nvme/host/ioctl.c |  8 ++--
 drivers/nvme/host/nvme.h  | 28 +++++++-------
 drivers/nvme/host/rdma.c  |  2 +-
 drivers/nvme/host/zns.c   | 17 ++++----
 5 files changed, 70 insertions(+), 66 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d699f0c8b13e..72908e622049 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -312,12 +312,12 @@ static void nvme_log_error(struct request *req)
 	struct nvme_request *nr = nvme_req(req);
 
 	if (ns) {
-		pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %llu blocks, %s (sct 0x%x / sc 0x%x) %s%s\n",
+		pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %u blocks, %s (sct 0x%x / sc 0x%x) %s%s\n",
 		       ns->disk ? ns->disk->disk_name : "?",
 		       nvme_get_opcode_str(nr->cmd->common.opcode),
 		       nr->cmd->common.opcode,
-		       (unsigned long long)nvme_sect_to_lba(ns, blk_rq_pos(req)),
-		       (unsigned long long)blk_rq_bytes(req) >> ns->lba_shift,
+		       nvme_sect_to_lba(ns, blk_rq_pos(req)),
+		       blk_rq_bytes(req) >> ns->head->lba_shift,
 		       nvme_get_error_status_str(nr->status),
 		       nr->status >> 8 & 7,	/* Status Code Type */
 		       nr->status & 0xff,	/* Status Code */
@@ -794,7 +794,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 
 	if (queue_max_discard_segments(req->q) == 1) {
 		u64 slba = nvme_sect_to_lba(ns, blk_rq_pos(req));
-		u32 nlb = blk_rq_sectors(req) >> (ns->lba_shift - 9);
+		u32 nlb = blk_rq_sectors(req) >> (ns->head->lba_shift - 9);
 
 		range[0].cattr = cpu_to_le32(0);
 		range[0].nlb = cpu_to_le32(nlb);
@@ -803,7 +803,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 	} else {
 		__rq_for_each_bio(bio, req) {
 			u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
-			u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
+			u32 nlb = bio->bi_iter.bi_size >> ns->head->lba_shift;
 
 			if (n < segments) {
 				range[n].cattr = cpu_to_le32(0);
@@ -841,7 +841,7 @@ static void nvme_set_ref_tag(struct nvme_ns *ns, struct nvme_command *cmnd,
 	u64 ref48;
 
 	/* both rw and write zeroes share the same reftag format */
-	switch (ns->guard_type) {
+	switch (ns->head->guard_type) {
 	case NVME_NVM_NS_16B_GUARD:
 		cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
 		break;
@@ -871,15 +871,16 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
 	cmnd->write_zeroes.slba =
 		cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
 	cmnd->write_zeroes.length =
-		cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+		cpu_to_le16((blk_rq_bytes(req) >> ns->head->lba_shift) - 1);
 
-	if (!(req->cmd_flags & REQ_NOUNMAP) && (ns->features & NVME_NS_DEAC))
+	if (!(req->cmd_flags & REQ_NOUNMAP) &&
+	    (ns->head->features & NVME_NS_DEAC))
 		cmnd->write_zeroes.control |= cpu_to_le16(NVME_WZ_DEAC);
 
 	if (nvme_ns_has_pi(ns)) {
 		cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT);
 
-		switch (ns->pi_type) {
+		switch (ns->head->pi_type) {
 		case NVME_NS_DPS_PI_TYPE1:
 		case NVME_NS_DPS_PI_TYPE2:
 			nvme_set_ref_tag(ns, cmnd, req);
@@ -912,12 +913,13 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 	cmnd->rw.cdw3 = 0;
 	cmnd->rw.metadata = 0;
 	cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
-	cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+	cmnd->rw.length =
+		cpu_to_le16((blk_rq_bytes(req) >> ns->head->lba_shift) - 1);
 	cmnd->rw.reftag = 0;
 	cmnd->rw.apptag = 0;
 	cmnd->rw.appmask = 0;
 
-	if (ns->ms) {
+	if (ns->head->ms) {
 		/*
 		 * If formated with metadata, the block layer always provides a
 		 * metadata buffer if CONFIG_BLK_DEV_INTEGRITY is enabled.  Else
@@ -930,7 +932,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 			control |= NVME_RW_PRINFO_PRACT;
 		}
 
-		switch (ns->pi_type) {
+		switch (ns->head->pi_type) {
 		case NVME_NS_DPS_PI_TYPE3:
 			control |= NVME_RW_PRINFO_PRCHK_GUARD;
 			break;
@@ -1676,9 +1678,9 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
 {
 	struct blk_integrity integrity = { };
 
-	switch (ns->pi_type) {
+	switch (ns->head->pi_type) {
 	case NVME_NS_DPS_PI_TYPE3:
-		switch (ns->guard_type) {
+		switch (ns->head->guard_type) {
 		case NVME_NVM_NS_16B_GUARD:
 			integrity.profile = &t10_pi_type3_crc;
 			integrity.tag_size = sizeof(u16) + sizeof(u32);
@@ -1696,7 +1698,7 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
 		break;
 	case NVME_NS_DPS_PI_TYPE1:
 	case NVME_NS_DPS_PI_TYPE2:
-		switch (ns->guard_type) {
+		switch (ns->head->guard_type) {
 		case NVME_NVM_NS_16B_GUARD:
 			integrity.profile = &t10_pi_type1_crc;
 			integrity.tag_size = sizeof(u16);
@@ -1717,7 +1719,7 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
 		break;
 	}
 
-	integrity.tuple_size = ns->ms;
+	integrity.tuple_size = ns->head->ms;
 	blk_integrity_register(disk, &integrity);
 	blk_queue_max_integrity_segments(disk->queue, max_integrity_segments);
 }
@@ -1776,11 +1778,11 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id)
 	int ret = 0;
 	u32 elbaf;
 
-	ns->pi_size = 0;
-	ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+	ns->head->pi_size = 0;
+	ns->head->ms = le16_to_cpu(id->lbaf[lbaf].ms);
 	if (!(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) {
-		ns->pi_size = sizeof(struct t10_pi_tuple);
-		ns->guard_type = NVME_NVM_NS_16B_GUARD;
+		ns->head->pi_size = sizeof(struct t10_pi_tuple);
+		ns->head->guard_type = NVME_NVM_NS_16B_GUARD;
 		goto set_pi;
 	}
 
@@ -1803,13 +1805,13 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id)
 	if (nvme_elbaf_sts(elbaf))
 		goto free_data;
 
-	ns->guard_type = nvme_elbaf_guard_type(elbaf);
-	switch (ns->guard_type) {
+	ns->head->guard_type = nvme_elbaf_guard_type(elbaf);
+	switch (ns->head->guard_type) {
 	case NVME_NVM_NS_64B_GUARD:
-		ns->pi_size = sizeof(struct crc64_pi_tuple);
+		ns->head->pi_size = sizeof(struct crc64_pi_tuple);
 		break;
 	case NVME_NVM_NS_16B_GUARD:
-		ns->pi_size = sizeof(struct t10_pi_tuple);
+		ns->head->pi_size = sizeof(struct t10_pi_tuple);
 		break;
 	default:
 		break;
@@ -1818,10 +1820,10 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id)
 free_data:
 	kfree(nvm);
 set_pi:
-	if (ns->pi_size && (first || ns->ms == ns->pi_size))
-		ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
+	if (ns->head->pi_size && (first || ns->head->ms == ns->head->pi_size))
+		ns->head->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
 	else
-		ns->pi_type = 0;
+		ns->head->pi_type = 0;
 
 	return ret;
 }
@@ -1835,8 +1837,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
 	if (ret)
 		return ret;
 
-	ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
-	if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
+	ns->head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
+	if (!ns->head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
 		return 0;
 
 	if (ctrl->ops->flags & NVME_F_FABRICS) {
@@ -1848,7 +1850,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
 		if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT)))
 			return 0;
 
-		ns->features |= NVME_NS_EXT_LBAS;
+		ns->head->features |= NVME_NS_EXT_LBAS;
 
 		/*
 		 * The current fabrics transport drivers support namespace
@@ -1860,7 +1862,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
 		 * gain the ability to use other metadata formats.
 		 */
 		if (ctrl->max_integrity_segments && nvme_ns_has_pi(ns))
-			ns->features |= NVME_NS_METADATA_SUPPORTED;
+			ns->head->features |= NVME_NS_METADATA_SUPPORTED;
 	} else {
 		/*
 		 * For PCIe controllers, we can't easily remap the separate
@@ -1869,9 +1871,9 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
 		 * We allow extended LBAs for the passthrough interface, though.
 		 */
 		if (id->flbas & NVME_NS_FLBAS_META_EXT)
-			ns->features |= NVME_NS_EXT_LBAS;
+			ns->head->features |= NVME_NS_EXT_LBAS;
 		else
-			ns->features |= NVME_NS_METADATA_SUPPORTED;
+			ns->head->features |= NVME_NS_METADATA_SUPPORTED;
 	}
 	return 0;
 }
@@ -1898,7 +1900,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
 		struct nvme_ns *ns, struct nvme_id_ns *id)
 {
 	sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze));
-	u32 bs = 1U << ns->lba_shift;
+	u32 bs = 1U << ns->head->lba_shift;
 	u32 atomic_bs, phys_bs, io_opt = 0;
 
 	/*
@@ -1906,7 +1908,8 @@ static void nvme_update_disk_info(struct gendisk *disk,
 	 * or smaller than a sector size yet, so catch this early and don't
 	 * allow block I/O.
 	 */
-	if (ns->lba_shift > PAGE_SHIFT || ns->lba_shift < SECTOR_SHIFT) {
+	if (ns->head->lba_shift > PAGE_SHIFT ||
+	    ns->head->lba_shift < SECTOR_SHIFT) {
 		capacity = 0;
 		bs = (1 << 9);
 	}
@@ -1949,9 +1952,9 @@ static void nvme_update_disk_info(struct gendisk *disk,
 	 * I/O to namespaces with metadata except when the namespace supports
 	 * PI, as it can strip/insert in that case.
 	 */
-	if (ns->ms) {
+	if (ns->head->ms) {
 		if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
-		    (ns->features & NVME_NS_METADATA_SUPPORTED))
+		    (ns->head->features & NVME_NS_METADATA_SUPPORTED))
 			nvme_init_integrity(disk, ns,
 					    ns->ctrl->max_integrity_segments);
 		else if (!nvme_ns_has_pi(ns))
@@ -2052,7 +2055,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
 
 	blk_mq_freeze_queue(ns->disk->queue);
 	lbaf = nvme_lbaf_index(id->flbas);
-	ns->lba_shift = id->lbaf[lbaf].ds;
+	ns->head->lba_shift = id->lbaf[lbaf].ds;
 	nvme_set_queue_limits(ns->ctrl, ns->queue);
 
 	ret = nvme_configure_metadata(ns, id);
@@ -2078,7 +2081,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
 	 * do not return zeroes.
 	 */
 	if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3)))
-		ns->features |= NVME_NS_DEAC;
+		ns->head->features |= NVME_NS_DEAC;
 	set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
 	set_bit(NVME_NS_READY, &ns->flags);
 	blk_mq_unfreeze_queue(ns->disk->queue);
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 529b9954d2b8..feee9cf50670 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -283,10 +283,10 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 		return -EINVAL;
 	}
 
-	length = (io.nblocks + 1) << ns->lba_shift;
+	length = (io.nblocks + 1) << ns->head->lba_shift;
 
 	if ((io.control & NVME_RW_PRINFO_PRACT) &&
-	    ns->ms == sizeof(struct t10_pi_tuple)) {
+	    ns->head->ms == sizeof(struct t10_pi_tuple)) {
 		/*
 		 * Protection information is stripped/inserted by the
 		 * controller.
@@ -296,11 +296,11 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 		meta_len = 0;
 		metadata = NULL;
 	} else {
-		meta_len = (io.nblocks + 1) * ns->ms;
+		meta_len = (io.nblocks + 1) * ns->head->ms;
 		metadata = nvme_to_user_ptr(io.metadata);
 	}
 
-	if (ns->features & NVME_NS_EXT_LBAS) {
+	if (ns->head->features & NVME_NS_EXT_LBAS) {
 		length += meta_len;
 		meta_len = 0;
 	} else if (meta_len) {
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 578e6d311bc9..1ebe6a9b42c9 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -451,6 +451,17 @@ struct nvme_ns_head {
 	bool			shared;
 	int			instance;
 	struct nvme_effects_log *effects;
+	int			lba_shift;
+	u16			ms;
+	u16			pi_size;
+	u16			sgs;
+	u32			sws;
+	u8			pi_type;
+	u8			guard_type;
+#ifdef CONFIG_BLK_DEV_ZONED
+	u64			zsze;
+#endif
+	unsigned long		features;
 
 	struct cdev		cdev;
 	struct device		cdev_device;
@@ -492,17 +503,6 @@ struct nvme_ns {
 	struct kref kref;
 	struct nvme_ns_head *head;
 
-	int lba_shift;
-	u16 ms;
-	u16 pi_size;
-	u16 sgs;
-	u32 sws;
-	u8 pi_type;
-	u8 guard_type;
-#ifdef CONFIG_BLK_DEV_ZONED
-	u64 zsze;
-#endif
-	unsigned long features;
 	unsigned long flags;
 #define NVME_NS_REMOVING	0
 #define NVME_NS_ANA_PENDING	2
@@ -519,7 +519,7 @@ struct nvme_ns {
 /* NVMe ns supports metadata actions by the controller (generate/strip) */
 static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
 {
-	return ns->pi_type && ns->ms == ns->pi_size;
+	return ns->head->pi_type && ns->head->ms == ns->head->pi_size;
 }
 
 struct nvme_ctrl_ops {
@@ -653,7 +653,7 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
  */
 static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector)
 {
-	return sector >> (ns->lba_shift - SECTOR_SHIFT);
+	return sector >> (ns->head->lba_shift - SECTOR_SHIFT);
 }
 
 /*
@@ -661,7 +661,7 @@ static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector)
  */
 static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba)
 {
-	return lba << (ns->lba_shift - SECTOR_SHIFT);
+	return lba << (ns->head->lba_shift - SECTOR_SHIFT);
 }
 
 /*
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 81e2621169e5..fc0df91e6b36 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1423,7 +1423,7 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
 		goto mr_put;
 
 	nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c,
-				req->mr->sig_attrs, ns->pi_type);
+				req->mr->sig_attrs, ns->head->pi_type);
 	nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);
 
 	ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index ec8557810c21..2a8871e0cc9b 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -11,7 +11,7 @@ int nvme_revalidate_zones(struct nvme_ns *ns)
 {
 	struct request_queue *q = ns->queue;
 
-	blk_queue_chunk_sectors(q, ns->zsze);
+	blk_queue_chunk_sectors(q, ns->head->zsze);
 	blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
 
 	return blk_revalidate_disk_zones(ns->disk, NULL);
@@ -99,11 +99,12 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
 		goto free_data;
 	}
 
-	ns->zsze = nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze));
-	if (!is_power_of_2(ns->zsze)) {
+	ns->head->zsze =
+		nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze));
+	if (!is_power_of_2(ns->head->zsze)) {
 		dev_warn(ns->ctrl->device,
 			"invalid zone size:%llu for namespace:%u\n",
-			ns->zsze, ns->head->ns_id);
+			ns->head->zsze, ns->head->ns_id);
 		status = -ENODEV;
 		goto free_data;
 	}
@@ -128,7 +129,7 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
 				   sizeof(struct nvme_zone_descriptor);
 
 	nr_zones = min_t(unsigned int, nr_zones,
-			 get_capacity(ns->disk) >> ilog2(ns->zsze));
+			 get_capacity(ns->head->disk) >> ilog2(ns->head->zsze));
 
 	bufsize = sizeof(struct nvme_zone_report) +
 		nr_zones * sizeof(struct nvme_zone_descriptor);
@@ -162,7 +163,7 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns,
 
 	zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
 	zone.cond = entry->zs >> 4;
-	zone.len = ns->zsze;
+	zone.len = ns->head->zsze;
 	zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap));
 	zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba));
 	if (zone.cond == BLK_ZONE_COND_FULL)
@@ -196,7 +197,7 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
 	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
 	c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
 
-	sector &= ~(ns->zsze - 1);
+	sector &= ~(ns->head->zsze - 1);
 	while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
 		memset(report, 0, buflen);
 
@@ -220,7 +221,7 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
 			zone_idx++;
 		}
 
-		sector += ns->zsze * nz;
+		sector += ns->head->zsze * nz;
 	}
 
 	if (zone_idx > 0)
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v4 2/4] nvme: rename ns attribute group
  2023-12-07 12:36 [PATCH v4 0/4] nvme: add csi, ms and nuse to sysfs Daniel Wagner
  2023-12-07 12:36 ` [PATCH v4 1/4] nvme: move ns id info to struct nvme_ns_head Daniel Wagner
@ 2023-12-07 12:36 ` Daniel Wagner
  2023-12-07 15:32   ` Christoph Hellwig
  2023-12-07 12:36 ` [PATCH v4 3/4] nvme: add csi, ms and nuse to sysfs Daniel Wagner
  2023-12-07 12:36 ` [PATCH v4 4/4] nvme: repack struct nvme_ns_head Daniel Wagner
  3 siblings, 1 reply; 15+ messages in thread
From: Daniel Wagner @ 2023-12-07 12:36 UTC (permalink / raw)
  To: linux-nvme
  Cc: linux-kernel, Keith Busch, Christoph Hellwig, Sagi Grimberg,
	Hannes Reinecke, Daniel Wagner

Drop the 'id' part of the attribute group name because we want to expose
non 'id' related attributes via the ns attribute group.

Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 drivers/nvme/host/core.c      |  2 +-
 drivers/nvme/host/multipath.c |  2 +-
 drivers/nvme/host/nvme.h      |  2 +-
 drivers/nvme/host/sysfs.c     | 14 +++++++-------
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 72908e622049..c3270818fa0d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3669,7 +3669,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
 	up_write(&ctrl->namespaces_rwsem);
 	nvme_get_ctrl(ctrl);
 
-	if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups))
+	if (device_add_disk(ctrl->device, ns->disk, nvme_ns_attr_groups))
 		goto out_cleanup_ns_from_list;
 
 	if (!nvme_ns_head_multipath(ns->head))
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 0a88d7bdc5e3..2dd4137a08b2 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -579,7 +579,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
 	 */
 	if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
 		rc = device_add_disk(&head->subsys->dev, head->disk,
-				     nvme_ns_id_attr_groups);
+				     nvme_ns_attr_groups);
 		if (rc) {
 			clear_bit(NVME_NSHEAD_DISK_LIVE, &ns->flags);
 			return;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1ebe6a9b42c9..32ec7ca30d7c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -870,7 +870,7 @@ int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
 int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
 
-extern const struct attribute_group *nvme_ns_id_attr_groups[];
+extern const struct attribute_group *nvme_ns_attr_groups[];
 extern const struct pr_ops nvme_pr_ops;
 extern const struct block_device_operations nvme_ns_head_ops;
 extern const struct attribute_group nvme_dev_attrs_group;
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index c6b7fbd4d34d..d682d0a667a0 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -114,7 +114,7 @@ static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(nsid);
 
-static struct attribute *nvme_ns_id_attrs[] = {
+static struct attribute *nvme_ns_attrs[] = {
 	&dev_attr_wwid.attr,
 	&dev_attr_uuid.attr,
 	&dev_attr_nguid.attr,
@@ -127,7 +127,7 @@ static struct attribute *nvme_ns_id_attrs[] = {
 	NULL,
 };
 
-static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
+static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
 		struct attribute *a, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -157,13 +157,13 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
 	return a->mode;
 }
 
-static const struct attribute_group nvme_ns_id_attr_group = {
-	.attrs		= nvme_ns_id_attrs,
-	.is_visible	= nvme_ns_id_attrs_are_visible,
+static const struct attribute_group nvme_ns_attr_group = {
+	.attrs		= nvme_ns_attrs,
+	.is_visible	= nvme_ns_attrs_are_visible,
 };
 
-const struct attribute_group *nvme_ns_id_attr_groups[] = {
-	&nvme_ns_id_attr_group,
+const struct attribute_group *nvme_ns_attr_groups[] = {
+	&nvme_ns_attr_group,
 	NULL,
 };
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v4 3/4] nvme: add csi, ms and nuse to sysfs
  2023-12-07 12:36 [PATCH v4 0/4] nvme: add csi, ms and nuse to sysfs Daniel Wagner
  2023-12-07 12:36 ` [PATCH v4 1/4] nvme: move ns id info to struct nvme_ns_head Daniel Wagner
  2023-12-07 12:36 ` [PATCH v4 2/4] nvme: rename ns attribute group Daniel Wagner
@ 2023-12-07 12:36 ` Daniel Wagner
  2023-12-07 12:42   ` Daniel Wagner
                     ` (2 more replies)
  2023-12-07 12:36 ` [PATCH v4 4/4] nvme: repack struct nvme_ns_head Daniel Wagner
  3 siblings, 3 replies; 15+ messages in thread
From: Daniel Wagner @ 2023-12-07 12:36 UTC (permalink / raw)
  To: linux-nvme
  Cc: linux-kernel, Keith Busch, Christoph Hellwig, Sagi Grimberg,
	Hannes Reinecke, Daniel Wagner

libnvme is using the sysfs for enumarating the nvme resources. Though
there are few missing attritbutes in the sysfs. For these libnvme issues
commands during discovering.

As the kernel already knows all these attributes and we would like to
avoid libnvme to issue commands all the time, expose these missing
attributes.

The nuse value is updated on request because the nuse is a volatile
value. Since any user can read the sysfs attribute, a very simple rate
limit is added (update once every 5 seconds). A more sophisticated
update strategy can be added later if there is actually a need for it.

Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 drivers/nvme/host/core.c  |  4 +-
 drivers/nvme/host/nvme.h  |  6 +++
 drivers/nvme/host/sysfs.c | 89 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c3270818fa0d..82c6faf424d6 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1454,7 +1454,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl,
 	return status;
 }
 
-static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
+int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 			struct nvme_id_ns **id)
 {
 	struct nvme_command c = { };
@@ -2056,6 +2056,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
 	blk_mq_freeze_queue(ns->disk->queue);
 	lbaf = nvme_lbaf_index(id->flbas);
 	ns->head->lba_shift = id->lbaf[lbaf].ds;
+	ns->head->nuse = le64_to_cpu(id->nuse);
 	nvme_set_queue_limits(ns->ctrl, ns->queue);
 
 	ret = nvme_configure_metadata(ns, id);
@@ -3418,6 +3419,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
 	head->ns_id = info->nsid;
 	head->ids = info->ids;
 	head->shared = info->is_shared;
+	ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
 	kref_init(&head->ref);
 
 	if (head->ids.csi) {
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 32ec7ca30d7c..2b31641a97b9 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -16,6 +16,7 @@
 #include <linux/rcupdate.h>
 #include <linux/wait.h>
 #include <linux/t10-pi.h>
+#include <linux/ratelimit_types.h>
 
 #include <trace/events/block.h>
 
@@ -456,6 +457,7 @@ struct nvme_ns_head {
 	u16			pi_size;
 	u16			sgs;
 	u32			sws;
+	u64			nuse;
 	u8			pi_type;
 	u8			guard_type;
 #ifdef CONFIG_BLK_DEV_ZONED
@@ -463,6 +465,8 @@ struct nvme_ns_head {
 #endif
 	unsigned long		features;
 
+	struct ratelimit_state	rs_nuse;
+
 	struct cdev		cdev;
 	struct device		cdev_device;
 
@@ -867,6 +871,8 @@ int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
 		unsigned int issue_flags);
 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
 		unsigned int issue_flags);
+int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
+		struct nvme_id_ns **id);
 int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
 
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index d682d0a667a0..79e362e0ac47 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -114,12 +114,101 @@ static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(nsid);
 
+static ssize_t csi_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ids.csi);
+}
+static DEVICE_ATTR_RO(csi);
+
+static ssize_t metadata_bytes_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ms);
+}
+static DEVICE_ATTR_RO(metadata_bytes);
+
+static int ns_head_update_nuse(struct nvme_ns_head *head)
+{
+	struct nvme_id_ns *id;
+	struct nvme_ns *ns;
+	int srcu_idx, ret = -EWOULDBLOCK;
+
+	/* Avoid issuing commands too often by rate limiting the update */
+	if (!__ratelimit(&head->rs_nuse))
+		return 0;
+
+	pr_info("%s: head %p\n", __func__, head);
+
+	srcu_idx = srcu_read_lock(&head->srcu);
+	ns = nvme_find_path(head);
+	if (!ns)
+		goto out_unlock;
+
+	ret = nvme_identify_ns(ns->ctrl, head->ns_id, &id);
+	if (ret)
+		goto out_unlock;
+
+	head->nuse = le64_to_cpu(id->nuse);
+	kfree(id);
+
+out_unlock:
+	srcu_read_unlock(&head->srcu, srcu_idx);
+	return ret;
+}
+
+static int ns_update_nuse(struct nvme_ns *ns)
+{
+	struct nvme_id_ns *id;
+	int ret;
+
+	/* Avoid issuing commands too often by rate limiting the update. */
+	if (!__ratelimit(&ns->head->rs_nuse))
+		return 0;
+
+	pr_info("%s: ns %p\n", __func__, ns);
+
+	ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, &id);
+	if (ret)
+		goto out_free_id;
+
+	ns->head->nuse = le64_to_cpu(id->nuse);
+
+out_free_id:
+	kfree(id);
+
+	return ret;
+}
+
+static ssize_t nuse_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	struct nvme_ns_head *head = dev_to_ns_head(dev);
+	struct gendisk *disk = dev_to_disk(dev);
+	struct block_device *bdev = disk->part0;
+	int ret;
+
+	if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
+	    bdev->bd_disk->fops == &nvme_ns_head_ops)
+		ret = ns_head_update_nuse(head);
+	else
+		ret = ns_update_nuse(bdev->bd_disk->private_data);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%llu\n", head->nuse);
+}
+static DEVICE_ATTR_RO(nuse);
+
 static struct attribute *nvme_ns_attrs[] = {
 	&dev_attr_wwid.attr,
 	&dev_attr_uuid.attr,
 	&dev_attr_nguid.attr,
 	&dev_attr_eui.attr,
+	&dev_attr_csi.attr,
 	&dev_attr_nsid.attr,
+	&dev_attr_metadata_bytes.attr,
+	&dev_attr_nuse.attr,
 #ifdef CONFIG_NVME_MULTIPATH
 	&dev_attr_ana_grpid.attr,
 	&dev_attr_ana_state.attr,
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v4 4/4] nvme: repack struct nvme_ns_head
  2023-12-07 12:36 [PATCH v4 0/4] nvme: add csi, ms and nuse to sysfs Daniel Wagner
                   ` (2 preceding siblings ...)
  2023-12-07 12:36 ` [PATCH v4 3/4] nvme: add csi, ms and nuse to sysfs Daniel Wagner
@ 2023-12-07 12:36 ` Daniel Wagner
  2023-12-07 15:33   ` Christoph Hellwig
  3 siblings, 1 reply; 15+ messages in thread
From: Daniel Wagner @ 2023-12-07 12:36 UTC (permalink / raw)
  To: linux-nvme
  Cc: linux-kernel, Keith Busch, Christoph Hellwig, Sagi Grimberg,
	Hannes Reinecke, Daniel Wagner

ns_id, lba_shift and ms are always accessed for every read/write I/O in
nvme_setup_rw. By grouping these variables into one cacheline we can
safe some cycles.

4k sequential reads:

           baseline   patched
Bandwidth: 1620       1634
IOPs       66345579   66910939

Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 drivers/nvme/host/nvme.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 2b31641a97b9..4a4beaecc832 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -445,21 +445,21 @@ struct nvme_ns_head {
 	struct list_head	list;
 	struct srcu_struct      srcu;
 	struct nvme_subsystem	*subsys;
-	unsigned		ns_id;
 	struct nvme_ns_ids	ids;
 	struct list_head	entry;
 	struct kref		ref;
 	bool			shared;
 	int			instance;
 	struct nvme_effects_log *effects;
+	u64			nuse;
+	unsigned		ns_id;
 	int			lba_shift;
 	u16			ms;
 	u16			pi_size;
-	u16			sgs;
-	u32			sws;
-	u64			nuse;
 	u8			pi_type;
 	u8			guard_type;
+	u16			sgs;
+	u32			sws;
 #ifdef CONFIG_BLK_DEV_ZONED
 	u64			zsze;
 #endif
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v4 3/4] nvme: add csi, ms and nuse to sysfs
  2023-12-07 12:36 ` [PATCH v4 3/4] nvme: add csi, ms and nuse to sysfs Daniel Wagner
@ 2023-12-07 12:42   ` Daniel Wagner
  2023-12-07 15:33   ` Christoph Hellwig
  2023-12-07 16:44   ` Keith Busch
  2 siblings, 0 replies; 15+ messages in thread
From: Daniel Wagner @ 2023-12-07 12:42 UTC (permalink / raw)
  To: linux-nvme
  Cc: linux-kernel, Keith Busch, Christoph Hellwig, Sagi Grimberg,
	Hannes Reinecke

On Thu, Dec 07, 2023 at 01:36:23PM +0100, Daniel Wagner wrote:
> +static int ns_head_update_nuse(struct nvme_ns_head *head)
> +{
> +	struct nvme_id_ns *id;
> +	struct nvme_ns *ns;
> +	int srcu_idx, ret = -EWOULDBLOCK;
> +
> +	/* Avoid issuing commands too often by rate limiting the update */
> +	if (!__ratelimit(&head->rs_nuse))
> +		return 0;
> +
> +	pr_info("%s: head %p\n", __func__, head);

Forgot to remove this debug print.

> +static int ns_update_nuse(struct nvme_ns *ns)
> +{
> +	struct nvme_id_ns *id;
> +	int ret;
> +
> +	/* Avoid issuing commands too often by rate limiting the update. */
> +	if (!__ratelimit(&ns->head->rs_nuse))
> +		return 0;
> +
> +	pr_info("%s: ns %p\n", __func__, ns);

ditto

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v4 0/4] nvme: add csi, ms and nuse to sysfs
  2023-12-07 12:36 ` [PATCH v4 1/4] nvme: move ns id info to struct nvme_ns_head Daniel Wagner
@ 2023-12-07 15:31   ` Christoph Hellwig
  2023-12-08  9:10     ` Daniel Wagner
  2023-12-07 16:31   ` [PATCH v4 1/4] nvme: move ns id info to struct nvme_ns_head Keith Busch
  1 sibling, 1 reply; 15+ messages in thread
From: Christoph Hellwig @ 2023-12-07 15:31 UTC (permalink / raw)
  To: Daniel Wagner
  Cc: linux-nvme, linux-kernel, Keith Busch, Christoph Hellwig,
	Sagi Grimberg, Hannes Reinecke

Hi Daniel,

this looks generally good to me.  A few comments, all but one are
cosmetic.

> @@ -1676,9 +1678,9 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,

This an now take the ns_head instead of the ns.

> @@ -1776,11 +1778,11 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id)

This can take a ctrl and ns_head instead of the ns.

> @@ -1835,8 +1837,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)

This as well after updating nvme_ns_has_pi to take the ns_head.

> @@ -1898,7 +1900,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
>  		struct nvme_ns *ns, struct nvme_id_ns *id)

This as well after fixing up nvme_lba_to_sect to take the ns_head.

> @@ -2052,7 +2055,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,

This one as well.

> @@ -128,7 +129,7 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
>  				   sizeof(struct nvme_zone_descriptor);
>  
>  	nr_zones = min_t(unsigned int, nr_zones,
> -			 get_capacity(ns->disk) >> ilog2(ns->zsze));
> +			 get_capacity(ns->head->disk) >> ilog2(ns->head->zsze));

This doesn't work, as the head disk doesn't exist for !multipath setups.

> @@ -162,7 +163,7 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns,

This could do with ctrl + ns_head now

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v4 2/4] nvme: rename ns attribute group
  2023-12-07 12:36 ` [PATCH v4 2/4] nvme: rename ns attribute group Daniel Wagner
@ 2023-12-07 15:32   ` Christoph Hellwig
  0 siblings, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2023-12-07 15:32 UTC (permalink / raw)
  To: Daniel Wagner
  Cc: linux-nvme, linux-kernel, Keith Busch, Christoph Hellwig,
	Sagi Grimberg, Hannes Reinecke

Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v4 3/4] nvme: add csi, ms and nuse to sysfs
  2023-12-07 12:36 ` [PATCH v4 3/4] nvme: add csi, ms and nuse to sysfs Daniel Wagner
  2023-12-07 12:42   ` Daniel Wagner
@ 2023-12-07 15:33   ` Christoph Hellwig
  2023-12-07 16:44   ` Keith Busch
  2 siblings, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2023-12-07 15:33 UTC (permalink / raw)
  To: Daniel Wagner
  Cc: linux-nvme, linux-kernel, Keith Busch, Christoph Hellwig,
	Sagi Grimberg, Hannes Reinecke

With the extra debugging removed:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v4 4/4] nvme: repack struct nvme_ns_head
  2023-12-07 12:36 ` [PATCH v4 4/4] nvme: repack struct nvme_ns_head Daniel Wagner
@ 2023-12-07 15:33   ` Christoph Hellwig
  0 siblings, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2023-12-07 15:33 UTC (permalink / raw)
  To: Daniel Wagner
  Cc: linux-nvme, linux-kernel, Keith Busch, Christoph Hellwig,
	Sagi Grimberg, Hannes Reinecke

On Thu, Dec 07, 2023 at 01:36:24PM +0100, Daniel Wagner wrote:
> ns_id, lba_shift and ms are always accessed for every read/write I/O in
> nvme_setup_rw. By grouping these variables into one cacheline we can
> safe some cycles.
> 
> 4k sequential reads:
> 
>            baseline   patched
> Bandwidth: 1620       1634
> IOPs       66345579   66910939

Nice:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v4 1/4] nvme: move ns id info to struct nvme_ns_head
  2023-12-07 12:36 ` [PATCH v4 1/4] nvme: move ns id info to struct nvme_ns_head Daniel Wagner
  2023-12-07 15:31   ` [PATCH v4 0/4] nvme: add csi, ms and nuse to sysfs Christoph Hellwig
@ 2023-12-07 16:31   ` Keith Busch
  2023-12-08 13:04     ` Daniel Wagner
  1 sibling, 1 reply; 15+ messages in thread
From: Keith Busch @ 2023-12-07 16:31 UTC (permalink / raw)
  To: Daniel Wagner
  Cc: linux-nvme, linux-kernel, Christoph Hellwig, Sagi Grimberg,
	Hannes Reinecke

On Thu, Dec 07, 2023 at 01:36:21PM +0100, Daniel Wagner wrote:
> @@ -1906,7 +1908,8 @@ static void nvme_update_disk_info(struct gendisk *disk,
>  	 * or smaller than a sector size yet, so catch this early and don't
>  	 * allow block I/O.
>  	 */
> -	if (ns->lba_shift > PAGE_SHIFT || ns->lba_shift < SECTOR_SHIFT) {
> +	if (ns->head->lba_shift > PAGE_SHIFT ||
> +	    ns->head->lba_shift < SECTOR_SHIFT) {
>  		capacity = 0;
>  		bs = (1 << 9);
>  	}

A minor conflict here: this series would target nvme-6.8, but the block
tree we're based on doesn't have this code. I'll patch it up for the
current 6.8 tree and make a note of the conflict for the next merge
window.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v4 3/4] nvme: add csi, ms and nuse to sysfs
  2023-12-07 12:36 ` [PATCH v4 3/4] nvme: add csi, ms and nuse to sysfs Daniel Wagner
  2023-12-07 12:42   ` Daniel Wagner
  2023-12-07 15:33   ` Christoph Hellwig
@ 2023-12-07 16:44   ` Keith Busch
  2023-12-08  9:10     ` Daniel Wagner
  2 siblings, 1 reply; 15+ messages in thread
From: Keith Busch @ 2023-12-07 16:44 UTC (permalink / raw)
  To: Daniel Wagner
  Cc: linux-nvme, linux-kernel, Christoph Hellwig, Sagi Grimberg,
	Hannes Reinecke

On Thu, Dec 07, 2023 at 01:36:23PM +0100, Daniel Wagner wrote:
> @@ -3418,6 +3419,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
>  	head->ns_id = info->nsid;
>  	head->ids = info->ids;
>  	head->shared = info->is_shared;
> +	ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
>  	kref_init(&head->ref);

I think we need to add:

	ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE);	

So that we don't get periodic messages like:

	[   60.469730] ns_head_update_nuse: 39 callbacks suppressed
	[  159.532901] ns_head_update_nuse: 1999 callbacks suppressed

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v4 0/4] nvme: add csi, ms and nuse to sysfs
  2023-12-07 15:31   ` [PATCH v4 0/4] nvme: add csi, ms and nuse to sysfs Christoph Hellwig
@ 2023-12-08  9:10     ` Daniel Wagner
  0 siblings, 0 replies; 15+ messages in thread
From: Daniel Wagner @ 2023-12-08  9:10 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-nvme, linux-kernel, Keith Busch, Sagi Grimberg, Hannes Reinecke

Hi Christoph,

On Thu, Dec 07, 2023 at 04:31:38PM +0100, Christoph Hellwig wrote:
> this looks generally good to me.  A few comments, all but one are
> cosmetic.
> 
> > @@ -1676,9 +1678,9 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
> 
> This an now take the ns_head instead of the ns.

Okay

> > @@ -1776,11 +1778,11 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id)
> 
> This can take a ctrl and ns_head instead of the ns.

Okay

> > @@ -1835,8 +1837,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
> 
> This as well after updating nvme_ns_has_pi to take the ns_head.

Okay

> > @@ -1898,7 +1900,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
> >  		struct nvme_ns *ns, struct nvme_id_ns *id)
> 
> This as well after fixing up nvme_lba_to_sect to take the ns_head.

Okay

> 
> > @@ -2052,7 +2055,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
> 
> This one as well.

When trying to refactor this function I run into a bit of trouble with
dependencies. Basically, we would need to pass through nvme_ctrl,
request_queue, gendisk and nvme_ns_head and still need nvme_ns in
nvme_update_zone_info:

nvme_update_ns_info_block
  blk_mq_freeze_queue(ns->disk->queue)
  nvme_set_chuck_sector(ns->queue)
  nvme_update_zone_info()
    set_bit(NVME_NS_FORCE_RO, ns->flags)
    blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);

I suggest we don't refactor this part now. At least it I don't see an
good way to avoid passing down nvme_ns as we still need this data
structure in deeper down the call chain.

> > @@ -128,7 +129,7 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
> >  				   sizeof(struct nvme_zone_descriptor);
> >  
> >  	nr_zones = min_t(unsigned int, nr_zones,
> > -			 get_capacity(ns->disk) >> ilog2(ns->zsze));
> > +			 get_capacity(ns->head->disk) >> ilog2(ns->head->zsze));
> 
> This doesn't work, as the head disk doesn't exist for !multipath
> setups.

I was a bit trigger happy here. Changed it back to 'get_capacity(ns->disk)'.

> > @@ -162,7 +163,7 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns,
> 
> This could do with ctrl + ns_head now

Okay.

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v4 3/4] nvme: add csi, ms and nuse to sysfs
  2023-12-07 16:44   ` Keith Busch
@ 2023-12-08  9:10     ` Daniel Wagner
  0 siblings, 0 replies; 15+ messages in thread
From: Daniel Wagner @ 2023-12-08  9:10 UTC (permalink / raw)
  To: Keith Busch
  Cc: linux-nvme, linux-kernel, Christoph Hellwig, Sagi Grimberg,
	Hannes Reinecke

On Thu, Dec 07, 2023 at 09:44:58AM -0700, Keith Busch wrote:
> On Thu, Dec 07, 2023 at 01:36:23PM +0100, Daniel Wagner wrote:
> > @@ -3418,6 +3419,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
> >  	head->ns_id = info->nsid;
> >  	head->ids = info->ids;
> >  	head->shared = info->is_shared;
> > +	ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
> >  	kref_init(&head->ref);
> 
> I think we need to add:
> 
> 	ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE);	
> 
> So that we don't get periodic messages like:
> 
> 	[   60.469730] ns_head_update_nuse: 39 callbacks suppressed
> 	[  159.532901] ns_head_update_nuse: 1999 callbacks suppressed

Good idea. I'll add this.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v4 1/4] nvme: move ns id info to struct nvme_ns_head
  2023-12-07 16:31   ` [PATCH v4 1/4] nvme: move ns id info to struct nvme_ns_head Keith Busch
@ 2023-12-08 13:04     ` Daniel Wagner
  0 siblings, 0 replies; 15+ messages in thread
From: Daniel Wagner @ 2023-12-08 13:04 UTC (permalink / raw)
  To: Keith Busch
  Cc: linux-nvme, linux-kernel, Christoph Hellwig, Sagi Grimberg,
	Hannes Reinecke

On Thu, Dec 07, 2023 at 09:31:16AM -0700, Keith Busch wrote:
> On Thu, Dec 07, 2023 at 01:36:21PM +0100, Daniel Wagner wrote:
> > @@ -1906,7 +1908,8 @@ static void nvme_update_disk_info(struct gendisk *disk,
> >  	 * or smaller than a sector size yet, so catch this early and don't
> >  	 * allow block I/O.
> >  	 */
> > -	if (ns->lba_shift > PAGE_SHIFT || ns->lba_shift < SECTOR_SHIFT) {
> > +	if (ns->head->lba_shift > PAGE_SHIFT ||
> > +	    ns->head->lba_shift < SECTOR_SHIFT) {
> >  		capacity = 0;
> >  		bs = (1 << 9);
> >  	}
> 
> A minor conflict here: this series would target nvme-6.8, but the block
> tree we're based on doesn't have this code. I'll patch it up for the
> current 6.8 tree and make a note of the conflict for the next merge
> window.

I've missed that nvme-6.8 was available now. I can rebase v5 ontop of
nvme-6.8 if you want me to do it. Whatever is simpler for you.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2023-12-08 13:04 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-07 12:36 [PATCH v4 0/4] nvme: add csi, ms and nuse to sysfs Daniel Wagner
2023-12-07 12:36 ` [PATCH v4 1/4] nvme: move ns id info to struct nvme_ns_head Daniel Wagner
2023-12-07 15:31   ` [PATCH v4 0/4] nvme: add csi, ms and nuse to sysfs Christoph Hellwig
2023-12-08  9:10     ` Daniel Wagner
2023-12-07 16:31   ` [PATCH v4 1/4] nvme: move ns id info to struct nvme_ns_head Keith Busch
2023-12-08 13:04     ` Daniel Wagner
2023-12-07 12:36 ` [PATCH v4 2/4] nvme: rename ns attribute group Daniel Wagner
2023-12-07 15:32   ` Christoph Hellwig
2023-12-07 12:36 ` [PATCH v4 3/4] nvme: add csi, ms and nuse to sysfs Daniel Wagner
2023-12-07 12:42   ` Daniel Wagner
2023-12-07 15:33   ` Christoph Hellwig
2023-12-07 16:44   ` Keith Busch
2023-12-08  9:10     ` Daniel Wagner
2023-12-07 12:36 ` [PATCH v4 4/4] nvme: repack struct nvme_ns_head Daniel Wagner
2023-12-07 15:33   ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.