linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] nvme: enable FDP support
       [not found] <CGME20240510134740epcas5p24ef1c2d6e8934c1c79b01c849e7ccb41@epcas5p2.samsung.com>
@ 2024-05-10 13:40 ` Kanchan Joshi
  2024-05-10 19:30   ` Keith Busch
                     ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Kanchan Joshi @ 2024-05-10 13:40 UTC (permalink / raw)
  To: axboe, kbusch, hch
  Cc: linux-nvme, linux-block, javier.gonz, bvanassche, david, slava,
	gost.dev, Kanchan Joshi, Hui Qi, Nitesh Shetty

Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
to control the placement of logical blocks so as to reduce the SSD WAF.

Userspace can send the data lifetime information using the write hints.
The SCSI driver (sd) can already pass this information to the SCSI
devices. This patch does the same for NVMe.

Fetches the placement-identifiers (plids) if the device supports FDP.
And map the incoming write-hints to plids.

Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Hui Qi <hui81.qi@samsung.com>
Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
---
 drivers/nvme/host/core.c | 67 ++++++++++++++++++++++++++++++++++++++++
 drivers/nvme/host/nvme.h |  4 +++
 include/linux/nvme.h     | 19 ++++++++++++
 3 files changed, 90 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 8ae0a2dc5eda..c3de06cff12f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -42,6 +42,20 @@ struct nvme_ns_info {
 	bool is_removed;
 };
 
+struct nvme_fdp_ruh_status_desc {
+	u16 pid;
+	u16 ruhid;
+	u32 earutr;
+	u64 ruamw;
+	u8  rsvd16[16];
+};
+
+struct nvme_fdp_ruh_status {
+	u8  rsvd0[14];
+	u16 nruhsd;
+	struct nvme_fdp_ruh_status_desc ruhsd[];
+};
+
 unsigned int admin_timeout = 60;
 module_param(admin_timeout, uint, 0644);
 MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
@@ -943,6 +957,16 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
 	return BLK_STS_OK;
 }
 
+static inline void nvme_assign_placement_id(struct nvme_ns *ns,
+					struct request *req,
+					struct nvme_command *cmd)
+{
+	enum rw_hint h = min(ns->head->nr_plids, req->write_hint);
+
+	cmd->rw.control |= cpu_to_le16(NVME_RW_DTYPE_DPLCMT);
+	cmd->rw.dsmgmt |= cpu_to_le32(ns->head->plids[h] << 16);
+}
+
 static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 		struct request *req, struct nvme_command *cmnd,
 		enum nvme_opcode op)
@@ -1058,6 +1082,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
 		break;
 	case REQ_OP_WRITE:
 		ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_write);
+		if (!ret && ns->head->nr_plids)
+			nvme_assign_placement_id(ns, req, cmd);
 		break;
 	case REQ_OP_ZONE_APPEND:
 		ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_zone_append);
@@ -2070,6 +2096,40 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
 	return ret;
 }
 
+static int nvme_fetch_fdp_plids(struct nvme_ns *ns, u32 nsid)
+{
+	struct nvme_command c = {};
+	struct nvme_fdp_ruh_status *ruhs;
+	struct nvme_fdp_ruh_status_desc *ruhsd;
+	int size, ret, i;
+
+	size = sizeof(*ruhs) + NVME_MAX_PLIDS * sizeof(*ruhsd);
+	ruhs = kzalloc(size, GFP_KERNEL);
+	if (!ruhs)
+		return -ENOMEM;
+
+	c.imr.opcode = nvme_cmd_io_mgmt_recv;
+	c.imr.nsid = cpu_to_le32(nsid);
+	c.imr.mo = 0x1;
+	c.imr.numd =  cpu_to_le32((size >> 2) - 1);
+
+	ret = nvme_submit_sync_cmd(ns->queue, &c, ruhs, size);
+	if (ret)
+		goto out;
+
+	ns->head->nr_plids = le16_to_cpu(ruhs->nruhsd);
+	ns->head->nr_plids =
+		min_t(u16, ns->head->nr_plids, NVME_MAX_PLIDS);
+
+	for (i = 0; i < ns->head->nr_plids; i++) {
+		ruhsd = &ruhs->ruhsd[i];
+		ns->head->plids[i] = le16_to_cpu(ruhsd->pid);
+	}
+out:
+	kfree(ruhs);
+	return ret;
+}
+
 static int nvme_update_ns_info_block(struct nvme_ns *ns,
 		struct nvme_ns_info *info)
 {
@@ -2157,6 +2217,13 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
 		if (ret && !nvme_first_scan(ns->disk))
 			goto out;
 	}
+	if (ns->ctrl->ctratt & NVME_CTRL_ATTR_FDPS) {
+		ret = nvme_fetch_fdp_plids(ns, info->nsid);
+		if (ret)
+			dev_warn(ns->ctrl->device,
+				"FDP failure status:0x%x\n", ret);
+	}
+
 
 	ret = 0;
 out:
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index d0ed64dc7380..67dad29fe289 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -440,6 +440,8 @@ struct nvme_ns_ids {
 	u8	csi;
 };
 
+#define NVME_MAX_PLIDS   (128)
+
 /*
  * Anchor structure for namespaces.  There is one for each namespace in a
  * NVMe subsystem that any of our controllers can see, and the namespace
@@ -457,6 +459,8 @@ struct nvme_ns_head {
 	bool			shared;
 	bool			passthru_err_log_enabled;
 	int			instance;
+	u16			nr_plids;
+	u16			plids[NVME_MAX_PLIDS];
 	struct nvme_effects_log *effects;
 	u64			nuse;
 	unsigned		ns_id;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 425573202295..fc07ba1b5ec5 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -270,6 +270,7 @@ enum nvme_ctrl_attr {
 	NVME_CTRL_ATTR_HID_128_BIT	= (1 << 0),
 	NVME_CTRL_ATTR_TBKAS		= (1 << 6),
 	NVME_CTRL_ATTR_ELBAS		= (1 << 15),
+	NVME_CTRL_ATTR_FDPS		= (1 << 19),
 };
 
 struct nvme_id_ctrl {
@@ -829,6 +830,7 @@ enum nvme_opcode {
 	nvme_cmd_resv_register	= 0x0d,
 	nvme_cmd_resv_report	= 0x0e,
 	nvme_cmd_resv_acquire	= 0x11,
+	nvme_cmd_io_mgmt_recv	= 0x12,
 	nvme_cmd_resv_release	= 0x15,
 	nvme_cmd_zone_mgmt_send	= 0x79,
 	nvme_cmd_zone_mgmt_recv	= 0x7a,
@@ -850,6 +852,7 @@ enum nvme_opcode {
 		nvme_opcode_name(nvme_cmd_resv_register),	\
 		nvme_opcode_name(nvme_cmd_resv_report),		\
 		nvme_opcode_name(nvme_cmd_resv_acquire),	\
+		nvme_opcode_name(nvme_cmd_io_mgmt_recv),	\
 		nvme_opcode_name(nvme_cmd_resv_release),	\
 		nvme_opcode_name(nvme_cmd_zone_mgmt_send),	\
 		nvme_opcode_name(nvme_cmd_zone_mgmt_recv),	\
@@ -1001,6 +1004,7 @@ enum {
 	NVME_RW_PRINFO_PRCHK_GUARD	= 1 << 12,
 	NVME_RW_PRINFO_PRACT		= 1 << 13,
 	NVME_RW_DTYPE_STREAMS		= 1 << 4,
+	NVME_RW_DTYPE_DPLCMT		= 2 << 4,
 	NVME_WZ_DEAC			= 1 << 9,
 };
 
@@ -1088,6 +1092,20 @@ struct nvme_zone_mgmt_recv_cmd {
 	__le32			cdw14[2];
 };
 
+struct nvme_io_mgmt_recv_cmd {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__le64			rsvd2[2];
+	union nvme_data_ptr	dptr;
+	__u8			mo;
+	__u8			rsvd11;
+	__u16			mos;
+	__le32			numd;
+	__le32			cdw12[4];
+};
+
 enum {
 	NVME_ZRA_ZONE_REPORT		= 0,
 	NVME_ZRASF_ZONE_REPORT_ALL	= 0,
@@ -1808,6 +1826,7 @@ struct nvme_command {
 		struct nvmf_auth_receive_command auth_receive;
 		struct nvme_dbbuf dbbuf;
 		struct nvme_directive_cmd directive;
+		struct nvme_io_mgmt_recv_cmd imr;
 	};
 };
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] nvme: enable FDP support
  2024-05-10 13:40 ` [PATCH] nvme: enable FDP support Kanchan Joshi
@ 2024-05-10 19:30   ` Keith Busch
  2024-05-11  9:20   ` kernel test robot
  2024-05-13  7:56   ` Viacheslav Dubeyko
  2 siblings, 0 replies; 11+ messages in thread
From: Keith Busch @ 2024-05-10 19:30 UTC (permalink / raw)
  To: Kanchan Joshi
  Cc: axboe, hch, linux-nvme, linux-block, javier.gonz, bvanassche,
	david, slava, gost.dev, Hui Qi, Nitesh Shetty

On Fri, May 10, 2024 at 07:10:15PM +0530, Kanchan Joshi wrote:
> Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
> to control the placement of logical blocks so as to reduce the SSD WAF.
> 
> Userspace can send the data lifetime information using the write hints.
> The SCSI driver (sd) can already pass this information to the SCSI
> devices. This patch does the same for NVMe.
> 
> Fetches the placement-identifiers (plids) if the device supports FDP.
> And map the incoming write-hints to plids.

Just some additional background since this looks similiar to when the
driver supported "streams".

Supporting streams in the driver was pretty a non-issue. The feature was
removed because devices didn't work with streams as expected, and
supporting it carried more maintenance overhead for the upper layers.

Since the block layer re-introduced write hints anyway outside of this
use case, this looks fine to me to re-introduce support for those hints.

So why not re-add stream support back? As far as I know, devices never
implemented that feature as expected, the driver had to enable it on
start up, and there's no required feedback mechanism to see if it's even
working or hurting.

For FDP, the user had to have configured the namespace that way in order
to get this, so it's still an optional, opt-in feature. It's also
mandatory for FDP capable drives to report WAF through the endurance
log, so users can see the effects of using it.

It would be nice to compare endurance logs with and without the FDP
configuration enabled for your various workloads. This will be great to
discuss at LSFMM next week.

> +static int nvme_fetch_fdp_plids(struct nvme_ns *ns, u32 nsid)
> +{
> +	struct nvme_command c = {};
> +	struct nvme_fdp_ruh_status *ruhs;
> +	struct nvme_fdp_ruh_status_desc *ruhsd;
> +	int size, ret, i;
> +
> +	size = sizeof(*ruhs) + NVME_MAX_PLIDS * sizeof(*ruhsd);

	size = struct_size(ruhs, ruhsd, MAX_PLIDS);

> +#define NVME_MAX_PLIDS   (128)
> +
>  /*
>   * Anchor structure for namespaces.  There is one for each namespace in a
>   * NVMe subsystem that any of our controllers can see, and the namespace
> @@ -457,6 +459,8 @@ struct nvme_ns_head {
>  	bool			shared;
>  	bool			passthru_err_log_enabled;
>  	int			instance;
> +	u16			nr_plids;
> +	u16			plids[NVME_MAX_PLIDS];

The largest index needed is WRITE_LIFE_EXTREME, which is "5", so I think
NVME_MAX_PLIDS should be the same value. And it will save space in the
struct.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] nvme: enable FDP support
  2024-05-10 13:40 ` [PATCH] nvme: enable FDP support Kanchan Joshi
  2024-05-10 19:30   ` Keith Busch
@ 2024-05-11  9:20   ` kernel test robot
  2024-05-13  7:56   ` Viacheslav Dubeyko
  2 siblings, 0 replies; 11+ messages in thread
From: kernel test robot @ 2024-05-11  9:20 UTC (permalink / raw)
  To: Kanchan Joshi, axboe, kbusch, hch
  Cc: oe-kbuild-all, linux-nvme, linux-block, javier.gonz, bvanassche,
	david, slava, gost.dev, Kanchan Joshi, Hui Qi, Nitesh Shetty

Hi Kanchan,

kernel test robot noticed the following build warnings:

[auto build test WARNING on axboe-block/for-next]
[also build test WARNING on linus/master v6.9-rc7 next-20240510]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Kanchan-Joshi/nvme-enable-FDP-support/20240510-214900
base:   https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
patch link:    https://lore.kernel.org/r/20240510134015.29717-1-joshi.k%40samsung.com
patch subject: [PATCH] nvme: enable FDP support
config: x86_64-randconfig-121-20240511 (https://download.01.org/0day-ci/archive/20240511/202405111758.Ts2xnoZH-lkp@intel.com/config)
compiler: gcc-9 (Ubuntu 9.5.0-4ubuntu2) 9.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240511/202405111758.Ts2xnoZH-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202405111758.Ts2xnoZH-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> drivers/nvme/host/core.c:2120:30: sparse: sparse: cast to restricted __le16
   drivers/nvme/host/core.c:2126:38: sparse: sparse: cast to restricted __le16
   drivers/nvme/host/core.c: note: in included file (through include/linux/wait.h, include/linux/wait_bit.h, include/linux/fs.h, ...):
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true

vim +2120 drivers/nvme/host/core.c

  2098	
  2099	static int nvme_fetch_fdp_plids(struct nvme_ns *ns, u32 nsid)
  2100	{
  2101		struct nvme_command c = {};
  2102		struct nvme_fdp_ruh_status *ruhs;
  2103		struct nvme_fdp_ruh_status_desc *ruhsd;
  2104		int size, ret, i;
  2105	
  2106		size = sizeof(*ruhs) + NVME_MAX_PLIDS * sizeof(*ruhsd);
  2107		ruhs = kzalloc(size, GFP_KERNEL);
  2108		if (!ruhs)
  2109			return -ENOMEM;
  2110	
  2111		c.imr.opcode = nvme_cmd_io_mgmt_recv;
  2112		c.imr.nsid = cpu_to_le32(nsid);
  2113		c.imr.mo = 0x1;
  2114		c.imr.numd =  cpu_to_le32((size >> 2) - 1);
  2115	
  2116		ret = nvme_submit_sync_cmd(ns->queue, &c, ruhs, size);
  2117		if (ret)
  2118			goto out;
  2119	
> 2120		ns->head->nr_plids = le16_to_cpu(ruhs->nruhsd);
  2121		ns->head->nr_plids =
  2122			min_t(u16, ns->head->nr_plids, NVME_MAX_PLIDS);
  2123	
  2124		for (i = 0; i < ns->head->nr_plids; i++) {
  2125			ruhsd = &ruhs->ruhsd[i];
  2126			ns->head->plids[i] = le16_to_cpu(ruhsd->pid);
  2127		}
  2128	out:
  2129		kfree(ruhs);
  2130		return ret;
  2131	}
  2132	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] nvme: enable FDP support
  2024-05-10 13:40 ` [PATCH] nvme: enable FDP support Kanchan Joshi
  2024-05-10 19:30   ` Keith Busch
  2024-05-11  9:20   ` kernel test robot
@ 2024-05-13  7:56   ` Viacheslav Dubeyko
  2024-05-14  8:44     ` Joel Granados
  2024-05-14 18:47     ` Kanchan Joshi
  2 siblings, 2 replies; 11+ messages in thread
From: Viacheslav Dubeyko @ 2024-05-13  7:56 UTC (permalink / raw)
  To: Kanchan Joshi
  Cc: Jens Axboe, Keith Busch, Christoph Hellwig, linux-nvme,
	linux-block, Javier González, Bart Van Assche, david,
	gost.dev, Hui Qi, Nitesh Shetty



> On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
> 
> Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
> to control the placement of logical blocks so as to reduce the SSD WAF.
> 
> Userspace can send the data lifetime information using the write hints.
> The SCSI driver (sd) can already pass this information to the SCSI
> devices. This patch does the same for NVMe.
> 
> Fetches the placement-identifiers (plids) if the device supports FDP.
> And map the incoming write-hints to plids.
> 


Great! Thanks for sharing  the patch.

Do  we have documentation that explains how, for example, kernel-space
file system can work with block layer to employ FDP?

Do  we have FDP support in QEMU already if there is no access to real
device for testing?

Thanks,
Slava.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] nvme: enable FDP support
  2024-05-13  7:56   ` Viacheslav Dubeyko
@ 2024-05-14  8:44     ` Joel Granados
  2024-05-14 18:47     ` Kanchan Joshi
  1 sibling, 0 replies; 11+ messages in thread
From: Joel Granados @ 2024-05-14  8:44 UTC (permalink / raw)
  To: Viacheslav Dubeyko
  Cc: Kanchan Joshi, Jens Axboe, Keith Busch, Christoph Hellwig,
	linux-nvme, linux-block, Javier González, Bart Van Assche,
	david, gost.dev, Hui Qi, Nitesh Shetty

[-- Attachment #1: Type: text/plain, Size: 1103 bytes --]

On Mon, May 13, 2024 at 10:56:00AM +0300, Viacheslav Dubeyko wrote:
> 
> 
> > On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
> > 
> > Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
> > to control the placement of logical blocks so as to reduce the SSD WAF.
> > 
> > Userspace can send the data lifetime information using the write hints.
> > The SCSI driver (sd) can already pass this information to the SCSI
> > devices. This patch does the same for NVMe.
> > 
> > Fetches the placement-identifiers (plids) if the device supports FDP.
> > And map the incoming write-hints to plids.
> > 
> 
> 
> Great! Thanks for sharing  the patch.
> 
> Do  we have documentation that explains how, for example, kernel-space
> file system can work with block layer to employ FDP?
> 
> Do  we have FDP support in QEMU already if there is no access to real
> device for testing?
I believe FDP has been in qemu for some time. Look for 73064edfb8
("hw/nvme: flexible data placement emulation  [Jesper Devantier]")

best

-- 

Joel Granados

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 659 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] nvme: enable FDP support
  2024-05-13  7:56   ` Viacheslav Dubeyko
  2024-05-14  8:44     ` Joel Granados
@ 2024-05-14 18:47     ` Kanchan Joshi
  2024-05-14 19:00       ` Viacheslav Dubeyko
  1 sibling, 1 reply; 11+ messages in thread
From: Kanchan Joshi @ 2024-05-14 18:47 UTC (permalink / raw)
  To: Viacheslav Dubeyko
  Cc: Kanchan Joshi, Jens Axboe, Keith Busch, Christoph Hellwig,
	linux-nvme, linux-block, Javier González, Bart Van Assche,
	david, gost.dev, Hui Qi, Nitesh Shetty

On Mon, May 13, 2024 at 2:04 AM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>
>
>
> > On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
> >
> > Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
> > to control the placement of logical blocks so as to reduce the SSD WAF.
> >
> > Userspace can send the data lifetime information using the write hints.
> > The SCSI driver (sd) can already pass this information to the SCSI
> > devices. This patch does the same for NVMe.
> >
> > Fetches the placement-identifiers (plids) if the device supports FDP.
> > And map the incoming write-hints to plids.
> >
>
>
> Great! Thanks for sharing  the patch.
>
> Do  we have documentation that explains how, for example, kernel-space
> file system can work with block layer to employ FDP?

This is primarily for user driven/exposed hints. For file system
driven hints, the scheme is really file system specific and therefore,
will vary from one to another.
F2FS is one (and only at the moment) example. Its 'fs-based' policy
can act as a reference for one way to go about it.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] nvme: enable FDP support
  2024-05-14 18:47     ` Kanchan Joshi
@ 2024-05-14 19:00       ` Viacheslav Dubeyko
  2024-05-15  3:30         ` Kanchan Joshi
  0 siblings, 1 reply; 11+ messages in thread
From: Viacheslav Dubeyko @ 2024-05-14 19:00 UTC (permalink / raw)
  To: Kanchan Joshi
  Cc: Kanchan Joshi, Jens Axboe, Keith Busch, Christoph Hellwig,
	linux-nvme, linux-block, Javier González, Bart Van Assche,
	david, gost.dev, Hui Qi, Nitesh Shetty



> On May 14, 2024, at 9:47 PM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
> 
> On Mon, May 13, 2024 at 2:04 AM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>> 
>> 
>> 
>>> On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
>>> 
>>> Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
>>> to control the placement of logical blocks so as to reduce the SSD WAF.
>>> 
>>> Userspace can send the data lifetime information using the write hints.
>>> The SCSI driver (sd) can already pass this information to the SCSI
>>> devices. This patch does the same for NVMe.
>>> 
>>> Fetches the placement-identifiers (plids) if the device supports FDP.
>>> And map the incoming write-hints to plids.
>>> 
>> 
>> 
>> Great! Thanks for sharing  the patch.
>> 
>> Do  we have documentation that explains how, for example, kernel-space
>> file system can work with block layer to employ FDP?
> 
> This is primarily for user driven/exposed hints. For file system
> driven hints, the scheme is really file system specific and therefore,
> will vary from one to another.
> F2FS is one (and only at the moment) example. Its 'fs-based' policy
> can act as a reference for one way to go about it.

Yes, I completely see the point. I would like to employ the FDP in my
kernel-space file system (SSDFS). And I have a vision how I can do it.
But I simply would like to see some documentation with the explanation of
API and limitations of FDP for the case of kernel-space file systems.

Thanks,
Slava.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] nvme: enable FDP support
  2024-05-15  3:30         ` Kanchan Joshi
@ 2024-05-14 21:40           ` Viacheslav Dubeyko
  2024-05-17 16:27             ` Kanchan Joshi
  0 siblings, 1 reply; 11+ messages in thread
From: Viacheslav Dubeyko @ 2024-05-14 21:40 UTC (permalink / raw)
  To: Kanchan Joshi
  Cc: Kanchan Joshi, Jens Axboe, Keith Busch, Christoph Hellwig,
	linux-nvme, linux-block, Javier González, Bart Van Assche,
	david, gost.dev, Hui Qi, Nitesh Shetty



> On May 15, 2024, at 6:30 AM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
> 
> On Tue, May 14, 2024 at 1:00 PM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>>> On May 14, 2024, at 9:47 PM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
>>> 
>>> On Mon, May 13, 2024 at 2:04 AM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>>>> 
>>>> 
>>>> 
>>>>> On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
>>>>> 
>>>>> Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
>>>>> to control the placement of logical blocks so as to reduce the SSD WAF.
>>>>> 
>>>>> Userspace can send the data lifetime information using the write hints.
>>>>> The SCSI driver (sd) can already pass this information to the SCSI
>>>>> devices. This patch does the same for NVMe.
>>>>> 
>>>>> Fetches the placement-identifiers (plids) if the device supports FDP.
>>>>> And map the incoming write-hints to plids.
>>>>> 
>>>> 
>>>> 
>>>> Great! Thanks for sharing  the patch.
>>>> 
>>>> Do  we have documentation that explains how, for example, kernel-space
>>>> file system can work with block layer to employ FDP?
>>> 
>>> This is primarily for user driven/exposed hints. For file system
>>> driven hints, the scheme is really file system specific and therefore,
>>> will vary from one to another.
>>> F2FS is one (and only at the moment) example. Its 'fs-based' policy
>>> can act as a reference for one way to go about it.
>> 
>> Yes, I completely see the point. I would like to employ the FDP in my
>> kernel-space file system (SSDFS). And I have a vision how I can do it.
>> But I simply would like to see some documentation with the explanation of
>> API and limitations of FDP for the case of kernel-space file systems.
> 
> Nothing complicated for early experimentation.
> Once FS has determined the hint value, it can put that into
> bio->bi_write_hint and send bio down.
> 
> If SSDFS cares about user-exposed hints too, it can choose different
> hint values than what is exposed to user-space.
> Or it can do what F2FS does - use the mount option as a toggle to
> reuse the same values either for user-hints or fs-defined hints.

How many hint values file system can use? Any limitations here?

And how file system can detect that it’s FDP-based device?

Thanks,
Slava.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] nvme: enable FDP support
  2024-05-14 19:00       ` Viacheslav Dubeyko
@ 2024-05-15  3:30         ` Kanchan Joshi
  2024-05-14 21:40           ` Viacheslav Dubeyko
  0 siblings, 1 reply; 11+ messages in thread
From: Kanchan Joshi @ 2024-05-15  3:30 UTC (permalink / raw)
  To: Viacheslav Dubeyko
  Cc: Kanchan Joshi, Jens Axboe, Keith Busch, Christoph Hellwig,
	linux-nvme, linux-block, Javier González, Bart Van Assche,
	david, gost.dev, Hui Qi, Nitesh Shetty

On Tue, May 14, 2024 at 1:00 PM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
> > On May 14, 2024, at 9:47 PM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
> >
> > On Mon, May 13, 2024 at 2:04 AM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
> >>
> >>
> >>
> >>> On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
> >>>
> >>> Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
> >>> to control the placement of logical blocks so as to reduce the SSD WAF.
> >>>
> >>> Userspace can send the data lifetime information using the write hints.
> >>> The SCSI driver (sd) can already pass this information to the SCSI
> >>> devices. This patch does the same for NVMe.
> >>>
> >>> Fetches the placement-identifiers (plids) if the device supports FDP.
> >>> And map the incoming write-hints to plids.
> >>>
> >>
> >>
> >> Great! Thanks for sharing  the patch.
> >>
> >> Do  we have documentation that explains how, for example, kernel-space
> >> file system can work with block layer to employ FDP?
> >
> > This is primarily for user driven/exposed hints. For file system
> > driven hints, the scheme is really file system specific and therefore,
> > will vary from one to another.
> > F2FS is one (and only at the moment) example. Its 'fs-based' policy
> > can act as a reference for one way to go about it.
>
> Yes, I completely see the point. I would like to employ the FDP in my
> kernel-space file system (SSDFS). And I have a vision how I can do it.
> But I simply would like to see some documentation with the explanation of
> API and limitations of FDP for the case of kernel-space file systems.

Nothing complicated for early experimentation.
Once FS has determined the hint value, it can put that into
bio->bi_write_hint and send bio down.

If SSDFS cares about user-exposed hints too, it can choose different
hint values than what is exposed to user-space.
Or it can do what F2FS does - use the mount option as a toggle to
reuse the same values either for user-hints or fs-defined hints.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] nvme: enable FDP support
  2024-05-14 21:40           ` Viacheslav Dubeyko
@ 2024-05-17 16:27             ` Kanchan Joshi
  2024-05-17 17:22               ` Viacheslav Dubeyko
  0 siblings, 1 reply; 11+ messages in thread
From: Kanchan Joshi @ 2024-05-17 16:27 UTC (permalink / raw)
  To: Viacheslav Dubeyko
  Cc: Kanchan Joshi, Jens Axboe, Keith Busch, Christoph Hellwig,
	linux-nvme, linux-block, Javier González, Bart Van Assche,
	david, gost.dev, Hui Qi, Nitesh Shetty

On Tue, May 14, 2024 at 2:40 PM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>
>
>
> > On May 15, 2024, at 6:30 AM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
> >
> > On Tue, May 14, 2024 at 1:00 PM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
> >>> On May 14, 2024, at 9:47 PM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
> >>>
> >>> On Mon, May 13, 2024 at 2:04 AM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
> >>>>
> >>>>
> >>>>
> >>>>> On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
> >>>>>
> >>>>> Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
> >>>>> to control the placement of logical blocks so as to reduce the SSD WAF.
> >>>>>
> >>>>> Userspace can send the data lifetime information using the write hints.
> >>>>> The SCSI driver (sd) can already pass this information to the SCSI
> >>>>> devices. This patch does the same for NVMe.
> >>>>>
> >>>>> Fetches the placement-identifiers (plids) if the device supports FDP.
> >>>>> And map the incoming write-hints to plids.
> >>>>>
> >>>>
> >>>>
> >>>> Great! Thanks for sharing  the patch.
> >>>>
> >>>> Do  we have documentation that explains how, for example, kernel-space
> >>>> file system can work with block layer to employ FDP?
> >>>
> >>> This is primarily for user driven/exposed hints. For file system
> >>> driven hints, the scheme is really file system specific and therefore,
> >>> will vary from one to another.
> >>> F2FS is one (and only at the moment) example. Its 'fs-based' policy
> >>> can act as a reference for one way to go about it.
> >>
> >> Yes, I completely see the point. I would like to employ the FDP in my
> >> kernel-space file system (SSDFS). And I have a vision how I can do it.
> >> But I simply would like to see some documentation with the explanation of
> >> API and limitations of FDP for the case of kernel-space file systems.
> >
> > Nothing complicated for early experimentation.
> > Once FS has determined the hint value, it can put that into
> > bio->bi_write_hint and send bio down.
> >
> > If SSDFS cares about user-exposed hints too, it can choose different
> > hint values than what is exposed to user-space.
> > Or it can do what F2FS does - use the mount option as a toggle to
> > reuse the same values either for user-hints or fs-defined hints.
>
> How many hint values file system can use? Any limitations here?

As many as already defined (in rw_hint.h). Possible to go higher too.
No hard limitation per se. Write is not going to fail even if it sends
a hint that does not exist.

> And how file system can detect that it’s FDP-based device?

It does not need to detect. File system sees write-hints; FDP is a
lower-level detail.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] nvme: enable FDP support
  2024-05-17 16:27             ` Kanchan Joshi
@ 2024-05-17 17:22               ` Viacheslav Dubeyko
  0 siblings, 0 replies; 11+ messages in thread
From: Viacheslav Dubeyko @ 2024-05-17 17:22 UTC (permalink / raw)
  To: Kanchan Joshi
  Cc: Kanchan Joshi, Jens Axboe, Keith Busch, Christoph Hellwig,
	linux-nvme, linux-block, Javier González, Bart Van Assche,
	david, gost.dev, Hui Qi, Nitesh Shetty



> On May 17, 2024, at 7:27 PM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
> 
> On Tue, May 14, 2024 at 2:40 PM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>> 
>> 
>> 
>>> On May 15, 2024, at 6:30 AM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
>>> 
>>> On Tue, May 14, 2024 at 1:00 PM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>>>>> On May 14, 2024, at 9:47 PM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
>>>>> 
>>>>> On Mon, May 13, 2024 at 2:04 AM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>>>>>> 
>>>>>> 
>>>>>> 
>>>>>>> On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
>>>>>>> 
>>>>>>> Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
>>>>>>> to control the placement of logical blocks so as to reduce the SSD WAF.
>>>>>>> 
>>>>>>> Userspace can send the data lifetime information using the write hints.
>>>>>>> The SCSI driver (sd) can already pass this information to the SCSI
>>>>>>> devices. This patch does the same for NVMe.
>>>>>>> 
>>>>>>> Fetches the placement-identifiers (plids) if the device supports FDP.
>>>>>>> And map the incoming write-hints to plids.
>>>>>>> 
>>>>>> 
>>>>>> 
>>>>>> Great! Thanks for sharing  the patch.
>>>>>> 
>>>>>> Do  we have documentation that explains how, for example, kernel-space
>>>>>> file system can work with block layer to employ FDP?
>>>>> 
>>>>> This is primarily for user driven/exposed hints. For file system
>>>>> driven hints, the scheme is really file system specific and therefore,
>>>>> will vary from one to another.
>>>>> F2FS is one (and only at the moment) example. Its 'fs-based' policy
>>>>> can act as a reference for one way to go about it.
>>>> 
>>>> Yes, I completely see the point. I would like to employ the FDP in my
>>>> kernel-space file system (SSDFS). And I have a vision how I can do it.
>>>> But I simply would like to see some documentation with the explanation of
>>>> API and limitations of FDP for the case of kernel-space file systems.
>>> 
>>> Nothing complicated for early experimentation.
>>> Once FS has determined the hint value, it can put that into
>>> bio->bi_write_hint and send bio down.
>>> 
>>> If SSDFS cares about user-exposed hints too, it can choose different
>>> hint values than what is exposed to user-space.
>>> Or it can do what F2FS does - use the mount option as a toggle to
>>> reuse the same values either for user-hints or fs-defined hints.
>> 
>> How many hint values file system can use? Any limitations here?
> 
> As many as already defined (in rw_hint.h). Possible to go higher too.
> No hard limitation per se. Write is not going to fail even if it sends
> a hint that does not exist.
> 

OK. I see. Thanks.

>> And how file system can detect that it’s FDP-based device?
> 
> It does not need to detect. File system sees write-hints; FDP is a
> lower-level detail.

I see your point. But SSDFS doesn’t need in hints from user-space side.
SSDFS has various types of segments (several types of metadata segments and
user data segment) and I would like to use hints for these different types of segments.
I mean that SSDFS needs to make decisions when and for what type of data or
metadata to send such hints without any instructions from user-space side.

Technically speaking, user-space side doesn’t need to care to provide any hints
to SSDFS because SSDFS can manage everything without such hints.
So, I would like to have opportunity to change SSDFS behavior for different
type of devices:

if (zns_device)
   execute_zns_related_logic
else if (fdp_device)
   execute_fdp_related_logic
else // conventional SSD
   execute_conventional_ssd_logic

Does it mean that there is no such way of FDP based device detection?

Thanks,
Slava.


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-05-17 17:22 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CGME20240510134740epcas5p24ef1c2d6e8934c1c79b01c849e7ccb41@epcas5p2.samsung.com>
2024-05-10 13:40 ` [PATCH] nvme: enable FDP support Kanchan Joshi
2024-05-10 19:30   ` Keith Busch
2024-05-11  9:20   ` kernel test robot
2024-05-13  7:56   ` Viacheslav Dubeyko
2024-05-14  8:44     ` Joel Granados
2024-05-14 18:47     ` Kanchan Joshi
2024-05-14 19:00       ` Viacheslav Dubeyko
2024-05-15  3:30         ` Kanchan Joshi
2024-05-14 21:40           ` Viacheslav Dubeyko
2024-05-17 16:27             ` Kanchan Joshi
2024-05-17 17:22               ` Viacheslav Dubeyko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).