All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V4] nvmet: add buffered I/O support for file backed ns
@ 2018-06-20  4:01 Chaitanya Kulkarni
  2018-06-20  8:21 ` Christoph Hellwig
  2018-06-20 17:44 ` Sagi Grimberg
  0 siblings, 2 replies; 5+ messages in thread
From: Chaitanya Kulkarni @ 2018-06-20  4:01 UTC (permalink / raw)


This is an incremental patch based on adding support for file backed
namespaces for NVMeOF target:-

http://lists.infradead.org/pipermail/linux-nvme/2018-May/017775.html/
http://lists.infradead.org/pipermail/linux-nvme/2018-May/017777.html.

We introduce "buffered_io" a new target namespace attribute. In default
execution, we set "buffered_io" to false, use O_DIRECT flag when opening
backend file and set the appropriate direct I/O flags when submitting
I/Os. In order to switch between the modes user needs to disable
namespace through configfs and then set the value of the "buffered_io".
This new value will have an effect after namespace is enabled manually.

In order to handle Read/Write requests when "buffered_io" is set we
introduce per namespace workqueue.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni at wdc.com>
---
Changes since V3 :-
1. Remove transparent mode switch and allow to switch the buffered
   and direct I/O mode only when ns is disabled manually.
2. Change the patch title.

Changes since V2 :-
1. Add support for transparent mode switch between buffered and
   direct I/O.

Changes since V1 :-
1. Rename the "vwc" namespace configfs attribute to the "buffered_io". 
---
 drivers/nvme/target/configfs.c    | 30 +++++++++++++++++++++
 drivers/nvme/target/core.c        |  1 +
 drivers/nvme/target/io-cmd-file.c | 43 +++++++++++++++++++++++++++----
 drivers/nvme/target/nvmet.h       |  2 ++
 4 files changed, 71 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index d3f3b3ec4d1a..3c06c517f009 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -407,11 +407,41 @@ static ssize_t nvmet_ns_enable_store(struct config_item *item,
 
 CONFIGFS_ATTR(nvmet_ns_, enable);
 
+static ssize_t nvmet_ns_buffered_io_show(struct config_item *item, char *page)
+{
+	return sprintf(page, "%d\n", to_nvmet_ns(item)->buffered_io);
+}
+
+static ssize_t nvmet_ns_buffered_io_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_ns *ns = to_nvmet_ns(item);
+	bool buffered_io;
+	int ret = 0;
+
+	if (strtobool(page, &buffered_io))
+		return -EINVAL;
+
+	mutex_unlock(&ns->subsys->lock);
+	if (ns->enabled == false)
+		ns->buffered_io = buffered_io;
+	else {
+		pr_err("disable ns before setting buffered_io value.\n");
+		ret = -EINVAL;
+	}
+	mutex_unlock(&ns->subsys->lock);
+
+	return ret ? ret : count;
+}
+
+CONFIGFS_ATTR(nvmet_ns_, buffered_io);
+
 static struct configfs_attribute *nvmet_ns_attrs[] = {
 	&nvmet_ns_attr_device_path,
 	&nvmet_ns_attr_device_nguid,
 	&nvmet_ns_attr_device_uuid,
 	&nvmet_ns_attr_enable,
+	&nvmet_ns_attr_buffered_io,
 	NULL,
 };
 
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index a03da764ecae..2888ed6d8665 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -437,6 +437,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
 	ns->nsid = nsid;
 	ns->subsys = subsys;
 	uuid_gen(&ns->uuid);
+	ns->buffered_io = false;
 
 	return ns;
 }
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 8c42b3a8c420..09d793442833 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -16,6 +16,11 @@
 void nvmet_file_ns_disable(struct nvmet_ns *ns)
 {
 	if (ns->file) {
+		if (ns->buffered_io) {
+			flush_workqueue(ns->file_wq);
+			destroy_workqueue(ns->file_wq);
+			ns->file_wq = NULL;
+		}
 		mempool_destroy(ns->bvec_pool);
 		ns->bvec_pool = NULL;
 		kmem_cache_destroy(ns->bvec_cache);
@@ -27,11 +32,14 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns)
 
 int nvmet_file_ns_enable(struct nvmet_ns *ns)
 {
-	int ret;
+	int flags = O_RDWR | O_LARGEFILE;
 	struct kstat stat;
+	int ret;
+
+	if (!ns->buffered_io)
+		flags |= O_DIRECT;
 
-	ns->file = filp_open(ns->device_path,
-			O_RDWR | O_LARGEFILE | O_DIRECT, 0);
+	ns->file = filp_open(ns->device_path, flags, 0);
 	if (IS_ERR(ns->file)) {
 		pr_err("failed to open file %s: (%ld)\n",
 				ns->device_path, PTR_ERR(ns->file));
@@ -62,6 +70,15 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
 		goto err;
 	}
 
+	if (ns->buffered_io) {
+		ns->file_wq = alloc_workqueue("nvmet-file",
+				WQ_UNBOUND_MAX_ACTIVE | WQ_MEM_RECLAIM, 0);
+		if (!ns->file_wq) {
+			ret = -ENOMEM;
+			goto err;
+		}
+	}
+
 	return ret;
 err:
 	ns->size = 0;
@@ -100,7 +117,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
 
 	iocb->ki_pos = pos;
 	iocb->ki_filp = req->ns->file;
-	iocb->ki_flags = IOCB_DIRECT | ki_flags;
+	iocb->ki_flags = ki_flags | iocb_flags(req->ns->file);
 
 	ret = call_iter(iocb, &iter);
 
@@ -189,6 +206,19 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
 	nvmet_file_submit_bvec(req, pos, bv_cnt, total_len);
 }
 
+static void nvmet_file_buffered_io_work(struct work_struct *w)
+{
+	struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+
+	nvmet_file_execute_rw(req);
+}
+
+static void nvmet_file_execute_rw_buffered_io(struct nvmet_req *req)
+{
+	INIT_WORK(&req->f.work, nvmet_file_buffered_io_work);
+	queue_work(req->ns->file_wq, &req->f.work);
+}
+
 static void nvmet_file_flush_work(struct work_struct *w)
 {
 	struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
@@ -280,7 +310,10 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
 	switch (cmd->common.opcode) {
 	case nvme_cmd_read:
 	case nvme_cmd_write:
-		req->execute = nvmet_file_execute_rw;
+		if (!req->ns->buffered_io)
+			req->execute = nvmet_file_execute_rw;
+		else
+			req->execute = nvmet_file_execute_rw_buffered_io;
 		req->data_len = nvmet_rw_len(req);
 		return 0;
 	case nvme_cmd_flush:
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 480dfe10fad9..c4aa54d933f1 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -65,6 +65,8 @@ struct nvmet_ns {
 	u8			nguid[16];
 	uuid_t			uuid;
 
+	bool			buffered_io;
+	struct workqueue_struct *file_wq;
 	bool			enabled;
 	struct nvmet_subsys	*subsys;
 	const char		*device_path;
-- 
2.17.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH V4] nvmet: add buffered I/O support for file backed ns
  2018-06-20  4:01 [PATCH V4] nvmet: add buffered I/O support for file backed ns Chaitanya Kulkarni
@ 2018-06-20  8:21 ` Christoph Hellwig
  2018-06-20 17:44 ` Sagi Grimberg
  1 sibling, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2018-06-20  8:21 UTC (permalink / raw)


> +		ns->file_wq = alloc_workqueue("nvmet-file",
> +				WQ_UNBOUND_MAX_ACTIVE | WQ_MEM_RECLAIM, 0);

The WQ_UNBOUND_MAX_ACTIVE here looks bogus, as far as I can tell it
isn't a flag for alloc_workqueue but needs to be passed as the last
argument.

I've removed it for now, and applied the patch with a few minor cleanups
to the nvme-4.19 branch.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH V4] nvmet: add buffered I/O support for file backed ns
  2018-06-20  4:01 [PATCH V4] nvmet: add buffered I/O support for file backed ns Chaitanya Kulkarni
  2018-06-20  8:21 ` Christoph Hellwig
@ 2018-06-20 17:44 ` Sagi Grimberg
  2018-06-20 21:44   ` Chaitanya Kulkarni
  1 sibling, 1 reply; 5+ messages in thread
From: Sagi Grimberg @ 2018-06-20 17:44 UTC (permalink / raw)



> +static ssize_t nvmet_ns_buffered_io_store(struct config_item *item,
> +		const char *page, size_t count)
> +{
> +	struct nvmet_ns *ns = to_nvmet_ns(item);
> +	bool buffered_io;
> +	int ret = 0;
> +
> +	if (strtobool(page, &buffered_io))
> +		return -EINVAL;
> +
> +	mutex_unlock(&ns->subsys->lock);
> +	if (ns->enabled == false)
> +		ns->buffered_io = buffered_io;
> +	else {
> +		pr_err("disable ns before setting buffered_io value.\n");
> +		ret = -EINVAL;
> +	}
> +	mutex_unlock(&ns->subsys->lock);
> +
> +	return ret ? ret : count;

Consider changing this to:

	mutex_unlock(&ns->subsys->lock);
	if (ns->enabled) {
		pr_err("cannot modify ns while enabled\n");
		ret = -EINVAL;
		goto out_unlocked;
	}

	ns->buffered_io = buffered_io;
	ret = count;
out_unlock:
	mutex_unlock(&ns->subsys->lock);
	return ret;

> +}
> +
> +CONFIGFS_ATTR(nvmet_ns_, buffered_io);
> +
>   static struct configfs_attribute *nvmet_ns_attrs[] = {
>   	&nvmet_ns_attr_device_path,
>   	&nvmet_ns_attr_device_nguid,
>   	&nvmet_ns_attr_device_uuid,
>   	&nvmet_ns_attr_enable,
> +	&nvmet_ns_attr_buffered_io,
>   	NULL,
>   };
>   
> diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
> index a03da764ecae..2888ed6d8665 100644
> --- a/drivers/nvme/target/core.c
> +++ b/drivers/nvme/target/core.c
> @@ -437,6 +437,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
>   	ns->nsid = nsid;
>   	ns->subsys = subsys;
>   	uuid_gen(&ns->uuid);
> +	ns->buffered_io = false;
>   
>   	return ns;
>   }
> diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
> index 8c42b3a8c420..09d793442833 100644
> --- a/drivers/nvme/target/io-cmd-file.c
> +++ b/drivers/nvme/target/io-cmd-file.c
> @@ -16,6 +16,11 @@
>   void nvmet_file_ns_disable(struct nvmet_ns *ns)
>   {
>   	if (ns->file) {
> +		if (ns->buffered_io) {
> +			flush_workqueue(ns->file_wq);
> +			destroy_workqueue(ns->file_wq);

destroy_workqueue flushes so no need to do that explicitly.

> +			ns->file_wq = NULL;
> +		}
>   		mempool_destroy(ns->bvec_pool);
>   		ns->bvec_pool = NULL;
>   		kmem_cache_destroy(ns->bvec_cache);
> @@ -27,11 +32,14 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns)
>   
>   int nvmet_file_ns_enable(struct nvmet_ns *ns)
>   {
> -	int ret;
> +	int flags = O_RDWR | O_LARGEFILE;
>   	struct kstat stat;
> +	int ret;
> +
> +	if (!ns->buffered_io)
> +		flags |= O_DIRECT;
>   
> -	ns->file = filp_open(ns->device_path,
> -			O_RDWR | O_LARGEFILE | O_DIRECT, 0);
> +	ns->file = filp_open(ns->device_path, flags, 0);
>   	if (IS_ERR(ns->file)) {
>   		pr_err("failed to open file %s: (%ld)\n",
>   				ns->device_path, PTR_ERR(ns->file));
> @@ -62,6 +70,15 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
>   		goto err;
>   	}
>   
> +	if (ns->buffered_io) {
> +		ns->file_wq = alloc_workqueue("nvmet-file",
> +				WQ_UNBOUND_MAX_ACTIVE | WQ_MEM_RECLAIM, 0);

I seriously don't think that a wq per ns is a good idea.. What was the
consideration for this?

And what's wrong with the system_unbound_wq?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH V4] nvmet: add buffered I/O support for file backed ns
  2018-06-20 17:44 ` Sagi Grimberg
@ 2018-06-20 21:44   ` Chaitanya Kulkarni
  2018-06-21  6:51     ` Sagi Grimberg
  0 siblings, 1 reply; 5+ messages in thread
From: Chaitanya Kulkarni @ 2018-06-20 21:44 UTC (permalink / raw)








From: Sagi Grimberg <sagi@grimberg.me>
Sent: Wednesday, June 20, 2018 10:44 AM
To: Chaitanya Kulkarni; linux-nvme at lists.infradead.org
Cc: hch at lst.de; keith.busch at intel.com
Subject: Re: [PATCH V4] nvmet: add buffered I/O support for file backed ns
? 
 

> +static ssize_t nvmet_ns_buffered_io_store(struct config_item *item,
> +???????????? const char *page, size_t count)
> +{
> +???? struct nvmet_ns *ns = to_nvmet_ns(item);
> +???? bool buffered_io;
> +???? int ret = 0;
> +
> +???? if (strtobool(page, &buffered_io))
> +???????????? return -EINVAL;
> +
> +???? mutex_unlock(&ns->subsys->lock);
> +???? if (ns->enabled == false)
> +???????????? ns->buffered_io = buffered_io;
> +???? else {
> +???????????? pr_err("disable ns before setting buffered_io value.\n");
> +???????????? ret = -EINVAL;
> +???? }
> +???? mutex_unlock(&ns->subsys->lock);
> +
> +???? return ret ? ret : count;

Consider changing this to:

??????? mutex_unlock(&ns->subsys->lock);
??????? if (ns->enabled) {
??????????????? pr_err("cannot modify ns while enabled\n");
??????????????? ret = -EINVAL;
??????????????? goto out_unlocked;
??????? }

??????? ns->buffered_io = buffered_io;
??????? ret = count;
out_unlock:
??????? mutex_unlock(&ns->subsys->lock);
??????? return ret;

[CK] I'm okay to change that if you prefer this style, can you please explain
        is there anything wrong with the current code?

> +}
> +
> +CONFIGFS_ATTR(nvmet_ns_, buffered_io);
> +
>?? static struct configfs_attribute *nvmet_ns_attrs[] = {
>??????? &nvmet_ns_attr_device_path,
>??????? &nvmet_ns_attr_device_nguid,
>??????? &nvmet_ns_attr_device_uuid,
>??????? &nvmet_ns_attr_enable,
> +???? &nvmet_ns_attr_buffered_io,
>??????? NULL,
>?? };
>?? 
> diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
> index a03da764ecae..2888ed6d8665 100644
> --- a/drivers/nvme/target/core.c
> +++ b/drivers/nvme/target/core.c
> @@ -437,6 +437,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
>??????? ns->nsid = nsid;
>??????? ns->subsys = subsys;
>??????? uuid_gen(&ns->uuid);
> +???? ns->buffered_io = false;
>?? 
>??????? return ns;
>?? }
> diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
> index 8c42b3a8c420..09d793442833 100644
> --- a/drivers/nvme/target/io-cmd-file.c
> +++ b/drivers/nvme/target/io-cmd-file.c
> @@ -16,6 +16,11 @@
>?? void nvmet_file_ns_disable(struct nvmet_ns *ns)
>?? {
>??????? if (ns->file) {
> +???????????? if (ns->buffered_io) {
> +???????????????????? flush_workqueue(ns->file_wq);
> +???????????????????? destroy_workqueue(ns->file_wq);

destroy_workqueue flushes so no need to do that explicitly.
[CK] Okay.

> +???????????????????? ns->file_wq = NULL;
> +???????????? }
>??????????????? mempool_destroy(ns->bvec_pool);
>??????????????? ns->bvec_pool = NULL;
>??????????????? kmem_cache_destroy(ns->bvec_cache);
> @@ -27,11 +32,14 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns)
>?? 
>?? int nvmet_file_ns_enable(struct nvmet_ns *ns)
>?? {
> -???? int ret;
> +???? int flags = O_RDWR | O_LARGEFILE;
>??????? struct kstat stat;
> +???? int ret;
> +
> +???? if (!ns->buffered_io)
> +???????????? flags |= O_DIRECT;
>?? 
> -???? ns->file = filp_open(ns->device_path,
> -???????????????????? O_RDWR | O_LARGEFILE | O_DIRECT, 0);
> +???? ns->file = filp_open(ns->device_path, flags, 0);
>??????? if (IS_ERR(ns->file)) {
>??????????????? pr_err("failed to open file %s: (%ld)\n",
>??????????????????????????????? ns->device_path, PTR_ERR(ns->file));
> @@ -62,6 +70,15 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
>??????????????? goto err;
>??????? }
>?? 
> +???? if (ns->buffered_io) {
> +???????????? ns->file_wq = alloc_workqueue("nvmet-file",
> +???????????????????????????? WQ_UNBOUND_MAX_ACTIVE | WQ_MEM_RECLAIM, 0);

I seriously don't think that a wq per ns is a good idea.. What was the
consideration for this?

[CK] Since I/O submission is done per ns basis it is important to encapsulate workqueue and
        make the property of the ns. In this way, I/Os from different namespaces will not interfere
        with each other.

       Can you please explain the benefits of using the following model:-

       1. 1: N (1 workqueue : N Namespace).
       2. M: N (M  workqueues : N Namespace).

[CK] Now that code is in, I'll send the configfs buffered_io parameter style change and removing
        the flush wq as a separate patch.

And what's wrong with the system_unbound_wq?

Regards,
Chaitanya

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH V4] nvmet: add buffered I/O support for file backed ns
  2018-06-20 21:44   ` Chaitanya Kulkarni
@ 2018-06-21  6:51     ` Sagi Grimberg
  0 siblings, 0 replies; 5+ messages in thread
From: Sagi Grimberg @ 2018-06-21  6:51 UTC (permalink / raw)



> [CK] I'm okay to change that if you prefer this style, can you please explain
>          is there anything wrong with the current code?

Its not wrong, its just a styling suggestion.

>> @@ -27,11 +32,14 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns)
>>     
>>  ?? int nvmet_file_ns_enable(struct nvmet_ns *ns)
>>  ?? {
>> -???? int ret;
>> +???? int flags = O_RDWR | O_LARGEFILE;
>>  ??????? struct kstat stat;
>> +???? int ret;
>> +
>> +???? if (!ns->buffered_io)
>> +???????????? flags |= O_DIRECT;
>>     
>> -???? ns->file = filp_open(ns->device_path,
>> -???????????????????? O_RDWR | O_LARGEFILE | O_DIRECT, 0);
>> +???? ns->file = filp_open(ns->device_path, flags, 0);
>>  ??????? if (IS_ERR(ns->file)) {
>>  ??????????????? pr_err("failed to open file %s: (%ld)\n",
>>  ??????????????????????????????? ns->device_path, PTR_ERR(ns->file));
>> @@ -62,6 +70,15 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
>>  ??????????????? goto err;
>>  ??????? }
>>     
>> +???? if (ns->buffered_io) {
>> +???????????? ns->file_wq = alloc_workqueue("nvmet-file",
>> +???????????????????????????? WQ_UNBOUND_MAX_ACTIVE | WQ_MEM_RECLAIM, 0);
> 
> I seriously don't think that a wq per ns is a good idea.. What was the
> consideration for this?
> 
> [CK] Since I/O submission is done per ns basis it is important to encapsulate workqueue and
>          make the property of the ns. In this way, I/Os from different namespaces will not interfere
>          with each other.
> 
>         Can you please explain the benefits of using the following model:-
> 
>         1. 1: N (1 workqueue : N Namespace).
>         2. M: N (M  workqueues : N Namespace).

wqs still share the same kworkers and workqueue access is rcu protected
so I'm not at all sure why this should make a difference.
Have you benchmarked per-ns workqueue vs. a shared one? I'd be
interested in knowing if it makes a difference. It would surprise
me if it did...

> [CK] Now that code is in, I'll send the configfs buffered_io parameter style change and removing
>          the flush wq as a separate patch.

We have time to fix it in-place, its not sent to Jens yet..

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-06-21  6:51 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-20  4:01 [PATCH V4] nvmet: add buffered I/O support for file backed ns Chaitanya Kulkarni
2018-06-20  8:21 ` Christoph Hellwig
2018-06-20 17:44 ` Sagi Grimberg
2018-06-20 21:44   ` Chaitanya Kulkarni
2018-06-21  6:51     ` Sagi Grimberg

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.