All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V3 0/2] nvmet: implement flush all support
@ 2018-06-26  3:15 Chaitanya Kulkarni
  2018-06-26  3:15 ` [PATCH V3 1/2] nvmet: move struct work_struct out of union Chaitanya Kulkarni
  2018-06-26  3:15 ` [PATCH V3 2/2] nvmet: add support for flush all Chaitanya Kulkarni
  0 siblings, 2 replies; 6+ messages in thread
From: Chaitanya Kulkarni @ 2018-06-26  3:15 UTC (permalink / raw)


Hi,

This implements the flush command behavior described in
"NVMe 1.3a  TP 4035" for NSID value of FFFFFFFFh. Now flush command
applies to all the namespaces attached to the controller processing the
Flush command when the value 0xFFFFFFFF is set in the NSID field. 

Regards,
Chaitanya

Changes since V2 :-
1. Remove the nvmet_flush_work struct and use the nvmet_req directly.
2. Small cleanups.
3. Use system workqueue instead of high priority system workqueue.
4. Flush each work as iterate.
5. Don't use worker thred context for block device flush (submit_bio()).
6. Add error message if flush fails.
7. Remove the space/tab formatting in the preparation patch.

Changes since V1 :-
1. Use system workqueues and parallelize the flush command submission.
2. Incorporate the coding style changes suggested by Sagi.
3. Add a new struct nvmet_subsys member to keep track of the active
   namespace belongs to a subsystem.

Chaitanya Kulkarni (2):
  nvmet: move struct work_struct out of union
  nvmet: add support for flush all

 drivers/nvme/target/admin-cmd.c   |  2 +-
 drivers/nvme/target/core.c        | 84 +++++++++++++++++++++++++++++++
 drivers/nvme/target/io-cmd-bdev.c | 12 +++--
 drivers/nvme/target/io-cmd-file.c | 33 ++++++------
 drivers/nvme/target/nvmet.h       | 12 ++++-
 5 files changed, 124 insertions(+), 19 deletions(-)

-- 
2.17.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH V3 1/2] nvmet: move struct work_struct out of union
  2018-06-26  3:15 [PATCH V3 0/2] nvmet: implement flush all support Chaitanya Kulkarni
@ 2018-06-26  3:15 ` Chaitanya Kulkarni
  2018-06-26  9:10   ` Sagi Grimberg
  2018-06-26  3:15 ` [PATCH V3 2/2] nvmet: add support for flush all Chaitanya Kulkarni
  1 sibling, 1 reply; 6+ messages in thread
From: Chaitanya Kulkarni @ 2018-06-26  3:15 UTC (permalink / raw)


This is a preparation patch which moves the struct work_struct
the parameter out of the anonymous union present in the request
structure so that it can be used to submit the generic work.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni at wdc.com>
---
 drivers/nvme/target/io-cmd-file.c | 24 ++++++++++++------------
 drivers/nvme/target/nvmet.h       |  2 +-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 16502018fbfd..4e6e6c255a58 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -207,20 +207,20 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
 
 static void nvmet_file_buffered_io_work(struct work_struct *w)
 {
-	struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+	struct nvmet_req *req = container_of(w, struct nvmet_req, work);
 
 	nvmet_file_execute_rw(req);
 }
 
 static void nvmet_file_execute_rw_buffered_io(struct nvmet_req *req)
 {
-	INIT_WORK(&req->f.work, nvmet_file_buffered_io_work);
-	queue_work(req->ns->file_wq, &req->f.work);
+	INIT_WORK(&req->work, nvmet_file_buffered_io_work);
+	queue_work(req->ns->file_wq, &req->work);
 }
 
 static void nvmet_file_flush_work(struct work_struct *w)
 {
-	struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+	struct nvmet_req *req = container_of(w, struct nvmet_req, work);
 	int ret;
 
 	ret = vfs_fsync(req->ns->file, 1);
@@ -230,8 +230,8 @@ static void nvmet_file_flush_work(struct work_struct *w)
 
 static void nvmet_file_execute_flush(struct nvmet_req *req)
 {
-	INIT_WORK(&req->f.work, nvmet_file_flush_work);
-	schedule_work(&req->f.work);
+	INIT_WORK(&req->work, nvmet_file_flush_work);
+	schedule_work(&req->work);
 }
 
 static void nvmet_file_execute_discard(struct nvmet_req *req)
@@ -258,7 +258,7 @@ static void nvmet_file_execute_discard(struct nvmet_req *req)
 
 static void nvmet_file_dsm_work(struct work_struct *w)
 {
-	struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+	struct nvmet_req *req = container_of(w, struct nvmet_req, work);
 
 	switch (le32_to_cpu(req->cmd->dsm.attributes)) {
 	case NVME_DSMGMT_AD:
@@ -275,13 +275,13 @@ static void nvmet_file_dsm_work(struct work_struct *w)
 
 static void nvmet_file_execute_dsm(struct nvmet_req *req)
 {
-	INIT_WORK(&req->f.work, nvmet_file_dsm_work);
-	schedule_work(&req->f.work);
+	INIT_WORK(&req->work, nvmet_file_dsm_work);
+	schedule_work(&req->work);
 }
 
 static void nvmet_file_write_zeroes_work(struct work_struct *w)
 {
-	struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+	struct nvmet_req *req = container_of(w, struct nvmet_req, work);
 	struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
 	int mode = FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE;
 	loff_t offset;
@@ -298,8 +298,8 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w)
 
 static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
 {
-	INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
-	schedule_work(&req->f.work);
+	INIT_WORK(&req->work, nvmet_file_write_zeroes_work);
+	schedule_work(&req->work);
 }
 
 u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index c4aa54d933f1..b1cd8eb6717f 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -256,9 +256,9 @@ struct nvmet_req {
 			bool			mpool_alloc;
 			struct kiocb            iocb;
 			struct bio_vec          *bvec;
-			struct work_struct      work;
 		} f;
 	};
+	struct work_struct      work;
 	int			sg_cnt;
 	/* data length as parsed from the command: */
 	size_t			data_len;
-- 
2.17.0

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH V3 2/2] nvmet: add support for flush all
  2018-06-26  3:15 [PATCH V3 0/2] nvmet: implement flush all support Chaitanya Kulkarni
  2018-06-26  3:15 ` [PATCH V3 1/2] nvmet: move struct work_struct out of union Chaitanya Kulkarni
@ 2018-06-26  3:15 ` Chaitanya Kulkarni
  2018-06-26 10:50   ` Sagi Grimberg
  1 sibling, 1 reply; 6+ messages in thread
From: Chaitanya Kulkarni @ 2018-06-26  3:15 UTC (permalink / raw)


This patch implements the flush command behavior described in
"NVMe 1.3a  TP 4035" for NSID value of FFFFFFFFh. Now flush command
applies to all the namespaces attached to the controller processing the
Flush command when the value 0xFFFFFFFF is set in the NSID field.

For block device backed ns we execute the flush in the primary worker
thread (nvmet_flush_all_work()), for file-backed ns we execute flush in
the separate worker thread context (nvmet_file_flush_all_work()).

We also add a new struct nvmet_subsys member to keep the track of number
of currently active namespaces "active_ns". We update this "active_ns"
member in nvmet_ns_enable()/nvmet_ns_disable().

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni at wdc.com>
---
 drivers/nvme/target/admin-cmd.c   |  2 +-
 drivers/nvme/target/core.c        | 84 +++++++++++++++++++++++++++++++
 drivers/nvme/target/io-cmd-bdev.c | 12 +++--
 drivers/nvme/target/io-cmd-file.c |  9 +++-
 drivers/nvme/target/nvmet.h       | 10 ++++
 5 files changed, 111 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index e2c6f8b39388..2ad52e5cdaed 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -256,7 +256,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 			NVME_CTRL_ONCS_WRITE_ZEROES);
 
 	/* XXX: don't report vwc if the underlying device is write through */
-	id->vwc = NVME_CTRL_VWC_PRESENT;
+	id->vwc = NVME_CTRL_VWC_PRESENT | 1 << 2 | 1 << 1;
 
 	/*
 	 * We can't support atomic writes bigger than a LBA without support
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 322f7cfc37a4..dd1030d3bb0a 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -370,6 +370,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
 		list_add_tail_rcu(&ns->dev_link, &old->dev_link);
 	}
 
+	subsys->active_ns++;
 	nvmet_ns_changed(subsys, ns->nsid);
 	ns->enabled = true;
 	ret = 0;
@@ -411,6 +412,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
 	mutex_lock(&subsys->lock);
 	nvmet_ns_changed(subsys, ns->nsid);
 	nvmet_ns_dev_disable(ns);
+	subsys->active_ns--;
 out_unlock:
 	mutex_unlock(&subsys->lock);
 }
@@ -543,6 +545,80 @@ int nvmet_sq_init(struct nvmet_sq *sq)
 }
 EXPORT_SYMBOL_GPL(nvmet_sq_init);
 
+static void nvmet_flush_all_work(struct work_struct *w)
+{
+	struct nvmet_req *req = container_of(w, struct nvmet_req, work);
+	struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
+	u16 status = NVME_SC_SUCCESS;
+	struct nvmet_req *req_arr;
+	struct nvmet_ns *ns;
+	unsigned int max_ns;
+	int cnt = 0, i;
+
+	req->ns = NULL;
+
+	/* only process namespaces present prior to flush command */
+	mutex_lock(&subsys->lock);
+	max_ns = subsys->active_ns;
+	mutex_unlock(&subsys->lock);
+
+	req_arr = kmalloc_array(max_ns, sizeof(*req_arr), GFP_KERNEL);
+	if (!req_arr) {
+		status = NVME_SC_INTERNAL;
+		goto out;
+	}
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ns, &subsys->namespaces, dev_link) {
+		struct nvmet_req *r = &req_arr[cnt];
+
+		if (cnt > max_ns)
+			break;
+
+		r->rsp = kmalloc(sizeof(*r->rsp), GFP_KERNEL);
+		if (!r->rsp) {
+			status = NVME_SC_INTERNAL;
+			break;
+		}
+		r->ns = ns;
+		r->cmd = req->cmd;
+		r->rsp->status = cpu_to_le16(0);
+
+		percpu_ref_get(&r->ns->ref);
+		rcu_read_unlock();
+
+		if (ns->file) {
+			INIT_WORK(&r->work, nvmet_file_flush_work);
+			schedule_work(&r->work);
+			flush_work(&r->work);
+		} else
+			nvmet_bdev_execute_flush(r);
+		cnt++;
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
+
+	for (i = 0; i < cnt; i++) {
+		struct nvmet_req *r = &req_arr[i];
+
+		if (r->rsp->status) {
+			status = NVME_SC_INTERNAL | NVME_SC_DNR;
+			pr_err("flush all failed nsid: %u\n", r->ns->nsid);
+		}
+		percpu_ref_put(&r->ns->ref);
+		kfree(r->rsp);
+	}
+	kfree(req_arr);
+out:
+	nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_flush_all(struct nvmet_req *req)
+{
+	INIT_WORK(&req->work, nvmet_flush_all_work);
+	schedule_work(&req->work);
+}
+
 static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
 {
 	struct nvme_command *cmd = req->cmd;
@@ -552,6 +628,12 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
 	if (unlikely(ret))
 		return ret;
 
+	if (unlikely(nvmet_cmd_flush_all(req))) {
+		req->execute = nvmet_execute_flush_all;
+		req->data_len = 0;
+		return NVME_SC_SUCCESS;
+	}
+
 	req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
 	if (unlikely(!req->ns))
 		return NVME_SC_INVALID_NS | NVME_SC_DNR;
@@ -1077,6 +1159,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 	INIT_LIST_HEAD(&subsys->ctrls);
 	INIT_LIST_HEAD(&subsys->hosts);
 
+	subsys->active_ns = 0;
+
 	return subsys;
 }
 
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index e0b0f7df70c2..2aa9e790c0c8 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -46,10 +46,16 @@ void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
 
 static void nvmet_bio_done(struct bio *bio)
 {
+	u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
 	struct nvmet_req *req = bio->bi_private;
 
-	nvmet_req_complete(req,
-		bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+	if (unlikely(nvmet_cmd_flush_all(req))) {
+		if (req->b.inline_bio.bi_status)
+			req->rsp->status = cpu_to_le16(status);
+		return;
+	}
+
+	nvmet_req_complete(req, bio->bi_status ? status : 0);
 
 	if (bio != &req->b.inline_bio)
 		bio_put(bio);
@@ -111,7 +117,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 	blk_poll(bdev_get_queue(req->ns->bdev), cookie);
 }
 
-static void nvmet_bdev_execute_flush(struct nvmet_req *req)
+void nvmet_bdev_execute_flush(struct nvmet_req *req)
 {
 	struct bio *bio = &req->b.inline_bio;
 
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 4e6e6c255a58..d1c79a3c4f9b 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -218,14 +218,19 @@ static void nvmet_file_execute_rw_buffered_io(struct nvmet_req *req)
 	queue_work(req->ns->file_wq, &req->work);
 }
 
-static void nvmet_file_flush_work(struct work_struct *w)
+void nvmet_file_flush_work(struct work_struct *w)
 {
 	struct nvmet_req *req = container_of(w, struct nvmet_req, work);
+	u16 status;
 	int ret;
 
 	ret = vfs_fsync(req->ns->file, 1);
+	status = ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : NVME_SC_SUCCESS;
 
-	nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+	if (nvmet_cmd_flush_all(req))
+		req->rsp->status = cpu_to_le16(status);
+	else
+		nvmet_req_complete(req, status);
 }
 
 static void nvmet_file_execute_flush(struct nvmet_req *req)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index b1cd8eb6717f..e9aae023a198 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -169,6 +169,7 @@ struct nvmet_subsys {
 
 	struct list_head	namespaces;
 	unsigned int		max_nsid;
+	unsigned int		active_ns;
 
 	struct list_head	ctrls;
 
@@ -380,9 +381,18 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns);
 void nvmet_bdev_ns_disable(struct nvmet_ns *ns);
 void nvmet_file_ns_disable(struct nvmet_ns *ns);
 
+void nvmet_bdev_execute_flush(struct nvmet_req *req);
+void nvmet_file_flush_work(struct work_struct *w);
+
 static inline u32 nvmet_rw_len(struct nvmet_req *req)
 {
 	return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
 			req->ns->blksize_shift;
 }
+
+static inline int nvmet_cmd_flush_all(struct nvmet_req *req)
+{
+	return (req->cmd->common.opcode == nvme_cmd_flush) &&
+		(le32_to_cpu(req->cmd->rw.nsid) == 0xFFFFFFFF);
+}
 #endif /* _NVMET_H */
-- 
2.17.0

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH V3 1/2] nvmet: move struct work_struct out of union
  2018-06-26  3:15 ` [PATCH V3 1/2] nvmet: move struct work_struct out of union Chaitanya Kulkarni
@ 2018-06-26  9:10   ` Sagi Grimberg
  0 siblings, 0 replies; 6+ messages in thread
From: Sagi Grimberg @ 2018-06-26  9:10 UTC (permalink / raw)


Reviewed-by: Sagi Grimberg <sagi at grimberg.me>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH V3 2/2] nvmet: add support for flush all
  2018-06-26  3:15 ` [PATCH V3 2/2] nvmet: add support for flush all Chaitanya Kulkarni
@ 2018-06-26 10:50   ` Sagi Grimberg
  2018-06-26 17:38     ` Chaitanya Kulkarni
  0 siblings, 1 reply; 6+ messages in thread
From: Sagi Grimberg @ 2018-06-26 10:50 UTC (permalink / raw)



> +static void nvmet_flush_all_work(struct work_struct *w)
> +{
> +	struct nvmet_req *req = container_of(w, struct nvmet_req, work);
> +	struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
> +	u16 status = NVME_SC_SUCCESS;
> +	struct nvmet_req *req_arr;
> +	struct nvmet_ns *ns;
> +	unsigned int max_ns;
> +	int cnt = 0, i;
> +
> +	req->ns = NULL;
> +
> +	/* only process namespaces present prior to flush command */
> +	mutex_lock(&subsys->lock);
> +	max_ns = subsys->active_ns;
> +	mutex_unlock(&subsys->lock);
> +
> +	req_arr = kmalloc_array(max_ns, sizeof(*req_arr), GFP_KERNEL);
> +	if (!req_arr) {
> +		status = NVME_SC_INTERNAL;
> +		goto out;
> +	}
> +
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(ns, &subsys->namespaces, dev_link) {
> +		struct nvmet_req *r = &req_arr[cnt];

Don't access the array before the check below, namespaces could be added
after the allocation.

> +
> +		if (cnt > max_ns)
> +			break;
> +
> +		r->rsp = kmalloc(sizeof(*r->rsp), GFP_KERNEL);

We can't allocate under rcu_read_lock. move it right next to the req_arr
allocation.

> +		if (!r->rsp) {
> +			status = NVME_SC_INTERNAL;
> +			break;
> +		}
> +		r->ns = ns;
> +		r->cmd = req->cmd;
> +		r->rsp->status = cpu_to_le16(0);
> +
> +		percpu_ref_get(&r->ns->ref);
> +		rcu_read_unlock();
> +
> +		if (ns->file) {
> +			INIT_WORK(&r->work, nvmet_file_flush_work);
> +			schedule_work(&r->work);
> +			flush_work(&r->work);

That's back to sequential. Why not flush it in the second iteration like
I proposed?

> +		} else
> +			nvmet_bdev_execute_flush(r);

I sorta liked having both flows symmetric as being executed from work,
why did you change it back?

> +		cnt++;
> +		rcu_read_lock();

I still don't think this is needed...

How about the below?
--
     nvmet: add support for flush all

     This patch implements the flush command behavior described in
     "NVMe 1.3a  TP 4035" for NSID value of FFFFFFFFh. Now flush command
     applies to all the namespaces attached to the controller processing the
     Flush command when the value 0xFFFFFFFF is set in the NSID field.

     For block device backed ns we execute the flush in the primary worker
     thread (nvmet_flush_all_work()), for file-backed ns we execute flush in
     the separate worker thread context (nvmet_file_flush_all_work()).

     We also add a new struct nvmet_subsys member to keep the track of 
number
     of currently active namespaces "active_ns". We update this "active_ns"
     member in nvmet_ns_enable()/nvmet_ns_disable().

     Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni at wdc.com>

diff --git a/drivers/nvme/target/admin-cmd.c 
b/drivers/nvme/target/admin-cmd.c
index a093191c9f49..60c3ced9511f 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -257,7 +257,7 @@ static void nvmet_execute_identify_ctrl(struct 
nvmet_req *req)
                         NVME_CTRL_ONCS_WRITE_ZEROES);

         /* XXX: don't report vwc if the underlying device is write 
through */
-       id->vwc = NVME_CTRL_VWC_PRESENT;
+       id->vwc = NVME_CTRL_VWC_PRESENT | 1 << 2 | 1 << 1;

         /*
          * We can't support atomic writes bigger than a LBA without support
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index e8d46bde909c..8bfb0995d470 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -380,6 +380,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
                 list_add_tail_rcu(&ns->dev_link, &old->dev_link);
         }

+       subsys->active_ns++;
         nvmet_ns_changed(subsys, ns->nsid);
         ns->enabled = true;
         ret = 0;
@@ -421,6 +422,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
         mutex_lock(&subsys->lock);
         nvmet_ns_changed(subsys, ns->nsid);
         nvmet_ns_dev_disable(ns);
+       subsys->active_ns--;
  out_unlock:
         mutex_unlock(&subsys->lock);
  }
@@ -563,6 +565,85 @@ int nvmet_sq_init(struct nvmet_sq *sq)
  }
  EXPORT_SYMBOL_GPL(nvmet_sq_init);

+static void nvmet_flush_all_work(struct work_struct *w)
+{
+       struct nvmet_req *req = container_of(w, struct nvmet_req, work);
+       struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
+       u16 status = NVME_SC_SUCCESS;
+       struct nvmet_req *r, *req_arr;
+       struct nvmet_ns *ns;
+       unsigned int max_ns;
+       int cnt = 0, i;
+
+       req->ns = NULL;
+
+       /* only process namespaces present prior to flush command */
+       mutex_lock(&subsys->lock);
+       max_ns = subsys->active_ns;
+       mutex_unlock(&subsys->lock);
+
+       req_arr = kmalloc_array(max_ns, sizeof(*req_arr), GFP_KERNEL);
+       if (!req_arr) {
+               status = NVME_SC_INTERNAL;
+               goto out;
+       }
+
+       for (i = 0; i < max_ns; i++) {
+               r = &req_arr[i];
+
+               r->rsp = kmalloc(sizeof(*r->rsp), GFP_KERNEL);
+               if (!r->rsp) {
+                       status = NVME_SC_INTERNAL;
+                       goto free_rsps;
+               }
+       }
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(ns, &subsys->namespaces, dev_link) {
+               if (cnt > max_ns)
+                       break;
+
+               r = &req_arr[cnt];
+               r->ns = ns;
+               r->cmd = req->cmd;
+               r->rsp->status = cpu_to_le16(0);
+
+               if (ns->file)
+                       INIT_WORK(&r->work, nvmet_file_flush_work);
+               else
+                       INIT_WORK(&r->work, nvmet_bdev_flush_work);
+
+               percpu_ref_get(&r->ns->ref);
+               schedule_work(&r->work);
+               cnt++;
+       }
+       rcu_read_unlock();
+
+       while (--cnt >= 0) {
+               r = &req_arr[cnt];
+
+               flush_work(&r->work);
+               percpu_ref_put(&r->ns->ref);
+               if (r->rsp->status) {
+                       pr_err("flush all failed nsid: %u\n", r->ns->nsid);
+                       status = NVME_SC_INTERNAL | NVME_SC_DNR;
+               }
+       }
+
+free_rsps:
+       while (--i)
+               kfree(req_arr[i].rsp);
+       kfree(req_arr);
+out:
+       nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_flush_all(struct nvmet_req *req)
+{
+       INIT_WORK(&req->work, nvmet_flush_all_work);
+       schedule_work(&req->work);
+}
+
  static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
  {
         struct nvme_command *cmd = req->cmd;
@@ -572,6 +653,12 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
         if (unlikely(ret))
                 return ret;

+       if (unlikely(nvmet_cmd_flush_all(req))) {
+               req->execute = nvmet_execute_flush_all;
+               req->data_len = 0;
+               return NVME_SC_SUCCESS;
+       }
+
         req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
         if (unlikely(!req->ns))
                 return NVME_SC_INVALID_NS | NVME_SC_DNR;
@@ -1100,6 +1187,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char 
*subsysnqn,
         INIT_LIST_HEAD(&subsys->ctrls);
         INIT_LIST_HEAD(&subsys->hosts);

+       subsys->active_ns = 0;
+
         return subsys;
  }

diff --git a/drivers/nvme/target/io-cmd-bdev.c 
b/drivers/nvme/target/io-cmd-bdev.c
index e0b0f7df70c2..08d38544ad74 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -46,10 +46,16 @@ void nvmet_bdev_ns_disable(struct nvmet_ns *ns)

  static void nvmet_bio_done(struct bio *bio)
  {
+       u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
         struct nvmet_req *req = bio->bi_private;

-       nvmet_req_complete(req,
-               bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+       if (unlikely(nvmet_cmd_flush_all(req))) {
+               if (req->b.inline_bio.bi_status)
+                       req->rsp->status = cpu_to_le16(status);
+               return;
+       }
+
+       nvmet_req_complete(req, bio->bi_status ? status : 0);

         if (bio != &req->b.inline_bio)
                 bio_put(bio);
@@ -111,7 +117,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
         blk_poll(bdev_get_queue(req->ns->bdev), cookie);
  }

-static void nvmet_bdev_execute_flush(struct nvmet_req *req)
+static struct bio *nvmet_bdev_init_flush_bio(struct nvmet_req *req)
  {
         struct bio *bio = &req->b.inline_bio;

@@ -121,6 +127,21 @@ static void nvmet_bdev_execute_flush(struct 
nvmet_req *req)
         bio->bi_end_io = nvmet_bio_done;
         bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;

+       return bio;
+}
+
+static void nvmet_bdev_execute_flush(struct nvmet_req *req)
+{
+       struct bio *bio = nvmet_bdev_init_flush_bio(req);
+
+       submit_bio(bio);
+}
+
+void nvmet_bdev_flush_work(struct work_struct *w)
+{
+       struct nvmet_req *req = container_of(w, struct nvmet_req, work);
+       struct bio *bio = nvmet_bdev_init_flush_bio(req);
+
         submit_bio(bio);
  }

diff --git a/drivers/nvme/target/io-cmd-file.c 
b/drivers/nvme/target/io-cmd-file.c
index 484a8e55b8e9..785032a377cb 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -207,14 +207,19 @@ static void 
nvmet_file_execute_rw_buffered_io(struct nvmet_req *req)
         queue_work(nvmet_buffered_io_wq, &req->work);
  }

-static void nvmet_file_flush_work(struct work_struct *w)
+void nvmet_file_flush_work(struct work_struct *w)
  {
         struct nvmet_req *req = container_of(w, struct nvmet_req, work);
+       u16 status;
         int ret;

         ret = vfs_fsync(req->ns->file, 1);
+       status = ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : NVME_SC_SUCCESS;

-       nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR 
: 0);
+       if (nvmet_cmd_flush_all(req))
+               req->rsp->status = cpu_to_le16(status);
+       else
+               nvmet_req_complete(req, status);
  }

  static void nvmet_file_execute_flush(struct nvmet_req *req)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index ac221f4d6591..b3b1639228aa 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -173,6 +173,7 @@ struct nvmet_subsys {

         struct list_head        namespaces;
         unsigned int            max_nsid;
+       unsigned int            active_ns;

         struct list_head        ctrls;

@@ -388,9 +389,18 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns);
  void nvmet_bdev_ns_disable(struct nvmet_ns *ns);
  void nvmet_file_ns_disable(struct nvmet_ns *ns);

+void nvmet_bdev_flush_work(struct work_struct *w);
+void nvmet_file_flush_work(struct work_struct *w);
+
  static inline u32 nvmet_rw_len(struct nvmet_req *req)
  {
         return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
                         req->ns->blksize_shift;
  }
+
+static inline int nvmet_cmd_flush_all(struct nvmet_req *req)
+{
+       return (req->cmd->common.opcode == nvme_cmd_flush) &&
+               (le32_to_cpu(req->cmd->rw.nsid) == 0xFFFFFFFF);
+}
  #endif /* _NVMET_H */
--

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH V3 2/2] nvmet: add support for flush all
  2018-06-26 10:50   ` Sagi Grimberg
@ 2018-06-26 17:38     ` Chaitanya Kulkarni
  0 siblings, 0 replies; 6+ messages in thread
From: Chaitanya Kulkarni @ 2018-06-26 17:38 UTC (permalink / raw)




From: Sagi Grimberg <sagi@grimberg.me>
Sent: Tuesday, June 26, 2018 3:50 AM
To: Chaitanya Kulkarni; linux-nvme at lists.infradead.org
Cc: hch at lst.de; keith.busch at intel.com
Subject: Re: [PATCH V3 2/2] nvmet: add support for flush all
? 
 

> +static void nvmet_flush_all_work(struct work_struct *w)
> +{
> +???? struct nvmet_req *req = container_of(w, struct nvmet_req, work);
> +???? struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
> +???? u16 status = NVME_SC_SUCCESS;
> +???? struct nvmet_req *req_arr;
> +???? struct nvmet_ns *ns;
> +???? unsigned int max_ns;
> +???? int cnt = 0, i;
> +
> +???? req->ns = NULL;
> +
> +???? /* only process namespaces present prior to flush command */
> +???? mutex_lock(&subsys->lock);
> +???? max_ns = subsys->active_ns;
> +???? mutex_unlock(&subsys->lock);
> +
> +???? req_arr = kmalloc_array(max_ns, sizeof(*req_arr), GFP_KERNEL);
> +???? if (!req_arr) {
> +???????????? status = NVME_SC_INTERNAL;
> +???????????? goto out;
> +???? }
> +
> +???? rcu_read_lock();
> +???? list_for_each_entry_rcu(ns, &subsys->namespaces, dev_link) {
> +???????????? struct nvmet_req *r = &req_arr[cnt];

Don't access the array before the check below, namespaces could be added
after the allocation.

> +
> +???????????? if (cnt > max_ns)
> +???????????????????? break;
> +
> +???????????? r->rsp = kmalloc(sizeof(*r->rsp), GFP_KERNEL);

We can't allocate under rcu_read_lock. move it right next to the req_arr
allocation.

> +???????????? if (!r->rsp) {
> +???????????????????? status = NVME_SC_INTERNAL;
> +???????????????????? break;
> +???????????? }
> +???????????? r->ns = ns;
> +???????????? r->cmd = req->cmd;
> +???????????? r->rsp->status = cpu_to_le16(0);
> +
> +???????????? percpu_ref_get(&r->ns->ref);
> +???????????? rcu_read_unlock();
> +
> +???????????? if (ns->file) {
> +???????????????????? INIT_WORK(&r->work, nvmet_file_flush_work);
> +???????????????????? schedule_work(&r->work);
> +???????????????????? flush_work(&r->work);

That's back to sequential. Why not flush it in the second iteration like
I proposed?

> +???????????? } else
> +???????????????????? nvmet_bdev_execute_flush(r);

I sorta liked having both flows symmetric as being executed from work,
why did you change it back?

[CK] Maybe I misunderstood your previous comment,
I also liked the symmetry we will keep it that way:-
"
I don't think that submit_bio needs to be executed from a work context.
[CK] Okay, will execute in the same context.
>??????? submit_bio(bio);
>?? }
>
"? ?

> +???????????? cnt++;
> +???????????? rcu_read_lock();

I still don't think this is needed...

How about the below?

[CK] Looks good to me, except we can get rid of the?nvmet_bdev_init_flush_bio() and call directly
nvmet_bdev_execute_flush() from?nvmet_bdev_flush_work(), I'll add these changes and send a next
version.

--
???? nvmet: add support for flush all

???? This patch implements the flush command behavior described in
???? "NVMe 1.3a? TP 4035" for NSID value of FFFFFFFFh. Now flush command
???? applies to all the namespaces attached to the controller processing the
???? Flush command when the value 0xFFFFFFFF is set in the NSID field.

???? For block device backed ns we execute the flush in the primary worker
???? thread (nvmet_flush_all_work()), for file-backed ns we execute flush in
???? the separate worker thread context (nvmet_file_flush_all_work()).

???? We also add a new struct nvmet_subsys member to keep the track of 
number
???? of currently active namespaces "active_ns". We update this "active_ns"
???? member in nvmet_ns_enable()/nvmet_ns_disable().

???? Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni at wdc.com>

diff --git a/drivers/nvme/target/admin-cmd.c 
b/drivers/nvme/target/admin-cmd.c
index a093191c9f49..60c3ced9511f 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -257,7 +257,7 @@ static void nvmet_execute_identify_ctrl(struct 
nvmet_req *req)
???????????????????????? NVME_CTRL_ONCS_WRITE_ZEROES);

???????? /* XXX: don't report vwc if the underlying device is write 
through */
-?????? id->vwc = NVME_CTRL_VWC_PRESENT;
+?????? id->vwc = NVME_CTRL_VWC_PRESENT | 1 << 2 | 1 << 1;

???????? /*
????????? * We can't support atomic writes bigger than a LBA without support
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index e8d46bde909c..8bfb0995d470 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -380,6 +380,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
???????????????? list_add_tail_rcu(&ns->dev_link, &old->dev_link);
???????? }

+?????? subsys->active_ns++;
???????? nvmet_ns_changed(subsys, ns->nsid);
???????? ns->enabled = true;
???????? ret = 0;
@@ -421,6 +422,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
???????? mutex_lock(&subsys->lock);
???????? nvmet_ns_changed(subsys, ns->nsid);
???????? nvmet_ns_dev_disable(ns);
+?????? subsys->active_ns--;
? out_unlock:
???????? mutex_unlock(&subsys->lock);
? }
@@ -563,6 +565,85 @@ int nvmet_sq_init(struct nvmet_sq *sq)
? }
? EXPORT_SYMBOL_GPL(nvmet_sq_init);

+static void nvmet_flush_all_work(struct work_struct *w)
+{
+?????? struct nvmet_req *req = container_of(w, struct nvmet_req, work);
+?????? struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
+?????? u16 status = NVME_SC_SUCCESS;
+?????? struct nvmet_req *r, *req_arr;
+?????? struct nvmet_ns *ns;
+?????? unsigned int max_ns;
+?????? int cnt = 0, i;
+
+?????? req->ns = NULL;
+
+?????? /* only process namespaces present prior to flush command */
+?????? mutex_lock(&subsys->lock);
+?????? max_ns = subsys->active_ns;
+?????? mutex_unlock(&subsys->lock);
+
+?????? req_arr = kmalloc_array(max_ns, sizeof(*req_arr), GFP_KERNEL);
+?????? if (!req_arr) {
+?????????????? status = NVME_SC_INTERNAL;
+?????????????? goto out;
+?????? }
+
+?????? for (i = 0; i < max_ns; i++) {
+?????????????? r = &req_arr[i];
+
+?????????????? r->rsp = kmalloc(sizeof(*r->rsp), GFP_KERNEL);
+?????????????? if (!r->rsp) {
+?????????????????????? status = NVME_SC_INTERNAL;
+?????????????????????? goto free_rsps;
+?????????????? }
+?????? }
+
+?????? rcu_read_lock();
+?????? list_for_each_entry_rcu(ns, &subsys->namespaces, dev_link) {
+?????????????? if (cnt > max_ns)
+?????????????????????? break;
+
+?????????????? r = &req_arr[cnt];
+?????????????? r->ns = ns;
+?????????????? r->cmd = req->cmd;
+?????????????? r->rsp->status = cpu_to_le16(0);
+
+?????????????? if (ns->file)
+?????????????????????? INIT_WORK(&r->work, nvmet_file_flush_work);
+?????????????? else
+?????????????????????? INIT_WORK(&r->work, nvmet_bdev_flush_work);
+
+?????????????? percpu_ref_get(&r->ns->ref);
+?????????????? schedule_work(&r->work);
+?????????????? cnt++;
+?????? }
+?????? rcu_read_unlock();
+
+?????? while (--cnt >= 0) {
+?????????????? r = &req_arr[cnt];
+
+?????????????? flush_work(&r->work);
+?????????????? percpu_ref_put(&r->ns->ref);
+?????????????? if (r->rsp->status) {
+?????????????????????? pr_err("flush all failed nsid: %u\n", r->ns->nsid);
+?????????????????????? status = NVME_SC_INTERNAL | NVME_SC_DNR;
+?????????????? }
+?????? }
+
+free_rsps:
+?????? while (--i)
+?????????????? kfree(req_arr[i].rsp);
+?????? kfree(req_arr);
+out:
+?????? nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_flush_all(struct nvmet_req *req)
+{
+?????? INIT_WORK(&req->work, nvmet_flush_all_work);
+?????? schedule_work(&req->work);
+}
+
? static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
? {
???????? struct nvme_command *cmd = req->cmd;
@@ -572,6 +653,12 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
???????? if (unlikely(ret))
???????????????? return ret;

+?????? if (unlikely(nvmet_cmd_flush_all(req))) {
+?????????????? req->execute = nvmet_execute_flush_all;
+?????????????? req->data_len = 0;
+?????????????? return NVME_SC_SUCCESS;
+?????? }
+
???????? req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
???????? if (unlikely(!req->ns))
???????????????? return NVME_SC_INVALID_NS | NVME_SC_DNR;
@@ -1100,6 +1187,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char 
*subsysnqn,
???????? INIT_LIST_HEAD(&subsys->ctrls);
???????? INIT_LIST_HEAD(&subsys->hosts);

+?????? subsys->active_ns = 0;
+
???????? return subsys;
? }

diff --git a/drivers/nvme/target/io-cmd-bdev.c 
b/drivers/nvme/target/io-cmd-bdev.c
index e0b0f7df70c2..08d38544ad74 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -46,10 +46,16 @@ void nvmet_bdev_ns_disable(struct nvmet_ns *ns)

? static void nvmet_bio_done(struct bio *bio)
? {
+?????? u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
???????? struct nvmet_req *req = bio->bi_private;

-?????? nvmet_req_complete(req,
-?????????????? bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+?????? if (unlikely(nvmet_cmd_flush_all(req))) {
+?????????????? if (req->b.inline_bio.bi_status)
+?????????????????????? req->rsp->status = cpu_to_le16(status);
+?????????????? return;
+?????? }
+
+?????? nvmet_req_complete(req, bio->bi_status ? status : 0);

???????? if (bio != &req->b.inline_bio)
???????????????? bio_put(bio);
@@ -111,7 +117,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
???????? blk_poll(bdev_get_queue(req->ns->bdev), cookie);
? }

-static void nvmet_bdev_execute_flush(struct nvmet_req *req)
+static struct bio *nvmet_bdev_init_flush_bio(struct nvmet_req *req)
? {
???????? struct bio *bio = &req->b.inline_bio;

@@ -121,6 +127,21 @@ static void nvmet_bdev_execute_flush(struct 
nvmet_req *req)
???????? bio->bi_end_io = nvmet_bio_done;
???????? bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;

+?????? return bio;
+}
+
+static void nvmet_bdev_execute_flush(struct nvmet_req *req)
+{
+?????? struct bio *bio = nvmet_bdev_init_flush_bio(req);
+
+?????? submit_bio(bio);
+}
+
+void nvmet_bdev_flush_work(struct work_struct *w)
+{
+?????? struct nvmet_req *req = container_of(w, struct nvmet_req, work);
+?????? struct bio *bio = nvmet_bdev_init_flush_bio(req);
+
???????? submit_bio(bio);
? }

diff --git a/drivers/nvme/target/io-cmd-file.c 
b/drivers/nvme/target/io-cmd-file.c
index 484a8e55b8e9..785032a377cb 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -207,14 +207,19 @@ static void 
nvmet_file_execute_rw_buffered_io(struct nvmet_req *req)
???????? queue_work(nvmet_buffered_io_wq, &req->work);
? }

-static void nvmet_file_flush_work(struct work_struct *w)
+void nvmet_file_flush_work(struct work_struct *w)
? {
???????? struct nvmet_req *req = container_of(w, struct nvmet_req, work);
+?????? u16 status;
???????? int ret;

???????? ret = vfs_fsync(req->ns->file, 1);
+?????? status = ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : NVME_SC_SUCCESS;

-?????? nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR 
: 0);
+?????? if (nvmet_cmd_flush_all(req))
+?????????????? req->rsp->status = cpu_to_le16(status);
+?????? else
+?????????????? nvmet_req_complete(req, status);
? }

? static void nvmet_file_execute_flush(struct nvmet_req *req)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index ac221f4d6591..b3b1639228aa 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -173,6 +173,7 @@ struct nvmet_subsys {

???????? struct list_head??????? namespaces;
???????? unsigned int??????????? max_nsid;
+?????? unsigned int??????????? active_ns;

???????? struct list_head??????? ctrls;

@@ -388,9 +389,18 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns);
? void nvmet_bdev_ns_disable(struct nvmet_ns *ns);
? void nvmet_file_ns_disable(struct nvmet_ns *ns);

+void nvmet_bdev_flush_work(struct work_struct *w);
+void nvmet_file_flush_work(struct work_struct *w);
+
? static inline u32 nvmet_rw_len(struct nvmet_req *req)
? {
???????? return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
???????????????????????? req->ns->blksize_shift;
? }
+
+static inline int nvmet_cmd_flush_all(struct nvmet_req *req)
+{
+?????? return (req->cmd->common.opcode == nvme_cmd_flush) &&
+?????????????? (le32_to_cpu(req->cmd->rw.nsid) == 0xFFFFFFFF);
+}
? #endif /* _NVMET_H */
--
    

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-06-26 17:38 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-26  3:15 [PATCH V3 0/2] nvmet: implement flush all support Chaitanya Kulkarni
2018-06-26  3:15 ` [PATCH V3 1/2] nvmet: move struct work_struct out of union Chaitanya Kulkarni
2018-06-26  9:10   ` Sagi Grimberg
2018-06-26  3:15 ` [PATCH V3 2/2] nvmet: add support for flush all Chaitanya Kulkarni
2018-06-26 10:50   ` Sagi Grimberg
2018-06-26 17:38     ` Chaitanya Kulkarni

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.