All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/3] outstanding nvme_fc/nvmet_fc fixes
@ 2017-05-22 22:28 James Smart
  2017-05-22 22:28 ` [PATCH v3 1/3] nvme_fc: replace ioabort msleep loop with completion James Smart
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: James Smart @ 2017-05-22 22:28 UTC (permalink / raw)


This is the remaining outstanding patches, reworked for the
prior review comments.

cut against nvme-4.12

-- james

James Smart (3):
  nvme_fc: replace ioabort msleep loop with completion
  nvmet_fc: Reduce work_q count
  nvme_fcloop: fix port deletes and callbacks

 drivers/nvme/host/fc.c       |  14 +--
 drivers/nvme/target/fc.c     | 206 ++++++++++++++++++++++++++++++++-----------
 drivers/nvme/target/fcloop.c |  86 ++++++++----------
 3 files changed, 199 insertions(+), 107 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v3 1/3] nvme_fc: replace ioabort msleep loop with completion
  2017-05-22 22:28 [PATCH v3 0/3] outstanding nvme_fc/nvmet_fc fixes James Smart
@ 2017-05-22 22:28 ` James Smart
  2017-05-23  7:12   ` Christoph Hellwig
  2017-05-22 22:28 ` [PATCH v3 2/3] nvmet_fc: Reduce work_q count James Smart
  2017-05-22 22:28 ` [PATCH v3 3/3] nvme_fcloop: fix port deletes and callbacks James Smart
  2 siblings, 1 reply; 9+ messages in thread
From: James Smart @ 2017-05-22 22:28 UTC (permalink / raw)


Per the recommendation by Sagi on:
http://lists.infradead.org/pipermail/linux-nvme/2017-April/009261.html

Wait for io aborts to complete wait converted from msleep look to
using a struct completion.

Signed-off-by: James Smart <james.smart at broadcom.com>
---
This is the 3rd version of this patch: 
v2: removed unneeded inner braces
v3: converted to waitqueue and wait_event_lock_irq()

 drivers/nvme/host/fc.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 14a009e43aa5..1ba693f7bdf0 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -167,6 +167,7 @@ struct nvme_fc_ctrl {
 	struct kref		ref;
 	u32			flags;
 	u32			iocnt;
+	wait_queue_head_t	ioabort_wait;
 
 	struct nvme_fc_fcp_op	aen_ops[NVME_FC_NR_AEN_COMMANDS];
 
@@ -1241,8 +1242,10 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
 
 	spin_lock_irqsave(&ctrl->lock, flags);
 	if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
-		if (ctrl->flags & FCCTRL_TERMIO)
-			ctrl->iocnt--;
+		if (ctrl->flags & FCCTRL_TERMIO) {
+			if (!--ctrl->iocnt)
+				wake_up(&ctrl->ioabort_wait);
+		}
 	}
 	if (op->flags & FCOP_FLAGS_RELEASED)
 		complete_rq = true;
@@ -2479,11 +2482,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 
 	/* wait for all io that had to be aborted */
 	spin_lock_irqsave(&ctrl->lock, flags);
-	while (ctrl->iocnt) {
-		spin_unlock_irqrestore(&ctrl->lock, flags);
-		msleep(1000);
-		spin_lock_irqsave(&ctrl->lock, flags);
-	}
+	wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
 	ctrl->flags &= ~FCCTRL_TERMIO;
 	spin_unlock_irqrestore(&ctrl->lock, flags);
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 2/3] nvmet_fc: Reduce work_q count
  2017-05-22 22:28 [PATCH v3 0/3] outstanding nvme_fc/nvmet_fc fixes James Smart
  2017-05-22 22:28 ` [PATCH v3 1/3] nvme_fc: replace ioabort msleep loop with completion James Smart
@ 2017-05-22 22:28 ` James Smart
  2017-05-23  7:15   ` Christoph Hellwig
  2017-05-22 22:28 ` [PATCH v3 3/3] nvme_fcloop: fix port deletes and callbacks James Smart
  2 siblings, 1 reply; 9+ messages in thread
From: James Smart @ 2017-05-22 22:28 UTC (permalink / raw)


Instead of a work_q per controller queue, use system workqueues.
Create "work lists" per cpu that driver ISR posts to and workqueue
pulls from.

Signed-off-by: James Smart <james.smart at broadcom.com>
---
This is the 5th version of this patch: 
v2:
converted to use DEFINE_PER_CPU()
reworked do {} while into more readable for loop in
 nvmet_fc_do_work_on_cpu()
renamed create/delete_threads to create/delete_workqueues

v3:recut on nvme-4.12

v4:use per_cpu_ptr() instead of per_cpu()

v5:remove own workqueues, use system workqueues.
renamed create/delete_workqueues to create/delete_worklists

 drivers/nvme/target/fc.c | 206 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 156 insertions(+), 50 deletions(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 2006fae61980..c5417d3a1797 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -20,6 +20,7 @@
 #include <linux/blk-mq.h>
 #include <linux/parser.h>
 #include <linux/random.h>
+#include <linux/threads.h>
 #include <uapi/scsi/fc/fc_fs.h>
 #include <uapi/scsi/fc/fc_els.h>
 
@@ -81,6 +82,7 @@ struct nvmet_fc_fcp_iod {
 	u32				offset;
 	enum nvmet_fcp_datadir		io_dir;
 	bool				active;
+	bool				started;
 	bool				abort;
 	bool				aborted;
 	bool				writedataactive;
@@ -88,12 +90,12 @@ struct nvmet_fc_fcp_iod {
 
 	struct nvmet_req		req;
 	struct work_struct		work;
-	struct work_struct		done_work;
 
 	struct nvmet_fc_tgtport		*tgtport;
 	struct nvmet_fc_tgt_queue	*queue;
 
-	struct list_head		fcp_list;	/* tgtport->fcp_list */
+	struct list_head		fcp_list;	/* queue->fod_list */
+	struct list_head		work_list;	/* workcpu->work_list */
 };
 
 struct nvmet_fc_tgtport {
@@ -132,7 +134,6 @@ struct nvmet_fc_tgt_queue {
 	struct nvmet_fc_tgt_assoc	*assoc;
 	struct nvmet_fc_fcp_iod		*fod;		/* array of fcp_iods */
 	struct list_head		fod_list;
-	struct workqueue_struct		*work_q;
 	struct kref			ref;
 } __aligned(sizeof(unsigned long long));
 
@@ -145,6 +146,21 @@ struct nvmet_fc_tgt_assoc {
 	struct kref			ref;
 };
 
+enum nvmet_fc_workcpu_flags {
+	NVMET_FC_CPU_RUNNING		= (1 << 0),
+	NVMET_FC_CPU_TERMINATING	= (1 << 1),
+};
+
+struct nvmet_fc_work_by_cpu {
+	struct list_head		fod_list;
+	spinlock_t			clock;
+	int				cpu;
+	bool				running;
+	struct work_struct		cpu_work;
+};
+
+#define NVMET_FC_MAX_WORK_BUDGET	4096
+
 
 static inline int
 nvmet_fc_iodnum(struct nvmet_fc_ls_iod *iodptr)
@@ -213,10 +229,11 @@ static DEFINE_SPINLOCK(nvmet_fc_tgtlock);
 static LIST_HEAD(nvmet_fc_target_list);
 static DEFINE_IDA(nvmet_fc_tgtport_cnt);
 
+static u32 nvmet_fc_cpu_cnt;
+static DEFINE_PER_CPU(struct nvmet_fc_work_by_cpu, nvmet_fc_cpu_workcpu);
+#define nvmet_fc_workcpu(cpu)	per_cpu_ptr(&nvmet_fc_cpu_workcpu, cpu)
 
 static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work);
-static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work);
-static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work);
 static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc);
 static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc);
 static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
@@ -417,11 +434,10 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport,
 	int i;
 
 	for (i = 0; i < queue->sqsize; fod++, i++) {
-		INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work);
-		INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work);
 		fod->tgtport = tgtport;
 		fod->queue = queue;
 		fod->active = false;
+		fod->started = false;
 		fod->abort = false;
 		fod->aborted = false;
 		fod->fcpreq = NULL;
@@ -498,6 +514,7 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
 	spin_lock_irqsave(&queue->qlock, flags);
 	list_add_tail(&fod->fcp_list, &fod->queue->fod_list);
 	fod->active = false;
+	fod->started = false;
 	fod->abort = false;
 	fod->aborted = false;
 	fod->writedataactive = false;
@@ -556,12 +573,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
 	if (!nvmet_fc_tgt_a_get(assoc))
 		goto out_free_queue;
 
-	queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0,
-				assoc->tgtport->fc_target_port.port_num,
-				assoc->a_id, qid);
-	if (!queue->work_q)
-		goto out_a_put;
-
 	queue->fod = (struct nvmet_fc_fcp_iod *)&queue[1];
 	queue->qid = qid;
 	queue->sqsize = sqsize;
@@ -591,8 +602,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
 
 out_fail_iodlist:
 	nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue);
-	destroy_workqueue(queue->work_q);
-out_a_put:
 	nvmet_fc_tgt_a_put(assoc);
 out_free_queue:
 	kfree(queue);
@@ -615,8 +624,6 @@ nvmet_fc_tgt_queue_free(struct kref *ref)
 
 	nvmet_fc_tgt_a_put(queue->assoc);
 
-	destroy_workqueue(queue->work_q);
-
 	kfree(queue);
 }
 
@@ -668,8 +675,6 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
 	}
 	spin_unlock_irqrestore(&queue->qlock, flags);
 
-	flush_workqueue(queue->work_q);
-
 	if (disconnect)
 		nvmet_sq_destroy(&queue->nvme_sq);
 
@@ -1962,24 +1967,27 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod)
 }
 
 static void
-nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work)
-{
-	struct nvmet_fc_fcp_iod *fod =
-		container_of(work, struct nvmet_fc_fcp_iod, done_work);
-
-	nvmet_fc_fod_op_done(fod);
-}
-
-static void
 nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
 {
 	struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private;
 	struct nvmet_fc_tgt_queue *queue = fod->queue;
+	struct nvmet_fc_work_by_cpu *workcpu = nvmet_fc_workcpu(queue->cpu);
+	unsigned long flags;
+	bool running;
 
-	if (fod->tgtport->ops->target_features & NVMET_FCTGTFEAT_OPDONE_IN_ISR)
-		/* context switch so completion is not in ISR context */
-		queue_work_on(queue->cpu, queue->work_q, &fod->done_work);
-	else
+	if (fod->tgtport->ops->target_features &
+				NVMET_FCTGTFEAT_OPDONE_IN_ISR) {
+		/* context switch for processing */
+
+		spin_lock_irqsave(&workcpu->clock, flags);
+		list_add_tail(&fod->work_list, &workcpu->fod_list);
+		running = workcpu->running;
+		workcpu->running = true;
+		spin_unlock_irqrestore(&workcpu->clock, flags);
+
+		if (!running)
+			schedule_work_on(workcpu->cpu, &workcpu->cpu_work);
+	} else
 		nvmet_fc_fod_op_done(fod);
 }
 
@@ -2069,6 +2077,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
 	 * layer until we have both based on csn.
 	 */
 
+	fod->started = true;
 	fod->fcpreq->done = nvmet_fc_xmt_fcp_op_done;
 
 	fod->total_length = be32_to_cpu(cmdiu->data_len);
@@ -2144,19 +2153,6 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
 	nvmet_fc_abort_op(tgtport, fod);
 }
 
-/*
- * Actual processing routine for received FC-NVME LS Requests from the LLD
- */
-static void
-nvmet_fc_handle_fcp_rqst_work(struct work_struct *work)
-{
-	struct nvmet_fc_fcp_iod *fod =
-		container_of(work, struct nvmet_fc_fcp_iod, work);
-	struct nvmet_fc_tgtport *tgtport = fod->tgtport;
-
-	nvmet_fc_handle_fcp_rqst(tgtport, fod);
-}
-
 /**
  * nvmet_fc_rcv_fcp_req - transport entry point called by an LLDD
  *                       upon the reception of a NVME FCP CMD IU.
@@ -2186,6 +2182,9 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
 	struct nvme_fc_cmd_iu *cmdiu = cmdiubuf;
 	struct nvmet_fc_tgt_queue *queue;
 	struct nvmet_fc_fcp_iod *fod;
+	struct nvmet_fc_work_by_cpu *workcpu;
+	unsigned long flags;
+	bool running;
 
 	/* validate iu, so the connection id can be used to find the queue */
 	if ((cmdiubuf_len != sizeof(*cmdiu)) ||
@@ -2223,9 +2222,20 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
 			((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0;
 	memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len);
 
-	if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR)
-		queue_work_on(queue->cpu, queue->work_q, &fod->work);
-	else
+	if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) {
+		/* context switch for processing */
+
+		workcpu = nvmet_fc_workcpu(queue->cpu);
+
+		spin_lock_irqsave(&workcpu->clock, flags);
+		list_add_tail(&fod->work_list, &workcpu->fod_list);
+		running = workcpu->running;
+		workcpu->running = true;
+		spin_unlock_irqrestore(&workcpu->clock, flags);
+
+		if (!running)
+			schedule_work_on(workcpu->cpu, &workcpu->cpu_work);
+	} else
 		nvmet_fc_handle_fcp_rqst(tgtport, fod);
 
 	return 0;
@@ -2391,13 +2401,17 @@ nvmet_fc_remove_port(struct nvmet_port *port)
 {
 	struct nvmet_fc_tgtport *tgtport = port->priv;
 	unsigned long flags;
+	bool matched = false;
 
 	spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
 	if (tgtport->port == port) {
-		nvmet_fc_tgtport_put(tgtport);
+		matched = true;
 		tgtport->port = NULL;
 	}
 	spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
+
+	if (matched)
+		nvmet_fc_tgtport_put(tgtport);
 }
 
 static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
@@ -2410,9 +2424,99 @@ static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
 	.delete_ctrl		= nvmet_fc_delete_ctrl,
 };
 
+static void
+nvmet_fc_do_work_on_cpu(struct work_struct *work)
+{
+	struct nvmet_fc_work_by_cpu *workcpu =
+		container_of(work, struct nvmet_fc_work_by_cpu, cpu_work);
+	struct nvmet_fc_fcp_iod *fod;
+	unsigned long flags;
+	int workcnt = 0;
+
+	spin_lock_irqsave(&workcpu->clock, flags);
+
+	fod = list_first_entry_or_null(&workcpu->fod_list,
+				struct nvmet_fc_fcp_iod, work_list);
+	for ( ; fod; ) {
+		list_del(&fod->work_list);
+
+		spin_unlock_irqrestore(&workcpu->clock, flags);
+
+		if (fod->started)
+			nvmet_fc_fod_op_done(fod);
+		else
+			nvmet_fc_handle_fcp_rqst(fod->tgtport, fod);
+
+		spin_lock_irqsave(&workcpu->clock, flags);
+
+		if (++workcnt >= NVMET_FC_MAX_WORK_BUDGET)
+			goto exit_reschedule;
+
+		fod = list_first_entry_or_null(&workcpu->fod_list,
+					struct nvmet_fc_fcp_iod, work_list);
+	}
+
+	workcpu->running = false;
+
+	spin_unlock_irqrestore(&workcpu->clock, flags);
+
+	return;
+
+exit_reschedule:
+	spin_unlock_irqrestore(&workcpu->clock, flags);
+	schedule_work_on(workcpu->cpu, &workcpu->cpu_work);
+}
+
+static int
+nvmet_fc_create_worklists(void)
+{
+	struct nvmet_fc_work_by_cpu *workcpu;
+	int i;
+
+	nvmet_fc_cpu_cnt = num_active_cpus();
+	for (i = 0; i < nvmet_fc_cpu_cnt; i++, workcpu++) {
+		workcpu = nvmet_fc_workcpu(i);
+
+		workcpu->cpu = i;
+		workcpu->running = false;
+		spin_lock_init(&workcpu->clock);
+		INIT_LIST_HEAD(&workcpu->fod_list);
+		INIT_WORK(&workcpu->cpu_work, nvmet_fc_do_work_on_cpu);
+	}
+
+	return 0;
+}
+
+static void
+nvmet_fc_delete_worklists(void)
+{
+	struct nvmet_fc_work_by_cpu *workcpu;
+	int i;
+
+	for (i = 0; i < nvmet_fc_cpu_cnt; i++, workcpu++) {
+		workcpu = nvmet_fc_workcpu(i);
+
+		/* sanity check - all work should be removed */
+		if (!list_empty(&workcpu->fod_list))
+			pr_warn("%s: cpu %d worklist not empty\n", __func__, i);
+	}
+}
+
 static int __init nvmet_fc_init_module(void)
 {
-	return nvmet_register_transport(&nvmet_fc_tgt_fcp_ops);
+	int ret;
+
+	ret = nvmet_fc_create_worklists();
+	if (ret)
+		goto fail;
+
+	ret = nvmet_register_transport(&nvmet_fc_tgt_fcp_ops);
+	if (!ret)
+		return 0;
+
+fail:
+	nvmet_fc_delete_worklists();
+	return -ENXIO;
 }
 
 static void __exit nvmet_fc_exit_module(void)
@@ -2423,6 +2527,8 @@ static void __exit nvmet_fc_exit_module(void)
 
 	nvmet_unregister_transport(&nvmet_fc_tgt_fcp_ops);
 
+	nvmet_fc_delete_worklists();
+
 	ida_destroy(&nvmet_fc_tgtport_cnt);
 }
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 3/3] nvme_fcloop: fix port deletes and callbacks
  2017-05-22 22:28 [PATCH v3 0/3] outstanding nvme_fc/nvmet_fc fixes James Smart
  2017-05-22 22:28 ` [PATCH v3 1/3] nvme_fc: replace ioabort msleep loop with completion James Smart
  2017-05-22 22:28 ` [PATCH v3 2/3] nvmet_fc: Reduce work_q count James Smart
@ 2017-05-22 22:28 ` James Smart
  2017-05-23  7:16   ` Christoph Hellwig
  2 siblings, 1 reply; 9+ messages in thread
From: James Smart @ 2017-05-22 22:28 UTC (permalink / raw)


Now that there are potentially long delays between when a
remoteport or targetport delete calls is made and when the
callback occurs (dev_loss_tmo timeout), no longer block in the
delete routines and move the final nport puts to the callbacks.

Moved the fcloop_nport_get/put/free routines to avoid forward
declarations.

Ensure port_info structs used in registrations are nulled in
case fields are not set (ex: devloss_tmo values).

Signed-off-by: James Smart <james.smart at broadcom.com>
---
This is the 3rd version of this patch: 
v2: cut on nvme-4.12
v3: remove remoteport and localport completions

 drivers/nvme/host/fc.c       |  1 +
 drivers/nvme/target/fcloop.c | 86 +++++++++++++++++++-------------------------
 2 files changed, 37 insertions(+), 50 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 1ba693f7bdf0..8ac109981960 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2728,6 +2728,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
 	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
 	spin_lock_init(&ctrl->lock);
+	init_waitqueue_head(&ctrl->ioabort_wait);
 
 	/* io queue count */
 	ctrl->queue_count = min_t(unsigned int,
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 294a6611fb24..70445d5e5a6a 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -227,8 +227,6 @@ struct fcloop_nport {
 	struct fcloop_lport *lport;
 	struct list_head nport_list;
 	struct kref ref;
-	struct completion rport_unreg_done;
-	struct completion tport_unreg_done;
 	u64 node_name;
 	u64 port_name;
 	u32 port_role;
@@ -636,6 +634,32 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
 }
 
 static void
+fcloop_nport_free(struct kref *ref)
+{
+	struct fcloop_nport *nport =
+		container_of(ref, struct fcloop_nport, ref);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fcloop_lock, flags);
+	list_del(&nport->nport_list);
+	spin_unlock_irqrestore(&fcloop_lock, flags);
+
+	kfree(nport);
+}
+
+static void
+fcloop_nport_put(struct fcloop_nport *nport)
+{
+	kref_put(&nport->ref, fcloop_nport_free);
+}
+
+static int
+fcloop_nport_get(struct fcloop_nport *nport)
+{
+	return kref_get_unless_zero(&nport->ref);
+}
+
+static void
 fcloop_localport_delete(struct nvme_fc_local_port *localport)
 {
 	struct fcloop_lport *lport = localport->private;
@@ -649,8 +673,7 @@ fcloop_remoteport_delete(struct nvme_fc_remote_port *remoteport)
 {
 	struct fcloop_rport *rport = remoteport->private;
 
-	/* release any threads waiting for the unreg to complete */
-	complete(&rport->nport->rport_unreg_done);
+	fcloop_nport_put(rport->nport);
 }
 
 static void
@@ -658,8 +681,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport)
 {
 	struct fcloop_tport *tport = targetport->private;
 
-	/* release any threads waiting for the unreg to complete */
-	complete(&tport->nport->tport_unreg_done);
+	fcloop_nport_put(tport->nport);
 }
 
 #define	FCLOOP_HW_QUEUES		4
@@ -727,6 +749,7 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr,
 		goto out_free_opts;
 	}
 
+	memset(&pinfo, 0, sizeof(pinfo));
 	pinfo.node_name = opts->wwnn;
 	pinfo.port_name = opts->wwpn;
 	pinfo.port_role = opts->roles;
@@ -809,32 +832,6 @@ fcloop_delete_local_port(struct device *dev, struct device_attribute *attr,
 	return ret ? ret : count;
 }
 
-static void
-fcloop_nport_free(struct kref *ref)
-{
-	struct fcloop_nport *nport =
-		container_of(ref, struct fcloop_nport, ref);
-	unsigned long flags;
-
-	spin_lock_irqsave(&fcloop_lock, flags);
-	list_del(&nport->nport_list);
-	spin_unlock_irqrestore(&fcloop_lock, flags);
-
-	kfree(nport);
-}
-
-static void
-fcloop_nport_put(struct fcloop_nport *nport)
-{
-	kref_put(&nport->ref, fcloop_nport_free);
-}
-
-static int
-fcloop_nport_get(struct fcloop_nport *nport)
-{
-	return kref_get_unless_zero(&nport->ref);
-}
-
 static struct fcloop_nport *
 fcloop_alloc_nport(const char *buf, size_t count, bool remoteport)
 {
@@ -943,6 +940,7 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr,
 	if (!nport)
 		return -EIO;
 
+	memset(&pinfo, 0, sizeof(pinfo));
 	pinfo.node_name = nport->node_name;
 	pinfo.port_name = nport->port_name;
 	pinfo.port_role = nport->port_role;
@@ -984,23 +982,17 @@ __unlink_remote_port(struct fcloop_nport *nport)
 }
 
 static int
-__wait_remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport)
+__remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport)
 {
 	int ret;
 
 	if (!rport)
 		return -EALREADY;
 
-	init_completion(&nport->rport_unreg_done);
-
 	ret = nvme_fc_unregister_remoteport(rport->remoteport);
 	if (ret)
 		return ret;
 
-	wait_for_completion(&nport->rport_unreg_done);
-
-	fcloop_nport_put(nport);
-
 	return ret;
 }
 
@@ -1034,7 +1026,7 @@ fcloop_delete_remote_port(struct device *dev, struct device_attribute *attr,
 	if (!nport)
 		return -ENOENT;
 
-	ret = __wait_remoteport_unreg(nport, rport);
+	ret = __remoteport_unreg(nport, rport);
 
 	return ret ? ret : count;
 }
@@ -1091,23 +1083,17 @@ __unlink_target_port(struct fcloop_nport *nport)
 }
 
 static int
-__wait_targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport)
+__targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport)
 {
 	int ret;
 
 	if (!tport)
 		return -EALREADY;
 
-	init_completion(&nport->tport_unreg_done);
-
 	ret = nvmet_fc_unregister_targetport(tport->targetport);
 	if (ret)
 		return ret;
 
-	wait_for_completion(&nport->tport_unreg_done);
-
-	fcloop_nport_put(nport);
-
 	return ret;
 }
 
@@ -1141,7 +1127,7 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr,
 	if (!nport)
 		return -ENOENT;
 
-	ret = __wait_targetport_unreg(nport, tport);
+	ret = __targetport_unreg(nport, tport);
 
 	return ret ? ret : count;
 }
@@ -1228,11 +1214,11 @@ static void __exit fcloop_exit(void)
 
 		spin_unlock_irqrestore(&fcloop_lock, flags);
 
-		ret = __wait_targetport_unreg(nport, tport);
+		ret = __targetport_unreg(nport, tport);
 		if (ret)
 			pr_warn("%s: Failed deleting target port\n", __func__);
 
-		ret = __wait_remoteport_unreg(nport, rport);
+		ret = __remoteport_unreg(nport, rport);
 		if (ret)
 			pr_warn("%s: Failed deleting remote port\n", __func__);
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 1/3] nvme_fc: replace ioabort msleep loop with completion
  2017-05-22 22:28 ` [PATCH v3 1/3] nvme_fc: replace ioabort msleep loop with completion James Smart
@ 2017-05-23  7:12   ` Christoph Hellwig
  0 siblings, 0 replies; 9+ messages in thread
From: Christoph Hellwig @ 2017-05-23  7:12 UTC (permalink / raw)


Looks fine,

Reviewed-by: Christoph Hellwig <hch at lst.de>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v3 2/3] nvmet_fc: Reduce work_q count
  2017-05-22 22:28 ` [PATCH v3 2/3] nvmet_fc: Reduce work_q count James Smart
@ 2017-05-23  7:15   ` Christoph Hellwig
  2017-05-23 19:31     ` James Smart
  0 siblings, 1 reply; 9+ messages in thread
From: Christoph Hellwig @ 2017-05-23  7:15 UTC (permalink / raw)


On Mon, May 22, 2017@03:28:43PM -0700, James Smart wrote:
> Instead of a work_q per controller queue, use system workqueues.
> Create "work lists" per cpu that driver ISR posts to and workqueue
> pulls from.

Why?  The whole point of workqueues is to avoid this sort of open coded
work lists in drivers.  To me it seems like you should simply make
the existing workqueue global, and maybe mark it as cpu itensive based
on profiling, but that's about it.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v3 3/3] nvme_fcloop: fix port deletes and callbacks
  2017-05-22 22:28 ` [PATCH v3 3/3] nvme_fcloop: fix port deletes and callbacks James Smart
@ 2017-05-23  7:16   ` Christoph Hellwig
  0 siblings, 0 replies; 9+ messages in thread
From: Christoph Hellwig @ 2017-05-23  7:16 UTC (permalink / raw)


Looks good,

Reviewed-by: Christoph Hellwig <hch at lst.de>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v3 2/3] nvmet_fc: Reduce work_q count
  2017-05-23  7:15   ` Christoph Hellwig
@ 2017-05-23 19:31     ` James Smart
  2017-05-24 17:35       ` Christoph Hellwig
  0 siblings, 1 reply; 9+ messages in thread
From: James Smart @ 2017-05-23 19:31 UTC (permalink / raw)


On 5/23/2017 12:15 AM, Christoph Hellwig wrote:
> On Mon, May 22, 2017@03:28:43PM -0700, James Smart wrote:
>> Instead of a work_q per controller queue, use system workqueues.
>> Create "work lists" per cpu that driver ISR posts to and workqueue
>> pulls from.
>
> Why?  The whole point of workqueues is to avoid this sort of open coded
> work lists in drivers.  To me it seems like you should simply make
> the existing workqueue global, and maybe mark it as cpu itensive based
> on profiling, but that's about it.

Why: to have parallelism and cpu affinity and its benefits for all the 
interim work the transport does for moving data/responses.

So I'm not sure how this differs from rdma. The bottom ib cq handler, 
which can be a workqueue element or a soft_irq instance, sits in a loop 
and processes the cq elements, calling the rdma transport done routine 
for each one, which does equivalent work. So both fc and rdma can be a 
workqueue element, both pull variable numbers of work items with caps on 
items per call, and the work per item is similar. So the only difference 
is rdma is pulling from a memory ring vs nvme-fc pulling from a linked list.

I can certainly remove the work list and go back to a work queue element 
work item. But I have to believe a work queue element for every 
completion is not as efficient as a simple linked list of the completions.

The idea of a global workqueue means you are effectively limited by the 
transaction rate of a single workqueue on a single cpu - no parallelism 
- which I've already seen exceeded in older implementations or 
combinations set up with the lldd. At a minimum, nvme-fc needs 
parallelization of work across cpus, and there has to be benefit in 
scheduling completions for the same queue on the same cpu and avoiding 
cross-cpu contention.

I'm open to alternative implementations - but it needs to be 
parallelized and minimize cross-cpu contention.

-- james

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v3 2/3] nvmet_fc: Reduce work_q count
  2017-05-23 19:31     ` James Smart
@ 2017-05-24 17:35       ` Christoph Hellwig
  0 siblings, 0 replies; 9+ messages in thread
From: Christoph Hellwig @ 2017-05-24 17:35 UTC (permalink / raw)


On Tue, May 23, 2017@12:31:07PM -0700, James Smart wrote:
> > Why?  The whole point of workqueues is to avoid this sort of open coded
> > work lists in drivers.  To me it seems like you should simply make
> > the existing workqueue global, and maybe mark it as cpu itensive based
> > on profiling, but that's about it.
> 
> Why: to have parallelism and cpu affinity and its benefits for all the
> interim work the transport does for moving data/responses.

Which is exactly the reason for using a workqueue - they do have
per cpu helpers and keep the affinity, and they keep the logic
for this in one place.

Documentation/core-api/workqueue.rst wil explain quite a bit of the
details.

> So I'm not sure how this differs from rdma. The bottom ib cq handler, which
> can be a workqueue element or a soft_irq instance, sits in a loop and
> processes the cq elements, calling the rdma transport done routine for each
> one, which does equivalent work. So both fc and rdma can be a workqueue
> element, both pull variable numbers of work items with caps on items per
> call, and the work per item is similar. So the only difference is rdma is
> pulling from a memory ring vs nvme-fc pulling from a linked list.

The difference is that by the time they are called we don't even know
how many elements we'll process.

> I can certainly remove the work list and go back to a work queue element
> work item. But I have to believe a work queue element for every completion
> is not as efficient as a simple linked list of the completions.


It should be very similar. The whole point of workqueues is to factor
this pattern into common helpers.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-05-24 17:35 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-22 22:28 [PATCH v3 0/3] outstanding nvme_fc/nvmet_fc fixes James Smart
2017-05-22 22:28 ` [PATCH v3 1/3] nvme_fc: replace ioabort msleep loop with completion James Smart
2017-05-23  7:12   ` Christoph Hellwig
2017-05-22 22:28 ` [PATCH v3 2/3] nvmet_fc: Reduce work_q count James Smart
2017-05-23  7:15   ` Christoph Hellwig
2017-05-23 19:31     ` James Smart
2017-05-24 17:35       ` Christoph Hellwig
2017-05-22 22:28 ` [PATCH v3 3/3] nvme_fcloop: fix port deletes and callbacks James Smart
2017-05-23  7:16   ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.