All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] qla2xxx: Convert to percpu_ida session tag pre-allocation
@ 2014-05-24  0:43 Nicholas A. Bellinger
  2014-05-24  2:33 ` Nicholas A. Bellinger
  0 siblings, 1 reply; 6+ messages in thread
From: Nicholas A. Bellinger @ 2014-05-24  0:43 UTC (permalink / raw)
  To: target-devel
  Cc: linux-scsi, Nicholas Bellinger, Saurav Kashyap, Quinn Tran,
	Giridhar Malavali, Chad Dupuis, Roland Dreier, Christoph Hellwig

From: Nicholas Bellinger <nab@linux-iscsi.org>

This patch converts qla2xxx target code to use generic percpu_ida
tag allocation provided by target-core, thus removing the original
kmem_cache_zalloc() for each struct qla_tgt_cmd descriptor in the
incoming ATIO packet fast-path.

This includes the conversion of qlt_handle_cmd_for_atio() to perform
qla_tgt_sess lookup before dispatching a command descriptor into
qla_tgt_wq process context, along with handling the case where no
active session exists, and subsequently kicking off a seperate
process context for qlt_create_sess_from_atio() to create a new one.

It also includes moving tag allocation into generic code within
qlt_get_tag(), so that the same logic can be shared between
qlt_handle_cmd_for_atio() + qlt_create_sess_from_atio() contexts.
Also, __qlt_do_work() has been made generic between both normal
process context in qlt_do_work() + qlt_create_sess_from_atio().

Next, update qlt_free_cmd() to release the percpu-ida tags, and
drop the now-unused global qla_tgt_cmd_cachep.

Finally in tcm_qla2xxx code, tcm_qla2xxx_check_initiator_node_acl()
has been updated to use transport_init_session_tags() along with a
hardcoded TCM_QLA2XXX_DEFAULT_TAGS=512 as the number of qla_tgt_cmd
descriptors to pre-allocate per qla_tgt_sess instance.

Cc: Saurav Kashyap <saurav.kashyap@qlogic.com>
Cc: Quinn Tran <quinn.tran@qlogic.com>
Cc: Giridhar Malavali <giridhar.malavali@qlogic.com>
Cc: Chad Dupuis <chad.dupuis@qlogic.com>
Cc: Roland Dreier <roland@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c  |  195 ++++++++++++++++++++++++------------
 drivers/scsi/qla2xxx/qla_target.h  |    6 ++
 drivers/scsi/qla2xxx/tcm_qla2xxx.c |    4 +-
 drivers/scsi/qla2xxx/tcm_qla2xxx.h |    2 +
 4 files changed, 142 insertions(+), 65 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 0cb7307..bd9c725 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -104,7 +104,6 @@ static void qlt_reject_free_srr_imm(struct scsi_qla_host *ha,
 /*
  * Global Variables
  */
-static struct kmem_cache *qla_tgt_cmd_cachep;
 static struct kmem_cache *qla_tgt_mgmt_cmd_cachep;
 static mempool_t *qla_tgt_mgmt_cmd_mempool;
 static struct workqueue_struct *qla_tgt_wq;
@@ -2165,11 +2164,18 @@ done:
 
 void qlt_free_cmd(struct qla_tgt_cmd *cmd)
 {
+	struct qla_tgt_sess *sess = cmd->sess;
+
 	BUG_ON(cmd->sg_mapped);
 
 	if (unlikely(cmd->free_sg))
 		kfree(cmd->sg);
-	kmem_cache_free(qla_tgt_cmd_cachep, cmd);
+
+	if (!sess || !sess->se_sess) {
+		WARN_ON(1);
+		return;
+	}
+	percpu_ida_free(&sess->se_sess->sess_tag_pool, cmd->se_cmd.map_tag);
 }
 EXPORT_SYMBOL(qlt_free_cmd);
 
@@ -2489,13 +2495,12 @@ static struct qla_tgt_sess *qlt_make_local_sess(struct scsi_qla_host *,
 /*
  * Process context for I/O path into tcm_qla2xxx code
  */
-static void qlt_do_work(struct work_struct *work)
+static void __qlt_do_work(struct qla_tgt_cmd *cmd)
 {
-	struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
 	scsi_qla_host_t *vha = cmd->vha;
 	struct qla_hw_data *ha = vha->hw;
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
-	struct qla_tgt_sess *sess = NULL;
+	struct qla_tgt_sess *sess = cmd->sess;
 	struct atio_from_isp *atio = &cmd->atio;
 	unsigned char *cdb;
 	unsigned long flags;
@@ -2505,41 +2510,6 @@ static void qlt_do_work(struct work_struct *work)
 	if (tgt->tgt_stop)
 		goto out_term;
 
-	spin_lock_irqsave(&ha->hardware_lock, flags);
-	sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha,
-	    atio->u.isp24.fcp_hdr.s_id);
-	/* Do kref_get() before dropping qla_hw_data->hardware_lock. */
-	if (sess)
-		kref_get(&sess->se_sess->sess_kref);
-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-
-	if (unlikely(!sess)) {
-		uint8_t *s_id =	atio->u.isp24.fcp_hdr.s_id;
-
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf022,
-			"qla_target(%d): Unable to find wwn login"
-			" (s_id %x:%x:%x), trying to create it manually\n",
-			vha->vp_idx, s_id[0], s_id[1], s_id[2]);
-
-		if (atio->u.raw.entry_count > 1) {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf023,
-				"Dropping multy entry cmd %p\n", cmd);
-			goto out_term;
-		}
-
-		mutex_lock(&vha->vha_tgt.tgt_mutex);
-		sess = qlt_make_local_sess(vha, s_id);
-		/* sess has an extra creation ref. */
-		mutex_unlock(&vha->vha_tgt.tgt_mutex);
-
-		if (!sess)
-			goto out_term;
-	}
-
-	cmd->sess = sess;
-	cmd->loop_id = sess->loop_id;
-	cmd->conf_compl_supported = sess->conf_compl_supported;
-
 	cdb = &atio->u.isp24.fcp_cmnd.cdb[0];
 	cmd->tag = atio->u.isp24.exchange_addr;
 	cmd->unpacked_lun = scsilun_to_int(
@@ -2566,8 +2536,8 @@ static void qlt_do_work(struct work_struct *work)
 	    "qla_target: START qla command: %p lun: 0x%04x (tag %d)\n",
 	    cmd, cmd->unpacked_lun, cmd->tag);
 
-	ret = vha->hw->tgt.tgt_ops->handle_cmd(vha, cmd, cdb, data_length,
-	    fcp_task_attr, data_dir, bidi);
+	ret = ha->tgt.tgt_ops->handle_cmd(vha, cmd, cdb, data_length,
+				          fcp_task_attr, data_dir, bidi);
 	if (ret != 0)
 		goto out_term;
 	/*
@@ -2586,17 +2556,114 @@ out_term:
 	 */
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	qlt_send_term_exchange(vha, NULL, &cmd->atio, 1);
-	kmem_cache_free(qla_tgt_cmd_cachep, cmd);
-	if (sess)
+	percpu_ida_free(&sess->se_sess->sess_tag_pool, cmd->se_cmd.map_tag);
+	ha->tgt.tgt_ops->put_sess(sess);
+	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+}
+
+static void qlt_do_work(struct work_struct *work)
+{
+	struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
+
+	__qlt_do_work(cmd);
+}
+
+static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha,
+				       struct qla_tgt_sess *sess,
+				       struct atio_from_isp *atio)
+{
+	struct se_session *se_sess = sess->se_sess;
+	struct qla_tgt_cmd *cmd;
+	int tag;
+
+	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING);
+	if (tag < 0)
+		return NULL;
+
+	cmd = &((struct qla_tgt_cmd *)se_sess->sess_cmd_map)[tag];
+	memset(cmd, 0, sizeof(struct qla_tgt_cmd));
+
+	memcpy(&cmd->atio, atio, sizeof(*atio));
+	cmd->state = QLA_TGT_STATE_NEW;
+	cmd->tgt = vha->vha_tgt.qla_tgt;
+	cmd->vha = vha;
+	cmd->se_cmd.map_tag = tag;
+	cmd->sess = sess;
+	cmd->loop_id = sess->loop_id;
+	cmd->conf_compl_supported = sess->conf_compl_supported;
+
+	return cmd;
+}
+
+static void qlt_send_busy(struct scsi_qla_host *, struct atio_from_isp *,
+			  uint16_t);
+
+static void qlt_create_sess_from_atio(struct work_struct *work)
+{
+	struct qla_tgt_sess_op *op = container_of(work,
+					struct qla_tgt_sess_op, work);
+	scsi_qla_host_t *vha = op->vha;
+	struct qla_hw_data *ha = vha->hw;
+	struct qla_tgt_sess *sess;
+	struct qla_tgt_cmd *cmd;
+	unsigned long flags;
+	uint8_t *s_id = op->atio.u.isp24.fcp_hdr.s_id;
+
+	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf022,
+		"qla_target(%d): Unable to find wwn login"
+		" (s_id %x:%x:%x), trying to create it manually\n",
+		vha->vp_idx, s_id[0], s_id[1], s_id[2]);
+
+	if (op->atio.u.raw.entry_count > 1) {
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf023,
+		        "Dropping multy entry atio %p\n", &op->atio);
+		goto out_term;
+	}
+
+	mutex_lock(&vha->vha_tgt.tgt_mutex);
+	sess = qlt_make_local_sess(vha, s_id);
+	/* sess has an extra creation ref. */
+	mutex_unlock(&vha->vha_tgt.tgt_mutex);
+
+	if (!sess)
+		goto out_term;
+	/*
+	 * Now obtain a pre-allocated session tag using the original op->atio
+	 * packet header, and dispatch into __qlt_do_work() using the existing
+	 * process context.
+	 */
+	cmd = qlt_get_tag(vha, sess, &op->atio);
+	if (!cmd) {
+		spin_lock_irqsave(&ha->hardware_lock, flags);
+		qlt_send_busy(vha, &op->atio, SAM_STAT_BUSY);
 		ha->tgt.tgt_ops->put_sess(sess);
+		spin_unlock_irqrestore(&ha->hardware_lock, flags);
+		kfree(op);
+		return;
+	}
+	/*
+	 * __qlt_do_work() will call ha->tgt.tgt_ops->put_sess() to release
+	 * the extra reference taken above by qlt_make_local_sess()
+	 */
+	__qlt_do_work(cmd);
+	kfree(op);
+	return;
+
+out_term:
+	spin_lock_irqsave(&ha->hardware_lock, flags);
+	qlt_send_term_exchange(vha, NULL, &op->atio, 1);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+	kfree(op);
+
 }
 
 /* ha->hardware_lock supposed to be held on entry */
 static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 	struct atio_from_isp *atio)
 {
+	struct qla_hw_data *ha = vha->hw;
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
+	struct qla_tgt_sess *sess;
 	struct qla_tgt_cmd *cmd;
 
 	if (unlikely(tgt->tgt_stop)) {
@@ -2605,18 +2672,31 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 		return -EFAULT;
 	}
 
-	cmd = kmem_cache_zalloc(qla_tgt_cmd_cachep, GFP_ATOMIC);
+	sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, atio->u.isp24.fcp_hdr.s_id);
+	if (unlikely(!sess)) {
+		struct qla_tgt_sess_op *op = kzalloc(sizeof(struct qla_tgt_sess_op),
+						     GFP_ATOMIC);
+		if (!op)
+			return -ENOMEM;
+
+		memcpy(&op->atio, atio, sizeof(*atio));
+		INIT_WORK(&op->work, qlt_create_sess_from_atio);
+		queue_work(qla_tgt_wq, &op->work);
+		return 0;
+	}
+	/*
+	 * Do kref_get() before returning + dropping qla_hw_data->hardware_lock.
+	 */
+	kref_get(&sess->se_sess->sess_kref);
+
+	cmd = qlt_get_tag(vha, sess, atio);
 	if (!cmd) {
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05e,
 		    "qla_target(%d): Allocation of cmd failed\n", vha->vp_idx);
+		ha->tgt.tgt_ops->put_sess(sess);
 		return -ENOMEM;
 	}
 
-	memcpy(&cmd->atio, atio, sizeof(*atio));
-	cmd->state = QLA_TGT_STATE_NEW;
-	cmd->tgt = vha->vha_tgt.qla_tgt;
-	cmd->vha = vha;
-
 	INIT_WORK(&cmd->work, qlt_do_work);
 	queue_work(qla_tgt_wq, &cmd->work);
 	return 0;
@@ -4911,23 +4991,13 @@ int __init qlt_init(void)
 	if (!QLA_TGT_MODE_ENABLED())
 		return 0;
 
-	qla_tgt_cmd_cachep = kmem_cache_create("qla_tgt_cmd_cachep",
-	    sizeof(struct qla_tgt_cmd), __alignof__(struct qla_tgt_cmd), 0,
-	    NULL);
-	if (!qla_tgt_cmd_cachep) {
-		ql_log(ql_log_fatal, NULL, 0xe06c,
-		    "kmem_cache_create for qla_tgt_cmd_cachep failed\n");
-		return -ENOMEM;
-	}
-
 	qla_tgt_mgmt_cmd_cachep = kmem_cache_create("qla_tgt_mgmt_cmd_cachep",
 	    sizeof(struct qla_tgt_mgmt_cmd), __alignof__(struct
 	    qla_tgt_mgmt_cmd), 0, NULL);
 	if (!qla_tgt_mgmt_cmd_cachep) {
 		ql_log(ql_log_fatal, NULL, 0xe06d,
 		    "kmem_cache_create for qla_tgt_mgmt_cmd_cachep failed\n");
-		ret = -ENOMEM;
-		goto out;
+		return -ENOMEM;
 	}
 
 	qla_tgt_mgmt_cmd_mempool = mempool_create(25, mempool_alloc_slab,
@@ -4955,8 +5025,6 @@ out_cmd_mempool:
 	mempool_destroy(qla_tgt_mgmt_cmd_mempool);
 out_mgmt_cmd_cachep:
 	kmem_cache_destroy(qla_tgt_mgmt_cmd_cachep);
-out:
-	kmem_cache_destroy(qla_tgt_cmd_cachep);
 	return ret;
 }
 
@@ -4968,5 +5036,4 @@ void qlt_exit(void)
 	destroy_workqueue(qla_tgt_wq);
 	mempool_destroy(qla_tgt_mgmt_cmd_mempool);
 	kmem_cache_destroy(qla_tgt_mgmt_cmd_cachep);
-	kmem_cache_destroy(qla_tgt_cmd_cachep);
 }
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index ce33d8c..63283c5 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -805,6 +805,12 @@ struct qla_tgt {
 	struct list_head tgt_list_entry;
 };
 
+struct qla_tgt_sess_op {
+	struct scsi_qla_host *vha;
+	struct atio_from_isp atio;
+	struct work_struct work;
+};
+
 /*
  * Equivilant to IT Nexus (Initiator-Target)
  */
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 68fb66f..34db344 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -1482,7 +1482,9 @@ static int tcm_qla2xxx_check_initiator_node_acl(
 	}
 	se_tpg = &tpg->se_tpg;
 
-	se_sess = transport_init_session(TARGET_PROT_NORMAL);
+	se_sess = transport_init_session_tags(TCM_QLA2XXX_DEFAULT_TAGS,
+					      sizeof(struct qla_tgt_cmd),
+					      TARGET_PROT_NORMAL);
 	if (IS_ERR(se_sess)) {
 		pr_err("Unable to initialize struct se_session\n");
 		return PTR_ERR(se_sess);
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
index 33aaac8..b0a3ea5 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
@@ -4,6 +4,8 @@
 #define TCM_QLA2XXX_VERSION	"v0.1"
 /* length of ASCII WWPNs including pad */
 #define TCM_QLA2XXX_NAMELEN	32
+/* Number of pre-allocated per-session tags */
+#define TCM_QLA2XXX_DEFAULT_TAGS 512
 
 #include "qla_target.h"
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] qla2xxx: Convert to percpu_ida session tag pre-allocation
  2014-05-24  0:43 [PATCH] qla2xxx: Convert to percpu_ida session tag pre-allocation Nicholas A. Bellinger
@ 2014-05-24  2:33 ` Nicholas A. Bellinger
  2014-05-29 20:29   ` Quinn Tran
  0 siblings, 1 reply; 6+ messages in thread
From: Nicholas A. Bellinger @ 2014-05-24  2:33 UTC (permalink / raw)
  To: Nicholas A. Bellinger
  Cc: target-devel, linux-scsi, Saurav Kashyap, Quinn Tran,
	Giridhar Malavali, Chad Dupuis, Roland Dreier, Christoph Hellwig

Hi Qlogic folks,

A question for you below..

On Sat, 2014-05-24 at 00:43 +0000, Nicholas A. Bellinger wrote:
> From: Nicholas Bellinger <nab@linux-iscsi.org>
> 
> This patch converts qla2xxx target code to use generic percpu_ida
> tag allocation provided by target-core, thus removing the original
> kmem_cache_zalloc() for each struct qla_tgt_cmd descriptor in the
> incoming ATIO packet fast-path.
> 
> This includes the conversion of qlt_handle_cmd_for_atio() to perform
> qla_tgt_sess lookup before dispatching a command descriptor into
> qla_tgt_wq process context, along with handling the case where no
> active session exists, and subsequently kicking off a seperate
> process context for qlt_create_sess_from_atio() to create a new one.
> 
> It also includes moving tag allocation into generic code within
> qlt_get_tag(), so that the same logic can be shared between
> qlt_handle_cmd_for_atio() + qlt_create_sess_from_atio() contexts.
> Also, __qlt_do_work() has been made generic between both normal
> process context in qlt_do_work() + qlt_create_sess_from_atio().
> 
> Next, update qlt_free_cmd() to release the percpu-ida tags, and
> drop the now-unused global qla_tgt_cmd_cachep.
> 
> Finally in tcm_qla2xxx code, tcm_qla2xxx_check_initiator_node_acl()
> has been updated to use transport_init_session_tags() along with a
> hardcoded TCM_QLA2XXX_DEFAULT_TAGS=512 as the number of qla_tgt_cmd
> descriptors to pre-allocate per qla_tgt_sess instance.
> 
> Cc: Saurav Kashyap <saurav.kashyap@qlogic.com>
> Cc: Quinn Tran <quinn.tran@qlogic.com>
> Cc: Giridhar Malavali <giridhar.malavali@qlogic.com>
> Cc: Chad Dupuis <chad.dupuis@qlogic.com>
> Cc: Roland Dreier <roland@kernel.org>
> Cc: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
> ---
>  drivers/scsi/qla2xxx/qla_target.c  |  195 ++++++++++++++++++++++++------------
>  drivers/scsi/qla2xxx/qla_target.h  |    6 ++
>  drivers/scsi/qla2xxx/tcm_qla2xxx.c |    4 +-
>  drivers/scsi/qla2xxx/tcm_qla2xxx.h |    2 +
>  4 files changed, 142 insertions(+), 65 deletions(-)
> 

<SNIP>

> diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
> index 68fb66f..34db344 100644
> --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
> +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
> @@ -1482,7 +1482,9 @@ static int tcm_qla2xxx_check_initiator_node_acl(
>  	}
>  	se_tpg = &tpg->se_tpg;
>  
> -	se_sess = transport_init_session(TARGET_PROT_NORMAL);
> +	se_sess = transport_init_session_tags(TCM_QLA2XXX_DEFAULT_TAGS,
> +					      sizeof(struct qla_tgt_cmd),
> +					      TARGET_PROT_NORMAL);
>  	if (IS_ERR(se_sess)) {
>  		pr_err("Unable to initialize struct se_session\n");
>  		return PTR_ERR(se_sess);
> diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
> index 33aaac8..b0a3ea5 100644
> --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h
> +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
> @@ -4,6 +4,8 @@
>  #define TCM_QLA2XXX_VERSION	"v0.1"
>  /* length of ASCII WWPNs including pad */
>  #define TCM_QLA2XXX_NAMELEN	32
> +/* Number of pre-allocated per-session tags */
> +#define TCM_QLA2XXX_DEFAULT_TAGS 512
>  

So a question wrt to the TCM_QLA2XXX_DEFAULT_TAGS value above used to
determine the number of qla_tgt_cmd descriptors to pre-allocate at
qla_tgt_sess creation time..

This value needs to line up with the total number of possible incoming
ATIO packets, plus the number of qla_tgt_cmd descriptors that have not
been acknowledged with a CTIO packet.  Typically with other fabrics that
have been converted to use percpu_ida, we over-allocate the number of
per-session tags in order to completely avoid the slow-path where
percpu_ida has to steal tags from another CPU.

AFAICT, there is no way at the qla_target driver level to enforce
per-session queue_depth + reflect the depth at the initiator port, so
the number of tags needs to be the worst case number of HW descriptors
that a single session can consume + number of unacknowledged
descriptors.

So the question is, is there already a define somewhere in qla2xxx code
that TCM_QLA2XXX_DEFAULT_TAGS can use as a starting point..?  If not,
what is the total number of outstanding commands that a single session
(or single port..?) can expect to handle at a given time..?

--nab


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] qla2xxx: Convert to percpu_ida session tag pre-allocation
  2014-05-24  2:33 ` Nicholas A. Bellinger
@ 2014-05-29 20:29   ` Quinn Tran
  2014-06-02 22:12     ` Nicholas A. Bellinger
  0 siblings, 1 reply; 6+ messages in thread
From: Quinn Tran @ 2014-05-29 20:29 UTC (permalink / raw)
  To: Nicholas A. Bellinger
  Cc: target-devel, linux-scsi, Saurav Kashyap, Giridhar Malavali,
	Chad Dupuis, Roland Dreier, Christoph Hellwig

[-- Attachment #1: Type: text/plain, Size: 5697 bytes --]

Nicholas,

Answer is below.

Regards,
Quinn Tran




On 5/23/14 7:33 PM, "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

>Hi Qlogic folks,
>
>A question for you below..
>
>On Sat, 2014-05-24 at 00:43 +0000, Nicholas A. Bellinger wrote:
>> From: Nicholas Bellinger <nab@linux-iscsi.org>
>>
>> This patch converts qla2xxx target code to use generic percpu_ida
>> tag allocation provided by target-core, thus removing the original
>> kmem_cache_zalloc() for each struct qla_tgt_cmd descriptor in the
>> incoming ATIO packet fast-path.
>>
>> This includes the conversion of qlt_handle_cmd_for_atio() to perform
>> qla_tgt_sess lookup before dispatching a command descriptor into
>> qla_tgt_wq process context, along with handling the case where no
>> active session exists, and subsequently kicking off a seperate
>> process context for qlt_create_sess_from_atio() to create a new one.
>>
>> It also includes moving tag allocation into generic code within
>> qlt_get_tag(), so that the same logic can be shared between
>> qlt_handle_cmd_for_atio() + qlt_create_sess_from_atio() contexts.
>> Also, __qlt_do_work() has been made generic between both normal
>> process context in qlt_do_work() + qlt_create_sess_from_atio().
>>
>> Next, update qlt_free_cmd() to release the percpu-ida tags, and
>> drop the now-unused global qla_tgt_cmd_cachep.
>>
>> Finally in tcm_qla2xxx code, tcm_qla2xxx_check_initiator_node_acl()
>> has been updated to use transport_init_session_tags() along with a
>> hardcoded TCM_QLA2XXX_DEFAULT_TAGS=512 as the number of qla_tgt_cmd
>> descriptors to pre-allocate per qla_tgt_sess instance.
>>
>> Cc: Saurav Kashyap <saurav.kashyap@qlogic.com>
>> Cc: Quinn Tran <quinn.tran@qlogic.com>
>> Cc: Giridhar Malavali <giridhar.malavali@qlogic.com>
>> Cc: Chad Dupuis <chad.dupuis@qlogic.com>
>> Cc: Roland Dreier <roland@kernel.org>
>> Cc: Christoph Hellwig <hch@lst.de>
>> Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
>> ---
>>  drivers/scsi/qla2xxx/qla_target.c  |  195
>>++++++++++++++++++++++++------------
>>  drivers/scsi/qla2xxx/qla_target.h  |    6 ++
>>  drivers/scsi/qla2xxx/tcm_qla2xxx.c |    4 +-
>>  drivers/scsi/qla2xxx/tcm_qla2xxx.h |    2 +
>>  4 files changed, 142 insertions(+), 65 deletions(-)
>>
>
><SNIP>
>
>> diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
>>b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
>> index 68fb66f..34db344 100644
>> --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
>> +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
>> @@ -1482,7 +1482,9 @@ static int tcm_qla2xxx_check_initiator_node_acl(
>>      }
>>      se_tpg = &tpg->se_tpg;
>>
>> -    se_sess = transport_init_session(TARGET_PROT_NORMAL);
>> +    se_sess = transport_init_session_tags(TCM_QLA2XXX_DEFAULT_TAGS,
>> +                                          sizeof(struct qla_tgt_cmd),
>> +                                          TARGET_PROT_NORMAL);
>>      if (IS_ERR(se_sess)) {
>>              pr_err("Unable to initialize struct se_session\n");
>>              return PTR_ERR(se_sess);
>> diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h
>>b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
>> index 33aaac8..b0a3ea5 100644
>> --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h
>> +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
>> @@ -4,6 +4,8 @@
>>  #define TCM_QLA2XXX_VERSION "v0.1"
>>  /* length of ASCII WWPNs including pad */
>>  #define TCM_QLA2XXX_NAMELEN 32
>> +/* Number of pre-allocated per-session tags */
>> +#define TCM_QLA2XXX_DEFAULT_TAGS 512
>>
>
>So a question wrt to the TCM_QLA2XXX_DEFAULT_TAGS value above used to
>determine the number of qla_tgt_cmd descriptors to pre-allocate at
>qla_tgt_sess creation time..
>
>This value needs to line up with the total number of possible incoming
>ATIO packets, plus the number of qla_tgt_cmd descriptors that have not
>been acknowledged with a CTIO packet.  Typically with other fabrics that
>have been converted to use percpu_ida, we over-allocate the number of
>per-session tags in order to completely avoid the slow-path where
>percpu_ida has to steal tags from another CPU.
>
>AFAICT, there is no way at the qla_target driver level to enforce
>per-session queue_depth + reflect the depth at the initiator port, so
>the number of tags needs to be the worst case number of HW descriptors
>that a single session can consume + number of unacknowledged
>descriptors.

QT> QLA driver & FW currently do not have queue_depth control at a per
session level. Instead, the descriptor pool (i.e. Exchange pool) is manage
by FW at the Port level.

>
>So the question is, is there already a define somewhere in qla2xxx code
>that TCM_QLA2XXX_DEFAULT_TAGS can use as a starting point..?  If not,
>what is the total number of outstanding commands that a single session
>(or single port..?) can expect to handle at a given time..?

QT> Typically, the worst case value we see is 2048 for each port.  It's
currently not #define.  This value can change over time.  A good starting
default value for TCM_QLA2XXX_DEFAULT_TAGS would be 2048 + %2 pad = 2088.

Extra size note, the true value should be extracted from
"ha->fw_xcb_count", if this field is set.  Otherwise, default back to
TCM_QLA2XXX_DEFAULT_TAGS.

>
>--nab
>


________________________________

This message and any attached documents contain information from QLogic Corporation or its wholly-owned subsidiaries that may be confidential. If you are not the intended recipient, you may not read, copy, distribute, or use this information. If you have received this transmission in error, please notify the sender immediately by reply e-mail and then delete this message.

[-- Attachment #2: winmail.dat --]
[-- Type: application/ms-tnef, Size: 6714 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] qla2xxx: Convert to percpu_ida session tag pre-allocation
  2014-05-29 20:29   ` Quinn Tran
@ 2014-06-02 22:12     ` Nicholas A. Bellinger
  2014-06-02 23:32       ` Quinn Tran
  0 siblings, 1 reply; 6+ messages in thread
From: Nicholas A. Bellinger @ 2014-06-02 22:12 UTC (permalink / raw)
  To: Quinn Tran
  Cc: target-devel, linux-scsi, Saurav Kashyap, Giridhar Malavali,
	Chad Dupuis, Roland Dreier, Christoph Hellwig

On Thu, 2014-05-29 at 20:29 +0000, Quinn Tran wrote:
> On 5/23/14 7:33 PM, "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

<SNIP>

> >> diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
> >>b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
> >> index 68fb66f..34db344 100644
> >> --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
> >> +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
> >> @@ -1482,7 +1482,9 @@ static int tcm_qla2xxx_check_initiator_node_acl(
> >>      }
> >>      se_tpg = &tpg->se_tpg;
> >>
> >> -    se_sess = transport_init_session(TARGET_PROT_NORMAL);
> >> +    se_sess = transport_init_session_tags(TCM_QLA2XXX_DEFAULT_TAGS,
> >> +                                          sizeof(struct qla_tgt_cmd),
> >> +                                          TARGET_PROT_NORMAL);
> >>      if (IS_ERR(se_sess)) {
> >>              pr_err("Unable to initialize struct se_session\n");
> >>              return PTR_ERR(se_sess);
> >> diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h
> >>b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
> >> index 33aaac8..b0a3ea5 100644
> >> --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h
> >> +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
> >> @@ -4,6 +4,8 @@
> >>  #define TCM_QLA2XXX_VERSION "v0.1"
> >>  /* length of ASCII WWPNs including pad */
> >>  #define TCM_QLA2XXX_NAMELEN 32
> >> +/* Number of pre-allocated per-session tags */
> >> +#define TCM_QLA2XXX_DEFAULT_TAGS 512
> >>
> >
> >So a question wrt to the TCM_QLA2XXX_DEFAULT_TAGS value above used to
> >determine the number of qla_tgt_cmd descriptors to pre-allocate at
> >qla_tgt_sess creation time..
> >
> >This value needs to line up with the total number of possible incoming
> >ATIO packets, plus the number of qla_tgt_cmd descriptors that have not
> >been acknowledged with a CTIO packet.  Typically with other fabrics that
> >have been converted to use percpu_ida, we over-allocate the number of
> >per-session tags in order to completely avoid the slow-path where
> >percpu_ida has to steal tags from another CPU.
> >
> >AFAICT, there is no way at the qla_target driver level to enforce
> >per-session queue_depth + reflect the depth at the initiator port, so
> >the number of tags needs to be the worst case number of HW descriptors
> >that a single session can consume + number of unacknowledged
> >descriptors.
> 
> QT> QLA driver & FW currently do not have queue_depth control at a per
> session level. Instead, the descriptor pool (i.e. Exchange pool) is manage
> by FW at the Port level.
> 
> >
> >So the question is, is there already a define somewhere in qla2xxx code
> >that TCM_QLA2XXX_DEFAULT_TAGS can use as a starting point..?  If not,
> >what is the total number of outstanding commands that a single session
> >(or single port..?) can expect to handle at a given time..?
> 
> QT> Typically, the worst case value we see is 2048 for each port.  It's
> currently not #define.  This value can change over time.  A good starting
> default value for TCM_QLA2XXX_DEFAULT_TAGS would be 2048 + %2 pad = 2088.
> 

<nod>, thanks for the extra background.

> Extra size note, the true value should be extracted from
> "ha->fw_xcb_count", if this field is set.  Otherwise, default back to
> TCM_QLA2XXX_DEFAULT_TAGS.
> 

Ok, squashing the following patch into the original to use
ha->fw_xcb_count (if available) for determining the worst-case per
session number of tags.  Note this currently ends up being ~1.7 MB of
pre-allocated descriptors per session.

Also, I assume the pad is required for ha->fw_xcb_count as well..  In
that case, the patch below also adds a extra define for 40 extra tags
following your pad comment above.

--nab

diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 34db344..8b7743f 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -1465,6 +1465,8 @@ static int tcm_qla2xxx_check_initiator_node_acl(
        struct qla_tgt_sess *sess = qla_tgt_sess;
        unsigned char port_name[36];
        unsigned long flags;
+       int num_tags = (ha->fw_xcb_count) ? ha->fw_xcb_count :
+                      TCM_QLA2XXX_DEFAULT_TAGS;
 
        lport = vha->vha_tgt.target_lport_ptr;
        if (!lport) {
@@ -1482,7 +1484,7 @@ static int tcm_qla2xxx_check_initiator_node_acl(
        }
        se_tpg = &tpg->se_tpg;
 
-       se_sess = transport_init_session_tags(TCM_QLA2XXX_DEFAULT_TAGS,
+       se_sess = transport_init_session_tags(num_tags + TCM_QLA2XXX_PAD_TAGS,
                                              sizeof(struct qla_tgt_cmd),
                                              TARGET_PROT_NORMAL);
        if (IS_ERR(se_sess)) {
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
index b0a3ea5..04d6728 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
@@ -4,8 +4,12 @@
 #define TCM_QLA2XXX_VERSION    "v0.1"
 /* length of ASCII WWPNs including pad */
 #define TCM_QLA2XXX_NAMELEN    32
-/* Number of pre-allocated per-session tags */
-#define TCM_QLA2XXX_DEFAULT_TAGS 512
+/*
+ * Number of pre-allocated per-session tags, based upon the worst-case
+ * per port number of iocbs
+ */
+#define TCM_QLA2XXX_DEFAULT_TAGS 2048
+#define TCM_QLA2XXX_PAD_TAGS   40
 
 #include "qla_target.h"
 

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] qla2xxx: Convert to percpu_ida session tag pre-allocation
  2014-06-02 22:12     ` Nicholas A. Bellinger
@ 2014-06-02 23:32       ` Quinn Tran
  2014-06-03  0:09         ` Nicholas A. Bellinger
  0 siblings, 1 reply; 6+ messages in thread
From: Quinn Tran @ 2014-06-02 23:32 UTC (permalink / raw)
  To: Nicholas A. Bellinger
  Cc: target-devel, linux-scsi, Saurav Kashyap, Giridhar Malavali,
	Chad Dupuis, Roland Dreier, Christoph Hellwig

[-- Attachment #1: Type: text/plain, Size: 1222 bytes --]


Regards,
Quinn Tran




On 6/2/14 3:12 PM, "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

>>
>>Extra size note, the true value should be extracted from
>>"ha->fw_xcb_count", if this field is set.  Otherwise, default back to
>>TCM_QLA2XXX_DEFAULT_TAGS.
>
>Ok, squashing the following patch into the original to use
>ha->fw_xcb_count (if available) for determining the worst-case per
>session number of tags.  Note this currently ends up being ~1.7 MB of
>pre-allocated descriptors per session.

QT>  Ack.

>
>Also, I assume the pad is required for ha->fw_xcb_count as well..  In
>that case, the patch below also adds a extra define for 40 extra tags
>following your pad comment above.

QT> Pad is not required if the value is read from ha->fw_xcb_count.


>
>--nab
>


________________________________

This message and any attached documents contain information from QLogic Corporation or its wholly-owned subsidiaries that may be confidential. If you are not the intended recipient, you may not read, copy, distribute, or use this information. If you have received this transmission in error, please notify the sender immediately by reply e-mail and then delete this message.

[-- Attachment #2: winmail.dat --]
[-- Type: application/ms-tnef, Size: 4818 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] qla2xxx: Convert to percpu_ida session tag pre-allocation
  2014-06-02 23:32       ` Quinn Tran
@ 2014-06-03  0:09         ` Nicholas A. Bellinger
  0 siblings, 0 replies; 6+ messages in thread
From: Nicholas A. Bellinger @ 2014-06-03  0:09 UTC (permalink / raw)
  To: Quinn Tran
  Cc: target-devel, linux-scsi, Saurav Kashyap, Giridhar Malavali,
	Chad Dupuis, Roland Dreier, Christoph Hellwig

On Mon, 2014-06-02 at 23:32 +0000, Quinn Tran wrote:
> Regards,
> Quinn Tran
> 
> 
> 
> 
> On 6/2/14 3:12 PM, "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> >>
> >>Extra size note, the true value should be extracted from
> >>"ha->fw_xcb_count", if this field is set.  Otherwise, default back to
> >>TCM_QLA2XXX_DEFAULT_TAGS.
> >
> >Ok, squashing the following patch into the original to use
> >ha->fw_xcb_count (if available) for determining the worst-case per
> >session number of tags.  Note this currently ends up being ~1.7 MB of
> >pre-allocated descriptors per session.
> 
> QT>  Ack.
> 
> >
> >Also, I assume the pad is required for ha->fw_xcb_count as well..  In
> >that case, the patch below also adds a extra define for 40 extra tags
> >following your pad comment above.
> 
> QT> Pad is not required if the value is read from ha->fw_xcb_count.
> 

Fixed.

Thanks Quinn!

--nab

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2014-06-03  0:09 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-24  0:43 [PATCH] qla2xxx: Convert to percpu_ida session tag pre-allocation Nicholas A. Bellinger
2014-05-24  2:33 ` Nicholas A. Bellinger
2014-05-29 20:29   ` Quinn Tran
2014-06-02 22:12     ` Nicholas A. Bellinger
2014-06-02 23:32       ` Quinn Tran
2014-06-03  0:09         ` Nicholas A. Bellinger

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.