All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/10]  mpt3sas: full mq support
@ 2017-01-31  9:25 Hannes Reinecke
  2017-01-31  9:25 ` [PATCH 01/10] mpt3sas: switch to pci_alloc_irq_vectors Hannes Reinecke
                   ` (11 more replies)
  0 siblings, 12 replies; 59+ messages in thread
From: Hannes Reinecke @ 2017-01-31  9:25 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: James Bottomley, Christoph Hellwig, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl, Hannes Reinecke

Hi all,

this is a patchset to enable full multiqueue support for the mpt3sas driver.
While the HBA only has a single mailbox register for submitting commands,
it does have individual receive queues per MSI-X interrupt and as such
does benefit from converting it to full multiqueue support.

As usual, comments and reviews are welcome.

Hannes Reinecke (10):
  mpt3sas: switch to pci_alloc_irq_vectors
  mpt3sas: set default value for cb_idx
  mpt3sas: implement _dechain_st()
  mpt3sas: separate out _base_recovery_check()
  mpt3sas: open-code _scsih_scsi_lookup_get()
  mpt3sas: Introduce mpt3sas_get_st_from_smid()
  mpt3sas: use hi-priority queue for TMFs
  mpt3sas: lockless command submission for scsi-mq
  mpt3sas: Use 'msix_index' as argument for put_smid functions
  mpt3sas: scsi-mq interrupt steering

 drivers/scsi/mpt3sas/mpt3sas_base.c      | 360 ++++++++++++++++++++-----------
 drivers/scsi/mpt3sas/mpt3sas_base.h      |  24 ++-
 drivers/scsi/mpt3sas/mpt3sas_config.c    |   2 +-
 drivers/scsi/mpt3sas/mpt3sas_ctl.c       | 134 ++++++++----
 drivers/scsi/mpt3sas/mpt3sas_scsih.c     | 303 +++++++++++++++++++++++---
 drivers/scsi/mpt3sas/mpt3sas_transport.c |   8 +-
 drivers/scsi/mpt3sas/mpt3sas_warpdrive.c |   4 +-
 7 files changed, 623 insertions(+), 212 deletions(-)

-- 
1.8.5.6


^ permalink raw reply	[flat|nested] 59+ messages in thread

* [PATCH 01/10] mpt3sas: switch to pci_alloc_irq_vectors
  2017-01-31  9:25 [PATCH 00/10] mpt3sas: full mq support Hannes Reinecke
@ 2017-01-31  9:25 ` Hannes Reinecke
  2017-02-07 13:15   ` Christoph Hellwig
  2017-02-16  9:32   ` Sreekanth Reddy
  2017-01-31  9:25 ` [PATCH 02/10] mpt3sas: set default value for cb_idx Hannes Reinecke
                   ` (10 subsequent siblings)
  11 siblings, 2 replies; 59+ messages in thread
From: Hannes Reinecke @ 2017-01-31  9:25 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: James Bottomley, Christoph Hellwig, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl, Hannes Reinecke,
	Hannes Reinecke

Cleanup the MSI-X handling allowing us to use
the PCI-layer provided vector allocation.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.c | 100 +++++++++++++++++-------------------
 drivers/scsi/mpt3sas/mpt3sas_base.h |   2 -
 2 files changed, 46 insertions(+), 56 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index f00ef88..0185a8d 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -1129,7 +1129,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 		/* TMs are on msix_index == 0 */
 		if (reply_q->msix_index == 0)
 			continue;
-		synchronize_irq(reply_q->vector);
+		synchronize_irq(pci_irq_vector(ioc->pdev, reply_q->msix_index));
 	}
 }
 
@@ -1818,11 +1818,8 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 
 	list_for_each_entry_safe(reply_q, next, &ioc->reply_queue_list, list) {
 		list_del(&reply_q->list);
-		if (smp_affinity_enable) {
-			irq_set_affinity_hint(reply_q->vector, NULL);
-			free_cpumask_var(reply_q->affinity_hint);
-		}
-		free_irq(reply_q->vector, reply_q);
+		free_irq(pci_irq_vector(ioc->pdev, reply_q->msix_index),
+			 reply_q);
 		kfree(reply_q);
 	}
 }
@@ -1831,13 +1828,13 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
  * _base_request_irq - request irq
  * @ioc: per adapter object
  * @index: msix index into vector table
- * @vector: irq vector
  *
  * Inserting respective reply_queue into the list.
  */
 static int
-_base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index, u32 vector)
+_base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index)
 {
+	struct pci_dev *pdev = ioc->pdev;
 	struct adapter_reply_queue *reply_q;
 	int r;
 
@@ -1849,14 +1846,6 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 	}
 	reply_q->ioc = ioc;
 	reply_q->msix_index = index;
-	reply_q->vector = vector;
-
-	if (smp_affinity_enable) {
-		if (!zalloc_cpumask_var(&reply_q->affinity_hint, GFP_KERNEL)) {
-			kfree(reply_q);
-			return -ENOMEM;
-		}
-	}
 
 	atomic_set(&reply_q->busy, 0);
 	if (ioc->msix_enable)
@@ -1865,12 +1854,11 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 	else
 		snprintf(reply_q->name, MPT_NAME_LENGTH, "%s%d",
 		    ioc->driver_name, ioc->id);
-	r = request_irq(vector, _base_interrupt, IRQF_SHARED, reply_q->name,
-	    reply_q);
+	r = request_irq(pci_irq_vector(pdev, index), _base_interrupt,
+			IRQF_SHARED, reply_q->name, reply_q);
 	if (r) {
 		pr_err(MPT3SAS_FMT "unable to allocate interrupt %d!\n",
-		    reply_q->name, vector);
-		free_cpumask_var(reply_q->affinity_hint);
+		       reply_q->name, pci_irq_vector(pdev, index));
 		kfree(reply_q);
 		return -EBUSY;
 	}
@@ -1906,6 +1894,21 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 	if (!nr_msix)
 		return;
 
+	if (smp_affinity_enable) {
+		list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
+			const cpumask_t *mask = pci_irq_get_affinity(ioc->pdev,
+							reply_q->msix_index);
+			if (!mask) {
+				pr_warn(MPT3SAS_FMT "no affinity for msi %x\n",
+					ioc->name, reply_q->msix_index);
+				continue;
+			}
+
+			for_each_cpu(cpu, mask)
+				ioc->cpu_msix_table[cpu] = reply_q->msix_index;
+		}
+		return;
+	}
 	cpu = cpumask_first(cpu_online_mask);
 
 	list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
@@ -1919,18 +1922,9 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 			group++;
 
 		for (i = 0 ; i < group ; i++) {
-			ioc->cpu_msix_table[cpu] = index;
-			if (smp_affinity_enable)
-				cpumask_or(reply_q->affinity_hint,
-				   reply_q->affinity_hint, get_cpu_mask(cpu));
+			ioc->cpu_msix_table[cpu] = reply_q->msix_index;
 			cpu = cpumask_next(cpu, cpu_online_mask);
 		}
-		if (smp_affinity_enable)
-			if (irq_set_affinity_hint(reply_q->vector,
-					   reply_q->affinity_hint))
-				dinitprintk(ioc, pr_info(MPT3SAS_FMT
-				 "Err setting affinity hint to irq vector %d\n",
-				 ioc->name, reply_q->vector));
 		index++;
 	}
 }
@@ -1957,10 +1951,10 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 static int
 _base_enable_msix(struct MPT3SAS_ADAPTER *ioc)
 {
-	struct msix_entry *entries, *a;
 	int r;
 	int i, local_max_msix_vectors;
 	u8 try_msix = 0;
+	unsigned int irq_flags = PCI_IRQ_MSIX;
 
 	if (msix_disable == -1 || msix_disable == 0)
 		try_msix = 1;
@@ -1972,7 +1966,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 		goto try_ioapic;
 
 	ioc->reply_queue_count = min_t(int, ioc->cpu_count,
-	    ioc->msix_vector_count);
+		ioc->msix_vector_count);
 
 	printk(MPT3SAS_FMT "MSI-X vectors supported: %d, no of cores"
 	  ": %d, max_msix_vectors: %d\n", ioc->name, ioc->msix_vector_count,
@@ -1993,46 +1987,42 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 	if (ioc->msix_vector_count < ioc->cpu_count)
 		smp_affinity_enable = 0;
 
-	entries = kcalloc(ioc->reply_queue_count, sizeof(struct msix_entry),
-	    GFP_KERNEL);
-	if (!entries) {
-		dfailprintk(ioc, pr_info(MPT3SAS_FMT
-			"kcalloc failed @ at %s:%d/%s() !!!\n",
-			ioc->name, __FILE__, __LINE__, __func__));
-		goto try_ioapic;
-	}
+	if (smp_affinity_enable)
+		irq_flags |= PCI_IRQ_AFFINITY;
 
-	for (i = 0, a = entries; i < ioc->reply_queue_count; i++, a++)
-		a->entry = i;
-
-	r = pci_enable_msix_exact(ioc->pdev, entries, ioc->reply_queue_count);
-	if (r) {
+	r = pci_alloc_irq_vectors(ioc->pdev, 1, ioc->reply_queue_count,
+				  irq_flags);
+	if (r < 0) {
 		dfailprintk(ioc, pr_info(MPT3SAS_FMT
-			"pci_enable_msix_exact failed (r=%d) !!!\n",
+			"pci_alloc_irq_vectors failed (r=%d) !!!\n",
 			ioc->name, r));
-		kfree(entries);
 		goto try_ioapic;
 	}
 
 	ioc->msix_enable = 1;
-	for (i = 0, a = entries; i < ioc->reply_queue_count; i++, a++) {
-		r = _base_request_irq(ioc, i, a->vector);
+	ioc->msix_vector_count = r;
+	for (i = 0; i < ioc->msix_vector_count; i++) {
+		r = _base_request_irq(ioc, i);
 		if (r) {
 			_base_free_irq(ioc);
 			_base_disable_msix(ioc);
-			kfree(entries);
 			goto try_ioapic;
 		}
 	}
 
-	kfree(entries);
 	return 0;
 
 /* failback to io_apic interrupt routing */
  try_ioapic:
 
 	ioc->reply_queue_count = 1;
-	r = _base_request_irq(ioc, 0, ioc->pdev->irq);
+	r = pci_alloc_irq_vectors(ioc->pdev, 1, 1, PCI_IRQ_LEGACY);
+	if (r < 0) {
+		dfailprintk(ioc, pr_info(MPT3SAS_FMT
+			"pci_alloc_irq_vector(legacy) failed (r=%d) !!!\n",
+			ioc->name, r));
+	} else
+		r = _base_request_irq(ioc, 0);
 
 	return r;
 }
@@ -2203,7 +2193,8 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 	list_for_each_entry(reply_q, &ioc->reply_queue_list, list)
 		pr_info(MPT3SAS_FMT "%s: IRQ %d\n",
 		    reply_q->name,  ((ioc->msix_enable) ? "PCI-MSI-X enabled" :
-		    "IO-APIC enabled"), reply_q->vector);
+		    "IO-APIC enabled"),
+		    pci_irq_vector(ioc->pdev, reply_q->msix_index));
 
 	pr_info(MPT3SAS_FMT "iomem(0x%016llx), mapped(0x%p), size(%d)\n",
 	    ioc->name, (unsigned long long)chip_phys, ioc->chip, memap_sz);
@@ -5338,7 +5329,8 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 		    sizeof(resource_size_t *), GFP_KERNEL);
 		if (!ioc->reply_post_host_index) {
 			dfailprintk(ioc, pr_info(MPT3SAS_FMT "allocation "
-				"for cpu_msix_table failed!!!\n", ioc->name));
+				"for reply_post_host_index failed!!!\n",
+				ioc->name));
 			r = -ENOMEM;
 			goto out_free_resources;
 		}
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 394fe13..505574e 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -719,12 +719,10 @@ struct _event_ack_list {
 struct adapter_reply_queue {
 	struct MPT3SAS_ADAPTER	*ioc;
 	u8			msix_index;
-	unsigned int		vector;
 	u32			reply_post_host_index;
 	Mpi2ReplyDescriptorsUnion_t *reply_post_free;
 	char			name[MPT_NAME_LENGTH];
 	atomic_t		busy;
-	cpumask_var_t		affinity_hint;
 	struct list_head	list;
 };
 
-- 
1.8.5.6


^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH 02/10] mpt3sas: set default value for cb_idx
  2017-01-31  9:25 [PATCH 00/10] mpt3sas: full mq support Hannes Reinecke
  2017-01-31  9:25 ` [PATCH 01/10] mpt3sas: switch to pci_alloc_irq_vectors Hannes Reinecke
@ 2017-01-31  9:25 ` Hannes Reinecke
  2017-02-07 13:15   ` Christoph Hellwig
  2017-01-31  9:25 ` [PATCH 03/10] mpt3sas: implement _dechain_st() Hannes Reinecke
                   ` (9 subsequent siblings)
  11 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-01-31  9:25 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: James Bottomley, Christoph Hellwig, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl, Hannes Reinecke,
	Hannes Reinecke

No functional change.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 0185a8d..09d0008 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -873,7 +873,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 _base_get_cb_idx(struct MPT3SAS_ADAPTER *ioc, u16 smid)
 {
 	int i;
-	u8 cb_idx;
+	u8 cb_idx = 0xFF;
 
 	if (smid < ioc->hi_priority_smid) {
 		i = smid - 1;
@@ -884,8 +884,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 	} else if (smid <= ioc->hba_queue_depth) {
 		i = smid - ioc->internal_smid;
 		cb_idx = ioc->internal_lookup[i].cb_idx;
-	} else
-		cb_idx = 0xFF;
+	}
 	return cb_idx;
 }
 
-- 
1.8.5.6


^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH 03/10] mpt3sas: implement _dechain_st()
  2017-01-31  9:25 [PATCH 00/10] mpt3sas: full mq support Hannes Reinecke
  2017-01-31  9:25 ` [PATCH 01/10] mpt3sas: switch to pci_alloc_irq_vectors Hannes Reinecke
  2017-01-31  9:25 ` [PATCH 02/10] mpt3sas: set default value for cb_idx Hannes Reinecke
@ 2017-01-31  9:25 ` Hannes Reinecke
  2017-02-07 13:15   ` Christoph Hellwig
  2017-01-31  9:25 ` [PATCH 04/10] mpt3sas: separate out _base_recovery_check() Hannes Reinecke
                   ` (8 subsequent siblings)
  11 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-01-31  9:25 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: James Bottomley, Christoph Hellwig, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl, Hannes Reinecke,
	Hannes Reinecke

Split off _dechain_st() as separate function.
No functional change.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 09d0008..120b317 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -2365,6 +2365,19 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 	return smid;
 }
 
+static void
+_dechain_st(struct MPT3SAS_ADAPTER *ioc, struct scsiio_tracker *st)
+{
+	struct chain_tracker *chain_req;
+
+	while (!list_empty(&st->chain_list)) {
+		chain_req = list_first_entry(&st->chain_list,
+					     struct chain_tracker,
+					     tracker_list);
+		list_move(&chain_req->tracker_list, &ioc->free_chain_list);
+	}
+}
+
 /**
  * mpt3sas_base_free_smid - put smid back on free_list
  * @ioc: per adapter object
@@ -2377,20 +2390,12 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 {
 	unsigned long flags;
 	int i;
-	struct chain_tracker *chain_req, *next;
 
 	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
 	if (smid < ioc->hi_priority_smid) {
 		/* scsiio queue */
 		i = smid - 1;
-		if (!list_empty(&ioc->scsi_lookup[i].chain_list)) {
-			list_for_each_entry_safe(chain_req, next,
-			    &ioc->scsi_lookup[i].chain_list, tracker_list) {
-				list_del_init(&chain_req->tracker_list);
-				list_add(&chain_req->tracker_list,
-				    &ioc->free_chain_list);
-			}
-		}
+		_dechain_st(ioc, &ioc->scsi_lookup[i]);
 		ioc->scsi_lookup[i].cb_idx = 0xFF;
 		ioc->scsi_lookup[i].scmd = NULL;
 		ioc->scsi_lookup[i].direct_io = 0;
-- 
1.8.5.6


^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH 04/10] mpt3sas: separate out _base_recovery_check()
  2017-01-31  9:25 [PATCH 00/10] mpt3sas: full mq support Hannes Reinecke
                   ` (2 preceding siblings ...)
  2017-01-31  9:25 ` [PATCH 03/10] mpt3sas: implement _dechain_st() Hannes Reinecke
@ 2017-01-31  9:25 ` Hannes Reinecke
  2017-02-07 13:16   ` Christoph Hellwig
  2017-02-16  9:53   ` Sreekanth Reddy
  2017-01-31  9:25 ` [PATCH 05/10] mpt3sas: open-code _scsih_scsi_lookup_get() Hannes Reinecke
                   ` (7 subsequent siblings)
  11 siblings, 2 replies; 59+ messages in thread
From: Hannes Reinecke @ 2017-01-31  9:25 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: James Bottomley, Christoph Hellwig, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl, Hannes Reinecke,
	Hannes Reinecke

No functional change.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 120b317..dd14596 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -2366,6 +2366,19 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 }
 
 static void
+_base_recovery_check(struct MPT3SAS_ADAPTER *ioc)
+{
+	/*
+	 * See _wait_for_commands_to_complete() call with regards to this code.
+	 */
+	if (ioc->shost_recovery && ioc->pending_io_count) {
+		if (ioc->pending_io_count == 1)
+			wake_up(&ioc->reset_wq);
+		ioc->pending_io_count = 0;
+	}
+}
+
+static void
 _dechain_st(struct MPT3SAS_ADAPTER *ioc, struct scsiio_tracker *st)
 {
 	struct chain_tracker *chain_req;
@@ -2402,15 +2415,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 		list_add(&ioc->scsi_lookup[i].tracker_list, &ioc->free_list);
 		spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
 
-		/*
-		 * See _wait_for_commands_to_complete() call with regards
-		 * to this code.
-		 */
-		if (ioc->shost_recovery && ioc->pending_io_count) {
-			if (ioc->pending_io_count == 1)
-				wake_up(&ioc->reset_wq);
-			ioc->pending_io_count--;
-		}
+		_base_recovery_check(ioc);
 		return;
 	} else if (smid < ioc->internal_smid) {
 		/* hi-priority */
-- 
1.8.5.6


^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH 05/10] mpt3sas: open-code _scsih_scsi_lookup_get()
  2017-01-31  9:25 [PATCH 00/10] mpt3sas: full mq support Hannes Reinecke
                   ` (3 preceding siblings ...)
  2017-01-31  9:25 ` [PATCH 04/10] mpt3sas: separate out _base_recovery_check() Hannes Reinecke
@ 2017-01-31  9:25 ` Hannes Reinecke
  2017-02-07 13:16   ` Christoph Hellwig
  2017-02-16  9:59   ` Sreekanth Reddy
  2017-01-31  9:25 ` [PATCH 06/10] mpt3sas: Introduce mpt3sas_get_st_from_smid() Hannes Reinecke
                   ` (6 subsequent siblings)
  11 siblings, 2 replies; 59+ messages in thread
From: Hannes Reinecke @ 2017-01-31  9:25 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: James Bottomley, Christoph Hellwig, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl, Hannes Reinecke,
	Hannes Reinecke

Just a wrapper around the scsi lookup array and only used
in one place, so open-code it.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/mpt3sas/mpt3sas_scsih.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index b5c966e..979f95a 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -1061,19 +1061,6 @@ struct _sas_node *
 }
 
 /**
- * _scsih_scsi_lookup_get - returns scmd entry
- * @ioc: per adapter object
- * @smid: system request message index
- *
- * Returns the smid stored scmd pointer.
- */
-static struct scsi_cmnd *
-_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, u16 smid)
-{
-	return ioc->scsi_lookup[smid - 1].scmd;
-}
-
-/**
  * _scsih_scsi_lookup_get_clear - returns scmd entry
  * @ioc: per adapter object
  * @smid: system request message index
@@ -6101,7 +6088,7 @@ static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
 	for (smid = 1; smid <= ioc->scsiio_depth; smid++) {
 		if (ioc->shost_recovery)
 			goto out;
-		scmd = _scsih_scsi_lookup_get(ioc, smid);
+		scmd = ioc->scsi_lookup[smid - 1].scmd;
 		if (!scmd)
 			continue;
 		sdev = scmd->device;
-- 
1.8.5.6


^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH 06/10] mpt3sas: Introduce mpt3sas_get_st_from_smid()
  2017-01-31  9:25 [PATCH 00/10] mpt3sas: full mq support Hannes Reinecke
                   ` (4 preceding siblings ...)
  2017-01-31  9:25 ` [PATCH 05/10] mpt3sas: open-code _scsih_scsi_lookup_get() Hannes Reinecke
@ 2017-01-31  9:25 ` Hannes Reinecke
  2017-02-07 13:17   ` Christoph Hellwig
  2017-01-31  9:25 ` [PATCH 07/10] mpt3sas: use hi-priority queue for TMFs Hannes Reinecke
                   ` (5 subsequent siblings)
  11 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-01-31  9:25 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: James Bottomley, Christoph Hellwig, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl, Hannes Reinecke,
	Hannes Reinecke

Abstract accesses to the scsi_lookup array by introducing
mpt3sas_get_st_from_smid().

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.c      | 20 ++++++++++++++++----
 drivers/scsi/mpt3sas/mpt3sas_base.h      |  2 ++
 drivers/scsi/mpt3sas/mpt3sas_scsih.c     |  2 +-
 drivers/scsi/mpt3sas/mpt3sas_warpdrive.c |  4 +++-
 4 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index dd14596..942fb7e 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -862,6 +862,14 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 	return 1;
 }
 
+struct scsiio_tracker *
+mpt3sas_get_st_from_smid(struct MPT3SAS_ADAPTER *ioc, u16 smid)
+{
+	WARN_ON(!smid);
+	WARN_ON(smid >= ioc->hi_priority_smid);
+	return &ioc->scsi_lookup[smid - 1];
+}
+
 /**
  * _base_get_cb_idx - obtain the callback index
  * @ioc: per adapter object
@@ -876,8 +884,11 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 	u8 cb_idx = 0xFF;
 
 	if (smid < ioc->hi_priority_smid) {
-		i = smid - 1;
-		cb_idx = ioc->scsi_lookup[i].cb_idx;
+		struct scsiio_tracker *st;
+
+		st = mpt3sas_get_st_from_smid(ioc, smid);
+		if (st)
+			cb_idx = st->cb_idx;
 	} else if (smid < ioc->internal_smid) {
 		i = smid - ioc->hi_priority_smid;
 		cb_idx = ioc->hpr_lookup[i].cb_idx;
@@ -1249,6 +1260,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 _base_get_chain_buffer_tracker(struct MPT3SAS_ADAPTER *ioc, u16 smid)
 {
 	struct chain_tracker *chain_req;
+	struct scsiio_tracker *st;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
@@ -1261,8 +1273,8 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 	chain_req = list_entry(ioc->free_chain_list.next,
 	    struct chain_tracker, tracker_list);
 	list_del_init(&chain_req->tracker_list);
-	list_add_tail(&chain_req->tracker_list,
-	    &ioc->scsi_lookup[smid - 1].chain_list);
+	st = mpt3sas_get_st_from_smid(ioc, smid);
+	list_add_tail(&chain_req->tracker_list, &st->chain_list);
 	spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
 	return chain_req;
 }
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 505574e..876e7b4 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -1246,6 +1246,8 @@ __le32 mpt3sas_base_get_sense_buffer_dma(struct MPT3SAS_ADAPTER *ioc,
 u16 mpt3sas_base_get_smid_hpr(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx);
 u16 mpt3sas_base_get_smid_scsiio(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx,
 	struct scsi_cmnd *scmd);
+struct scsiio_tracker * mpt3sas_get_st_from_smid(struct MPT3SAS_ADAPTER *ioc,
+	u16 smid);
 
 u16 mpt3sas_base_get_smid(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx);
 void mpt3sas_base_free_smid(struct MPT3SAS_ADAPTER *ioc, u16 smid);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 979f95a..ca925ef 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -2250,7 +2250,7 @@ struct _sas_node *
 	}
 
 	if (type == MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK)
-		scsi_lookup = &ioc->scsi_lookup[smid_task - 1];
+		scsi_lookup = mpt3sas_get_st_from_smid(ioc, smid_task);
 
 	dtmprintk(ioc, pr_info(MPT3SAS_FMT
 		"sending tm: handle(0x%04x), task_type(0x%02x), smid(%d)\n",
diff --git a/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c b/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c
index 540bd50..06e3f7d 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c
@@ -270,7 +270,9 @@
 inline u8
 mpt3sas_scsi_direct_io_get(struct MPT3SAS_ADAPTER *ioc, u16 smid)
 {
-	return ioc->scsi_lookup[smid - 1].direct_io;
+	struct scsiio_tracker *st = mpt3sas_get_st_from_smid(ioc, smid);
+
+	return st ? st->direct_io : 0;
 }
 
 /**
-- 
1.8.5.6


^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH 07/10] mpt3sas: use hi-priority queue for TMFs
  2017-01-31  9:25 [PATCH 00/10] mpt3sas: full mq support Hannes Reinecke
                   ` (5 preceding siblings ...)
  2017-01-31  9:25 ` [PATCH 06/10] mpt3sas: Introduce mpt3sas_get_st_from_smid() Hannes Reinecke
@ 2017-01-31  9:25 ` Hannes Reinecke
  2017-02-07 13:19   ` Christoph Hellwig
  2017-02-16 10:09   ` Sreekanth Reddy
  2017-01-31  9:25 ` [PATCH 08/10] mpt3sas: lockless command submission for scsi-mq Hannes Reinecke
                   ` (4 subsequent siblings)
  11 siblings, 2 replies; 59+ messages in thread
From: Hannes Reinecke @ 2017-01-31  9:25 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: James Bottomley, Christoph Hellwig, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl, Hannes Reinecke,
	Hannes Reinecke

When sending a TMF via the ioctl interface we should be using
the hi-priority queue instead of the scsi queue to be consistent
with overall TMF usage.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/mpt3sas/mpt3sas_ctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
index 95f0f24..e952175 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
@@ -720,7 +720,7 @@ enum block_state {
 		}
 	} else {
 
-		smid = mpt3sas_base_get_smid_scsiio(ioc, ioc->ctl_cb_idx, NULL);
+		smid = mpt3sas_base_get_smid_hpr(ioc, ioc->ctl_cb_idx);
 		if (!smid) {
 			pr_err(MPT3SAS_FMT "%s: failed obtaining a smid\n",
 			    ioc->name, __func__);
-- 
1.8.5.6


^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH 08/10] mpt3sas: lockless command submission for scsi-mq
  2017-01-31  9:25 [PATCH 00/10] mpt3sas: full mq support Hannes Reinecke
                   ` (6 preceding siblings ...)
  2017-01-31  9:25 ` [PATCH 07/10] mpt3sas: use hi-priority queue for TMFs Hannes Reinecke
@ 2017-01-31  9:25 ` Hannes Reinecke
  2017-01-31 13:22   ` Christoph Hellwig
  2017-01-31  9:25 ` [PATCH 09/10] mpt3sas: Use 'msix_index' as argument for put_smid functions Hannes Reinecke
                   ` (3 subsequent siblings)
  11 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-01-31  9:25 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: James Bottomley, Christoph Hellwig, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl, Hannes Reinecke,
	Hannes Reinecke

Enable lockless command submission for scsi-mq by moving the
command structure into the payload for struct request.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.c  | 125 ++++++++++++++----
 drivers/scsi/mpt3sas/mpt3sas_base.h  |   2 +
 drivers/scsi/mpt3sas/mpt3sas_ctl.c   | 111 ++++++++++++----
 drivers/scsi/mpt3sas/mpt3sas_scsih.c | 246 +++++++++++++++++++++++++++++++++--
 4 files changed, 427 insertions(+), 57 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 942fb7e..29e139f 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -867,6 +867,20 @@ struct scsiio_tracker *
 {
 	WARN_ON(!smid);
 	WARN_ON(smid >= ioc->hi_priority_smid);
+	if (shost_use_blk_mq(ioc->shost)) {
+		u16 hwq = (smid - 1) % ioc->shost->nr_hw_queues;
+		u16 tag = (smid - 1) / ioc->shost->nr_hw_queues;
+		struct blk_mq_tag_set *tagset = &ioc->shost->tag_set;
+		struct request *req;
+
+		req = blk_mq_tag_to_rq(tagset->tags[hwq], tag);
+		if (req) {
+			struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
+
+			return scsi_cmd_priv(cmd);
+		} else
+			return NULL;
+	}
 	return &ioc->scsi_lookup[smid - 1];
 }
 
@@ -2329,6 +2343,20 @@ struct scsiio_tracker *
 	struct scsiio_tracker *request;
 	u16 smid;
 
+	if (shost_use_blk_mq(ioc->shost)) {
+		unsigned int unique_tag = blk_mq_unique_tag(scmd->request);
+		u16 hwq = blk_mq_unique_tag_to_hwq(unique_tag);
+		u16 tag = blk_mq_unique_tag_to_tag(unique_tag);
+		u16 smid = (tag * ioc->shost->nr_hw_queues) + hwq + 1;
+
+		request = scsi_cmd_priv(scmd);
+		request->scmd = scmd;
+		request->cb_idx = cb_idx;
+		request->msix_io = hwq;
+		request->smid = smid;
+		INIT_LIST_HEAD(&request->chain_list);
+		return request->smid;
+	}
 	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
 	if (list_empty(&ioc->free_list)) {
 		spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
@@ -2403,6 +2431,23 @@ struct scsiio_tracker *
 	}
 }
 
+void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
+			   struct scsiio_tracker *st)
+{
+	if (WARN_ON(st->smid == 0))
+		return;
+	st->cb_idx = 0xFF;
+	st->scmd = NULL;
+	st->direct_io = 0;
+	if (!list_empty(&st->chain_list)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
+		_dechain_st(ioc, st);
+		spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
+	}
+}
+
 /**
  * mpt3sas_base_free_smid - put smid back on free_list
  * @ioc: per adapter object
@@ -2416,6 +2461,19 @@ struct scsiio_tracker *
 	unsigned long flags;
 	int i;
 
+	if (shost_use_blk_mq(ioc->shost) && smid < ioc->hi_priority_smid) {
+		struct scsiio_tracker *st;
+
+		st = mpt3sas_get_st_from_smid(ioc, smid);
+		if (WARN_ON(!st)) {
+			_base_recovery_check(ioc);
+			return;
+		}
+		mpt3sas_base_clear_st(ioc, st);
+		_base_recovery_check(ioc);
+		return;
+	}
+
 	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
 	if (smid < ioc->hi_priority_smid) {
 		/* scsiio queue */
@@ -3559,14 +3617,23 @@ struct scsiio_tracker *
 	    ioc->name, (unsigned long long) ioc->request_dma));
 	total_sz += sz;
 
-	sz = ioc->scsiio_depth * sizeof(struct scsiio_tracker);
-	ioc->scsi_lookup_pages = get_order(sz);
-	ioc->scsi_lookup = (struct scsiio_tracker *)__get_free_pages(
-	    GFP_KERNEL, ioc->scsi_lookup_pages);
-	if (!ioc->scsi_lookup) {
-		pr_err(MPT3SAS_FMT "scsi_lookup: get_free_pages failed, sz(%d)\n",
-			ioc->name, (int)sz);
-		goto out;
+	/*
+	 * Don't need to allocate memory for scsiio_tracker array if we
+	 * are using scsi-mq, we embed it in the scsi_cmnd for that case.
+	 */
+	if (!shost_use_blk_mq(ioc->shost)) {
+		sz = ioc->scsiio_depth * sizeof(struct scsiio_tracker);
+		ioc->scsi_lookup_pages = get_order(sz);
+		ioc->scsi_lookup = (struct scsiio_tracker *)__get_free_pages(
+			GFP_KERNEL, ioc->scsi_lookup_pages);
+		if (!ioc->scsi_lookup) {
+			pr_err(MPT3SAS_FMT "scsi_lookup: get_free_pages "
+			       "failed, sz(%d)\n", ioc->name, (int)sz);
+			goto out;
+		}
+	} else {
+		ioc->scsi_lookup_pages = 0;
+		ioc->scsi_lookup = NULL;
 	}
 
 	dinitprintk(ioc, pr_info(MPT3SAS_FMT "scsiio(0x%p): depth(%d)\n",
@@ -5159,15 +5226,17 @@ struct scsiio_tracker *
 	/* initialize the scsi lookup free list */
 	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
 	INIT_LIST_HEAD(&ioc->free_list);
-	smid = 1;
-	for (i = 0; i < ioc->scsiio_depth; i++, smid++) {
-		INIT_LIST_HEAD(&ioc->scsi_lookup[i].chain_list);
-		ioc->scsi_lookup[i].cb_idx = 0xFF;
-		ioc->scsi_lookup[i].smid = smid;
-		ioc->scsi_lookup[i].scmd = NULL;
-		ioc->scsi_lookup[i].direct_io = 0;
-		list_add_tail(&ioc->scsi_lookup[i].tracker_list,
-		    &ioc->free_list);
+	if (!shost_use_blk_mq(ioc->shost)) {
+		smid = 1;
+		for (i = 0; i < ioc->scsiio_depth; i++, smid++) {
+			struct scsiio_tracker *st = &ioc->scsi_lookup[i];
+
+			memset(st, 0, sizeof(struct scsiio_tracker));
+			INIT_LIST_HEAD(&st->chain_list);
+			st->cb_idx = 0xFF;
+			st->smid = smid;
+			list_add_tail(&st->tracker_list, &ioc->free_list);
+		}
 	}
 
 	/* hi-priority queue */
@@ -5667,6 +5736,13 @@ struct scsiio_tracker *
 	}
 }
 
+void _count_pending(struct request *req, void *data, bool reserved)
+{
+	struct MPT3SAS_ADAPTER *ioc = data;
+
+	ioc->pending_io_count++;
+}
+
 /**
  * _wait_for_commands_to_complete - reset controller
  * @ioc: Pointer to MPT_ADAPTER structure
@@ -5688,11 +5764,16 @@ struct scsiio_tracker *
 		return;
 
 	/* pending command count */
-	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
-	for (i = 0; i < ioc->scsiio_depth; i++)
-		if (ioc->scsi_lookup[i].cb_idx != 0xFF)
-			ioc->pending_io_count++;
-	spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
+	if (shost_use_blk_mq(ioc->shost))
+		blk_mq_tagset_busy_iter(&ioc->shost->tag_set,
+					_count_pending, ioc);
+	else {
+		spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
+		for (i = 0; i < ioc->scsiio_depth; i++)
+			if (ioc->scsi_lookup[i].cb_idx != 0xFF)
+				ioc->pending_io_count++;
+		spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
+	}
 
 	if (!ioc->pending_io_count)
 		return;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 876e7b4..7a3553e 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -1248,6 +1248,8 @@ u16 mpt3sas_base_get_smid_scsiio(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx,
 	struct scsi_cmnd *scmd);
 struct scsiio_tracker * mpt3sas_get_st_from_smid(struct MPT3SAS_ADAPTER *ioc,
 	u16 smid);
+void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
+			   struct scsiio_tracker *st);
 
 u16 mpt3sas_base_get_smid(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx);
 void mpt3sas_base_free_smid(struct MPT3SAS_ADAPTER *ioc, u16 smid);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
index e952175..264e239 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
@@ -549,6 +549,86 @@ enum block_state {
 	return 0;
 }
 
+static bool
+_scmd_match(struct scsi_cmnd *scmd, u16 handle, u32 lun)
+{
+	struct MPT3SAS_DEVICE *priv_data;
+
+	if (scmd == NULL || scmd->device == NULL ||
+	    scmd->device->hostdata == NULL)
+		return false;
+	if (lun != scmd->device->lun)
+		return false;
+	priv_data = scmd->device->hostdata;
+	if (priv_data->sas_target == NULL)
+		return false;
+	if (priv_data->sas_target->handle != handle)
+		return false;
+
+	return true;
+}
+
+struct smid_match_data {
+	u16 handle;
+	u16 smid;
+	u32 lun;
+};
+
+static void
+_smid_fn(struct request *req, void *data, bool reserved)
+{
+	struct smid_match_data *smd = data;
+	struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
+	struct scsiio_tracker *st;
+
+	if (!_scmd_match(scmd, smd->handle, smd->lun))
+		return;
+
+	st = scsi_cmd_priv(scmd);
+	smd->smid = st->smid;
+}
+
+static u16
+_ctl_find_smid_mq(struct MPT3SAS_ADAPTER *ioc, u16 handle, u32 lun)
+{
+	struct smid_match_data smd;
+
+	smd.smid = 0;
+	blk_mq_tagset_busy_iter(&ioc->shost->tag_set, _smid_fn, &smd);
+	return smd.smid;
+}
+
+static u16
+_ctl_find_smid_legacy(struct MPT3SAS_ADAPTER *ioc, u16 handle, u32 lun)
+{
+	struct scsi_cmnd *scmd;
+	unsigned long flags;
+	u16 smid = 0;
+	int i;
+
+	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
+	for (i = ioc->scsiio_depth; i; i--) {
+		scmd = ioc->scsi_lookup[i - 1].scmd;
+		if (!_scmd_match(scmd, handle, lun))
+			continue;
+
+		smid = ioc->scsi_lookup[i - 1].smid;
+		break;
+	}
+	spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
+
+	return smid;
+}
+
+static u16
+_ctl_find_smid(struct MPT3SAS_ADAPTER *ioc, u16 handle, u32 lun)
+{
+	if (shost_use_blk_mq(ioc->shost))
+		return _ctl_find_smid_mq(ioc, handle, lun);
+	else
+		return _ctl_find_smid_legacy(ioc, handle, lun);
+}
+
 /**
  * _ctl_set_task_mid - assign an active smid to tm request
  * @ioc: per adapter object
@@ -562,12 +642,7 @@ enum block_state {
 _ctl_set_task_mid(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command *karg,
 	Mpi2SCSITaskManagementRequest_t *tm_request)
 {
-	u8 found = 0;
-	u16 i;
-	u16 handle;
-	struct scsi_cmnd *scmd;
-	struct MPT3SAS_DEVICE *priv_data;
-	unsigned long flags;
+	u16 smid, handle;
 	Mpi2SCSITaskManagementReply_t *tm_reply;
 	u32 sz;
 	u32 lun;
@@ -581,27 +656,11 @@ enum block_state {
 		return 0;
 
 	lun = scsilun_to_int((struct scsi_lun *)tm_request->LUN);
-
 	handle = le16_to_cpu(tm_request->DevHandle);
-	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
-	for (i = ioc->scsiio_depth; i && !found; i--) {
-		scmd = ioc->scsi_lookup[i - 1].scmd;
-		if (scmd == NULL || scmd->device == NULL ||
-		    scmd->device->hostdata == NULL)
-			continue;
-		if (lun != scmd->device->lun)
-			continue;
-		priv_data = scmd->device->hostdata;
-		if (priv_data->sas_target == NULL)
-			continue;
-		if (priv_data->sas_target->handle != handle)
-			continue;
-		tm_request->TaskMID = cpu_to_le16(ioc->scsi_lookup[i - 1].smid);
-		found = 1;
-	}
-	spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
 
-	if (!found) {
+	smid = _ctl_find_smid(ioc, handle, lun);
+
+	if (!smid) {
 		dctlprintk(ioc, pr_info(MPT3SAS_FMT
 			"%s: handle(0x%04x), lun(%d), no active mid!!\n",
 			ioc->name,
@@ -621,6 +680,8 @@ enum block_state {
 		return 1;
 	}
 
+	tm_request->TaskMID = cpu_to_le16(smid);
+
 	dctlprintk(ioc, pr_info(MPT3SAS_FMT
 		"%s: handle(0x%04x), lun(%d), task_mid(%d)\n", ioc->name,
 	    desc, le16_to_cpu(tm_request->DevHandle), lun,
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index ca925ef..bbfbfcc 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -147,7 +147,6 @@ static int _scsih_add_device(struct MPT3SAS_ADAPTER *ioc, u16 handle,
 module_param(prot_mask, int, 0);
 MODULE_PARM_DESC(prot_mask, " host protection capabilities mask, def=7 ");
 
-
 /* raid transport support */
 struct raid_template *mpt3sas_raid_template;
 struct raid_template *mpt2sas_raid_template;
@@ -1072,8 +1071,19 @@ struct _sas_node *
 _scsih_scsi_lookup_get_clear(struct MPT3SAS_ADAPTER *ioc, u16 smid)
 {
 	unsigned long flags;
-	struct scsi_cmnd *scmd;
+	struct scsi_cmnd *scmd = NULL;
 
+	if (shost_use_blk_mq(ioc->shost)) {
+		u16 hwq = (smid - 1) % ioc->shost->nr_hw_queues;
+		u16 tag = (smid - 1) / ioc->shost->nr_hw_queues;
+		struct blk_mq_tag_set *tagset = &ioc->shost->tag_set;
+		struct request *req;
+
+		req = blk_mq_tag_to_rq(tagset->tags[hwq], tag);
+		if (!req)
+			return NULL;
+		return blk_mq_rq_to_pdu(req);
+	}
 	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
 	scmd = ioc->scsi_lookup[smid - 1].scmd;
 	ioc->scsi_lookup[smid - 1].scmd = NULL;
@@ -1100,6 +1110,11 @@ struct _sas_node *
 	unsigned long	flags;
 	int i;
 
+	if (shost_use_blk_mq(ioc->shost)) {
+		struct scsiio_tracker *st = scsi_cmd_priv(scmd);
+
+		return st->smid;
+	}
 	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
 	smid = 0;
 	for (i = 0; i < ioc->scsiio_depth; i++) {
@@ -1113,6 +1128,24 @@ struct _sas_node *
 	return smid;
 }
 
+struct _scsih_scsi_lookup_data {
+	int channel;
+	int id;
+	int lun;
+	int result;
+};
+
+static void
+_scsih_scsi_target_lookup_fn(struct request *req, void *data, bool reserved)
+{
+	struct _scsih_scsi_lookup_data *lookup_data = data;
+	struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
+
+	if (scmd->device->id == lookup_data->id &&
+	    scmd->device->channel == lookup_data->channel)
+		lookup_data->result++;
+}
+
 /**
  * _scsih_scsi_lookup_find_by_target - search for matching channel:id
  * @ioc: per adapter object
@@ -1131,6 +1164,16 @@ struct _sas_node *
 	unsigned long	flags;
 	int i;
 
+	if (shost_use_blk_mq(ioc->shost)) {
+		struct _scsih_scsi_lookup_data data = {
+			.channel = channel,
+			.id = id,
+			.result = 0,
+		};
+		blk_mq_tagset_busy_iter(&ioc->shost->tag_set,
+					_scsih_scsi_target_lookup_fn, &data);
+		return (data.result > 0);
+	}
 	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
 	found = 0;
 	for (i = 0 ; i < ioc->scsiio_depth; i++) {
@@ -1146,6 +1189,18 @@ struct _sas_node *
 	return found;
 }
 
+static void
+_scsih_scsi_lun_lookup_fn(struct request *req, void *data, bool reserved)
+{
+	struct _scsih_scsi_lookup_data *lookup_data = data;
+	struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
+
+	if (scmd->device->id == lookup_data->id &&
+	    scmd->device->channel == lookup_data->channel &&
+	    scmd->device->lun == lookup_data->lun)
+		lookup_data->result++;
+}
+
 /**
  * _scsih_scsi_lookup_find_by_lun - search for matching channel:id:lun
  * @ioc: per adapter object
@@ -1165,6 +1220,17 @@ struct _sas_node *
 	unsigned long	flags;
 	int i;
 
+	if (shost_use_blk_mq(ioc->shost)) {
+		struct _scsih_scsi_lookup_data data = {
+			.channel = channel,
+			.id = id,
+			.lun = lun,
+			.result = 0,
+		};
+		blk_mq_tagset_busy_iter(&ioc->shost->tag_set,
+					_scsih_scsi_lun_lookup_fn, &data);
+		return (data.result > 0);
+	}
 	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
 	found = 0;
 	for (i = 0 ; i < ioc->scsiio_depth; i++) {
@@ -2309,7 +2375,7 @@ struct _sas_node *
 	switch (type) {
 	case MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK:
 		rc = SUCCESS;
-		if (scsi_lookup->scmd == NULL)
+		if (scsi_lookup->cb_idx == 0xFF)
 			break;
 		rc = FAILED;
 		break;
@@ -3891,6 +3957,26 @@ static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
 	return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16);
 }
 
+void _flush_running(struct request *req, void *data, bool reserved)
+{
+	struct MPT3SAS_ADAPTER *ioc = data;
+	struct scsi_cmnd *scmd;
+	struct scsiio_tracker *st;
+
+	scmd = blk_mq_rq_to_pdu(req);
+	st = scsi_cmd_priv(scmd);
+	mpt3sas_base_clear_st(ioc, st);
+	scsi_dma_unmap(scmd);
+
+	if (ioc->pci_error_recovery)
+		scmd->result = DID_NO_CONNECT << 16;
+	else
+		scmd->result = DID_RESET << 16;
+	scmd->scsi_done(scmd);
+
+	ioc->pending_io_count++;
+}
+
 /**
  * _scsih_flush_running_cmds - completing outstanding commands.
  * @ioc: per adapter object
@@ -3905,13 +3991,19 @@ static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
 {
 	struct scsi_cmnd *scmd;
 	u16 smid;
-	u16 count = 0;
+
+	ioc->pending_io_count = 0;
+	if (shost_use_blk_mq(ioc->shost)) {
+		blk_mq_tagset_busy_iter(&ioc->shost->tag_set,
+					_flush_running, ioc);
+		goto out;
+	}
 
 	for (smid = 1; smid <= ioc->scsiio_depth; smid++) {
 		scmd = _scsih_scsi_lookup_get_clear(ioc, smid);
 		if (!scmd)
 			continue;
-		count++;
+		ioc->pending_io_count++;
 		if (ata_12_16_cmd(scmd))
 			scsi_internal_device_unblock(scmd->device,
 							SDEV_RUNNING);
@@ -3923,8 +4015,9 @@ static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
 			scmd->result = DID_RESET << 16;
 		scmd->scsi_done(scmd);
 	}
+out:
 	dtmprintk(ioc, pr_info(MPT3SAS_FMT "completing %d cmds\n",
-	    ioc->name, count));
+	    ioc->name, ioc->pending_io_count));
 }
 
 /**
@@ -4662,9 +4755,11 @@ static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
 	if (mpt3sas_scsi_direct_io_get(ioc, smid) &&
 	     ((ioc_status & MPI2_IOCSTATUS_MASK)
 	      != MPI2_IOCSTATUS_SCSI_TASK_TERMINATED)) {
-		spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
-		ioc->scsi_lookup[smid - 1].scmd = scmd;
-		spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
+		if (!shost_use_blk_mq(ioc->shost)) {
+			spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
+			ioc->scsi_lookup[smid - 1].scmd = scmd;
+			spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
+		}
 		mpt3sas_scsi_direct_io_set(ioc, smid, 0);
 		memcpy(mpi_request->CDB.CDB32, scmd->cmnd, scmd->cmd_len);
 		mpi_request->DevHandle =
@@ -4835,11 +4930,13 @@ static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
 		_scsih_scsi_ioc_info(ioc , scmd, mpi_reply, smid);
 
  out:
+	if (shost_use_blk_mq(ioc->shost))
+		mpt3sas_base_clear_st(ioc, scsi_cmd_priv(scmd));
 
 	scsi_dma_unmap(scmd);
 
 	scmd->scsi_done(scmd);
-	return 1;
+	return shost_use_blk_mq(ioc->shost) ? 0 : 1;
 }
 
 /**
@@ -6033,6 +6130,108 @@ static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
 		     fw_event->event_data);
 }
 
+struct _abort_sas_task_data {
+	struct MPT3SAS_ADAPTER *ioc;
+	int query_count;
+	int retry;
+	int termination_count;
+};
+
+void _abort_sas_task(struct request *req, void *data, bool reserved)
+{
+	struct _abort_sas_task_data *priv = data;
+	struct scsi_cmnd *scmd;
+	struct scsi_device *sdev;
+	u16 handle;
+	u32 lun;
+	struct scsiio_tracker *st;
+	struct MPT3SAS_DEVICE *sas_device_priv_data;
+	Mpi2SCSITaskManagementReply_t *mpi_reply;
+	u16 ioc_status;
+	int r;
+	u8 task_abort_retries;
+
+	scmd = blk_mq_rq_to_pdu(req);
+	st = scsi_cmd_priv(scmd);
+
+	sdev = scmd->device;
+	sas_device_priv_data = sdev->hostdata;
+	if (!sas_device_priv_data || !sas_device_priv_data->sas_target)
+		return;
+	/* skip hidden raid components */
+	if (sas_device_priv_data->sas_target->flags &
+	    MPT_TARGET_FLAGS_RAID_COMPONENT)
+		return;
+	/* skip volumes */
+	if (sas_device_priv_data->sas_target->flags &
+	    MPT_TARGET_FLAGS_VOLUME)
+		return;
+
+	handle = sas_device_priv_data->sas_target->handle;
+	lun = sas_device_priv_data->lun;
+	mpi_reply = priv->ioc->tm_cmds.reply;
+	priv->query_count++;
+
+	if (priv->ioc->shost_recovery)
+		return;
+
+	r = mpt3sas_scsih_issue_tm(priv->ioc, handle, 0, 0, lun,
+		MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK, st->smid, 30);
+	if (r == FAILED) {
+		sdev_printk(KERN_WARNING, sdev,
+			    "mpt3sas_scsih_issue_tm: FAILED when sending "
+			    "QUERY_TASK: scmd(%p)\n", scmd);
+		priv->retry++;
+		return;
+	}
+	ioc_status = le16_to_cpu(mpi_reply->IOCStatus)
+		& MPI2_IOCSTATUS_MASK;
+	if (ioc_status != MPI2_IOCSTATUS_SUCCESS) {
+		sdev_printk(KERN_WARNING, sdev,
+			"query task: FAILED with IOCSTATUS(0x%04x), scmd(%p)\n",
+			ioc_status, scmd);
+		priv->retry++;
+		return;
+	}
+
+	/* see if IO is still owned by IOC and target */
+	if (mpi_reply->ResponseCode ==
+	    MPI2_SCSITASKMGMT_RSP_TM_SUCCEEDED ||
+	    mpi_reply->ResponseCode ==
+	    MPI2_SCSITASKMGMT_RSP_IO_QUEUED_ON_IOC)
+		return;
+
+	task_abort_retries = 0;
+ tm_retry:
+	if (task_abort_retries++ == 60) {
+		dewtprintk(priv->ioc, pr_info(MPT3SAS_FMT
+			"%s: ABORT_TASK: giving up\n",
+			priv->ioc->name, __func__));
+		priv->retry++;
+		return;
+	}
+
+	if (priv->ioc->shost_recovery)
+		return;
+
+	r = mpt3sas_scsih_issue_tm(priv->ioc, handle, sdev->channel, sdev->id,
+		sdev->lun, MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK, st->smid, 30);
+	if (r == FAILED) {
+		sdev_printk(KERN_WARNING, sdev,
+			    "mpt3sas_scsih_issue_tm: ABORT_TASK: FAILED : "
+			    "scmd(%p)\n", scmd);
+		goto tm_retry;
+	}
+
+	if (task_abort_retries > 1)
+		sdev_printk(KERN_WARNING, sdev,
+			    "mpt3sas_scsih_issue_tm: ABORT_TASK: RETRIES (%d):"
+			    " scmd(%p)\n",
+			    task_abort_retries - 1, scmd);
+
+	priv->termination_count += le32_to_cpu(mpi_reply->TerminationCount);
+}
+
 /**
  * _scsih_sas_broadcast_primitive_event - handle broadcast events
  * @ioc: per adapter object
@@ -6070,6 +6269,31 @@ static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
 
 	_scsih_block_io_all_device(ioc);
 
+	if (shost_use_blk_mq(ioc->shost)) {
+		struct _abort_sas_task_data priv = {
+			.ioc = ioc,
+			.retry = 0,
+			.query_count = 0,
+			.termination_count = 0,
+		};
+
+retry_iter:
+		if (max_retries++ == 5) {
+			dewtprintk(ioc, pr_info(MPT3SAS_FMT "%s: giving up\n",
+				ioc->name, __func__));
+			goto out;
+		} else if (max_retries > 1)
+			dewtprintk(ioc, pr_info(MPT3SAS_FMT "%s: %d retry\n",
+				ioc->name, __func__, max_retries - 1));
+
+		blk_mq_tagset_busy_iter(&ioc->shost->tag_set,
+					_abort_sas_task, &priv);
+		if (priv.retry)
+			goto retry_iter;
+		termination_count = priv.termination_count;
+		query_count = priv.query_count;
+		goto out_no_lock;
+	}
 	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
 	mpi_reply = ioc->tm_cmds.reply;
  broadcast_aen_retry:
@@ -8578,6 +8802,7 @@ static void sas_device_make_active(struct MPT3SAS_ADAPTER *ioc,
 	.shost_attrs			= mpt3sas_host_attrs,
 	.sdev_attrs			= mpt3sas_dev_attrs,
 	.track_queue_depth		= 1,
+	.cmd_size			= sizeof(struct scsiio_tracker),
 };
 
 /* raid transport support for SAS 2.0 HBA devices */
@@ -8616,6 +8841,7 @@ static void sas_device_make_active(struct MPT3SAS_ADAPTER *ioc,
 	.shost_attrs			= mpt3sas_host_attrs,
 	.sdev_attrs			= mpt3sas_dev_attrs,
 	.track_queue_depth		= 1,
+	.cmd_size			= sizeof(struct scsiio_tracker),
 };
 
 /* raid transport support for SAS 3.0 HBA devices */
-- 
1.8.5.6


^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH 09/10] mpt3sas: Use 'msix_index' as argument for put_smid functions
  2017-01-31  9:25 [PATCH 00/10] mpt3sas: full mq support Hannes Reinecke
                   ` (7 preceding siblings ...)
  2017-01-31  9:25 ` [PATCH 08/10] mpt3sas: lockless command submission for scsi-mq Hannes Reinecke
@ 2017-01-31  9:25 ` Hannes Reinecke
  2017-01-31  9:26 ` [PATCH 10/10] mpt3sas: scsi-mq interrupt steering Hannes Reinecke
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 59+ messages in thread
From: Hannes Reinecke @ 2017-01-31  9:25 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: James Bottomley, Christoph Hellwig, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl, Hannes Reinecke,
	Hannes Reinecke

Use msix_index as explicit argument for the various put_smid
callbacks.
No functional change.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.c      | 56 +++++++++++++++++---------------
 drivers/scsi/mpt3sas/mpt3sas_base.h      | 18 ++++++++--
 drivers/scsi/mpt3sas/mpt3sas_config.c    |  2 +-
 drivers/scsi/mpt3sas/mpt3sas_ctl.c       | 21 ++++++------
 drivers/scsi/mpt3sas/mpt3sas_scsih.c     | 21 ++++++------
 drivers/scsi/mpt3sas/mpt3sas_transport.c |  8 ++---
 6 files changed, 71 insertions(+), 55 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 29e139f..f6564a3 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -849,7 +849,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 	ack_request->EventContext = mpi_reply->EventContext;
 	ack_request->VF_ID = 0;  /* TODO */
 	ack_request->VP_ID = 0;
-	ioc->put_smid_default(ioc, smid);
+	mpt3sas_base_put_smid_default(ioc, smid);
 
  out:
 
@@ -2290,12 +2290,6 @@ struct scsiio_tracker *
 	return ioc->reply + (phys_addr - (u32)ioc->reply_dma);
 }
 
-static inline u8
-_base_get_msix_index(struct MPT3SAS_ADAPTER *ioc)
-{
-	return ioc->cpu_msix_table[raw_smp_processor_id()];
-}
-
 /**
  * mpt3sas_base_get_smid - obtain a free smid from internal queue
  * @ioc: per adapter object
@@ -2370,7 +2364,7 @@ struct scsiio_tracker *
 	request->scmd = scmd;
 	request->cb_idx = cb_idx;
 	smid = request->smid;
-	request->msix_io = _base_get_msix_index(ioc);
+	request->msix_io = mpt3sas_base_get_msix_index(ioc);
 	list_del(&request->tracker_list);
 	spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
 	return smid;
@@ -2538,18 +2532,20 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
  * @ioc: per adapter object
  * @smid: system request message index
  * @handle: device handle
+ * @msix_index: MSI-X index
  *
  * Return nothing.
  */
 static void
-_base_put_smid_scsi_io(struct MPT3SAS_ADAPTER *ioc, u16 smid, u16 handle)
+_base_put_smid_scsi_io(struct MPT3SAS_ADAPTER *ioc, u16 smid,
+		       u16 handle, u16 msix_index)
 {
 	Mpi2RequestDescriptorUnion_t descriptor;
 	u64 *request = (u64 *)&descriptor;
 
 
 	descriptor.SCSIIO.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO;
-	descriptor.SCSIIO.MSIxIndex =  _base_get_msix_index(ioc);
+	descriptor.SCSIIO.MSIxIndex =  cpu_to_le16(msix_index);
 	descriptor.SCSIIO.SMID = cpu_to_le16(smid);
 	descriptor.SCSIIO.DevHandle = cpu_to_le16(handle);
 	descriptor.SCSIIO.LMID = 0;
@@ -2562,19 +2558,20 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
  * @ioc: per adapter object
  * @smid: system request message index
  * @handle: device handle
+ * @msix_index: MSI-X index
  *
  * Return nothing.
  */
 static void
 _base_put_smid_fast_path(struct MPT3SAS_ADAPTER *ioc, u16 smid,
-	u16 handle)
+	u16 handle, u16 msix_index)
 {
 	Mpi2RequestDescriptorUnion_t descriptor;
 	u64 *request = (u64 *)&descriptor;
 
 	descriptor.SCSIIO.RequestFlags =
 	    MPI25_REQ_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO;
-	descriptor.SCSIIO.MSIxIndex = _base_get_msix_index(ioc);
+	descriptor.SCSIIO.MSIxIndex = cpu_to_le16(msix_index);
 	descriptor.SCSIIO.SMID = cpu_to_le16(smid);
 	descriptor.SCSIIO.DevHandle = cpu_to_le16(handle);
 	descriptor.SCSIIO.LMID = 0;
@@ -2598,7 +2595,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
 
 	descriptor.HighPriority.RequestFlags =
 	    MPI2_REQ_DESCRIPT_FLAGS_HIGH_PRIORITY;
-	descriptor.HighPriority.MSIxIndex =  msix_task;
+	descriptor.HighPriority.MSIxIndex =  cpu_to_le16(msix_task);
 	descriptor.HighPriority.SMID = cpu_to_le16(smid);
 	descriptor.HighPriority.LMID = 0;
 	descriptor.HighPriority.Reserved1 = 0;
@@ -2610,17 +2607,18 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
  * _base_put_smid_default - Default, primarily used for config pages
  * @ioc: per adapter object
  * @smid: system request message index
+ * @msix_index: MSI-X index
  *
  * Return nothing.
  */
 static void
-_base_put_smid_default(struct MPT3SAS_ADAPTER *ioc, u16 smid)
+_base_put_smid_default(struct MPT3SAS_ADAPTER *ioc, u16 smid, u16 msix_index)
 {
 	Mpi2RequestDescriptorUnion_t descriptor;
 	u64 *request = (u64 *)&descriptor;
 
 	descriptor.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE;
-	descriptor.Default.MSIxIndex =  _base_get_msix_index(ioc);
+	descriptor.Default.MSIxIndex =  cpu_to_le16(msix_index);
 	descriptor.Default.SMID = cpu_to_le16(smid);
 	descriptor.Default.LMID = 0;
 	descriptor.Default.DescriptorTypeDependent = 0;
@@ -2634,18 +2632,19 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
 * @ioc: per adapter object
 * @smid: system request message index
 * @handle: device handle, unused in this function, for function type match
+* @msix_index: MSI-X index
 *
 * Return nothing.
 */
 static void
 _base_put_smid_scsi_io_atomic(struct MPT3SAS_ADAPTER *ioc, u16 smid,
-	u16 handle)
+	u16 handle, u16 msix_index)
 {
 	Mpi26AtomicRequestDescriptor_t descriptor;
 	u32 *request = (u32 *)&descriptor;
 
 	descriptor.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO;
-	descriptor.MSIxIndex = _base_get_msix_index(ioc);
+	descriptor.MSIxIndex = cpu_to_le16(msix_index);
 	descriptor.SMID = cpu_to_le16(smid);
 
 	writel(cpu_to_le32(*request), &ioc->chip->AtomicRequestDescriptorPost);
@@ -2657,17 +2656,18 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
  * @ioc: per adapter object
  * @smid: system request message index
  * @handle: device handle, unused in this function, for function type match
+ * @msix_index: MSI-X index
  * Return nothing
  */
 static void
 _base_put_smid_fast_path_atomic(struct MPT3SAS_ADAPTER *ioc, u16 smid,
-	u16 handle)
+	u16 handle, u16 msix_index)
 {
 	Mpi26AtomicRequestDescriptor_t descriptor;
 	u32 *request = (u32 *)&descriptor;
 
 	descriptor.RequestFlags = MPI25_REQ_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO;
-	descriptor.MSIxIndex = _base_get_msix_index(ioc);
+	descriptor.MSIxIndex = cpu_to_le16(msix_index);
 	descriptor.SMID = cpu_to_le16(smid);
 
 	writel(cpu_to_le32(*request), &ioc->chip->AtomicRequestDescriptorPost);
@@ -2690,7 +2690,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
 	u32 *request = (u32 *)&descriptor;
 
 	descriptor.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_HIGH_PRIORITY;
-	descriptor.MSIxIndex = msix_task;
+	descriptor.MSIxIndex = cpu_to_le16(msix_task);
 	descriptor.SMID = cpu_to_le16(smid);
 
 	writel(cpu_to_le32(*request), &ioc->chip->AtomicRequestDescriptorPost);
@@ -2701,17 +2701,19 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
  * use Atomic Request Descriptor
  * @ioc: per adapter object
  * @smid: system request message index
+ * @msix_index: MSI-X index
  *
  * Return nothing.
  */
 static void
-_base_put_smid_default_atomic(struct MPT3SAS_ADAPTER *ioc, u16 smid)
+_base_put_smid_default_atomic(struct MPT3SAS_ADAPTER *ioc,
+	u16 smid, u16 msix_index)
 {
 	Mpi26AtomicRequestDescriptor_t descriptor;
 	u32 *request = (u32 *)&descriptor;
 
 	descriptor.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE;
-	descriptor.MSIxIndex = _base_get_msix_index(ioc);
+	descriptor.MSIxIndex = cpu_to_le16(msix_index);
 	descriptor.SMID = cpu_to_le16(smid);
 
 	writel(cpu_to_le32(*request), &ioc->chip->AtomicRequestDescriptorPost);
@@ -4240,7 +4242,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
 	    mpi_request->Operation == MPI2_SAS_OP_PHY_LINK_RESET)
 		ioc->ioc_link_reset_in_progress = 1;
 	init_completion(&ioc->base_cmds.done);
-	ioc->put_smid_default(ioc, smid);
+	mpt3sas_base_put_smid_default(ioc, smid);
 	wait_for_completion_timeout(&ioc->base_cmds.done,
 	    msecs_to_jiffies(10000));
 	if ((mpi_request->Operation == MPI2_SAS_OP_PHY_HARD_RESET ||
@@ -4340,7 +4342,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
 	ioc->base_cmds.smid = smid;
 	memcpy(request, mpi_request, sizeof(Mpi2SepReply_t));
 	init_completion(&ioc->base_cmds.done);
-	ioc->put_smid_default(ioc, smid);
+	mpt3sas_base_put_smid_default(ioc, smid);
 	wait_for_completion_timeout(&ioc->base_cmds.done,
 	    msecs_to_jiffies(10000));
 	if (!(ioc->base_cmds.status & MPT3_CMD_COMPLETE)) {
@@ -4754,7 +4756,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
 	mpi_request->Function = MPI2_FUNCTION_PORT_ENABLE;
 
 	init_completion(&ioc->port_enable_cmds.done);
-	ioc->put_smid_default(ioc, smid);
+	mpt3sas_base_put_smid_default(ioc, smid);
 	wait_for_completion_timeout(&ioc->port_enable_cmds.done, 300*HZ);
 	if (!(ioc->port_enable_cmds.status & MPT3_CMD_COMPLETE)) {
 		pr_err(MPT3SAS_FMT "%s: timeout\n",
@@ -4817,7 +4819,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
 	memset(mpi_request, 0, sizeof(Mpi2PortEnableRequest_t));
 	mpi_request->Function = MPI2_FUNCTION_PORT_ENABLE;
 
-	ioc->put_smid_default(ioc, smid);
+	mpt3sas_base_put_smid_default(ioc, smid);
 	return 0;
 }
 
@@ -4936,7 +4938,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
 		mpi_request->EventMasks[i] =
 		    cpu_to_le32(ioc->event_masks[i]);
 	init_completion(&ioc->base_cmds.done);
-	ioc->put_smid_default(ioc, smid);
+	mpt3sas_base_put_smid_default(ioc, smid);
 	wait_for_completion_timeout(&ioc->base_cmds.done, 30*HZ);
 	if (!(ioc->base_cmds.status & MPT3_CMD_COMPLETE)) {
 		pr_err(MPT3SAS_FMT "%s: timeout\n",
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 7a3553e..091de98 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -739,8 +739,9 @@ typedef void (*MPT_BUILD_ZERO_LEN_SGE)(struct MPT3SAS_ADAPTER *ioc,
 
 /* To support atomic and non atomic descriptors*/
 typedef void (*PUT_SMID_IO_FP_HIP) (struct MPT3SAS_ADAPTER *ioc, u16 smid,
-	u16 funcdep);
-typedef void (*PUT_SMID_DEFAULT) (struct MPT3SAS_ADAPTER *ioc, u16 smid);
+				    u16 funcdep, u16 msix_index);
+typedef void (*PUT_SMID_DEFAULT) (struct MPT3SAS_ADAPTER *ioc, u16 smid,
+				  u16 msix_index);
 
 /* IOC Facts and Port Facts converted from little endian to cpu */
 union mpi3_version_union {
@@ -1201,7 +1202,7 @@ struct MPT3SAS_ADAPTER {
 	u8		atomic_desc_capable;
 	PUT_SMID_IO_FP_HIP put_smid_scsi_io;
 	PUT_SMID_IO_FP_HIP put_smid_fast_path;
-	PUT_SMID_IO_FP_HIP put_smid_hi_priority;
+	PUT_SMID_DEFAULT put_smid_hi_priority;
 	PUT_SMID_DEFAULT put_smid_default;
 
 };
@@ -1283,6 +1284,17 @@ void mpt3sas_base_update_missing_delay(struct MPT3SAS_ADAPTER *ioc,
 
 int mpt3sas_port_enable(struct MPT3SAS_ADAPTER *ioc);
 
+static inline u8
+mpt3sas_base_get_msix_index(struct MPT3SAS_ADAPTER *ioc)
+{
+	return ioc->cpu_msix_table[raw_smp_processor_id()];
+}
+
+static inline void
+mpt3sas_base_put_smid_default(struct MPT3SAS_ADAPTER *ioc, u16 smid)
+{
+	ioc->put_smid_default(ioc, smid, mpt3sas_base_get_msix_index(ioc));
+}
 
 /* scsih shared API */
 u8 mpt3sas_scsih_event_callback(struct MPT3SAS_ADAPTER *ioc, u8 msix_index,
diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c
index dd62701..cebfd73 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_config.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_config.c
@@ -384,7 +384,7 @@ struct config_request {
 	memcpy(config_request, mpi_request, sizeof(Mpi2ConfigRequest_t));
 	_config_display_some_debug(ioc, smid, "config_request", NULL);
 	init_completion(&ioc->config_cmds.done);
-	ioc->put_smid_default(ioc, smid);
+	mpt3sas_base_put_smid_default(ioc, smid);
 	wait_for_completion_timeout(&ioc->config_cmds.done, timeout*HZ);
 	if (!(ioc->config_cmds.status & MPT3_CMD_COMPLETE)) {
 		pr_err(MPT3SAS_FMT "%s: timeout\n",
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
index 264e239..a9c3c0d 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
@@ -873,9 +873,10 @@ struct smid_match_data {
 		ioc->build_sg(ioc, psge, data_out_dma, data_out_sz,
 		    data_in_dma, data_in_sz);
 		if (mpi_request->Function == MPI2_FUNCTION_SCSI_IO_REQUEST)
-			ioc->put_smid_scsi_io(ioc, smid, device_handle);
+			ioc->put_smid_scsi_io(ioc, smid, device_handle,
+					      mpt3sas_base_get_msix_index(ioc));
 		else
-			ioc->put_smid_default(ioc, smid);
+			mpt3sas_base_put_smid_default(ioc, smid);
 		break;
 	}
 	case MPI2_FUNCTION_SCSI_TASK_MGMT:
@@ -941,7 +942,7 @@ struct smid_match_data {
 		}
 		ioc->build_sg(ioc, psge, data_out_dma, data_out_sz, data_in_dma,
 		    data_in_sz);
-		ioc->put_smid_default(ioc, smid);
+		mpt3sas_base_put_smid_default(ioc, smid);
 		break;
 	}
 	case MPI2_FUNCTION_SATA_PASSTHROUGH:
@@ -956,7 +957,7 @@ struct smid_match_data {
 		}
 		ioc->build_sg(ioc, psge, data_out_dma, data_out_sz, data_in_dma,
 		    data_in_sz);
-		ioc->put_smid_default(ioc, smid);
+		mpt3sas_base_put_smid_default(ioc, smid);
 		break;
 	}
 	case MPI2_FUNCTION_FW_DOWNLOAD:
@@ -964,7 +965,7 @@ struct smid_match_data {
 	{
 		ioc->build_sg(ioc, psge, data_out_dma, data_out_sz, data_in_dma,
 		    data_in_sz);
-		ioc->put_smid_default(ioc, smid);
+		mpt3sas_base_put_smid_default(ioc, smid);
 		break;
 	}
 	case MPI2_FUNCTION_TOOLBOX:
@@ -979,7 +980,7 @@ struct smid_match_data {
 			ioc->build_sg_mpi(ioc, psge, data_out_dma, data_out_sz,
 				data_in_dma, data_in_sz);
 		}
-		ioc->put_smid_default(ioc, smid);
+		mpt3sas_base_put_smid_default(ioc, smid);
 		break;
 	}
 	case MPI2_FUNCTION_SAS_IO_UNIT_CONTROL:
@@ -998,7 +999,7 @@ struct smid_match_data {
 	default:
 		ioc->build_sg_mpi(ioc, psge, data_out_dma, data_out_sz,
 		    data_in_dma, data_in_sz);
-		ioc->put_smid_default(ioc, smid);
+		mpt3sas_base_put_smid_default(ioc, smid);
 		break;
 	}
 
@@ -1587,7 +1588,7 @@ struct smid_match_data {
 			cpu_to_le32(ioc->product_specific[buffer_type][i]);
 
 	init_completion(&ioc->ctl_cmds.done);
-	ioc->put_smid_default(ioc, smid);
+	mpt3sas_base_put_smid_default(ioc, smid);
 	wait_for_completion_timeout(&ioc->ctl_cmds.done,
 	    MPT3_IOCTL_DEFAULT_TIMEOUT*HZ);
 
@@ -1934,7 +1935,7 @@ struct smid_match_data {
 	mpi_request->VP_ID = 0;
 
 	init_completion(&ioc->ctl_cmds.done);
-	ioc->put_smid_default(ioc, smid);
+	mpt3sas_base_put_smid_default(ioc, smid);
 	wait_for_completion_timeout(&ioc->ctl_cmds.done,
 	    MPT3_IOCTL_DEFAULT_TIMEOUT*HZ);
 
@@ -2201,7 +2202,7 @@ struct smid_match_data {
 	mpi_request->VP_ID = 0;
 
 	init_completion(&ioc->ctl_cmds.done);
-	ioc->put_smid_default(ioc, smid);
+	mpt3sas_base_put_smid_default(ioc, smid);
 	wait_for_completion_timeout(&ioc->ctl_cmds.done,
 	    MPT3_IOCTL_DEFAULT_TIMEOUT*HZ);
 
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index bbfbfcc..8c451aa 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -3353,7 +3353,7 @@ static struct fw_event_work *dequeue_next_fw_event(struct MPT3SAS_ADAPTER *ioc)
 	mpi_request->Function = MPI2_FUNCTION_SAS_IO_UNIT_CONTROL;
 	mpi_request->Operation = MPI2_SAS_OP_REMOVE_DEVICE;
 	mpi_request->DevHandle = mpi_request_tm->DevHandle;
-	ioc->put_smid_default(ioc, smid_sas_ctrl);
+	ioc->put_smid_default(ioc, smid_sas_ctrl, msix_index);
 
 	return _scsih_check_for_pending_tm(ioc, smid);
 }
@@ -3448,7 +3448,7 @@ static struct fw_event_work *dequeue_next_fw_event(struct MPT3SAS_ADAPTER *ioc)
 	mpi_request->Function = MPI2_FUNCTION_SCSI_TASK_MGMT;
 	mpi_request->DevHandle = cpu_to_le16(handle);
 	mpi_request->TaskType = MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET;
-	ioc->put_smid_hi_priority(ioc, smid, 0);
+	ioc->put_smid_hi_priority(ioc, smid, mpt3sas_base_get_msix_index(ioc));
 }
 
 /**
@@ -3540,7 +3540,7 @@ static struct fw_event_work *dequeue_next_fw_event(struct MPT3SAS_ADAPTER *ioc)
 	ack_request->EventContext = event_context;
 	ack_request->VF_ID = 0;  /* TODO */
 	ack_request->VP_ID = 0;
-	ioc->put_smid_default(ioc, smid);
+	ioc->put_smid_default(ioc, smid, mpt3sas_base_get_msix_index(ioc));
 }
 
 /**
@@ -3597,7 +3597,7 @@ static struct fw_event_work *dequeue_next_fw_event(struct MPT3SAS_ADAPTER *ioc)
 	mpi_request->Function = MPI2_FUNCTION_SAS_IO_UNIT_CONTROL;
 	mpi_request->Operation = MPI2_SAS_OP_REMOVE_DEVICE;
 	mpi_request->DevHandle = handle;
-	ioc->put_smid_default(ioc, smid);
+	ioc->put_smid_default(ioc, smid, mpt3sas_base_get_msix_index(ioc));
 }
 
 /**
@@ -4139,6 +4139,7 @@ void _flush_running(struct request *req, void *data, bool reserved)
 	u32 mpi_control;
 	u16 smid;
 	u16 handle;
+	u16 msix_task = mpt3sas_base_get_msix_index(ioc);
 
 	if (ioc->logging_level & MPT_DEBUG_SCSI)
 		scsi_print_command(scmd);
@@ -4258,12 +4259,12 @@ void _flush_running(struct request *req, void *data, bool reserved)
 		if (sas_target_priv_data->flags & MPT_TARGET_FASTPATH_IO) {
 			mpi_request->IoFlags = cpu_to_le16(scmd->cmd_len |
 			    MPI25_SCSIIO_IOFLAGS_FAST_PATH);
-			ioc->put_smid_fast_path(ioc, smid, handle);
+			ioc->put_smid_fast_path(ioc, smid, handle, msix_task);
 		} else
 			ioc->put_smid_scsi_io(ioc, smid,
-			    le16_to_cpu(mpi_request->DevHandle));
+				le16_to_cpu(mpi_request->DevHandle), msix_task);
 	} else
-		ioc->put_smid_default(ioc, smid);
+		ioc->put_smid_default(ioc, smid, msix_task);
 	return 0;
 
  out:
@@ -4765,7 +4766,7 @@ void _flush_running(struct request *req, void *data, bool reserved)
 		mpi_request->DevHandle =
 		    cpu_to_le16(sas_device_priv_data->sas_target->handle);
 		ioc->put_smid_scsi_io(ioc, smid,
-		    sas_device_priv_data->sas_target->handle);
+			sas_device_priv_data->sas_target->handle, msix_index);
 		return 0;
 	}
 	/* turning off TLR */
@@ -6508,7 +6509,7 @@ void _abort_sas_task(struct request *req, void *data, bool reserved)
 	    handle, phys_disk_num));
 
 	init_completion(&ioc->scsih_cmds.done);
-	ioc->put_smid_default(ioc, smid);
+	mpt3sas_base_put_smid_default(ioc, smid);
 	wait_for_completion_timeout(&ioc->scsih_cmds.done, 10*HZ);
 
 	if (!(ioc->scsih_cmds.status & MPT3_CMD_COMPLETE)) {
@@ -8358,7 +8359,7 @@ void _abort_sas_task(struct request *req, void *data, bool reserved)
 	if (!ioc->hide_ir_msg)
 		pr_info(MPT3SAS_FMT "IR shutdown (sending)\n", ioc->name);
 	init_completion(&ioc->scsih_cmds.done);
-	ioc->put_smid_default(ioc, smid);
+	ioc->put_smid_default(ioc, smid, mpt3sas_base_get_msix_index(ioc));
 	wait_for_completion_timeout(&ioc->scsih_cmds.done, 10*HZ);
 
 	if (!(ioc->scsih_cmds.status & MPT3_CMD_COMPLETE)) {
diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
index 7f1d578..b74faf1 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c
@@ -392,7 +392,7 @@ struct rep_manu_reply {
 		"report_manufacture - send to sas_addr(0x%016llx)\n",
 		ioc->name, (unsigned long long)sas_address));
 	init_completion(&ioc->transport_cmds.done);
-	ioc->put_smid_default(ioc, smid);
+	mpt3sas_base_put_smid_default(ioc, smid);
 	wait_for_completion_timeout(&ioc->transport_cmds.done, 10*HZ);
 
 	if (!(ioc->transport_cmds.status & MPT3_CMD_COMPLETE)) {
@@ -1198,7 +1198,7 @@ struct phy_error_log_reply {
 		ioc->name, (unsigned long long)phy->identify.sas_address,
 		phy->number));
 	init_completion(&ioc->transport_cmds.done);
-	ioc->put_smid_default(ioc, smid);
+	mpt3sas_base_put_smid_default(ioc, smid);
 	wait_for_completion_timeout(&ioc->transport_cmds.done, 10*HZ);
 
 	if (!(ioc->transport_cmds.status & MPT3_CMD_COMPLETE)) {
@@ -1514,7 +1514,7 @@ struct phy_control_reply {
 		ioc->name, (unsigned long long)phy->identify.sas_address,
 		phy->number, phy_operation));
 	init_completion(&ioc->transport_cmds.done);
-	ioc->put_smid_default(ioc, smid);
+	mpt3sas_base_put_smid_default(ioc, smid);
 	wait_for_completion_timeout(&ioc->transport_cmds.done, 10*HZ);
 
 	if (!(ioc->transport_cmds.status & MPT3_CMD_COMPLETE)) {
@@ -2032,7 +2032,7 @@ struct phy_control_reply {
 		"%s - sending smp request\n", ioc->name, __func__));
 
 	init_completion(&ioc->transport_cmds.done);
-	ioc->put_smid_default(ioc, smid);
+	mpt3sas_base_put_smid_default(ioc, smid);
 	wait_for_completion_timeout(&ioc->transport_cmds.done, 10*HZ);
 
 	if (!(ioc->transport_cmds.status & MPT3_CMD_COMPLETE)) {
-- 
1.8.5.6


^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH 10/10] mpt3sas: scsi-mq interrupt steering
  2017-01-31  9:25 [PATCH 00/10] mpt3sas: full mq support Hannes Reinecke
                   ` (8 preceding siblings ...)
  2017-01-31  9:25 ` [PATCH 09/10] mpt3sas: Use 'msix_index' as argument for put_smid functions Hannes Reinecke
@ 2017-01-31  9:26 ` Hannes Reinecke
  2017-01-31 10:05   ` Christoph Hellwig
  2017-01-31 10:02 ` [PATCH 00/10] mpt3sas: full mq support Christoph Hellwig
  2017-02-07 13:19 ` Christoph Hellwig
  11 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-01-31  9:26 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: James Bottomley, Christoph Hellwig, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl, Hannes Reinecke,
	Hannes Reinecke

Enable all queues and implement correct I/O steering for scsi-mq.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.c  |  8 ++++++--
 drivers/scsi/mpt3sas/mpt3sas_scsih.c | 19 +++++++++++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index f6564a3..84f808d 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -3560,9 +3560,13 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
 	 * with some internal commands that could be outstanding
 	 */
 	ioc->shost->can_queue = ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT;
+	if (ioc->shost->nr_hw_queues > 1) {
+		ioc->shost->nr_hw_queues = ioc->msix_vector_count;
+		ioc->shost->can_queue /= ioc->msix_vector_count;
+	}
 	dinitprintk(ioc, pr_info(MPT3SAS_FMT
-		"scsi host: can_queue depth (%d)\n",
-		ioc->name, ioc->shost->can_queue));
+		"scsi host: can_queue depth (%d), nr_hw_queues (%d)\n",
+		ioc->name, ioc->shost->can_queue, ioc->shost->nr_hw_queues));
 
 
 	/* contiguous pool for request and chains, 16 byte align, one extra "
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 8c451aa..e43f928 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -54,6 +54,7 @@
 #include <linux/interrupt.h>
 #include <linux/aer.h>
 #include <linux/raid_class.h>
+#include <linux/blk-mq-pci.h>
 #include <asm/unaligned.h>
 
 #include "mpt3sas_base.h"
@@ -4255,6 +4256,11 @@ void _flush_running(struct request *req, void *data, bool reserved)
 		mpt3sas_setup_direct_io(ioc, scmd, raid_device, mpi_request,
 		    smid);
 
+	if (shost_use_blk_mq(ioc->shost) && (ioc->shost->nr_hw_queues > 1)) {
+		u32 unique_tag = blk_mq_unique_tag(scmd->request);
+
+		msix_task = blk_mq_unique_tag_to_hwq(unique_tag);
+	}
 	if (likely(mpi_request->Function == MPI2_FUNCTION_SCSI_IO_REQUEST)) {
 		if (sas_target_priv_data->flags & MPT_TARGET_FASTPATH_IO) {
 			mpi_request->IoFlags = cpu_to_le16(scmd->cmd_len |
@@ -8775,6 +8781,16 @@ static void sas_device_make_active(struct MPT3SAS_ADAPTER *ioc,
 	return 1;
 }
 
+static int scsih_map_queues(struct Scsi_Host *shost)
+{
+	struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
+
+	if (shost->nr_hw_queues > 1)
+		return blk_mq_pci_map_queues(&shost->tag_set, ioc->pdev);
+
+	return -EINVAL;
+}
+
 /* shost template for SAS 2.0 HBA devices */
 static struct scsi_host_template mpt2sas_driver_template = {
 	.module				= THIS_MODULE,
@@ -8788,6 +8804,7 @@ static void sas_device_make_active(struct MPT3SAS_ADAPTER *ioc,
 	.slave_destroy			= scsih_slave_destroy,
 	.scan_finished			= scsih_scan_finished,
 	.scan_start			= scsih_scan_start,
+	.map_queues			= scsih_map_queues,
 	.change_queue_depth		= scsih_change_queue_depth,
 	.eh_abort_handler		= scsih_abort,
 	.eh_device_reset_handler	= scsih_dev_reset,
@@ -9054,6 +9071,8 @@ static void sas_device_make_active(struct MPT3SAS_ADAPTER *ioc,
 	shost->max_lun = max_lun;
 	shost->transportt = mpt3sas_transport_template;
 	shost->unique_id = ioc->id;
+	if (shost->use_blk_mq)
+		shost->nr_hw_queues = num_online_cpus();
 
 	if (max_sectors != 0xFFFF) {
 		if (max_sectors < 64) {
-- 
1.8.5.6


^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10]  mpt3sas: full mq support
  2017-01-31  9:25 [PATCH 00/10] mpt3sas: full mq support Hannes Reinecke
                   ` (9 preceding siblings ...)
  2017-01-31  9:26 ` [PATCH 10/10] mpt3sas: scsi-mq interrupt steering Hannes Reinecke
@ 2017-01-31 10:02 ` Christoph Hellwig
  2017-01-31 11:16   ` Hannes Reinecke
  2017-02-07 13:19 ` Christoph Hellwig
  11 siblings, 1 reply; 59+ messages in thread
From: Christoph Hellwig @ 2017-01-31 10:02 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, mpt-fusionlinux.pdl

On Tue, Jan 31, 2017 at 10:25:50AM +0100, Hannes Reinecke wrote:
> Hi all,
> 
> this is a patchset to enable full multiqueue support for the mpt3sas driver.
> While the HBA only has a single mailbox register for submitting commands,
> it does have individual receive queues per MSI-X interrupt and as such
> does benefit from converting it to full multiqueue support.

Explanation and numbers on why this would be beneficial, please.
We should not need multiple submissions queues for a single register
to benefit from multiple completion queues.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 10/10] mpt3sas: scsi-mq interrupt steering
  2017-01-31  9:26 ` [PATCH 10/10] mpt3sas: scsi-mq interrupt steering Hannes Reinecke
@ 2017-01-31 10:05   ` Christoph Hellwig
  0 siblings, 0 replies; 59+ messages in thread
From: Christoph Hellwig @ 2017-01-31 10:05 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, mpt-fusionlinux.pdl,
	Hannes Reinecke


We should not need multiple submission queues for IRQ steering.

We just need to build the cpu to vector reverse map that
blk_mq_pci_map_queues builds.  So maybe we need to refactor
the API for ->map_queues a bit so that it's not tied to
submission queues.  In the meantime you can just opencode
that loop.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-01-31 10:02 ` [PATCH 00/10] mpt3sas: full mq support Christoph Hellwig
@ 2017-01-31 11:16   ` Hannes Reinecke
  2017-01-31 17:54     ` Kashyap Desai
  0 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-01-31 11:16 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Martin K. Petersen, James Bottomley, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl

On 01/31/2017 11:02 AM, Christoph Hellwig wrote:
> On Tue, Jan 31, 2017 at 10:25:50AM +0100, Hannes Reinecke wrote:
>> Hi all,
>>
>> this is a patchset to enable full multiqueue support for the mpt3sas driver.
>> While the HBA only has a single mailbox register for submitting commands,
>> it does have individual receive queues per MSI-X interrupt and as such
>> does benefit from converting it to full multiqueue support.
> 
> Explanation and numbers on why this would be beneficial, please.
> We should not need multiple submissions queues for a single register
> to benefit from multiple completion queues.
> 
Well, the actual throughput very strongly depends on the blk-mq-sched
patches from Jens.
As this is barely finished I didn't post any numbers yet.

However:
With multiqueue support:
4k seq read : io=60573MB, bw=1009.2MB/s, iops=258353, runt= 60021msec
With scsi-mq on 1 queue:
4k seq read : io=17369MB, bw=296291KB/s, iops=74072, runt= 60028msec
So yes, there _is_ a benefit.

(Which is actually quite cool, as these tests were done on a SAS3 HBA,
so we're getting close to the theoretical maximum of 1.2GB/s).
(Unlike the single-queue case :-)

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 08/10] mpt3sas: lockless command submission for scsi-mq
  2017-01-31  9:25 ` [PATCH 08/10] mpt3sas: lockless command submission for scsi-mq Hannes Reinecke
@ 2017-01-31 13:22   ` Christoph Hellwig
  2017-01-31 13:46     ` Hannes Reinecke
  0 siblings, 1 reply; 59+ messages in thread
From: Christoph Hellwig @ 2017-01-31 13:22 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, mpt-fusionlinux.pdl,
	Hannes Reinecke

On Tue, Jan 31, 2017 at 10:25:58AM +0100, Hannes Reinecke wrote:
> Enable lockless command submission for scsi-mq by moving the
> command structure into the payload for struct request.

We support cmd_size for both the mq and !mq code path, so this
could be simplified by always taking your new block-mq path.

>  4 files changed, 427 insertions(+), 57 deletions(-)

The amount of new code scare me.  Especially compared to the very
shport changelog.

> diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
> index 942fb7e..29e139f 100644
> --- a/drivers/scsi/mpt3sas/mpt3sas_base.c
> +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
> @@ -867,6 +867,20 @@ struct scsiio_tracker *
>  {
>  	WARN_ON(!smid);
>  	WARN_ON(smid >= ioc->hi_priority_smid);
> +	if (shost_use_blk_mq(ioc->shost)) {
> +		u16 hwq = (smid - 1) % ioc->shost->nr_hw_queues;
> +		u16 tag = (smid - 1) / ioc->shost->nr_hw_queues;
> +		struct blk_mq_tag_set *tagset = &ioc->shost->tag_set;
> +		struct request *req;
> +
> +		req = blk_mq_tag_to_rq(tagset->tags[hwq], tag);
> +		if (req) {
> +			struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
> +
> +			return scsi_cmd_priv(cmd);
> +		} else
> +			return NULL;
> +	}

This looks like it basically duplicates scsi_host_find_tag().

> +	if (shost_use_blk_mq(ioc->shost)) {
> +		unsigned int unique_tag = blk_mq_unique_tag(scmd->request);
> +		u16 hwq = blk_mq_unique_tag_to_hwq(unique_tag);
> +		u16 tag = blk_mq_unique_tag_to_tag(unique_tag);
> +		u16 smid = (tag * ioc->shost->nr_hw_queues) + hwq + 1;
> +
> +		request = scsi_cmd_priv(scmd);
> +		request->scmd = scmd;

no need for a backpointer, blk_mq_rq_from_pdu gets your
back to the request, and scsi_cmd_priv back to driver private data.

> +		request->cb_idx = cb_idx;
> +		request->msix_io = hwq;
> +		request->smid = smid;

why do we need to store the smid?

> @@ -562,12 +642,7 @@ enum block_state {
>  _ctl_set_task_mid(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command *karg,

I think this function just needs to go away instantly.
Trying to send task management requests from userspace for any
active smid is plain dangerous.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 08/10] mpt3sas: lockless command submission for scsi-mq
  2017-01-31 13:22   ` Christoph Hellwig
@ 2017-01-31 13:46     ` Hannes Reinecke
  2017-01-31 14:24       ` Christoph Hellwig
  0 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-01-31 13:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Martin K. Petersen, James Bottomley, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl, Hannes Reinecke

On 01/31/2017 02:22 PM, Christoph Hellwig wrote:
> On Tue, Jan 31, 2017 at 10:25:58AM +0100, Hannes Reinecke wrote:
>> Enable lockless command submission for scsi-mq by moving the
>> command structure into the payload for struct request.
> 
> We support cmd_size for both the mq and !mq code path, so this
> could be simplified by always taking your new block-mq path.
> 
Yes, they are with your latest patchset.
But that sort of overlapped with my patches and is hasn't been merged
yet. So I haven't integrated that.

So what is the status?
Should I rebase my patch on top of those patches?

>>  4 files changed, 427 insertions(+), 57 deletions(-)
> 
> The amount of new code scare me.  Especially compared to the very
> short changelog.
> 
Well, these are mainly duplicated code paths as I left the original code
in place.
Simply because the infrastructure is very much in flux and converting it
all in one go might be giving negative results :-)

>> diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
>> index 942fb7e..29e139f 100644
>> --- a/drivers/scsi/mpt3sas/mpt3sas_base.c
>> +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
>> @@ -867,6 +867,20 @@ struct scsiio_tracker *
>>  {
>>  	WARN_ON(!smid);
>>  	WARN_ON(smid >= ioc->hi_priority_smid);
>> +	if (shost_use_blk_mq(ioc->shost)) {
>> +		u16 hwq = (smid - 1) % ioc->shost->nr_hw_queues;
>> +		u16 tag = (smid - 1) / ioc->shost->nr_hw_queues;
>> +		struct blk_mq_tag_set *tagset = &ioc->shost->tag_set;
>> +		struct request *req;
>> +
>> +		req = blk_mq_tag_to_rq(tagset->tags[hwq], tag);
>> +		if (req) {
>> +			struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
>> +
>> +			return scsi_cmd_priv(cmd);
>> +		} else
>> +			return NULL;
>> +	}
> 
> This looks like it basically duplicates scsi_host_find_tag().
> 
Yes, but scsi_host_find_tag() still relies on req->special, which it
really shouldn't after your patches; scsi_cmd_priv() should be used there.
I got a patch series to remove the last vestigies from using ->special
in the SCSI midlayer; probably time to post it.

>> +	if (shost_use_blk_mq(ioc->shost)) {
>> +		unsigned int unique_tag = blk_mq_unique_tag(scmd->request);
>> +		u16 hwq = blk_mq_unique_tag_to_hwq(unique_tag);
>> +		u16 tag = blk_mq_unique_tag_to_tag(unique_tag);
>> +		u16 smid = (tag * ioc->shost->nr_hw_queues) + hwq + 1;
>> +
>> +		request = scsi_cmd_priv(scmd);
>> +		request->scmd = scmd;
> 
> no need for a backpointer, blk_mq_rq_from_pdu gets your
> back to the request, and scsi_cmd_priv back to driver private data.
> 
I know. Compability with original code only.

>> +		request->cb_idx = cb_idx;
>> +		request->msix_io = hwq;
>> +		request->smid = smid;
> 
> why do we need to store the smid?
> 
Sanity checking.
Far too many instances where I got the tag <-> smid mapping wrong during
development.
But yeah, if we move the entire driver over and rip out the original
code path this can go.

>> @@ -562,12 +642,7 @@ enum block_state {
>>  _ctl_set_task_mid(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command *karg,
> 
> I think this function just needs to go away instantly.
> Trying to send task management requests from userspace for any
> active smid is plain dangerous.
> 
Hey, I didn't to it. I just did the conversion.

But indeed, it's of questionable value.

I can remove it with the next iteration.

So, I'm happy to rebase it on top of your BLOCK_PC changes. Just tell me
which tree to use.
Then indeed most of the duplicate code-paths could go away.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 08/10] mpt3sas: lockless command submission for scsi-mq
  2017-01-31 13:46     ` Hannes Reinecke
@ 2017-01-31 14:24       ` Christoph Hellwig
  0 siblings, 0 replies; 59+ messages in thread
From: Christoph Hellwig @ 2017-01-31 14:24 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Christoph Hellwig, Martin K. Petersen, James Bottomley,
	linux-scsi, Sathya Prakash, Kashyap Desai, mpt-fusionlinux.pdl,
	Hannes Reinecke

On Tue, Jan 31, 2017 at 02:46:19PM +0100, Hannes Reinecke wrote:
> Yes, they are with your latest patchset.

No, this has been the case since blk-mq support wa added to scsi.

> Yes, but scsi_host_find_tag() still relies on req->special, which it
> really shouldn't after your patches; scsi_cmd_priv() should be used there.

Both are perfectly fine for now.

> > I think this function just needs to go away instantly.
> > Trying to send task management requests from userspace for any
> > active smid is plain dangerous.
> > 
> Hey, I didn't to it. I just did the conversion.
> 
> But indeed, it's of questionable value.
> 
> I can remove it with the next iteration.

I think we need to sort out the mpt-specific passthrough ioctls
one way or another.  A lot of the mess in this patch comes from
the fact that no all I/O goes through the block/scsi layer.

Once that is sorted out things become a lot simpler.

> So, I'm happy to rebase it on top of your BLOCK_PC changes. Just tell me
> which tree to use.

You don't need them.  cmd_size has been there for a long time for SCSI.
The only thing my patches do is to add support to legacy block code
and use that new simpler and more efficient implementation in SCSI instead
of the old hown-grown implementation in SCSI.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* RE: [PATCH 00/10] mpt3sas: full mq support
  2017-01-31 11:16   ` Hannes Reinecke
@ 2017-01-31 17:54     ` Kashyap Desai
  2017-02-01  6:51       ` Hannes Reinecke
  0 siblings, 1 reply; 59+ messages in thread
From: Kashyap Desai @ 2017-01-31 17:54 UTC (permalink / raw)
  To: Hannes Reinecke, Christoph Hellwig
  Cc: Martin K. Petersen, James Bottomley, linux-scsi,
	Sathya Prakash Veerichetty, PDL-MPT-FUSIONLINUX, Sreekanth Reddy

> -----Original Message-----
> From: Hannes Reinecke [mailto:hare@suse.de]
> Sent: Tuesday, January 31, 2017 4:47 PM
> To: Christoph Hellwig
> Cc: Martin K. Petersen; James Bottomley; linux-scsi@vger.kernel.org;
Sathya
> Prakash; Kashyap Desai; mpt-fusionlinux.pdl@broadcom.com
> Subject: Re: [PATCH 00/10] mpt3sas: full mq support
>
> On 01/31/2017 11:02 AM, Christoph Hellwig wrote:
> > On Tue, Jan 31, 2017 at 10:25:50AM +0100, Hannes Reinecke wrote:
> >> Hi all,
> >>
> >> this is a patchset to enable full multiqueue support for the mpt3sas
> driver.
> >> While the HBA only has a single mailbox register for submitting
> >> commands, it does have individual receive queues per MSI-X interrupt
> >> and as such does benefit from converting it to full multiqueue
support.
> >
> > Explanation and numbers on why this would be beneficial, please.
> > We should not need multiple submissions queues for a single register
> > to benefit from multiple completion queues.
> >
> Well, the actual throughput very strongly depends on the blk-mq-sched
> patches from Jens.
> As this is barely finished I didn't post any numbers yet.
>
> However:
> With multiqueue support:
> 4k seq read : io=60573MB, bw=1009.2MB/s, iops=258353, runt= 60021msec
> With scsi-mq on 1 queue:
> 4k seq read : io=17369MB, bw=296291KB/s, iops=74072, runt= 60028msec So
> yes, there _is_ a benefit.
>
> (Which is actually quite cool, as these tests were done on a SAS3 HBA,
so
> we're getting close to the theoretical maximum of 1.2GB/s).
> (Unlike the single-queue case :-)

Hannes -

Can you share detail about setup ? How many drives do you have and how is
connection (enclosure -> drives. ??) ?
To me it looks like current mpt3sas driver might be taking more hit in
spinlock operation (penalty on NUMA arch is more compare to single core
server) unlike we have in megaraid_sas driver use of shared blk tag.

I mean " [PATCH 08/10] mpt3sas: lockless command submission for scsi-mq"
patch is improving performance removing spinlock overhead and attempting
to get request using blk_tags.
Are you seeing performance improvement  if you hard code nr_hw_queues = 1
in below code changes part of "[PATCH 10/10] mpt3sas: scsi-mq interrupt
steering"

@@ -9054,6 +9071,8 @@ static void sas_device_make_active(struct
MPT3SAS_ADAPTER *ioc,
 	shost->max_lun = max_lun;
 	shost->transportt = mpt3sas_transport_template;
 	shost->unique_id = ioc->id;
+	if (shost->use_blk_mq)
+		shost->nr_hw_queues = num_online_cpus();


Thanks, Kashyap

>
> Cheers,
>
> Hannes
> --
> Dr. Hannes Reinecke		   Teamlead Storage & Networking
> hare@suse.de			               +49 911 74053 688
> SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
> GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton HRB
21284
> (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-01-31 17:54     ` Kashyap Desai
@ 2017-02-01  6:51       ` Hannes Reinecke
  2017-02-01  7:07         ` Kashyap Desai
  0 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-01  6:51 UTC (permalink / raw)
  To: Kashyap Desai, Christoph Hellwig
  Cc: Martin K. Petersen, James Bottomley, linux-scsi,
	Sathya Prakash Veerichetty, PDL-MPT-FUSIONLINUX, Sreekanth Reddy

On 01/31/2017 06:54 PM, Kashyap Desai wrote:
>> -----Original Message-----
>> From: Hannes Reinecke [mailto:hare@suse.de]
>> Sent: Tuesday, January 31, 2017 4:47 PM
>> To: Christoph Hellwig
>> Cc: Martin K. Petersen; James Bottomley; linux-scsi@vger.kernel.org;
> Sathya
>> Prakash; Kashyap Desai; mpt-fusionlinux.pdl@broadcom.com
>> Subject: Re: [PATCH 00/10] mpt3sas: full mq support
>>
>> On 01/31/2017 11:02 AM, Christoph Hellwig wrote:
>>> On Tue, Jan 31, 2017 at 10:25:50AM +0100, Hannes Reinecke wrote:
>>>> Hi all,
>>>>
>>>> this is a patchset to enable full multiqueue support for the mpt3sas
>> driver.
>>>> While the HBA only has a single mailbox register for submitting
>>>> commands, it does have individual receive queues per MSI-X interrupt
>>>> and as such does benefit from converting it to full multiqueue
> support.
>>>
>>> Explanation and numbers on why this would be beneficial, please.
>>> We should not need multiple submissions queues for a single register
>>> to benefit from multiple completion queues.
>>>
>> Well, the actual throughput very strongly depends on the blk-mq-sched
>> patches from Jens.
>> As this is barely finished I didn't post any numbers yet.
>>
>> However:
>> With multiqueue support:
>> 4k seq read : io=60573MB, bw=1009.2MB/s, iops=258353, runt= 60021msec
>> With scsi-mq on 1 queue:
>> 4k seq read : io=17369MB, bw=296291KB/s, iops=74072, runt= 60028msec So
>> yes, there _is_ a benefit.
>>
>> (Which is actually quite cool, as these tests were done on a SAS3 HBA,
> so
>> we're getting close to the theoretical maximum of 1.2GB/s).
>> (Unlike the single-queue case :-)
>
> Hannes -
>
> Can you share detail about setup ? How many drives do you have and how is
> connection (enclosure -> drives. ??) ?
> To me it looks like current mpt3sas driver might be taking more hit in
> spinlock operation (penalty on NUMA arch is more compare to single core
> server) unlike we have in megaraid_sas driver use of shared blk tag.
>
The tests were done with a single LSI SAS3008 connected to a NetApp 
E-series (2660), using 4 LUNs under MD-RAID0.

Megaraid_sas is even worse here; due to the odd nature of the 'fusion' 
implementation we're ending up having _two_ sets of tags, making it 
really hard to use scsi-mq here.
(Not that I didn't try; but lacking a proper backend it's really hard to 
evaluate the benefit of those ... spinning HDDs simply don't cut it here)

> I mean " [PATCH 08/10] mpt3sas: lockless command submission for scsi-mq"
> patch is improving performance removing spinlock overhead and attempting
> to get request using blk_tags.
> Are you seeing performance improvement  if you hard code nr_hw_queues = 1
> in below code changes part of "[PATCH 10/10] mpt3sas: scsi-mq interrupt
> steering"
>
No. The numbers posted above are generated with exactly that patch; the 
first line is running with nr_hw_queues=32 and the second line with 
nr_hw_queues=1.

Curiously, though, patch 8/10 also reduces the 'can_queue' value by 
dividing it by the number of CPUs (required for blk tag space scaling).
If I _increase_ can_queue after setting up the tagspace to the original 
value performance _drops_ again.
Most unexpected; I'll be doing more experimenting there.

Full results will be presented at VAULT, btw :-)

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		      zSeries & Storage
hare@suse.de			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* RE: [PATCH 00/10] mpt3sas: full mq support
  2017-02-01  6:51       ` Hannes Reinecke
@ 2017-02-01  7:07         ` Kashyap Desai
  2017-02-01  7:43           ` Hannes Reinecke
  0 siblings, 1 reply; 59+ messages in thread
From: Kashyap Desai @ 2017-02-01  7:07 UTC (permalink / raw)
  To: Hannes Reinecke, Christoph Hellwig
  Cc: Martin K. Petersen, James Bottomley, linux-scsi,
	Sathya Prakash Veerichetty, PDL-MPT-FUSIONLINUX, Sreekanth Reddy

> -----Original Message-----
> From: Hannes Reinecke [mailto:hare@suse.de]
> Sent: Wednesday, February 01, 2017 12:21 PM
> To: Kashyap Desai; Christoph Hellwig
> Cc: Martin K. Petersen; James Bottomley; linux-scsi@vger.kernel.org;
> Sathya
> Prakash Veerichetty; PDL-MPT-FUSIONLINUX; Sreekanth Reddy
> Subject: Re: [PATCH 00/10] mpt3sas: full mq support
>
> On 01/31/2017 06:54 PM, Kashyap Desai wrote:
> >> -----Original Message-----
> >> From: Hannes Reinecke [mailto:hare@suse.de]
> >> Sent: Tuesday, January 31, 2017 4:47 PM
> >> To: Christoph Hellwig
> >> Cc: Martin K. Petersen; James Bottomley; linux-scsi@vger.kernel.org;
> > Sathya
> >> Prakash; Kashyap Desai; mpt-fusionlinux.pdl@broadcom.com
> >> Subject: Re: [PATCH 00/10] mpt3sas: full mq support
> >>
> >> On 01/31/2017 11:02 AM, Christoph Hellwig wrote:
> >>> On Tue, Jan 31, 2017 at 10:25:50AM +0100, Hannes Reinecke wrote:
> >>>> Hi all,
> >>>>
> >>>> this is a patchset to enable full multiqueue support for the
> >>>> mpt3sas
> >> driver.
> >>>> While the HBA only has a single mailbox register for submitting
> >>>> commands, it does have individual receive queues per MSI-X
> >>>> interrupt and as such does benefit from converting it to full
> >>>> multiqueue
> > support.
> >>>
> >>> Explanation and numbers on why this would be beneficial, please.
> >>> We should not need multiple submissions queues for a single register
> >>> to benefit from multiple completion queues.
> >>>
> >> Well, the actual throughput very strongly depends on the blk-mq-sched
> >> patches from Jens.
> >> As this is barely finished I didn't post any numbers yet.
> >>
> >> However:
> >> With multiqueue support:
> >> 4k seq read : io=60573MB, bw=1009.2MB/s, iops=258353, runt=
> 60021msec
> >> With scsi-mq on 1 queue:
> >> 4k seq read : io=17369MB, bw=296291KB/s, iops=74072, runt= 60028msec
> >> So yes, there _is_ a benefit.
> >>
> >> (Which is actually quite cool, as these tests were done on a SAS3
> >> HBA,
> > so
> >> we're getting close to the theoretical maximum of 1.2GB/s).
> >> (Unlike the single-queue case :-)
> >
> > Hannes -
> >
> > Can you share detail about setup ? How many drives do you have and how
> > is connection (enclosure -> drives. ??) ?
> > To me it looks like current mpt3sas driver might be taking more hit in
> > spinlock operation (penalty on NUMA arch is more compare to single
> > core
> > server) unlike we have in megaraid_sas driver use of shared blk tag.
> >
> The tests were done with a single LSI SAS3008 connected to a NetApp E-
> series (2660), using 4 LUNs under MD-RAID0.
>
> Megaraid_sas is even worse here; due to the odd nature of the 'fusion'
> implementation we're ending up having _two_ sets of tags, making it really
> hard to use scsi-mq here.

Current megaraid_sas as single submission queue exposed to the blk-mq will
not encounter similar performance issue.
We may not see significant improvement of performance if we attempt the same
for megaraid_sas driver.
We had similar discussion for megaraid_sas and hpsa.
http://www.spinics.net/lists/linux-scsi/msg101838.html

I am seeing this patch series is similar attempt for mpt3sas..Am I missing
anything ?

Megaraid_sas driver just do indexing from blk_tag and fire IO quick enough
unlike mpt3sas where we have  lock contention @driver level as bottleneck.

> (Not that I didn't try; but lacking a proper backend it's really hard to
> evaluate
> the benefit of those ... spinning HDDs simply don't cut it here)
>
> > I mean " [PATCH 08/10] mpt3sas: lockless command submission for scsi-
> mq"
> > patch is improving performance removing spinlock overhead and
> > attempting to get request using blk_tags.
> > Are you seeing performance improvement  if you hard code nr_hw_queues
> > = 1 in below code changes part of "[PATCH 10/10] mpt3sas: scsi-mq
> > interrupt steering"
> >
> No. The numbers posted above are generated with exactly that patch; the
> first line is running with nr_hw_queues=32 and the second line with
> nr_hw_queues=1.

Thanks Hannes. That clarifies.  Can you share <fio> script you have used ?

If my  understanding correct, you will see theoretical maximum of 1.2GBp/s
if you restrict your work load to single numa node. This is just for
understanding if <mpt3sas> driver spinlocks are adding overhead. We have
seen such overhead on multi-socket server and it is reasonable to reduce
lock in mpt3sas driver, but only concern is exposing HBA for multiple
submission queue to blk-mq is really not required and trying to figure out
if we have any side effect of doing that.

>
> Curiously, though, patch 8/10 also reduces the 'can_queue' value by
> dividing
> it by the number of CPUs (required for blk tag space scaling).
> If I _increase_ can_queue after setting up the tagspace to the original
> value
> performance _drops_ again.
> Most unexpected; I'll be doing more experimenting there.
>
> Full results will be presented at VAULT, btw :-)
>
> Cheers,
>
> Hannes
> --
> Dr. Hannes Reinecke		      zSeries & Storage
> hare@suse.de			      +49 911 74053 688
> SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
> GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-01  7:07         ` Kashyap Desai
@ 2017-02-01  7:43           ` Hannes Reinecke
  2017-02-09 13:03             ` Sreekanth Reddy
  0 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-01  7:43 UTC (permalink / raw)
  To: Kashyap Desai, Christoph Hellwig
  Cc: Martin K. Petersen, James Bottomley, linux-scsi,
	Sathya Prakash Veerichetty, PDL-MPT-FUSIONLINUX, Sreekanth Reddy

On 02/01/2017 08:07 AM, Kashyap Desai wrote:
>> -----Original Message-----
>> From: Hannes Reinecke [mailto:hare@suse.de]
>> Sent: Wednesday, February 01, 2017 12:21 PM
>> To: Kashyap Desai; Christoph Hellwig
>> Cc: Martin K. Petersen; James Bottomley; linux-scsi@vger.kernel.org;
>> Sathya
>> Prakash Veerichetty; PDL-MPT-FUSIONLINUX; Sreekanth Reddy
>> Subject: Re: [PATCH 00/10] mpt3sas: full mq support
>>
>> On 01/31/2017 06:54 PM, Kashyap Desai wrote:
>>>> -----Original Message-----
>>>> From: Hannes Reinecke [mailto:hare@suse.de]
>>>> Sent: Tuesday, January 31, 2017 4:47 PM
>>>> To: Christoph Hellwig
>>>> Cc: Martin K. Petersen; James Bottomley; linux-scsi@vger.kernel.org;
>>> Sathya
>>>> Prakash; Kashyap Desai; mpt-fusionlinux.pdl@broadcom.com
>>>> Subject: Re: [PATCH 00/10] mpt3sas: full mq support
>>>>
>>>> On 01/31/2017 11:02 AM, Christoph Hellwig wrote:
>>>>> On Tue, Jan 31, 2017 at 10:25:50AM +0100, Hannes Reinecke wrote:
>>>>>> Hi all,
>>>>>>
>>>>>> this is a patchset to enable full multiqueue support for the
>>>>>> mpt3sas
>>>> driver.
>>>>>> While the HBA only has a single mailbox register for submitting
>>>>>> commands, it does have individual receive queues per MSI-X
>>>>>> interrupt and as such does benefit from converting it to full
>>>>>> multiqueue
>>> support.
>>>>>
>>>>> Explanation and numbers on why this would be beneficial, please.
>>>>> We should not need multiple submissions queues for a single register
>>>>> to benefit from multiple completion queues.
>>>>>
>>>> Well, the actual throughput very strongly depends on the blk-mq-sched
>>>> patches from Jens.
>>>> As this is barely finished I didn't post any numbers yet.
>>>>
>>>> However:
>>>> With multiqueue support:
>>>> 4k seq read : io=60573MB, bw=1009.2MB/s, iops=258353, runt=
>> 60021msec
>>>> With scsi-mq on 1 queue:
>>>> 4k seq read : io=17369MB, bw=296291KB/s, iops=74072, runt= 60028msec
>>>> So yes, there _is_ a benefit.
>>>>
>>>> (Which is actually quite cool, as these tests were done on a SAS3
>>>> HBA,
>>> so
>>>> we're getting close to the theoretical maximum of 1.2GB/s).
>>>> (Unlike the single-queue case :-)
>>>
>>> Hannes -
>>>
>>> Can you share detail about setup ? How many drives do you have and how
>>> is connection (enclosure -> drives. ??) ?
>>> To me it looks like current mpt3sas driver might be taking more hit in
>>> spinlock operation (penalty on NUMA arch is more compare to single
>>> core
>>> server) unlike we have in megaraid_sas driver use of shared blk tag.
>>>
>> The tests were done with a single LSI SAS3008 connected to a NetApp E-
>> series (2660), using 4 LUNs under MD-RAID0.
>>
>> Megaraid_sas is even worse here; due to the odd nature of the 'fusion'
>> implementation we're ending up having _two_ sets of tags, making it really
>> hard to use scsi-mq here.
>
> Current megaraid_sas as single submission queue exposed to the blk-mq will
> not encounter similar performance issue.
> We may not see significant improvement of performance if we attempt the same
> for megaraid_sas driver.
> We had similar discussion for megaraid_sas and hpsa.
> http://www.spinics.net/lists/linux-scsi/msg101838.html
>
> I am seeing this patch series is similar attempt for mpt3sas..Am I missing
> anything ?
>
No, you don't. That is precisely the case.

The different here is that mpt3sas is actually exposing hardware 
capabilities, whereas with megaraid_sas (and hpsa) we're limited by the 
hardware implementation to a single completion queue shared between HBA 
and OS.
With mpt3sas we're having per-interrupt completion queues (well, for 
newer firmware :-) so we can take advantage of scsi-mq.

(And if someone had done a _proper_ design of the megaraid_sas_fusion 
thing by exposing several submission and completion queues for 
megaraid_sas itself instead of bolting the existing megaraid_sas single 
queue approach ontop of the mpt3sas multiqueue design we could have done 
the same thing there ... sigh)

> Megaraid_sas driver just do indexing from blk_tag and fire IO quick enough
> unlike mpt3sas where we have  lock contention @driver level as bottleneck.
>
>> (Not that I didn't try; but lacking a proper backend it's really hard to
>> evaluate
>> the benefit of those ... spinning HDDs simply don't cut it here)
>>
>>> I mean " [PATCH 08/10] mpt3sas: lockless command submission for scsi-
>> mq"
>>> patch is improving performance removing spinlock overhead and
>>> attempting to get request using blk_tags.
>>> Are you seeing performance improvement  if you hard code nr_hw_queues
>>> = 1 in below code changes part of "[PATCH 10/10] mpt3sas: scsi-mq
>>> interrupt steering"
>>>
>> No. The numbers posted above are generated with exactly that patch; the
>> first line is running with nr_hw_queues=32 and the second line with
>> nr_hw_queues=1.
>
> Thanks Hannes. That clarifies.  Can you share <fio> script you have used ?
>
> If my  understanding correct, you will see theoretical maximum of 1.2GBp/s
> if you restrict your work load to single numa node. This is just for
> understanding if <mpt3sas> driver spinlocks are adding overhead. We have
> seen such overhead on multi-socket server and it is reasonable to reduce
> lock in mpt3sas driver, but only concern is exposing HBA for multiple
> submission queue to blk-mq is really not required and trying to figure out
> if we have any side effect of doing that.
>
Well, the HBA has per-MSIx completion queues, so I don't see any issues 
with exposing them.
blk-mq is designed to handle per-CPU queues, so exposing all hardware 
queues will be beneficial especially in a low-latency context; and as 
the experiments show, even when connected to an external storage there 
is a benefit to be had.

But exposing all queues might even reduce or even resolve your FW Fault 
status 0x2100 state; with that patch you now have each queue pulling 
request off the completion queue and updating the reply post host index 
in parallel, making the situation far more unlikely.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		      zSeries & Storage
hare@suse.de			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 01/10] mpt3sas: switch to pci_alloc_irq_vectors
  2017-01-31  9:25 ` [PATCH 01/10] mpt3sas: switch to pci_alloc_irq_vectors Hannes Reinecke
@ 2017-02-07 13:15   ` Christoph Hellwig
  2017-02-16  9:32   ` Sreekanth Reddy
  1 sibling, 0 replies; 59+ messages in thread
From: Christoph Hellwig @ 2017-02-07 13:15 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, mpt-fusionlinux.pdl,
	Hannes Reinecke

Looks fine:

Reviewed-by: Christoph Hellwig <hch@lst.de>

Btw, I think the !smp_affinity_enable path should just go away
sooner or later.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 02/10] mpt3sas: set default value for cb_idx
  2017-01-31  9:25 ` [PATCH 02/10] mpt3sas: set default value for cb_idx Hannes Reinecke
@ 2017-02-07 13:15   ` Christoph Hellwig
  0 siblings, 0 replies; 59+ messages in thread
From: Christoph Hellwig @ 2017-02-07 13:15 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, mpt-fusionlinux.pdl,
	Hannes Reinecke

Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 03/10] mpt3sas: implement _dechain_st()
  2017-01-31  9:25 ` [PATCH 03/10] mpt3sas: implement _dechain_st() Hannes Reinecke
@ 2017-02-07 13:15   ` Christoph Hellwig
  2017-02-07 13:18     ` Hannes Reinecke
  0 siblings, 1 reply; 59+ messages in thread
From: Christoph Hellwig @ 2017-02-07 13:15 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, mpt-fusionlinux.pdl,
	Hannes Reinecke

On Tue, Jan 31, 2017 at 10:25:53AM +0100, Hannes Reinecke wrote:
> Split off _dechain_st() as separate function.
> No functional change.
> 
> Signed-off-by: Hannes Reinecke <hare@suse.com>
> ---
>  drivers/scsi/mpt3sas/mpt3sas_base.c | 23 ++++++++++++++---------
>  1 file changed, 14 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
> index 09d0008..120b317 100644
> --- a/drivers/scsi/mpt3sas/mpt3sas_base.c
> +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
> @@ -2365,6 +2365,19 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>  	return smid;
>  }
>  
> +static void
> +_dechain_st(struct MPT3SAS_ADAPTER *ioc, struct scsiio_tracker *st)
> +{
> +	struct chain_tracker *chain_req;
> +
> +	while (!list_empty(&st->chain_list)) {
> +		chain_req = list_first_entry(&st->chain_list,
> +					     struct chain_tracker,
> +					     tracker_list);
> +		list_move(&chain_req->tracker_list, &ioc->free_chain_list);
> +	}

Why not just use list_splice_init?

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 04/10] mpt3sas: separate out _base_recovery_check()
  2017-01-31  9:25 ` [PATCH 04/10] mpt3sas: separate out _base_recovery_check() Hannes Reinecke
@ 2017-02-07 13:16   ` Christoph Hellwig
  2017-02-16  9:53   ` Sreekanth Reddy
  1 sibling, 0 replies; 59+ messages in thread
From: Christoph Hellwig @ 2017-02-07 13:16 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, mpt-fusionlinux.pdl,
	Hannes Reinecke

Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 05/10] mpt3sas: open-code _scsih_scsi_lookup_get()
  2017-01-31  9:25 ` [PATCH 05/10] mpt3sas: open-code _scsih_scsi_lookup_get() Hannes Reinecke
@ 2017-02-07 13:16   ` Christoph Hellwig
  2017-02-16  9:59   ` Sreekanth Reddy
  1 sibling, 0 replies; 59+ messages in thread
From: Christoph Hellwig @ 2017-02-07 13:16 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, mpt-fusionlinux.pdl,
	Hannes Reinecke

Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 06/10] mpt3sas: Introduce mpt3sas_get_st_from_smid()
  2017-01-31  9:25 ` [PATCH 06/10] mpt3sas: Introduce mpt3sas_get_st_from_smid() Hannes Reinecke
@ 2017-02-07 13:17   ` Christoph Hellwig
  0 siblings, 0 replies; 59+ messages in thread
From: Christoph Hellwig @ 2017-02-07 13:17 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, mpt-fusionlinux.pdl,
	Hannes Reinecke

>  inline u8
>  mpt3sas_scsi_direct_io_get(struct MPT3SAS_ADAPTER *ioc, u16 smid)
>  {
> -	return ioc->scsi_lookup[smid - 1].direct_io;
> +	struct scsiio_tracker *st = mpt3sas_get_st_from_smid(ioc, smid);
> +
> +	return st ? st->direct_io : 0;

This wrapper can go away and be merged into the only caller.

Otherwise looks fine:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 03/10] mpt3sas: implement _dechain_st()
  2017-02-07 13:15   ` Christoph Hellwig
@ 2017-02-07 13:18     ` Hannes Reinecke
  0 siblings, 0 replies; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-07 13:18 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Martin K. Petersen, James Bottomley, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl, Hannes Reinecke

On 02/07/2017 02:15 PM, Christoph Hellwig wrote:
> On Tue, Jan 31, 2017 at 10:25:53AM +0100, Hannes Reinecke wrote:
>> Split off _dechain_st() as separate function.
>> No functional change.
>>
>> Signed-off-by: Hannes Reinecke <hare@suse.com>
>> ---
>>  drivers/scsi/mpt3sas/mpt3sas_base.c | 23 ++++++++++++++---------
>>  1 file changed, 14 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
>> index 09d0008..120b317 100644
>> --- a/drivers/scsi/mpt3sas/mpt3sas_base.c
>> +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
>> @@ -2365,6 +2365,19 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>>  	return smid;
>>  }
>>  
>> +static void
>> +_dechain_st(struct MPT3SAS_ADAPTER *ioc, struct scsiio_tracker *st)
>> +{
>> +	struct chain_tracker *chain_req;
>> +
>> +	while (!list_empty(&st->chain_list)) {
>> +		chain_req = list_first_entry(&st->chain_list,
>> +					     struct chain_tracker,
>> +					     tracker_list);
>> +		list_move(&chain_req->tracker_list, &ioc->free_chain_list);
>> +	}
> 
> Why not just use list_splice_init?
> 
Yep, can do.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 07/10] mpt3sas: use hi-priority queue for TMFs
  2017-01-31  9:25 ` [PATCH 07/10] mpt3sas: use hi-priority queue for TMFs Hannes Reinecke
@ 2017-02-07 13:19   ` Christoph Hellwig
  2017-02-16 10:09   ` Sreekanth Reddy
  1 sibling, 0 replies; 59+ messages in thread
From: Christoph Hellwig @ 2017-02-07 13:19 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, mpt-fusionlinux.pdl,
	Hannes Reinecke

On Tue, Jan 31, 2017 at 10:25:57AM +0100, Hannes Reinecke wrote:
> When sending a TMF via the ioctl interface we should be using
> the hi-priority queue instead of the scsi queue to be consistent
> with overall TMF usage.

Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10]  mpt3sas: full mq support
  2017-01-31  9:25 [PATCH 00/10] mpt3sas: full mq support Hannes Reinecke
                   ` (10 preceding siblings ...)
  2017-01-31 10:02 ` [PATCH 00/10] mpt3sas: full mq support Christoph Hellwig
@ 2017-02-07 13:19 ` Christoph Hellwig
  2017-02-07 14:38   ` Hannes Reinecke
  2017-02-15  8:15   ` Christoph Hellwig
  11 siblings, 2 replies; 59+ messages in thread
From: Christoph Hellwig @ 2017-02-07 13:19 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, mpt-fusionlinux.pdl

Patch 1-7 look fine to me with minor fixups, and I'd love to see
them go into 4.11.  The last one looks really questionable,
and 8 and 9 will need some work so that the MPT passthrough ioctls
either go away or make use of struct request and the block layer
and SCSI infrastructure.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-07 13:19 ` Christoph Hellwig
@ 2017-02-07 14:38   ` Hannes Reinecke
  2017-02-07 15:34     ` Christoph Hellwig
  2017-02-15  8:15   ` Christoph Hellwig
  1 sibling, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-07 14:38 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Martin K. Petersen, James Bottomley, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl

On 02/07/2017 02:19 PM, Christoph Hellwig wrote:
> Patch 1-7 look fine to me with minor fixups, and I'd love to see
> them go into 4.11.  The last one looks really questionable,
> and 8 and 9 will need some work so that the MPT passthrough ioctls
> either go away or make use of struct request and the block layer
> and SCSI infrastructure.
> 
Hmm. Which is quite a bit of effort for very little gain.

The SCSI passthrough commands pass in pre-formatted SGLs, so the driver
just has to map them.
If we were converting that we first have to re-format the
(driver-specific) SGLs into linux sg lists, only to have them converted
back into driver-specific ones once queuecommand is called.
You sure it's worth the effort?

The driver already reserves some tags for precisely this use-case, so it
won't conflict with normal I/O operation.
So where's the problem with that?

I know the SCSI passthrough operations are decidedly ugly, but if I were
to change them I'd rather move them over to bsg once we converted bsg to
operate without a request queue.
But for now ... not sure.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-07 14:38   ` Hannes Reinecke
@ 2017-02-07 15:34     ` Christoph Hellwig
  2017-02-07 15:39       ` Hannes Reinecke
  0 siblings, 1 reply; 59+ messages in thread
From: Christoph Hellwig @ 2017-02-07 15:34 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Christoph Hellwig, Martin K. Petersen, James Bottomley,
	linux-scsi, Sathya Prakash, Kashyap Desai, mpt-fusionlinux.pdl

On Tue, Feb 07, 2017 at 03:38:51PM +0100, Hannes Reinecke wrote:
> The SCSI passthrough commands pass in pre-formatted SGLs, so the driver
> just has to map them.
> If we were converting that we first have to re-format the
> (driver-specific) SGLs into linux sg lists, only to have them converted
> back into driver-specific ones once queuecommand is called.
> You sure it's worth the effort?
> 
> The driver already reserves some tags for precisely this use-case, so it
> won't conflict with normal I/O operation.
> So where's the problem with that?

If it was an entirely separate path that would be easy, but it's
not - see all the poking into the tag maps that your patch 8
includes.  If it was just a few tags on the side not interacting
with the scsi or blk-mq it wouldn't be such a problem.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-07 15:34     ` Christoph Hellwig
@ 2017-02-07 15:39       ` Hannes Reinecke
  2017-02-07 15:40         ` Christoph Hellwig
  0 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-07 15:39 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Martin K. Petersen, James Bottomley, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl

On 02/07/2017 04:34 PM, Christoph Hellwig wrote:
> On Tue, Feb 07, 2017 at 03:38:51PM +0100, Hannes Reinecke wrote:
>> The SCSI passthrough commands pass in pre-formatted SGLs, so the driver
>> just has to map them.
>> If we were converting that we first have to re-format the
>> (driver-specific) SGLs into linux sg lists, only to have them converted
>> back into driver-specific ones once queuecommand is called.
>> You sure it's worth the effort?
>>
>> The driver already reserves some tags for precisely this use-case, so it
>> won't conflict with normal I/O operation.
>> So where's the problem with that?
> 
> If it was an entirely separate path that would be easy, but it's
> not - see all the poking into the tag maps that your patch 8
> includes.  If it was just a few tags on the side not interacting
> with the scsi or blk-mq it wouldn't be such a problem.
> 
But we do; we're getting the index/tag/smid from the high-priority list,
which is separated from the normal SCSI I/O tag space.
(which reminds me; there's another cleanup patch to be had in
_ctl_do_mpt_command(), but that's beside the point).

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-07 15:39       ` Hannes Reinecke
@ 2017-02-07 15:40         ` Christoph Hellwig
  2017-02-07 15:49           ` Hannes Reinecke
  0 siblings, 1 reply; 59+ messages in thread
From: Christoph Hellwig @ 2017-02-07 15:40 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Christoph Hellwig, Martin K. Petersen, James Bottomley,
	linux-scsi, Sathya Prakash, Kashyap Desai, mpt-fusionlinux.pdl

On Tue, Feb 07, 2017 at 04:39:01PM +0100, Hannes Reinecke wrote:
> But we do; we're getting the index/tag/smid from the high-priority list,
> which is separated from the normal SCSI I/O tag space.
> (which reminds me; there's another cleanup patch to be had in
> _ctl_do_mpt_command(), but that's beside the point).

The calls to blk_mq_tagset_busy_iter added in patch 8 indicate the
contrary.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-07 15:40         ` Christoph Hellwig
@ 2017-02-07 15:49           ` Hannes Reinecke
  0 siblings, 0 replies; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-07 15:49 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Martin K. Petersen, James Bottomley, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl

On 02/07/2017 04:40 PM, Christoph Hellwig wrote:
> On Tue, Feb 07, 2017 at 04:39:01PM +0100, Hannes Reinecke wrote:
>> But we do; we're getting the index/tag/smid from the high-priority list,
>> which is separated from the normal SCSI I/O tag space.
>> (which reminds me; there's another cleanup patch to be had in
>> _ctl_do_mpt_command(), but that's beside the point).
> 
> The calls to blk_mq_tagset_busy_iter added in patch 8 indicate the
> contrary.
> 
Right. Now I see what you mean.
We should have used reserved_tags here.
Sadly we still don't have an interface on actually _allocate_ reserved
tags, have we?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-01  7:43           ` Hannes Reinecke
@ 2017-02-09 13:03             ` Sreekanth Reddy
  2017-02-09 13:12               ` Hannes Reinecke
  0 siblings, 1 reply; 59+ messages in thread
From: Sreekanth Reddy @ 2017-02-09 13:03 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Kashyap Desai, Christoph Hellwig, Martin K. Petersen,
	James Bottomley, linux-scsi, Sathya Prakash Veerichetty,
	PDL-MPT-FUSIONLINUX

On Wed, Feb 1, 2017 at 1:13 PM, Hannes Reinecke <hare@suse.de> wrote:
>
> On 02/01/2017 08:07 AM, Kashyap Desai wrote:
>>>
>>> -----Original Message-----
>>> From: Hannes Reinecke [mailto:hare@suse.de]
>>> Sent: Wednesday, February 01, 2017 12:21 PM
>>> To: Kashyap Desai; Christoph Hellwig
>>> Cc: Martin K. Petersen; James Bottomley; linux-scsi@vger.kernel.org;
>>> Sathya
>>> Prakash Veerichetty; PDL-MPT-FUSIONLINUX; Sreekanth Reddy
>>> Subject: Re: [PATCH 00/10] mpt3sas: full mq support
>>>
>>> On 01/31/2017 06:54 PM, Kashyap Desai wrote:
>>>>>
>>>>> -----Original Message-----
>>>>> From: Hannes Reinecke [mailto:hare@suse.de]
>>>>> Sent: Tuesday, January 31, 2017 4:47 PM
>>>>> To: Christoph Hellwig
>>>>> Cc: Martin K. Petersen; James Bottomley; linux-scsi@vger.kernel.org;
>>>>
>>>> Sathya
>>>>>
>>>>> Prakash; Kashyap Desai; mpt-fusionlinux.pdl@broadcom.com
>>>>> Subject: Re: [PATCH 00/10] mpt3sas: full mq support
>>>>>
>>>>> On 01/31/2017 11:02 AM, Christoph Hellwig wrote:
>>>>>>
>>>>>> On Tue, Jan 31, 2017 at 10:25:50AM +0100, Hannes Reinecke wrote:
>>>>>>>
>>>>>>> Hi all,
>>>>>>>
>>>>>>> this is a patchset to enable full multiqueue support for the
>>>>>>> mpt3sas
>>>>>
>>>>> driver.
>>>>>>>
>>>>>>> While the HBA only has a single mailbox register for submitting
>>>>>>> commands, it does have individual receive queues per MSI-X
>>>>>>> interrupt and as such does benefit from converting it to full
>>>>>>> multiqueue
>>>>
>>>> support.
>>>>>>
>>>>>>
>>>>>> Explanation and numbers on why this would be beneficial, please.
>>>>>> We should not need multiple submissions queues for a single register
>>>>>> to benefit from multiple completion queues.
>>>>>>
>>>>> Well, the actual throughput very strongly depends on the blk-mq-sched
>>>>> patches from Jens.
>>>>> As this is barely finished I didn't post any numbers yet.
>>>>>
>>>>> However:
>>>>> With multiqueue support:
>>>>> 4k seq read : io=60573MB, bw=1009.2MB/s, iops=258353, runt=
>>>
>>> 60021msec
>>>>>
>>>>> With scsi-mq on 1 queue:
>>>>> 4k seq read : io=17369MB, bw=296291KB/s, iops=74072, runt= 60028msec
>>>>> So yes, there _is_ a benefit.


Hannes,

I have created a md raid0 with 4 SAS SSD drives using below command,
#mdadm --create /dev/md0 --level=0 --raid-devices=4 /dev/sdg /dev/sdh
/dev/sdi /dev/sdj

And here is 'mdadm --detail /dev/md0' command output,
--------------------------------------------------------------------------------------------------------------------------
/dev/md0:
        Version : 1.2
  Creation Time : Thu Feb  9 14:38:47 2017
     Raid Level : raid0
     Array Size : 780918784 (744.74 GiB 799.66 GB)
   Raid Devices : 4
  Total Devices : 4
    Persistence : Superblock is persistent

    Update Time : Thu Feb  9 14:38:47 2017
          State : clean
 Active Devices : 4
Working Devices : 4
 Failed Devices : 0
  Spare Devices : 0

     Chunk Size : 512K

           Name : host_name
           UUID : b63f9da7:b7de9a25:6a46ca00:42214e22
         Events : 0

    Number   Major   Minor   RaidDevice State
       0       8       96        0      active sync   /dev/sdg
       1       8      112        1      active sync   /dev/sdh
       2       8      144        2      active sync   /dev/sdj
       3       8      128        3      active sync   /dev/sdi
------------------------------------------------------------------------------------------------------------------------------

Then I have used below fio profile to run 4K sequence read operations
with nr_hw_queues=1 driver and with nr_hw_queues=24 driver (as my
system has two numa node and each with 12 cpus).
-----------------------------------------------------
global]
ioengine=libaio
group_reporting
direct=1
rw=read
bs=4k
allow_mounted_write=0
iodepth=128
runtime=150s

[job1]
filename=/dev/md0
-----------------------------------------------------

Here are the fio results when nr_hw_queues=1 (i.e. single request
queue) with various number of job counts
1JOB 4k read  : io=213268MB, bw=1421.8MB/s, iops=363975, runt=150001msec
2JOBs 4k read : io=309605MB, bw=2064.2MB/s, iops=528389, runt=150001msec
4JOBs 4k read : io=281001MB, bw=1873.4MB/s, iops=479569, runt=150002msec
8JOBs 4k read : io=236297MB, bw=1575.2MB/s, iops=403236, runt=150016msec

Here are the fio results when nr_hw_queues=24 (i.e. multiple request
queue) with various number of job counts
1JOB 4k read   : io=95194MB, bw=649852KB/s, iops=162463, runt=150001msec
2JOBs 4k read : io=189343MB, bw=1262.3MB/s, iops=323142, runt=150001msec
4JOBs 4k read : io=314832MB, bw=2098.9MB/s, iops=537309, runt=150001msec
8JOBs 4k read : io=277015MB, bw=1846.8MB/s, iops=472769, runt=150001msec

Here we can see that on less number of jobs count, single request
queue (nr_hw_queues=1) is giving more IOPs than multi request
queues(nr_hw_queues=24).

Can you please share your fio profile, so that I can try same thing on
my system.

Thanks,
Sreekanth


>>>>>
>>>>>
>>>>> (Which is actually quite cool, as these tests were done on a SAS3
>>>>> HBA,
>>>>
>>>> so
>>>>>
>>>>> we're getting close to the theoretical maximum of 1.2GB/s).
>>>>> (Unlike the single-queue case :-)
>>>>
>>>>
>>>> Hannes -
>>>>
>>>> Can you share detail about setup ? How many drives do you have and how
>>>> is connection (enclosure -> drives. ??) ?
>>>> To me it looks like current mpt3sas driver might be taking more hit in
>>>> spinlock operation (penalty on NUMA arch is more compare to single
>>>> core
>>>> server) unlike we have in megaraid_sas driver use of shared blk tag.
>>>>
>>> The tests were done with a single LSI SAS3008 connected to a NetApp E-
>>> series (2660), using 4 LUNs under MD-RAID0.
>>>
>>> Megaraid_sas is even worse here; due to the odd nature of the 'fusion'
>>> implementation we're ending up having _two_ sets of tags, making it really
>>> hard to use scsi-mq here.
>>
>>
>> Current megaraid_sas as single submission queue exposed to the blk-mq will
>> not encounter similar performance issue.
>> We may not see significant improvement of performance if we attempt the same
>> for megaraid_sas driver.
>> We had similar discussion for megaraid_sas and hpsa.
>> http://www.spinics.net/lists/linux-scsi/msg101838.html
>>
>> I am seeing this patch series is similar attempt for mpt3sas..Am I missing
>> anything ?
>>
> No, you don't. That is precisely the case.
>
> The different here is that mpt3sas is actually exposing hardware capabilities, whereas with megaraid_sas (and hpsa) we're limited by the hardware implementation to a single completion queue shared between HBA and OS.
> With mpt3sas we're having per-interrupt completion queues (well, for newer firmware :-) so we can take advantage of scsi-mq.
>
> (And if someone had done a _proper_ design of the megaraid_sas_fusion thing by exposing several submission and completion queues for megaraid_sas itself instead of bolting the existing megaraid_sas single queue approach ontop of the mpt3sas multiqueue design we could have done the same thing there ... sigh)
>
>> Megaraid_sas driver just do indexing from blk_tag and fire IO quick enough
>> unlike mpt3sas where we have  lock contention @driver level as bottleneck.
>>
>>> (Not that I didn't try; but lacking a proper backend it's really hard to
>>> evaluate
>>> the benefit of those ... spinning HDDs simply don't cut it here)
>>>
>>>> I mean " [PATCH 08/10] mpt3sas: lockless command submission for scsi-
>>>
>>> mq"
>>>>
>>>> patch is improving performance removing spinlock overhead and
>>>> attempting to get request using blk_tags.
>>>> Are you seeing performance improvement  if you hard code nr_hw_queues
>>>> = 1 in below code changes part of "[PATCH 10/10] mpt3sas: scsi-mq
>>>> interrupt steering"
>>>>
>>> No. The numbers posted above are generated with exactly that patch; the
>>> first line is running with nr_hw_queues=32 and the second line with
>>> nr_hw_queues=1.
>>
>>
>> Thanks Hannes. That clarifies.  Can you share <fio> script you have used ?
>>
>> If my  understanding correct, you will see theoretical maximum of 1.2GBp/s
>> if you restrict your work load to single numa node. This is just for
>> understanding if <mpt3sas> driver spinlocks are adding overhead. We have
>> seen such overhead on multi-socket server and it is reasonable to reduce
>> lock in mpt3sas driver, but only concern is exposing HBA for multiple
>> submission queue to blk-mq is really not required and trying to figure out
>> if we have any side effect of doing that.
>>
> Well, the HBA has per-MSIx completion queues, so I don't see any issues with exposing them.
> blk-mq is designed to handle per-CPU queues, so exposing all hardware queues will be beneficial especially in a low-latency context; and as the experiments show, even when connected to an external storage there is a benefit to be had.
>
> But exposing all queues might even reduce or even resolve your FW Fault status 0x2100 state; with that patch you now have each queue pulling request off the completion queue and updating the reply post host index in parallel, making the situation far more unlikely.
>
> Cheers,
>
> Hannes
> --
> Dr. Hannes Reinecke                   zSeries & Storage
> hare@suse.de                          +49 911 74053 688
> SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
> GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-09 13:03             ` Sreekanth Reddy
@ 2017-02-09 13:12               ` Hannes Reinecke
  2017-02-10  4:43                 ` Sreekanth Reddy
  0 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-09 13:12 UTC (permalink / raw)
  To: Sreekanth Reddy
  Cc: Kashyap Desai, Christoph Hellwig, Martin K. Petersen,
	James Bottomley, linux-scsi, Sathya Prakash Veerichetty,
	PDL-MPT-FUSIONLINUX

On 02/09/2017 02:03 PM, Sreekanth Reddy wrote:
> On Wed, Feb 1, 2017 at 1:13 PM, Hannes Reinecke <hare@suse.de> wrote:
>>
>> On 02/01/2017 08:07 AM, Kashyap Desai wrote:
>>>>
>>>> -----Original Message-----
>>>> From: Hannes Reinecke [mailto:hare@suse.de]
>>>> Sent: Wednesday, February 01, 2017 12:21 PM
>>>> To: Kashyap Desai; Christoph Hellwig
>>>> Cc: Martin K. Petersen; James Bottomley; linux-scsi@vger.kernel.org;
>>>> Sathya
>>>> Prakash Veerichetty; PDL-MPT-FUSIONLINUX; Sreekanth Reddy
>>>> Subject: Re: [PATCH 00/10] mpt3sas: full mq support
>>>>
>>>> On 01/31/2017 06:54 PM, Kashyap Desai wrote:
>>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Hannes Reinecke [mailto:hare@suse.de]
>>>>>> Sent: Tuesday, January 31, 2017 4:47 PM
>>>>>> To: Christoph Hellwig
>>>>>> Cc: Martin K. Petersen; James Bottomley; linux-scsi@vger.kernel.org;
>>>>>
>>>>> Sathya
>>>>>>
>>>>>> Prakash; Kashyap Desai; mpt-fusionlinux.pdl@broadcom.com
>>>>>> Subject: Re: [PATCH 00/10] mpt3sas: full mq support
>>>>>>
>>>>>> On 01/31/2017 11:02 AM, Christoph Hellwig wrote:
>>>>>>>
>>>>>>> On Tue, Jan 31, 2017 at 10:25:50AM +0100, Hannes Reinecke wrote:
>>>>>>>>
>>>>>>>> Hi all,
>>>>>>>>
>>>>>>>> this is a patchset to enable full multiqueue support for the
>>>>>>>> mpt3sas
>>>>>>
>>>>>> driver.
>>>>>>>>
>>>>>>>> While the HBA only has a single mailbox register for submitting
>>>>>>>> commands, it does have individual receive queues per MSI-X
>>>>>>>> interrupt and as such does benefit from converting it to full
>>>>>>>> multiqueue
>>>>>
>>>>> support.
>>>>>>>
>>>>>>>
>>>>>>> Explanation and numbers on why this would be beneficial, please.
>>>>>>> We should not need multiple submissions queues for a single register
>>>>>>> to benefit from multiple completion queues.
>>>>>>>
>>>>>> Well, the actual throughput very strongly depends on the blk-mq-sched
>>>>>> patches from Jens.
>>>>>> As this is barely finished I didn't post any numbers yet.
>>>>>>
>>>>>> However:
>>>>>> With multiqueue support:
>>>>>> 4k seq read : io=60573MB, bw=1009.2MB/s, iops=258353, runt=
>>>>
>>>> 60021msec
>>>>>>
>>>>>> With scsi-mq on 1 queue:
>>>>>> 4k seq read : io=17369MB, bw=296291KB/s, iops=74072, runt= 60028msec
>>>>>> So yes, there _is_ a benefit.
> 
> 
> Hannes,
> 
> I have created a md raid0 with 4 SAS SSD drives using below command,
> #mdadm --create /dev/md0 --level=0 --raid-devices=4 /dev/sdg /dev/sdh
> /dev/sdi /dev/sdj
> 
> And here is 'mdadm --detail /dev/md0' command output,
> --------------------------------------------------------------------------------------------------------------------------
> /dev/md0:
>         Version : 1.2
>   Creation Time : Thu Feb  9 14:38:47 2017
>      Raid Level : raid0
>      Array Size : 780918784 (744.74 GiB 799.66 GB)
>    Raid Devices : 4
>   Total Devices : 4
>     Persistence : Superblock is persistent
> 
>     Update Time : Thu Feb  9 14:38:47 2017
>           State : clean
>  Active Devices : 4
> Working Devices : 4
>  Failed Devices : 0
>   Spare Devices : 0
> 
>      Chunk Size : 512K
> 
>            Name : host_name
>            UUID : b63f9da7:b7de9a25:6a46ca00:42214e22
>          Events : 0
> 
>     Number   Major   Minor   RaidDevice State
>        0       8       96        0      active sync   /dev/sdg
>        1       8      112        1      active sync   /dev/sdh
>        2       8      144        2      active sync   /dev/sdj
>        3       8      128        3      active sync   /dev/sdi
> ------------------------------------------------------------------------------------------------------------------------------
> 
> Then I have used below fio profile to run 4K sequence read operations
> with nr_hw_queues=1 driver and with nr_hw_queues=24 driver (as my
> system has two numa node and each with 12 cpus).
> -----------------------------------------------------
> global]
> ioengine=libaio
> group_reporting
> direct=1
> rw=read
> bs=4k
> allow_mounted_write=0
> iodepth=128
> runtime=150s
> 
> [job1]
> filename=/dev/md0
> -----------------------------------------------------
> 
> Here are the fio results when nr_hw_queues=1 (i.e. single request
> queue) with various number of job counts
> 1JOB 4k read  : io=213268MB, bw=1421.8MB/s, iops=363975, runt=150001msec
> 2JOBs 4k read : io=309605MB, bw=2064.2MB/s, iops=528389, runt=150001msec
> 4JOBs 4k read : io=281001MB, bw=1873.4MB/s, iops=479569, runt=150002msec
> 8JOBs 4k read : io=236297MB, bw=1575.2MB/s, iops=403236, runt=150016msec
> 
> Here are the fio results when nr_hw_queues=24 (i.e. multiple request
> queue) with various number of job counts
> 1JOB 4k read   : io=95194MB, bw=649852KB/s, iops=162463, runt=150001msec
> 2JOBs 4k read : io=189343MB, bw=1262.3MB/s, iops=323142, runt=150001msec
> 4JOBs 4k read : io=314832MB, bw=2098.9MB/s, iops=537309, runt=150001msec
> 8JOBs 4k read : io=277015MB, bw=1846.8MB/s, iops=472769, runt=150001msec
> 
> Here we can see that on less number of jobs count, single request
> queue (nr_hw_queues=1) is giving more IOPs than multi request
> queues(nr_hw_queues=24).
> 
> Can you please share your fio profile, so that I can try same thing on
> my system.
> 
Have you tried with the latest git update from Jens for-4.11/block (or
for-4.11/next) branch?
I've found that using the mq-deadline scheduler has a noticeable
performance boost.

The fio job I'm using is essentially the same; you just should make sure
to specify a 'numjob=' statement in there.
Otherwise fio will just use a single CPU, which of course leads to
averse effects in the multiqueue case.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-09 13:12               ` Hannes Reinecke
@ 2017-02-10  4:43                 ` Sreekanth Reddy
  2017-02-10  6:59                   ` Hannes Reinecke
  0 siblings, 1 reply; 59+ messages in thread
From: Sreekanth Reddy @ 2017-02-10  4:43 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Kashyap Desai, Christoph Hellwig, Martin K. Petersen,
	James Bottomley, linux-scsi, Sathya Prakash Veerichetty,
	PDL-MPT-FUSIONLINUX

On Thu, Feb 9, 2017 at 6:42 PM, Hannes Reinecke <hare@suse.de> wrote:
> On 02/09/2017 02:03 PM, Sreekanth Reddy wrote:
>> On Wed, Feb 1, 2017 at 1:13 PM, Hannes Reinecke <hare@suse.de> wrote:
>>>
>>> On 02/01/2017 08:07 AM, Kashyap Desai wrote:
>>>>>
>>>>> -----Original Message-----
>>>>> From: Hannes Reinecke [mailto:hare@suse.de]
>>>>> Sent: Wednesday, February 01, 2017 12:21 PM
>>>>> To: Kashyap Desai; Christoph Hellwig
>>>>> Cc: Martin K. Petersen; James Bottomley; linux-scsi@vger.kernel.org;
>>>>> Sathya
>>>>> Prakash Veerichetty; PDL-MPT-FUSIONLINUX; Sreekanth Reddy
>>>>> Subject: Re: [PATCH 00/10] mpt3sas: full mq support
>>>>>
>>>>> On 01/31/2017 06:54 PM, Kashyap Desai wrote:
>>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: Hannes Reinecke [mailto:hare@suse.de]
>>>>>>> Sent: Tuesday, January 31, 2017 4:47 PM
>>>>>>> To: Christoph Hellwig
>>>>>>> Cc: Martin K. Petersen; James Bottomley; linux-scsi@vger.kernel.org;
>>>>>>
>>>>>> Sathya
>>>>>>>
>>>>>>> Prakash; Kashyap Desai; mpt-fusionlinux.pdl@broadcom.com
>>>>>>> Subject: Re: [PATCH 00/10] mpt3sas: full mq support
>>>>>>>
>>>>>>> On 01/31/2017 11:02 AM, Christoph Hellwig wrote:
>>>>>>>>
>>>>>>>> On Tue, Jan 31, 2017 at 10:25:50AM +0100, Hannes Reinecke wrote:
>>>>>>>>>
>>>>>>>>> Hi all,
>>>>>>>>>
>>>>>>>>> this is a patchset to enable full multiqueue support for the
>>>>>>>>> mpt3sas
>>>>>>>
>>>>>>> driver.
>>>>>>>>>
>>>>>>>>> While the HBA only has a single mailbox register for submitting
>>>>>>>>> commands, it does have individual receive queues per MSI-X
>>>>>>>>> interrupt and as such does benefit from converting it to full
>>>>>>>>> multiqueue
>>>>>>
>>>>>> support.
>>>>>>>>
>>>>>>>>
>>>>>>>> Explanation and numbers on why this would be beneficial, please.
>>>>>>>> We should not need multiple submissions queues for a single register
>>>>>>>> to benefit from multiple completion queues.
>>>>>>>>
>>>>>>> Well, the actual throughput very strongly depends on the blk-mq-sched
>>>>>>> patches from Jens.
>>>>>>> As this is barely finished I didn't post any numbers yet.
>>>>>>>
>>>>>>> However:
>>>>>>> With multiqueue support:
>>>>>>> 4k seq read : io=60573MB, bw=1009.2MB/s, iops=258353, runt=
>>>>>
>>>>> 60021msec
>>>>>>>
>>>>>>> With scsi-mq on 1 queue:
>>>>>>> 4k seq read : io=17369MB, bw=296291KB/s, iops=74072, runt= 60028msec
>>>>>>> So yes, there _is_ a benefit.
>>
>>
>> Hannes,
>>
>> I have created a md raid0 with 4 SAS SSD drives using below command,
>> #mdadm --create /dev/md0 --level=0 --raid-devices=4 /dev/sdg /dev/sdh
>> /dev/sdi /dev/sdj
>>
>> And here is 'mdadm --detail /dev/md0' command output,
>> --------------------------------------------------------------------------------------------------------------------------
>> /dev/md0:
>>         Version : 1.2
>>   Creation Time : Thu Feb  9 14:38:47 2017
>>      Raid Level : raid0
>>      Array Size : 780918784 (744.74 GiB 799.66 GB)
>>    Raid Devices : 4
>>   Total Devices : 4
>>     Persistence : Superblock is persistent
>>
>>     Update Time : Thu Feb  9 14:38:47 2017
>>           State : clean
>>  Active Devices : 4
>> Working Devices : 4
>>  Failed Devices : 0
>>   Spare Devices : 0
>>
>>      Chunk Size : 512K
>>
>>            Name : host_name
>>            UUID : b63f9da7:b7de9a25:6a46ca00:42214e22
>>          Events : 0
>>
>>     Number   Major   Minor   RaidDevice State
>>        0       8       96        0      active sync   /dev/sdg
>>        1       8      112        1      active sync   /dev/sdh
>>        2       8      144        2      active sync   /dev/sdj
>>        3       8      128        3      active sync   /dev/sdi
>> ------------------------------------------------------------------------------------------------------------------------------
>>
>> Then I have used below fio profile to run 4K sequence read operations
>> with nr_hw_queues=1 driver and with nr_hw_queues=24 driver (as my
>> system has two numa node and each with 12 cpus).
>> -----------------------------------------------------
>> global]
>> ioengine=libaio
>> group_reporting
>> direct=1
>> rw=read
>> bs=4k
>> allow_mounted_write=0
>> iodepth=128
>> runtime=150s
>>
>> [job1]
>> filename=/dev/md0
>> -----------------------------------------------------
>>
>> Here are the fio results when nr_hw_queues=1 (i.e. single request
>> queue) with various number of job counts
>> 1JOB 4k read  : io=213268MB, bw=1421.8MB/s, iops=363975, runt=150001msec
>> 2JOBs 4k read : io=309605MB, bw=2064.2MB/s, iops=528389, runt=150001msec
>> 4JOBs 4k read : io=281001MB, bw=1873.4MB/s, iops=479569, runt=150002msec
>> 8JOBs 4k read : io=236297MB, bw=1575.2MB/s, iops=403236, runt=150016msec
>>
>> Here are the fio results when nr_hw_queues=24 (i.e. multiple request
>> queue) with various number of job counts
>> 1JOB 4k read   : io=95194MB, bw=649852KB/s, iops=162463, runt=150001msec
>> 2JOBs 4k read : io=189343MB, bw=1262.3MB/s, iops=323142, runt=150001msec
>> 4JOBs 4k read : io=314832MB, bw=2098.9MB/s, iops=537309, runt=150001msec
>> 8JOBs 4k read : io=277015MB, bw=1846.8MB/s, iops=472769, runt=150001msec
>>
>> Here we can see that on less number of jobs count, single request
>> queue (nr_hw_queues=1) is giving more IOPs than multi request
>> queues(nr_hw_queues=24).
>>
>> Can you please share your fio profile, so that I can try same thing on
>> my system.
>>
> Have you tried with the latest git update from Jens for-4.11/block (or
> for-4.11/next) branch?

I am using below git repo,

https://git.kernel.org/cgit/linux/kernel/git/mkp/scsi.git/log/?h=4.11/scsi-queue

Today I will try with Jens for-4.11/block.

> I've found that using the mq-deadline scheduler has a noticeable
> performance boost.
>
> The fio job I'm using is essentially the same; you just should make sure
> to specify a 'numjob=' statement in there.
> Otherwise fio will just use a single CPU, which of course leads to
> averse effects in the multiqueue case.

Yes I am providing 'numjob=' on fio command line as shown below,

# fio md_fio_profile --numjobs=8 --output=fio_results.txt

Thanks,
Sreekanth

>
> Cheers,
>
> Hannes
> --
> Dr. Hannes Reinecke                Teamlead Storage & Networking
> hare@suse.de                                   +49 911 74053 688
> SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
> GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
> HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-10  4:43                 ` Sreekanth Reddy
@ 2017-02-10  6:59                   ` Hannes Reinecke
  2017-02-13  6:15                     ` Sreekanth Reddy
  0 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-10  6:59 UTC (permalink / raw)
  To: Sreekanth Reddy
  Cc: Kashyap Desai, Christoph Hellwig, Martin K. Petersen,
	James Bottomley, linux-scsi, Sathya Prakash Veerichetty,
	PDL-MPT-FUSIONLINUX

On 02/10/2017 05:43 AM, Sreekanth Reddy wrote:
> On Thu, Feb 9, 2017 at 6:42 PM, Hannes Reinecke <hare@suse.de> wrote:
>> On 02/09/2017 02:03 PM, Sreekanth Reddy wrote:
[ .. ]
>>>
>>>
>>> Hannes,
>>>
>>> I have created a md raid0 with 4 SAS SSD drives using below command,
>>> #mdadm --create /dev/md0 --level=0 --raid-devices=4 /dev/sdg /dev/sdh
>>> /dev/sdi /dev/sdj
>>>
>>> And here is 'mdadm --detail /dev/md0' command output,
>>> --------------------------------------------------------------------------------------------------------------------------
>>> /dev/md0:
>>>         Version : 1.2
>>>   Creation Time : Thu Feb  9 14:38:47 2017
>>>      Raid Level : raid0
>>>      Array Size : 780918784 (744.74 GiB 799.66 GB)
>>>    Raid Devices : 4
>>>   Total Devices : 4
>>>     Persistence : Superblock is persistent
>>>
>>>     Update Time : Thu Feb  9 14:38:47 2017
>>>           State : clean
>>>  Active Devices : 4
>>> Working Devices : 4
>>>  Failed Devices : 0
>>>   Spare Devices : 0
>>>
>>>      Chunk Size : 512K
>>>
>>>            Name : host_name
>>>            UUID : b63f9da7:b7de9a25:6a46ca00:42214e22
>>>          Events : 0
>>>
>>>     Number   Major   Minor   RaidDevice State
>>>        0       8       96        0      active sync   /dev/sdg
>>>        1       8      112        1      active sync   /dev/sdh
>>>        2       8      144        2      active sync   /dev/sdj
>>>        3       8      128        3      active sync   /dev/sdi
>>> ------------------------------------------------------------------------------------------------------------------------------
>>>
>>> Then I have used below fio profile to run 4K sequence read operations
>>> with nr_hw_queues=1 driver and with nr_hw_queues=24 driver (as my
>>> system has two numa node and each with 12 cpus).
>>> -----------------------------------------------------
>>> global]
>>> ioengine=libaio
>>> group_reporting
>>> direct=1
>>> rw=read
>>> bs=4k
>>> allow_mounted_write=0
>>> iodepth=128
>>> runtime=150s
>>>
>>> [job1]
>>> filename=/dev/md0
>>> -----------------------------------------------------
>>>
>>> Here are the fio results when nr_hw_queues=1 (i.e. single request
>>> queue) with various number of job counts
>>> 1JOB 4k read  : io=213268MB, bw=1421.8MB/s, iops=363975, runt=150001msec
>>> 2JOBs 4k read : io=309605MB, bw=2064.2MB/s, iops=528389, runt=150001msec
>>> 4JOBs 4k read : io=281001MB, bw=1873.4MB/s, iops=479569, runt=150002msec
>>> 8JOBs 4k read : io=236297MB, bw=1575.2MB/s, iops=403236, runt=150016msec
>>>
>>> Here are the fio results when nr_hw_queues=24 (i.e. multiple request
>>> queue) with various number of job counts
>>> 1JOB 4k read   : io=95194MB, bw=649852KB/s, iops=162463, runt=150001msec
>>> 2JOBs 4k read : io=189343MB, bw=1262.3MB/s, iops=323142, runt=150001msec
>>> 4JOBs 4k read : io=314832MB, bw=2098.9MB/s, iops=537309, runt=150001msec
>>> 8JOBs 4k read : io=277015MB, bw=1846.8MB/s, iops=472769, runt=150001msec
>>>
>>> Here we can see that on less number of jobs count, single request
>>> queue (nr_hw_queues=1) is giving more IOPs than multi request
>>> queues(nr_hw_queues=24).
>>>
>>> Can you please share your fio profile, so that I can try same thing on
>>> my system.
>>>
>> Have you tried with the latest git update from Jens for-4.11/block (or
>> for-4.11/next) branch?
> 
> I am using below git repo,
> 
> https://git.kernel.org/cgit/linux/kernel/git/mkp/scsi.git/log/?h=4.11/scsi-queue
> 
> Today I will try with Jens for-4.11/block.
> 
By all means, do.

>> I've found that using the mq-deadline scheduler has a noticeable
>> performance boost.
>>
>> The fio job I'm using is essentially the same; you just should make sure
>> to specify a 'numjob=' statement in there.
>> Otherwise fio will just use a single CPU, which of course leads to
>> averse effects in the multiqueue case.
> 
> Yes I am providing 'numjob=' on fio command line as shown below,
> 
> # fio md_fio_profile --numjobs=8 --output=fio_results.txt
> 
Still, it looks as if you'd be using less jobs than you have CPUs.
Which means you'll be running into a tag starvation scenario on those
CPUs, especially for the small blocksizes.
What are the results if you set 'numjobs' to the number of CPUs?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-10  6:59                   ` Hannes Reinecke
@ 2017-02-13  6:15                     ` Sreekanth Reddy
  2017-02-13 13:11                       ` Hannes Reinecke
  0 siblings, 1 reply; 59+ messages in thread
From: Sreekanth Reddy @ 2017-02-13  6:15 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Kashyap Desai, Christoph Hellwig, Martin K. Petersen,
	James Bottomley, linux-scsi, Sathya Prakash Veerichetty,
	PDL-MPT-FUSIONLINUX

On Fri, Feb 10, 2017 at 12:29 PM, Hannes Reinecke <hare@suse.de> wrote:
> On 02/10/2017 05:43 AM, Sreekanth Reddy wrote:
>> On Thu, Feb 9, 2017 at 6:42 PM, Hannes Reinecke <hare@suse.de> wrote:
>>> On 02/09/2017 02:03 PM, Sreekanth Reddy wrote:
> [ .. ]
>>>>
>>>>
>>>> Hannes,
>>>>
>>>> I have created a md raid0 with 4 SAS SSD drives using below command,
>>>> #mdadm --create /dev/md0 --level=0 --raid-devices=4 /dev/sdg /dev/sdh
>>>> /dev/sdi /dev/sdj
>>>>
>>>> And here is 'mdadm --detail /dev/md0' command output,
>>>> --------------------------------------------------------------------------------------------------------------------------
>>>> /dev/md0:
>>>>         Version : 1.2
>>>>   Creation Time : Thu Feb  9 14:38:47 2017
>>>>      Raid Level : raid0
>>>>      Array Size : 780918784 (744.74 GiB 799.66 GB)
>>>>    Raid Devices : 4
>>>>   Total Devices : 4
>>>>     Persistence : Superblock is persistent
>>>>
>>>>     Update Time : Thu Feb  9 14:38:47 2017
>>>>           State : clean
>>>>  Active Devices : 4
>>>> Working Devices : 4
>>>>  Failed Devices : 0
>>>>   Spare Devices : 0
>>>>
>>>>      Chunk Size : 512K
>>>>
>>>>            Name : host_name
>>>>            UUID : b63f9da7:b7de9a25:6a46ca00:42214e22
>>>>          Events : 0
>>>>
>>>>     Number   Major   Minor   RaidDevice State
>>>>        0       8       96        0      active sync   /dev/sdg
>>>>        1       8      112        1      active sync   /dev/sdh
>>>>        2       8      144        2      active sync   /dev/sdj
>>>>        3       8      128        3      active sync   /dev/sdi
>>>> ------------------------------------------------------------------------------------------------------------------------------
>>>>
>>>> Then I have used below fio profile to run 4K sequence read operations
>>>> with nr_hw_queues=1 driver and with nr_hw_queues=24 driver (as my
>>>> system has two numa node and each with 12 cpus).
>>>> -----------------------------------------------------
>>>> global]
>>>> ioengine=libaio
>>>> group_reporting
>>>> direct=1
>>>> rw=read
>>>> bs=4k
>>>> allow_mounted_write=0
>>>> iodepth=128
>>>> runtime=150s
>>>>
>>>> [job1]
>>>> filename=/dev/md0
>>>> -----------------------------------------------------
>>>>
>>>> Here are the fio results when nr_hw_queues=1 (i.e. single request
>>>> queue) with various number of job counts
>>>> 1JOB 4k read  : io=213268MB, bw=1421.8MB/s, iops=363975, runt=150001msec
>>>> 2JOBs 4k read : io=309605MB, bw=2064.2MB/s, iops=528389, runt=150001msec
>>>> 4JOBs 4k read : io=281001MB, bw=1873.4MB/s, iops=479569, runt=150002msec
>>>> 8JOBs 4k read : io=236297MB, bw=1575.2MB/s, iops=403236, runt=150016msec
>>>>
>>>> Here are the fio results when nr_hw_queues=24 (i.e. multiple request
>>>> queue) with various number of job counts
>>>> 1JOB 4k read   : io=95194MB, bw=649852KB/s, iops=162463, runt=150001msec
>>>> 2JOBs 4k read : io=189343MB, bw=1262.3MB/s, iops=323142, runt=150001msec
>>>> 4JOBs 4k read : io=314832MB, bw=2098.9MB/s, iops=537309, runt=150001msec
>>>> 8JOBs 4k read : io=277015MB, bw=1846.8MB/s, iops=472769, runt=150001msec
>>>>
>>>> Here we can see that on less number of jobs count, single request
>>>> queue (nr_hw_queues=1) is giving more IOPs than multi request
>>>> queues(nr_hw_queues=24).
>>>>
>>>> Can you please share your fio profile, so that I can try same thing on
>>>> my system.
>>>>
>>> Have you tried with the latest git update from Jens for-4.11/block (or
>>> for-4.11/next) branch?
>>
>> I am using below git repo,
>>
>> https://git.kernel.org/cgit/linux/kernel/git/mkp/scsi.git/log/?h=4.11/scsi-queue
>>
>> Today I will try with Jens for-4.11/block.
>>
> By all means, do.
>
>>> I've found that using the mq-deadline scheduler has a noticeable
>>> performance boost.
>>>
>>> The fio job I'm using is essentially the same; you just should make sure
>>> to specify a 'numjob=' statement in there.
>>> Otherwise fio will just use a single CPU, which of course leads to
>>> averse effects in the multiqueue case.
>>
>> Yes I am providing 'numjob=' on fio command line as shown below,
>>
>> # fio md_fio_profile --numjobs=8 --output=fio_results.txt
>>
> Still, it looks as if you'd be using less jobs than you have CPUs.
> Which means you'll be running into a tag starvation scenario on those
> CPUs, especially for the small blocksizes.
> What are the results if you set 'numjobs' to the number of CPUs?
>

Hannes,

Tried on Jens for-4.11/block kernel repo and also set each block PD's
scheduler as 'mq-deadline', and here is my results for 4K SR on md0
(raid0 with 4 drives). I have 24 CPUs and so tried even with setting
numjobs=24.

fio results when nr_hw_queues=1 (i.e. single request queue) with
various number of job counts

4k read when numjobs=1 : io=215553MB, bw=1437.9MB/s, iops=367874,
runt=150001msec
4k read when numjobs=2 : io=307771MB, bw=2051.9MB/s, iops=525258,
runt=150001msec
4k read when numjobs=4 : io=300382MB, bw=2002.6MB/s, iops=512644,
runt=150002msec
4k read when numjobs=8 : io=320609MB, bw=2137.4MB/s, iops=547162,
runt=150003msec
4k read when numjobs=24: io=275701MB, bw=1837.1MB/s, iops=470510,
runt=150006msec

fio results when nr_hw_queues=24 (i.e. multiple request queue) with
various number of job counts,

4k read when numjobs=1 : io=177600MB, bw=1183.2MB/s, iops=303102,
runt=150001msec
4k read when numjobs=2 : io=182416MB, bw=1216.1MB/s, iops=311320,
runt=150001msec
4k read when numjobs=4 : io=347553MB, bw=2316.2MB/s, iops=593149,
runt=150002msec
4k read when numjobs=8 : io=349995MB, bw=2333.3MB/s, iops=597312,
runt=150003msec
4k read when numjobs=24: io=350618MB, bw=2337.4MB/s, iops=598359,
runt=150007msec

On less number of jobs single queue performing better. Where as on
more number of jobs multi-queue is performing better.

Thanks,
Sreekanth


> Cheers,
>
> Hannes
> --
> Dr. Hannes Reinecke                Teamlead Storage & Networking
> hare@suse.de                                   +49 911 74053 688
> SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
> GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
> HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-13  6:15                     ` Sreekanth Reddy
@ 2017-02-13 13:11                       ` Hannes Reinecke
  2017-02-15  8:27                         ` Sreekanth Reddy
  0 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-13 13:11 UTC (permalink / raw)
  To: Sreekanth Reddy
  Cc: Kashyap Desai, Christoph Hellwig, Martin K. Petersen,
	James Bottomley, linux-scsi, Sathya Prakash Veerichetty,
	PDL-MPT-FUSIONLINUX

On 02/13/2017 07:15 AM, Sreekanth Reddy wrote:
> On Fri, Feb 10, 2017 at 12:29 PM, Hannes Reinecke <hare@suse.de> wrote:
>> On 02/10/2017 05:43 AM, Sreekanth Reddy wrote:
>>> On Thu, Feb 9, 2017 at 6:42 PM, Hannes Reinecke <hare@suse.de> wrote:
>>>> On 02/09/2017 02:03 PM, Sreekanth Reddy wrote:
>> [ .. ]
>>>>>
>>>>>
>>>>> Hannes,
>>>>>
>>>>> I have created a md raid0 with 4 SAS SSD drives using below command,
>>>>> #mdadm --create /dev/md0 --level=0 --raid-devices=4 /dev/sdg /dev/sdh
>>>>> /dev/sdi /dev/sdj
>>>>>
>>>>> And here is 'mdadm --detail /dev/md0' command output,
>>>>> --------------------------------------------------------------------------------------------------------------------------
>>>>> /dev/md0:
>>>>>         Version : 1.2
>>>>>   Creation Time : Thu Feb  9 14:38:47 2017
>>>>>      Raid Level : raid0
>>>>>      Array Size : 780918784 (744.74 GiB 799.66 GB)
>>>>>    Raid Devices : 4
>>>>>   Total Devices : 4
>>>>>     Persistence : Superblock is persistent
>>>>>
>>>>>     Update Time : Thu Feb  9 14:38:47 2017
>>>>>           State : clean
>>>>>  Active Devices : 4
>>>>> Working Devices : 4
>>>>>  Failed Devices : 0
>>>>>   Spare Devices : 0
>>>>>
>>>>>      Chunk Size : 512K
>>>>>
>>>>>            Name : host_name
>>>>>            UUID : b63f9da7:b7de9a25:6a46ca00:42214e22
>>>>>          Events : 0
>>>>>
>>>>>     Number   Major   Minor   RaidDevice State
>>>>>        0       8       96        0      active sync   /dev/sdg
>>>>>        1       8      112        1      active sync   /dev/sdh
>>>>>        2       8      144        2      active sync   /dev/sdj
>>>>>        3       8      128        3      active sync   /dev/sdi
>>>>> ------------------------------------------------------------------------------------------------------------------------------
>>>>>
>>>>> Then I have used below fio profile to run 4K sequence read operations
>>>>> with nr_hw_queues=1 driver and with nr_hw_queues=24 driver (as my
>>>>> system has two numa node and each with 12 cpus).
>>>>> -----------------------------------------------------
>>>>> global]
>>>>> ioengine=libaio
>>>>> group_reporting
>>>>> direct=1
>>>>> rw=read
>>>>> bs=4k
>>>>> allow_mounted_write=0
>>>>> iodepth=128
>>>>> runtime=150s
>>>>>
>>>>> [job1]
>>>>> filename=/dev/md0
>>>>> -----------------------------------------------------
>>>>>
>>>>> Here are the fio results when nr_hw_queues=1 (i.e. single request
>>>>> queue) with various number of job counts
>>>>> 1JOB 4k read  : io=213268MB, bw=1421.8MB/s, iops=363975, runt=150001msec
>>>>> 2JOBs 4k read : io=309605MB, bw=2064.2MB/s, iops=528389, runt=150001msec
>>>>> 4JOBs 4k read : io=281001MB, bw=1873.4MB/s, iops=479569, runt=150002msec
>>>>> 8JOBs 4k read : io=236297MB, bw=1575.2MB/s, iops=403236, runt=150016msec
>>>>>
>>>>> Here are the fio results when nr_hw_queues=24 (i.e. multiple request
>>>>> queue) with various number of job counts
>>>>> 1JOB 4k read   : io=95194MB, bw=649852KB/s, iops=162463, runt=150001msec
>>>>> 2JOBs 4k read : io=189343MB, bw=1262.3MB/s, iops=323142, runt=150001msec
>>>>> 4JOBs 4k read : io=314832MB, bw=2098.9MB/s, iops=537309, runt=150001msec
>>>>> 8JOBs 4k read : io=277015MB, bw=1846.8MB/s, iops=472769, runt=150001msec
>>>>>
>>>>> Here we can see that on less number of jobs count, single request
>>>>> queue (nr_hw_queues=1) is giving more IOPs than multi request
>>>>> queues(nr_hw_queues=24).
>>>>>
>>>>> Can you please share your fio profile, so that I can try same thing on
>>>>> my system.
>>>>>
>>>> Have you tried with the latest git update from Jens for-4.11/block (or
>>>> for-4.11/next) branch?
>>>
>>> I am using below git repo,
>>>
>>> https://git.kernel.org/cgit/linux/kernel/git/mkp/scsi.git/log/?h=4.11/scsi-queue
>>>
>>> Today I will try with Jens for-4.11/block.
>>>
>> By all means, do.
>>
>>>> I've found that using the mq-deadline scheduler has a noticeable
>>>> performance boost.
>>>>
>>>> The fio job I'm using is essentially the same; you just should make sure
>>>> to specify a 'numjob=' statement in there.
>>>> Otherwise fio will just use a single CPU, which of course leads to
>>>> averse effects in the multiqueue case.
>>>
>>> Yes I am providing 'numjob=' on fio command line as shown below,
>>>
>>> # fio md_fio_profile --numjobs=8 --output=fio_results.txt
>>>
>> Still, it looks as if you'd be using less jobs than you have CPUs.
>> Which means you'll be running into a tag starvation scenario on those
>> CPUs, especially for the small blocksizes.
>> What are the results if you set 'numjobs' to the number of CPUs?
>>
> 
> Hannes,
> 
> Tried on Jens for-4.11/block kernel repo and also set each block PD's
> scheduler as 'mq-deadline', and here is my results for 4K SR on md0
> (raid0 with 4 drives). I have 24 CPUs and so tried even with setting
> numjobs=24.
> 
> fio results when nr_hw_queues=1 (i.e. single request queue) with
> various number of job counts
> 
> 4k read when numjobs=1 : io=215553MB, bw=1437.9MB/s, iops=367874,
> runt=150001msec
> 4k read when numjobs=2 : io=307771MB, bw=2051.9MB/s, iops=525258,
> runt=150001msec
> 4k read when numjobs=4 : io=300382MB, bw=2002.6MB/s, iops=512644,
> runt=150002msec
> 4k read when numjobs=8 : io=320609MB, bw=2137.4MB/s, iops=547162,
> runt=150003msec
> 4k read when numjobs=24: io=275701MB, bw=1837.1MB/s, iops=470510,
> runt=150006msec
> 
> fio results when nr_hw_queues=24 (i.e. multiple request queue) with
> various number of job counts,
> 
> 4k read when numjobs=1 : io=177600MB, bw=1183.2MB/s, iops=303102,
> runt=150001msec
> 4k read when numjobs=2 : io=182416MB, bw=1216.1MB/s, iops=311320,
> runt=150001msec
> 4k read when numjobs=4 : io=347553MB, bw=2316.2MB/s, iops=593149,
> runt=150002msec
> 4k read when numjobs=8 : io=349995MB, bw=2333.3MB/s, iops=597312,
> runt=150003msec
> 4k read when numjobs=24: io=350618MB, bw=2337.4MB/s, iops=598359,
> runt=150007msec
> 
> On less number of jobs single queue performing better. Where as on
> more number of jobs multi-queue is performing better.
> 
Thank you for these numbers. They do very much fit with my results.

So it's as I suspected; with more parallelism we do gain from
multiqueue. And with single-issue processes we do suffer a performance
penalty.

However, I strongly suspect that this is an issue with block-mq itself,
and not so much with mpt3sas.
Reason is that block-mq needs split the tag space into distinct ranges
for each queue, and hence is hitting tag starvation far earlier the more
queues are registered.
block-mq _can_ work around this by moving the issuing process onto
another CPU (and thus use the tagspace from there), but this involved
calling 'schedule' in the hot path. And might well account for the
performance drop here.

I will be doing more tests with a high nr_hw_queue count and a low I/O
issuer count; I really do guess that it's the block-layer which is
performing suboptimal here.
In any case, we will be discussing blk-mq performance at LSF/MM this
year; I will be bringing up the poor single-queue performance there.

At the end of the day, I strongly suspect that every self-respecting
process doing heavy I/O already _is_ multithreaded, so I would not
trying to optimize for the single-queue case.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10]  mpt3sas: full mq support
  2017-02-07 13:19 ` Christoph Hellwig
  2017-02-07 14:38   ` Hannes Reinecke
@ 2017-02-15  8:15   ` Christoph Hellwig
  2017-02-15  8:19     ` Hannes Reinecke
  1 sibling, 1 reply; 59+ messages in thread
From: Christoph Hellwig @ 2017-02-15  8:15 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, mpt-fusionlinux.pdl

On Tue, Feb 07, 2017 at 02:19:09PM +0100, Christoph Hellwig wrote:
> Patch 1-7 look fine to me with minor fixups, and I'd love to see
> them go into 4.11.

Any chance to see a resend of these?

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-15  8:15   ` Christoph Hellwig
@ 2017-02-15  8:19     ` Hannes Reinecke
  0 siblings, 0 replies; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-15  8:19 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Martin K. Petersen, James Bottomley, linux-scsi, Sathya Prakash,
	Kashyap Desai, mpt-fusionlinux.pdl

On 02/15/2017 09:15 AM, Christoph Hellwig wrote:
> On Tue, Feb 07, 2017 at 02:19:09PM +0100, Christoph Hellwig wrote:
>> Patch 1-7 look fine to me with minor fixups, and I'd love to see
>> them go into 4.11.
> 
> Any chance to see a resend of these?
> 
Sure.

Will do shortly.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-13 13:11                       ` Hannes Reinecke
@ 2017-02-15  8:27                         ` Sreekanth Reddy
  2017-02-15  9:18                           ` Kashyap Desai
  0 siblings, 1 reply; 59+ messages in thread
From: Sreekanth Reddy @ 2017-02-15  8:27 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Kashyap Desai, Christoph Hellwig, Martin K. Petersen,
	James Bottomley, linux-scsi, Sathya Prakash Veerichetty,
	PDL-MPT-FUSIONLINUX

On Mon, Feb 13, 2017 at 6:41 PM, Hannes Reinecke <hare@suse.de> wrote:
> On 02/13/2017 07:15 AM, Sreekanth Reddy wrote:
>> On Fri, Feb 10, 2017 at 12:29 PM, Hannes Reinecke <hare@suse.de> wrote:
>>> On 02/10/2017 05:43 AM, Sreekanth Reddy wrote:
>>>> On Thu, Feb 9, 2017 at 6:42 PM, Hannes Reinecke <hare@suse.de> wrote:
>>>>> On 02/09/2017 02:03 PM, Sreekanth Reddy wrote:
>>> [ .. ]
>>>>>>
>>>>>>
>>>>>> Hannes,
>>>>>>
>>>>>> I have created a md raid0 with 4 SAS SSD drives using below command,
>>>>>> #mdadm --create /dev/md0 --level=0 --raid-devices=4 /dev/sdg /dev/sdh
>>>>>> /dev/sdi /dev/sdj
>>>>>>
>>>>>> And here is 'mdadm --detail /dev/md0' command output,
>>>>>> --------------------------------------------------------------------------------------------------------------------------
>>>>>> /dev/md0:
>>>>>>         Version : 1.2
>>>>>>   Creation Time : Thu Feb  9 14:38:47 2017
>>>>>>      Raid Level : raid0
>>>>>>      Array Size : 780918784 (744.74 GiB 799.66 GB)
>>>>>>    Raid Devices : 4
>>>>>>   Total Devices : 4
>>>>>>     Persistence : Superblock is persistent
>>>>>>
>>>>>>     Update Time : Thu Feb  9 14:38:47 2017
>>>>>>           State : clean
>>>>>>  Active Devices : 4
>>>>>> Working Devices : 4
>>>>>>  Failed Devices : 0
>>>>>>   Spare Devices : 0
>>>>>>
>>>>>>      Chunk Size : 512K
>>>>>>
>>>>>>            Name : host_name
>>>>>>            UUID : b63f9da7:b7de9a25:6a46ca00:42214e22
>>>>>>          Events : 0
>>>>>>
>>>>>>     Number   Major   Minor   RaidDevice State
>>>>>>        0       8       96        0      active sync   /dev/sdg
>>>>>>        1       8      112        1      active sync   /dev/sdh
>>>>>>        2       8      144        2      active sync   /dev/sdj
>>>>>>        3       8      128        3      active sync   /dev/sdi
>>>>>> ------------------------------------------------------------------------------------------------------------------------------
>>>>>>
>>>>>> Then I have used below fio profile to run 4K sequence read operations
>>>>>> with nr_hw_queues=1 driver and with nr_hw_queues=24 driver (as my
>>>>>> system has two numa node and each with 12 cpus).
>>>>>> -----------------------------------------------------
>>>>>> global]
>>>>>> ioengine=libaio
>>>>>> group_reporting
>>>>>> direct=1
>>>>>> rw=read
>>>>>> bs=4k
>>>>>> allow_mounted_write=0
>>>>>> iodepth=128
>>>>>> runtime=150s
>>>>>>
>>>>>> [job1]
>>>>>> filename=/dev/md0
>>>>>> -----------------------------------------------------
>>>>>>
>>>>>> Here are the fio results when nr_hw_queues=1 (i.e. single request
>>>>>> queue) with various number of job counts
>>>>>> 1JOB 4k read  : io=213268MB, bw=1421.8MB/s, iops=363975, runt=150001msec
>>>>>> 2JOBs 4k read : io=309605MB, bw=2064.2MB/s, iops=528389, runt=150001msec
>>>>>> 4JOBs 4k read : io=281001MB, bw=1873.4MB/s, iops=479569, runt=150002msec
>>>>>> 8JOBs 4k read : io=236297MB, bw=1575.2MB/s, iops=403236, runt=150016msec
>>>>>>
>>>>>> Here are the fio results when nr_hw_queues=24 (i.e. multiple request
>>>>>> queue) with various number of job counts
>>>>>> 1JOB 4k read   : io=95194MB, bw=649852KB/s, iops=162463, runt=150001msec
>>>>>> 2JOBs 4k read : io=189343MB, bw=1262.3MB/s, iops=323142, runt=150001msec
>>>>>> 4JOBs 4k read : io=314832MB, bw=2098.9MB/s, iops=537309, runt=150001msec
>>>>>> 8JOBs 4k read : io=277015MB, bw=1846.8MB/s, iops=472769, runt=150001msec
>>>>>>
>>>>>> Here we can see that on less number of jobs count, single request
>>>>>> queue (nr_hw_queues=1) is giving more IOPs than multi request
>>>>>> queues(nr_hw_queues=24).
>>>>>>
>>>>>> Can you please share your fio profile, so that I can try same thing on
>>>>>> my system.
>>>>>>
>>>>> Have you tried with the latest git update from Jens for-4.11/block (or
>>>>> for-4.11/next) branch?
>>>>
>>>> I am using below git repo,
>>>>
>>>> https://git.kernel.org/cgit/linux/kernel/git/mkp/scsi.git/log/?h=4.11/scsi-queue
>>>>
>>>> Today I will try with Jens for-4.11/block.
>>>>
>>> By all means, do.
>>>
>>>>> I've found that using the mq-deadline scheduler has a noticeable
>>>>> performance boost.
>>>>>
>>>>> The fio job I'm using is essentially the same; you just should make sure
>>>>> to specify a 'numjob=' statement in there.
>>>>> Otherwise fio will just use a single CPU, which of course leads to
>>>>> averse effects in the multiqueue case.
>>>>
>>>> Yes I am providing 'numjob=' on fio command line as shown below,
>>>>
>>>> # fio md_fio_profile --numjobs=8 --output=fio_results.txt
>>>>
>>> Still, it looks as if you'd be using less jobs than you have CPUs.
>>> Which means you'll be running into a tag starvation scenario on those
>>> CPUs, especially for the small blocksizes.
>>> What are the results if you set 'numjobs' to the number of CPUs?
>>>
>>
>> Hannes,
>>
>> Tried on Jens for-4.11/block kernel repo and also set each block PD's
>> scheduler as 'mq-deadline', and here is my results for 4K SR on md0
>> (raid0 with 4 drives). I have 24 CPUs and so tried even with setting
>> numjobs=24.
>>
>> fio results when nr_hw_queues=1 (i.e. single request queue) with
>> various number of job counts
>>
>> 4k read when numjobs=1 : io=215553MB, bw=1437.9MB/s, iops=367874,
>> runt=150001msec
>> 4k read when numjobs=2 : io=307771MB, bw=2051.9MB/s, iops=525258,
>> runt=150001msec
>> 4k read when numjobs=4 : io=300382MB, bw=2002.6MB/s, iops=512644,
>> runt=150002msec
>> 4k read when numjobs=8 : io=320609MB, bw=2137.4MB/s, iops=547162,
>> runt=150003msec
>> 4k read when numjobs=24: io=275701MB, bw=1837.1MB/s, iops=470510,
>> runt=150006msec
>>
>> fio results when nr_hw_queues=24 (i.e. multiple request queue) with
>> various number of job counts,
>>
>> 4k read when numjobs=1 : io=177600MB, bw=1183.2MB/s, iops=303102,
>> runt=150001msec
>> 4k read when numjobs=2 : io=182416MB, bw=1216.1MB/s, iops=311320,
>> runt=150001msec
>> 4k read when numjobs=4 : io=347553MB, bw=2316.2MB/s, iops=593149,
>> runt=150002msec
>> 4k read when numjobs=8 : io=349995MB, bw=2333.3MB/s, iops=597312,
>> runt=150003msec
>> 4k read when numjobs=24: io=350618MB, bw=2337.4MB/s, iops=598359,
>> runt=150007msec
>>
>> On less number of jobs single queue performing better. Where as on
>> more number of jobs multi-queue is performing better.
>>
> Thank you for these numbers. They do very much fit with my results.
>
> So it's as I suspected; with more parallelism we do gain from
> multiqueue. And with single-issue processes we do suffer a performance
> penalty.
>
> However, I strongly suspect that this is an issue with block-mq itself,
> and not so much with mpt3sas.
> Reason is that block-mq needs split the tag space into distinct ranges
> for each queue, and hence is hitting tag starvation far earlier the more
> queues are registered.
> block-mq _can_ work around this by moving the issuing process onto
> another CPU (and thus use the tagspace from there), but this involved
> calling 'schedule' in the hot path. And might well account for the
> performance drop here.
>
> I will be doing more tests with a high nr_hw_queue count and a low I/O
> issuer count; I really do guess that it's the block-layer which is
> performing suboptimal here.
> In any case, we will be discussing blk-mq performance at LSF/MM this
> year; I will be bringing up the poor single-queue performance there.
>
> At the end of the day, I strongly suspect that every self-respecting
> process doing heavy I/O already _is_ multithreaded, so I would not
> trying to optimize for the single-queue case.
>
> Cheers,
>
> Hannes


Hannes,

Result I have posted last time is with merge operation enabled in
block layer. If I disable merge operation then I don't see much
improvement  with multiple hw request queues. Here is the result,

fio results when nr_hw_queues=1,
4k read when numjobs=24: io=248387MB, bw=1655.1MB/s, iops=423905,
runt=150003msec

fio results when nr_hw_queues=24,
4k read when numjobs=24: io=263904MB, bw=1759.4MB/s, iops=450393,
runt=150001msec

Thanks,
Sreekanth

^ permalink raw reply	[flat|nested] 59+ messages in thread

* RE: [PATCH 00/10] mpt3sas: full mq support
  2017-02-15  8:27                         ` Sreekanth Reddy
@ 2017-02-15  9:18                           ` Kashyap Desai
  2017-02-15 10:05                             ` Hannes Reinecke
  0 siblings, 1 reply; 59+ messages in thread
From: Kashyap Desai @ 2017-02-15  9:18 UTC (permalink / raw)
  To: Sreekanth Reddy, Hannes Reinecke
  Cc: Christoph Hellwig, Martin K. Petersen, James Bottomley,
	linux-scsi, Sathya Prakash Veerichetty, PDL-MPT-FUSIONLINUX

>
>
> Hannes,
>
> Result I have posted last time is with merge operation enabled in block
> layer. If I disable merge operation then I don't see much improvement
> with
> multiple hw request queues. Here is the result,
>
> fio results when nr_hw_queues=1,
> 4k read when numjobs=24: io=248387MB, bw=1655.1MB/s, iops=423905,
> runt=150003msec
>
> fio results when nr_hw_queues=24,
> 4k read when numjobs=24: io=263904MB, bw=1759.4MB/s, iops=450393,
> runt=150001msec

Hannes -

 I worked with Sreekanth and also understand pros/cons of Patch #10.
" [PATCH 10/10] mpt3sas: scsi-mq interrupt steering"

In above patch, can_queue of HBA is divided based on logic CPU, it means we
want to mimic as if mpt3sas HBA support multi queue distributing actual
resources which is single Submission H/W Queue. This approach badly impact
many performance areas.

nr_hw_queues = 1 is what I observe as best performance approach since it
never throttle IO if sdev->queue_depth is set to HBA queue depth.
In case of nr_hw_queues = "CPUs" throttle IO at SCSI level since we never
allow more than "updated can_queue" in LLD.

Below code bring actual HBA can_queue very low ( Ea on 96 logical core CPU
new can_queue goes to 42, if HBA queue depth is 4K). It means we will see
lots of IO throttling in scsi mid layer due to shost->can_queue reach the
limit very soon if you have <fio> jobs with higher QD.

	if (ioc->shost->nr_hw_queues > 1) {
		ioc->shost->nr_hw_queues = ioc->msix_vector_count;
		ioc->shost->can_queue /= ioc->msix_vector_count;
	}
I observe negative performance if I have 8 SSD drives attached to Ventura
(latest IT controller). 16 fio jobs at QD=128 gives ~1600K IOPs and the
moment I switch to nr_hw_queues = "CPUs", it gave hardly ~850K IOPs. This is
mainly because of host_busy stuck at very low ~169 on my setup.

May be as Sreekanth mentioned, performance improvement you have observed is
due to nomerges=2 is not set and OS will attempt soft back/front merge.

I debug live machine and understood we never see parallel instance of
"scsi_dispatch_cmd" as we expect due to can_queue is less. If we really has
*very* large HBA QD, this patch #10 to expose multiple SQ may be useful.

For now, we are looking for updated version of patch which will only keep IT
HBA in SQ mode (like we are doing in <megaraid_sas> driver) and add
interface to use blk_tag in both scsi.mq and !scsi.mq mode.  Sreekanth has
already started working on it, but we may need to check full performance
test run to post the actual patch.
May be we can cherry pick few patches from this series and get blk_tag
support to improve performance of <mpt3sas> later which will not allow use
to choose nr_hw_queue to be tunable.

Thanks, Kashyap


>
> Thanks,
> Sreekanth

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-15  9:18                           ` Kashyap Desai
@ 2017-02-15 10:05                             ` Hannes Reinecke
  2017-02-16  9:48                               ` Kashyap Desai
  0 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-15 10:05 UTC (permalink / raw)
  To: Kashyap Desai, Sreekanth Reddy
  Cc: Christoph Hellwig, Martin K. Petersen, James Bottomley,
	linux-scsi, Sathya Prakash Veerichetty, PDL-MPT-FUSIONLINUX

[-- Attachment #1: Type: text/plain, Size: 4722 bytes --]

On 02/15/2017 10:18 AM, Kashyap Desai wrote:
>>
>>
>> Hannes,
>>
>> Result I have posted last time is with merge operation enabled in block
>> layer. If I disable merge operation then I don't see much improvement
>> with
>> multiple hw request queues. Here is the result,
>>
>> fio results when nr_hw_queues=1,
>> 4k read when numjobs=24: io=248387MB, bw=1655.1MB/s, iops=423905,
>> runt=150003msec
>>
>> fio results when nr_hw_queues=24,
>> 4k read when numjobs=24: io=263904MB, bw=1759.4MB/s, iops=450393,
>> runt=150001msec
> 
> Hannes -
> 
>  I worked with Sreekanth and also understand pros/cons of Patch #10.
> " [PATCH 10/10] mpt3sas: scsi-mq interrupt steering"
> 
> In above patch, can_queue of HBA is divided based on logic CPU, it means we
> want to mimic as if mpt3sas HBA support multi queue distributing actual
> resources which is single Submission H/W Queue. This approach badly impact
> many performance areas.
> 
> nr_hw_queues = 1 is what I observe as best performance approach since it
> never throttle IO if sdev->queue_depth is set to HBA queue depth.
> In case of nr_hw_queues = "CPUs" throttle IO at SCSI level since we never
> allow more than "updated can_queue" in LLD.
> 
True.
And this was actually one of the things I wanted to demonstrate with
this patchset :-)
ATM blk-mq really works best when having a distinct tag space per
port/device. As soon as the hardware provides a _shared_ tag space you
end up with tag starvation issues as blk-mq only allows you to do a
static split of the available tagspace.
While this patchset demonstrates that the HBA itself _does_ benefit from
using block-mq (especially on highly parallel loads), it also
demonstrates that _block-mq_ has issues with singlethreaded loads on
this HBA (or, rather, type of HBA, as I doubt this issue is affecting
mpt3sas only).

> Below code bring actual HBA can_queue very low ( Ea on 96 logical core CPU
> new can_queue goes to 42, if HBA queue depth is 4K). It means we will see
> lots of IO throttling in scsi mid layer due to shost->can_queue reach the
> limit very soon if you have <fio> jobs with higher QD.
> 
> 	if (ioc->shost->nr_hw_queues > 1) {
> 		ioc->shost->nr_hw_queues = ioc->msix_vector_count;
> 		ioc->shost->can_queue /= ioc->msix_vector_count;
> 	}
> I observe negative performance if I have 8 SSD drives attached to Ventura
> (latest IT controller). 16 fio jobs at QD=128 gives ~1600K IOPs and the
> moment I switch to nr_hw_queues = "CPUs", it gave hardly ~850K IOPs. This is
> mainly because of host_busy stuck at very low ~169 on my setup.
> 
Which actually might be an issue with the way scsi is hooked into blk-mq.
The SCSI stack is using 'can_queue' as a check for 'host_busy', ie if
the host is capable of accepting more commands.
As we're limiting can_queue (to get the per-queue command depth
correctly) we should be using the _overall_ command depth for the
can_queue value itself to make the host_busy check work correctly.

I've attached a patch for that; can you test if it makes a difference?

> May be as Sreekanth mentioned, performance improvement you have observed is
> due to nomerges=2 is not set and OS will attempt soft back/front merge.
> 
> I debug live machine and understood we never see parallel instance of
> "scsi_dispatch_cmd" as we expect due to can_queue is less. If we really has
> *very* large HBA QD, this patch #10 to expose multiple SQ may be useful.
> 
As mentioned, the above patch might help here.
The patch actually _reduced_ throughput on my end, as the requests never
stayed long enough in the queue to be merged. Hence I've refrained from
posting it.
But as you're able to test with SSDs this patch really should make a
difference, and certainly should remove the arbitrary stalls due to
host_busy.

> For now, we are looking for updated version of patch which will only keep IT
> HBA in SQ mode (like we are doing in <megaraid_sas> driver) and add
> interface to use blk_tag in both scsi.mq and !scsi.mq mode.  Sreekanth has
> already started working on it, but we may need to check full performance
> test run to post the actual patch.
> May be we can cherry pick few patches from this series and get blk_tag
> support to improve performance of <mpt3sas> later which will not allow use
> to choose nr_hw_queue to be tunable.
> 
Sure, no problem with that.
I'll be preparing another submission round, and we can discuss how we go
from there.

Cheers,

Hannes
> Thanks, Kashyap
> 
> 
>>
>> Thanks,
>> Sreekanth


-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-mpt3sas-implement-shared_tags-SCSI-host-flag.patch --]
[-- Type: text/x-patch; name="0001-mpt3sas-implement-shared_tags-SCSI-host-flag.patch", Size: 3253 bytes --]

From df424c8618e0b06ded2d978818e6d3df4a54a61d Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 15 Feb 2017 10:58:01 +0100
Subject: [PATCH] mpt3sas: implement 'shared_tags' SCSI host flag

If the HBA implements a host-wide tagspace we should be signalling
this to the SCSI layer to avoid 'can_queue' being reduced, thereby
inducing I/O stalls.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.c  | 5 ++---
 drivers/scsi/mpt3sas/mpt3sas_scsih.c | 2 ++
 drivers/scsi/scsi_lib.c              | 2 ++
 include/scsi/scsi_host.h             | 5 +++++
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 9e31cae..520aee4 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -3544,10 +3544,9 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
 	 */
 	ioc->shost->reserved_cmds = INTERNAL_SCSIIO_CMDS_COUNT;
 	ioc->shost->can_queue = ioc->scsiio_depth - ioc->shost->reserved_cmds;
-	if (ioc->shost->nr_hw_queues > 1) {
+	if (ioc->shost->nr_hw_queues > 1)
 		ioc->shost->nr_hw_queues = ioc->msix_vector_count;
-		ioc->shost->can_queue /= ioc->msix_vector_count;
-	}
+
 	dinitprintk(ioc, pr_info(MPT3SAS_FMT
 		"scsi host: can_queue depth (%d), nr_hw_queues (%d)\n",
 		ioc->name, ioc->shost->can_queue, ioc->shost->nr_hw_queues));
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 14f7a9d..4088e1a 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -8585,6 +8585,7 @@ static int scsih_map_queues(struct Scsi_Host *shost)
 	.shost_attrs			= mpt3sas_host_attrs,
 	.sdev_attrs			= mpt3sas_dev_attrs,
 	.track_queue_depth		= 1,
+	.shared_tags			= 1,
 	.cmd_size			= sizeof(struct scsiio_tracker),
 };
 
@@ -8624,6 +8625,7 @@ static int scsih_map_queues(struct Scsi_Host *shost)
 	.shost_attrs			= mpt3sas_host_attrs,
 	.sdev_attrs			= mpt3sas_dev_attrs,
 	.track_queue_depth		= 1,
+	.shared_tags			= 1,
 	.cmd_size			= sizeof(struct scsiio_tracker),
 };
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7100aaa..6bb06ed 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2143,6 +2143,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
 	shost->tag_set.ops = &scsi_mq_ops;
 	shost->tag_set.nr_hw_queues = shost->nr_hw_queues ? : 1;
 	shost->tag_set.queue_depth = shost->can_queue;
+	if (shost->hostt->shared_tags)
+		shost->tag_set.queue_depth /= shost->nr_hw_queues;
 	shost->tag_set.reserved_tags = shost->reserved_cmds;
 	shost->tag_set.cmd_size = cmd_size;
 	shost->tag_set.numa_node = NUMA_NO_NODE;
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index cc83dd6..d344803 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -457,6 +457,11 @@ struct scsi_host_template {
 	unsigned no_async_abort:1;
 
 	/*
+	 * True if the host uses a shared tag space
+	 */
+	unsigned shared_tags:1;
+
+	/*
 	 * Countdown for host blocking with no commands outstanding.
 	 */
 	unsigned int max_host_blocked;
-- 
1.8.5.6


^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH 01/10] mpt3sas: switch to pci_alloc_irq_vectors
  2017-01-31  9:25 ` [PATCH 01/10] mpt3sas: switch to pci_alloc_irq_vectors Hannes Reinecke
  2017-02-07 13:15   ` Christoph Hellwig
@ 2017-02-16  9:32   ` Sreekanth Reddy
  2017-02-16 10:01     ` Hannes Reinecke
  1 sibling, 1 reply; 59+ messages in thread
From: Sreekanth Reddy @ 2017-02-16  9:32 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, PDL-MPT-FUSIONLINUX,
	Hannes Reinecke

On Tue, Jan 31, 2017 at 2:55 PM, Hannes Reinecke <hare@suse.de> wrote:
> Cleanup the MSI-X handling allowing us to use
> the PCI-layer provided vector allocation.
>
> Signed-off-by: Hannes Reinecke <hare@suse.com>
> ---
>  drivers/scsi/mpt3sas/mpt3sas_base.c | 100 +++++++++++++++++-------------------
>  drivers/scsi/mpt3sas/mpt3sas_base.h |   2 -
>  2 files changed, 46 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
> index f00ef88..0185a8d 100644
> --- a/drivers/scsi/mpt3sas/mpt3sas_base.c
> +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
> @@ -1129,7 +1129,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>                 /* TMs are on msix_index == 0 */
>                 if (reply_q->msix_index == 0)
>                         continue;
> -               synchronize_irq(reply_q->vector);
> +               synchronize_irq(pci_irq_vector(ioc->pdev, reply_q->msix_index));
>         }
>  }
>
> @@ -1818,11 +1818,8 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>
>         list_for_each_entry_safe(reply_q, next, &ioc->reply_queue_list, list) {
>                 list_del(&reply_q->list);
> -               if (smp_affinity_enable) {
> -                       irq_set_affinity_hint(reply_q->vector, NULL);
> -                       free_cpumask_var(reply_q->affinity_hint);
> -               }
> -               free_irq(reply_q->vector, reply_q);
> +               free_irq(pci_irq_vector(ioc->pdev, reply_q->msix_index),
> +                        reply_q);
>                 kfree(reply_q);
>         }
>  }
> @@ -1831,13 +1828,13 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>   * _base_request_irq - request irq
>   * @ioc: per adapter object
>   * @index: msix index into vector table
> - * @vector: irq vector
>   *
>   * Inserting respective reply_queue into the list.
>   */
>  static int
> -_base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index, u32 vector)
> +_base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index)
>  {
> +       struct pci_dev *pdev = ioc->pdev;
>         struct adapter_reply_queue *reply_q;
>         int r;
>
> @@ -1849,14 +1846,6 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>         }
>         reply_q->ioc = ioc;
>         reply_q->msix_index = index;
> -       reply_q->vector = vector;
> -
> -       if (smp_affinity_enable) {
> -               if (!zalloc_cpumask_var(&reply_q->affinity_hint, GFP_KERNEL)) {
> -                       kfree(reply_q);
> -                       return -ENOMEM;
> -               }
> -       }
>
>         atomic_set(&reply_q->busy, 0);
>         if (ioc->msix_enable)
> @@ -1865,12 +1854,11 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>         else
>                 snprintf(reply_q->name, MPT_NAME_LENGTH, "%s%d",
>                     ioc->driver_name, ioc->id);
> -       r = request_irq(vector, _base_interrupt, IRQF_SHARED, reply_q->name,
> -           reply_q);
> +       r = request_irq(pci_irq_vector(pdev, index), _base_interrupt,
> +                       IRQF_SHARED, reply_q->name, reply_q);
>         if (r) {
>                 pr_err(MPT3SAS_FMT "unable to allocate interrupt %d!\n",
> -                   reply_q->name, vector);
> -               free_cpumask_var(reply_q->affinity_hint);
> +                      reply_q->name, pci_irq_vector(pdev, index));
>                 kfree(reply_q);
>                 return -EBUSY;
>         }
> @@ -1906,6 +1894,21 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>         if (!nr_msix)
>                 return;
>
> +       if (smp_affinity_enable) {
> +               list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
> +                       const cpumask_t *mask = pci_irq_get_affinity(ioc->pdev,
> +                                                       reply_q->msix_index);
> +                       if (!mask) {
> +                               pr_warn(MPT3SAS_FMT "no affinity for msi %x\n",
> +                                       ioc->name, reply_q->msix_index);
> +                               continue;
> +                       }
> +
> +                       for_each_cpu(cpu, mask)
> +                               ioc->cpu_msix_table[cpu] = reply_q->msix_index;
> +               }
> +               return;
> +       }
>         cpu = cpumask_first(cpu_online_mask);
>
>         list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
> @@ -1919,18 +1922,9 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>                         group++;
>
>                 for (i = 0 ; i < group ; i++) {
> -                       ioc->cpu_msix_table[cpu] = index;
> -                       if (smp_affinity_enable)
> -                               cpumask_or(reply_q->affinity_hint,
> -                                  reply_q->affinity_hint, get_cpu_mask(cpu));
> +                       ioc->cpu_msix_table[cpu] = reply_q->msix_index;
>                         cpu = cpumask_next(cpu, cpu_online_mask);
>                 }
> -               if (smp_affinity_enable)
> -                       if (irq_set_affinity_hint(reply_q->vector,
> -                                          reply_q->affinity_hint))
> -                               dinitprintk(ioc, pr_info(MPT3SAS_FMT
> -                                "Err setting affinity hint to irq vector %d\n",
> -                                ioc->name, reply_q->vector));
>                 index++;
>         }
>  }
> @@ -1957,10 +1951,10 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>  static int
>  _base_enable_msix(struct MPT3SAS_ADAPTER *ioc)
>  {
> -       struct msix_entry *entries, *a;
>         int r;
>         int i, local_max_msix_vectors;
>         u8 try_msix = 0;
> +       unsigned int irq_flags = PCI_IRQ_MSIX;
>
>         if (msix_disable == -1 || msix_disable == 0)
>                 try_msix = 1;
> @@ -1972,7 +1966,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>                 goto try_ioapic;
>
>         ioc->reply_queue_count = min_t(int, ioc->cpu_count,
> -           ioc->msix_vector_count);
> +               ioc->msix_vector_count);
>
>         printk(MPT3SAS_FMT "MSI-X vectors supported: %d, no of cores"
>           ": %d, max_msix_vectors: %d\n", ioc->name, ioc->msix_vector_count,
> @@ -1993,46 +1987,42 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>         if (ioc->msix_vector_count < ioc->cpu_count)
>                 smp_affinity_enable = 0;
>
> -       entries = kcalloc(ioc->reply_queue_count, sizeof(struct msix_entry),
> -           GFP_KERNEL);
> -       if (!entries) {
> -               dfailprintk(ioc, pr_info(MPT3SAS_FMT
> -                       "kcalloc failed @ at %s:%d/%s() !!!\n",
> -                       ioc->name, __FILE__, __LINE__, __func__));
> -               goto try_ioapic;
> -       }
> +       if (smp_affinity_enable)
> +               irq_flags |= PCI_IRQ_AFFINITY;
>
> -       for (i = 0, a = entries; i < ioc->reply_queue_count; i++, a++)
> -               a->entry = i;
> -
> -       r = pci_enable_msix_exact(ioc->pdev, entries, ioc->reply_queue_count);
> -       if (r) {
> +       r = pci_alloc_irq_vectors(ioc->pdev, 1, ioc->reply_queue_count,
> +                                 irq_flags);
> +       if (r < 0) {
>                 dfailprintk(ioc, pr_info(MPT3SAS_FMT
> -                       "pci_enable_msix_exact failed (r=%d) !!!\n",
> +                       "pci_alloc_irq_vectors failed (r=%d) !!!\n",
>                         ioc->name, r));
> -               kfree(entries);
>                 goto try_ioapic;
>         }
>
>         ioc->msix_enable = 1;
> -       for (i = 0, a = entries; i < ioc->reply_queue_count; i++, a++) {
> -               r = _base_request_irq(ioc, i, a->vector);
> +       ioc->msix_vector_count = r;

Here we should also reinitialize ioc->reply_queue_count if
pci_alloc_irq_vectors returns with value less than requested MSIx
vector count (i.e. ioc->reply_queue_count).

Thanks,
Sreekanth

> +       for (i = 0; i < ioc->msix_vector_count; i++) {
> +               r = _base_request_irq(ioc, i);
>                 if (r) {
>                         _base_free_irq(ioc);
>                         _base_disable_msix(ioc);
> -                       kfree(entries);
>                         goto try_ioapic;
>                 }
>         }
>
> -       kfree(entries);
>         return 0;
>
>  /* failback to io_apic interrupt routing */
>   try_ioapic:
>
>         ioc->reply_queue_count = 1;
> -       r = _base_request_irq(ioc, 0, ioc->pdev->irq);
> +       r = pci_alloc_irq_vectors(ioc->pdev, 1, 1, PCI_IRQ_LEGACY);
> +       if (r < 0) {
> +               dfailprintk(ioc, pr_info(MPT3SAS_FMT
> +                       "pci_alloc_irq_vector(legacy) failed (r=%d) !!!\n",
> +                       ioc->name, r));
> +       } else
> +               r = _base_request_irq(ioc, 0);
>
>         return r;
>  }
> @@ -2203,7 +2193,8 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>         list_for_each_entry(reply_q, &ioc->reply_queue_list, list)
>                 pr_info(MPT3SAS_FMT "%s: IRQ %d\n",
>                     reply_q->name,  ((ioc->msix_enable) ? "PCI-MSI-X enabled" :
> -                   "IO-APIC enabled"), reply_q->vector);
> +                   "IO-APIC enabled"),
> +                   pci_irq_vector(ioc->pdev, reply_q->msix_index));
>
>         pr_info(MPT3SAS_FMT "iomem(0x%016llx), mapped(0x%p), size(%d)\n",
>             ioc->name, (unsigned long long)chip_phys, ioc->chip, memap_sz);
> @@ -5338,7 +5329,8 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>                     sizeof(resource_size_t *), GFP_KERNEL);
>                 if (!ioc->reply_post_host_index) {
>                         dfailprintk(ioc, pr_info(MPT3SAS_FMT "allocation "
> -                               "for cpu_msix_table failed!!!\n", ioc->name));
> +                               "for reply_post_host_index failed!!!\n",
> +                               ioc->name));
>                         r = -ENOMEM;
>                         goto out_free_resources;
>                 }
> diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
> index 394fe13..505574e 100644
> --- a/drivers/scsi/mpt3sas/mpt3sas_base.h
> +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
> @@ -719,12 +719,10 @@ struct _event_ack_list {
>  struct adapter_reply_queue {
>         struct MPT3SAS_ADAPTER  *ioc;
>         u8                      msix_index;
> -       unsigned int            vector;
>         u32                     reply_post_host_index;
>         Mpi2ReplyDescriptorsUnion_t *reply_post_free;
>         char                    name[MPT_NAME_LENGTH];
>         atomic_t                busy;
> -       cpumask_var_t           affinity_hint;
>         struct list_head        list;
>  };
>
> --
> 1.8.5.6
>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* RE: [PATCH 00/10] mpt3sas: full mq support
  2017-02-15 10:05                             ` Hannes Reinecke
@ 2017-02-16  9:48                               ` Kashyap Desai
  2017-02-16 10:18                                 ` Hannes Reinecke
  0 siblings, 1 reply; 59+ messages in thread
From: Kashyap Desai @ 2017-02-16  9:48 UTC (permalink / raw)
  To: Hannes Reinecke, Sreekanth Reddy
  Cc: Christoph Hellwig, Martin K. Petersen, James Bottomley,
	linux-scsi, Sathya Prakash Veerichetty, PDL-MPT-FUSIONLINUX

> -----Original Message-----
> From: Hannes Reinecke [mailto:hare@suse.de]
> Sent: Wednesday, February 15, 2017 3:35 PM
> To: Kashyap Desai; Sreekanth Reddy
> Cc: Christoph Hellwig; Martin K. Petersen; James Bottomley; linux-
> scsi@vger.kernel.org; Sathya Prakash Veerichetty; PDL-MPT-FUSIONLINUX
> Subject: Re: [PATCH 00/10] mpt3sas: full mq support
>
> On 02/15/2017 10:18 AM, Kashyap Desai wrote:
> >>
> >>
> >> Hannes,
> >>
> >> Result I have posted last time is with merge operation enabled in
> >> block layer. If I disable merge operation then I don't see much
> >> improvement with multiple hw request queues. Here is the result,
> >>
> >> fio results when nr_hw_queues=1,
> >> 4k read when numjobs=24: io=248387MB, bw=1655.1MB/s, iops=423905,
> >> runt=150003msec
> >>
> >> fio results when nr_hw_queues=24,
> >> 4k read when numjobs=24: io=263904MB, bw=1759.4MB/s, iops=450393,
> >> runt=150001msec
> >
> > Hannes -
> >
> >  I worked with Sreekanth and also understand pros/cons of Patch #10.
> > " [PATCH 10/10] mpt3sas: scsi-mq interrupt steering"
> >
> > In above patch, can_queue of HBA is divided based on logic CPU, it
> > means we want to mimic as if mpt3sas HBA support multi queue
> > distributing actual resources which is single Submission H/W Queue.
> > This approach badly impact many performance areas.
> >
> > nr_hw_queues = 1 is what I observe as best performance approach since
> > it never throttle IO if sdev->queue_depth is set to HBA queue depth.
> > In case of nr_hw_queues = "CPUs" throttle IO at SCSI level since we
> > never allow more than "updated can_queue" in LLD.
> >
> True.
> And this was actually one of the things I wanted to demonstrate with this
> patchset :-) ATM blk-mq really works best when having a distinct tag space
> per port/device. As soon as the hardware provides a _shared_ tag space you
> end up with tag starvation issues as blk-mq only allows you to do a static
> split of the available tagspace.
> While this patchset demonstrates that the HBA itself _does_ benefit from
> using block-mq (especially on highly parallel loads), it also demonstrates
> that
> _block-mq_ has issues with singlethreaded loads on this HBA (or, rather,
> type of HBA, as I doubt this issue is affecting mpt3sas only).
>
> > Below code bring actual HBA can_queue very low ( Ea on 96 logical core
> > CPU new can_queue goes to 42, if HBA queue depth is 4K). It means we
> > will see lots of IO throttling in scsi mid layer due to
> > shost->can_queue reach the limit very soon if you have <fio> jobs with
> higher QD.
> >
> > 	if (ioc->shost->nr_hw_queues > 1) {
> > 		ioc->shost->nr_hw_queues = ioc->msix_vector_count;
> > 		ioc->shost->can_queue /= ioc->msix_vector_count;
> > 	}
> > I observe negative performance if I have 8 SSD drives attached to
> > Ventura (latest IT controller). 16 fio jobs at QD=128 gives ~1600K
> > IOPs and the moment I switch to nr_hw_queues = "CPUs", it gave hardly
> > ~850K IOPs. This is mainly because of host_busy stuck at very low ~169
> > on
> my setup.
> >
> Which actually might be an issue with the way scsi is hooked into blk-mq.
> The SCSI stack is using 'can_queue' as a check for 'host_busy', ie if the
> host is
> capable of accepting more commands.
> As we're limiting can_queue (to get the per-queue command depth
> correctly) we should be using the _overall_ command depth for the
> can_queue value itself to make the host_busy check work correctly.
>
> I've attached a patch for that; can you test if it makes a difference?
Hannes -
Attached patch works fine for me. FYI -  We need to set device queue depth
to can_queue as we are currently not doing in mpt3sas driver.

With attached patch when I tried, I see ~2-3% improvement running multiple
jobs. Single job profile no difference.

So looks like we are good to reach performance with single nr_hw_queues.

We have some patches to be send so want to know how to rebase this patch
series as few patches coming from Broadcom. Can we consider below as plan ?

- Patches from 1-7 will be reposted. Also Sreekanth will complete review on
existing patch 1-7.
- We need blk_tag support only for nr_hw_queue = 1.

With that say, we will have many code changes/function without "
shost_use_blk_mq" check and assume it is single nr_hw_queue supported
<mpt3sas> driver.

Ea - Below function can be simplify - just refer tag from scmd->request and
don't need check of shost_use_blk_mq + nr_hw_queue etc..

u16
mpt3sas_base_get_smid_scsiio(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx,
        struct scsi_cmnd *scmd)
{
        struct scsiio_tracker *request;
        u16 smid = scmd->request->tag +  1;
         ...
         return request->smid;
}

- Later we can explore if nr_hw_queue more than one really add benefit.
>From current limited testing, I don't see major performance boost if we have
nr_hw_queue more than one.

>
> > May be as Sreekanth mentioned, performance improvement you have
> > observed is due to nomerges=2 is not set and OS will attempt soft
> back/front merge.
> >
> > I debug live machine and understood we never see parallel instance of
> > "scsi_dispatch_cmd" as we expect due to can_queue is less. If we
> > really has
> > *very* large HBA QD, this patch #10 to expose multiple SQ may be useful.
> >
> As mentioned, the above patch might help here.
> The patch actually _reduced_ throughput on my end, as the requests never
> stayed long enough in the queue to be merged. Hence I've refrained from
> posting it.
> But as you're able to test with SSDs this patch really should make a
> difference, and certainly should remove the arbitrary stalls due to
> host_busy.
>
> > For now, we are looking for updated version of patch which will only
> > keep IT HBA in SQ mode (like we are doing in <megaraid_sas> driver)
> > and add interface to use blk_tag in both scsi.mq and !scsi.mq mode.
> > Sreekanth has already started working on it, but we may need to check
> > full performance test run to post the actual patch.
> > May be we can cherry pick few patches from this series and get blk_tag
> > support to improve performance of <mpt3sas> later which will not allow
> > use to choose nr_hw_queue to be tunable.
> >
> Sure, no problem with that.
> I'll be preparing another submission round, and we can discuss how we go
> from there.
>
> Cheers,
>
> Hannes
> > Thanks, Kashyap
> >
> >
> >>
> >> Thanks,
> >> Sreekanth
>
>
> --
> Dr. Hannes Reinecke		   Teamlead Storage & Networking
> hare@suse.de			               +49 911 74053 688
> SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
> GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton HRB 21284
> (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 04/10] mpt3sas: separate out _base_recovery_check()
  2017-01-31  9:25 ` [PATCH 04/10] mpt3sas: separate out _base_recovery_check() Hannes Reinecke
  2017-02-07 13:16   ` Christoph Hellwig
@ 2017-02-16  9:53   ` Sreekanth Reddy
  2017-02-16 10:03     ` Hannes Reinecke
  1 sibling, 1 reply; 59+ messages in thread
From: Sreekanth Reddy @ 2017-02-16  9:53 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, PDL-MPT-FUSIONLINUX,
	Hannes Reinecke

On Tue, Jan 31, 2017 at 2:55 PM, Hannes Reinecke <hare@suse.de> wrote:
> No functional change.
>
> Signed-off-by: Hannes Reinecke <hare@suse.com>
> ---
>  drivers/scsi/mpt3sas/mpt3sas_base.c | 23 ++++++++++++++---------
>  1 file changed, 14 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
> index 120b317..dd14596 100644
> --- a/drivers/scsi/mpt3sas/mpt3sas_base.c
> +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
> @@ -2366,6 +2366,19 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>  }
>
>  static void
> +_base_recovery_check(struct MPT3SAS_ADAPTER *ioc)
> +{
> +       /*
> +        * See _wait_for_commands_to_complete() call with regards to this code.
> +        */
> +       if (ioc->shost_recovery && ioc->pending_io_count) {
> +               if (ioc->pending_io_count == 1)
> +                       wake_up(&ioc->reset_wq);
> +               ioc->pending_io_count = 0;

I think here we should reduce the pending_io_count by one instead of
assigning it to zero in-order to have same behavior as before.

Thanks,
Sreekanth

> +       }
> +}
> +
> +static void
>  _dechain_st(struct MPT3SAS_ADAPTER *ioc, struct scsiio_tracker *st)
>  {
>         struct chain_tracker *chain_req;
> @@ -2402,15 +2415,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>                 list_add(&ioc->scsi_lookup[i].tracker_list, &ioc->free_list);
>                 spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
>
> -               /*
> -                * See _wait_for_commands_to_complete() call with regards
> -                * to this code.
> -                */
> -               if (ioc->shost_recovery && ioc->pending_io_count) {
> -                       if (ioc->pending_io_count == 1)
> -                               wake_up(&ioc->reset_wq);
> -                       ioc->pending_io_count--;
> -               }
> +               _base_recovery_check(ioc);
>                 return;
>         } else if (smid < ioc->internal_smid) {
>                 /* hi-priority */
> --
> 1.8.5.6
>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 05/10] mpt3sas: open-code _scsih_scsi_lookup_get()
  2017-01-31  9:25 ` [PATCH 05/10] mpt3sas: open-code _scsih_scsi_lookup_get() Hannes Reinecke
  2017-02-07 13:16   ` Christoph Hellwig
@ 2017-02-16  9:59   ` Sreekanth Reddy
  2017-02-16 10:04     ` Hannes Reinecke
  1 sibling, 1 reply; 59+ messages in thread
From: Sreekanth Reddy @ 2017-02-16  9:59 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, PDL-MPT-FUSIONLINUX,
	Hannes Reinecke

On Tue, Jan 31, 2017 at 2:55 PM, Hannes Reinecke <hare@suse.de> wrote:
> Just a wrapper around the scsi lookup array and only used
> in one place, so open-code it.

I think this whole series of patches created over 14.101.00.00 version
driver not on the latest 15.100.00.00 driver. So this patch will
conflict with 15.100.00.00 version driver i.e. with commit
459325c466d278d3c9f51ddc9bb544c014136fd1(mpt3sas: Fix for Crusader to
achieve product targets with SAS devices).

Thanks,
Sreekanth

>
> Signed-off-by: Hannes Reinecke <hare@suse.com>
> ---
>  drivers/scsi/mpt3sas/mpt3sas_scsih.c | 15 +--------------
>  1 file changed, 1 insertion(+), 14 deletions(-)
>
> diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
> index b5c966e..979f95a 100644
> --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
> +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
> @@ -1061,19 +1061,6 @@ struct _sas_node *
>  }
>
>  /**
> - * _scsih_scsi_lookup_get - returns scmd entry
> - * @ioc: per adapter object
> - * @smid: system request message index
> - *
> - * Returns the smid stored scmd pointer.
> - */
> -static struct scsi_cmnd *
> -_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, u16 smid)
> -{
> -       return ioc->scsi_lookup[smid - 1].scmd;
> -}
> -
> -/**
>   * _scsih_scsi_lookup_get_clear - returns scmd entry
>   * @ioc: per adapter object
>   * @smid: system request message index
> @@ -6101,7 +6088,7 @@ static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
>         for (smid = 1; smid <= ioc->scsiio_depth; smid++) {
>                 if (ioc->shost_recovery)
>                         goto out;
> -               scmd = _scsih_scsi_lookup_get(ioc, smid);
> +               scmd = ioc->scsi_lookup[smid - 1].scmd;
>                 if (!scmd)
>                         continue;
>                 sdev = scmd->device;
> --
> 1.8.5.6
>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 01/10] mpt3sas: switch to pci_alloc_irq_vectors
  2017-02-16  9:32   ` Sreekanth Reddy
@ 2017-02-16 10:01     ` Hannes Reinecke
  0 siblings, 0 replies; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-16 10:01 UTC (permalink / raw)
  To: Sreekanth Reddy
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, PDL-MPT-FUSIONLINUX,
	Hannes Reinecke

On 02/16/2017 10:32 AM, Sreekanth Reddy wrote:
> On Tue, Jan 31, 2017 at 2:55 PM, Hannes Reinecke <hare@suse.de> wrote:
>> Cleanup the MSI-X handling allowing us to use
>> the PCI-layer provided vector allocation.
>>
>> Signed-off-by: Hannes Reinecke <hare@suse.com>
>> ---
>>  drivers/scsi/mpt3sas/mpt3sas_base.c | 100 +++++++++++++++++-------------------
>>  drivers/scsi/mpt3sas/mpt3sas_base.h |   2 -
>>  2 files changed, 46 insertions(+), 56 deletions(-)
>>
[ .. ]
>>
>>         ioc->msix_enable = 1;
>> -       for (i = 0, a = entries; i < ioc->reply_queue_count; i++, a++) {
>> -               r = _base_request_irq(ioc, i, a->vector);
>> +       ioc->msix_vector_count = r;
> 
> Here we should also reinitialize ioc->reply_queue_count if
> pci_alloc_irq_vectors returns with value less than requested MSIx
> vector count (i.e. ioc->reply_queue_count).
> 
Sigh. The usual confusion between 'reply_queue_count' and
'msix_vector_count'.

I have _tried_ to make 'reply_queue_count' the number of _allocated_
reply queues, and 'msix_vector_count' the number of MSI-x vectors allocated.
As pci_alloc_irq_vectors might be returning less vectors than specified
(but we don't modify the allocation for reply_queue_count in that case)
I considered it an error to modify reply_queue_count, too.

But I'd _very_ much appreciate if we could clear up the confusion and
fix the meaning of 'reply_queue_count' and 'msix_vector_count'.
ATM I don't think we have a consistent usage here.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 04/10] mpt3sas: separate out _base_recovery_check()
  2017-02-16  9:53   ` Sreekanth Reddy
@ 2017-02-16 10:03     ` Hannes Reinecke
  0 siblings, 0 replies; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-16 10:03 UTC (permalink / raw)
  To: Sreekanth Reddy
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, PDL-MPT-FUSIONLINUX,
	Hannes Reinecke

On 02/16/2017 10:53 AM, Sreekanth Reddy wrote:
> On Tue, Jan 31, 2017 at 2:55 PM, Hannes Reinecke <hare@suse.de> wrote:
>> No functional change.
>>
>> Signed-off-by: Hannes Reinecke <hare@suse.com>
>> ---
>>  drivers/scsi/mpt3sas/mpt3sas_base.c | 23 ++++++++++++++---------
>>  1 file changed, 14 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
>> index 120b317..dd14596 100644
>> --- a/drivers/scsi/mpt3sas/mpt3sas_base.c
>> +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
>> @@ -2366,6 +2366,19 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
>>  }
>>
>>  static void
>> +_base_recovery_check(struct MPT3SAS_ADAPTER *ioc)
>> +{
>> +       /*
>> +        * See _wait_for_commands_to_complete() call with regards to this code.
>> +        */
>> +       if (ioc->shost_recovery && ioc->pending_io_count) {
>> +               if (ioc->pending_io_count == 1)
>> +                       wake_up(&ioc->reset_wq);
>> +               ioc->pending_io_count = 0;
> 
> I think here we should reduce the pending_io_count by one instead of
> assigning it to zero in-order to have same behavior as before.
> 
Ouch, indeed. My fault.

Will be fixing it up.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 05/10] mpt3sas: open-code _scsih_scsi_lookup_get()
  2017-02-16  9:59   ` Sreekanth Reddy
@ 2017-02-16 10:04     ` Hannes Reinecke
  0 siblings, 0 replies; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-16 10:04 UTC (permalink / raw)
  To: Sreekanth Reddy
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, PDL-MPT-FUSIONLINUX,
	Hannes Reinecke

On 02/16/2017 10:59 AM, Sreekanth Reddy wrote:
> On Tue, Jan 31, 2017 at 2:55 PM, Hannes Reinecke <hare@suse.de> wrote:
>> Just a wrapper around the scsi lookup array and only used
>> in one place, so open-code it.
> 
> I think this whole series of patches created over 14.101.00.00 version
> driver not on the latest 15.100.00.00 driver. So this patch will
> conflict with 15.100.00.00 version driver i.e. with commit
> 459325c466d278d3c9f51ddc9bb544c014136fd1(mpt3sas: Fix for Crusader to
> achieve product targets with SAS devices).
> 
Correct, I just noticed it.
Will be rebasing it.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 07/10] mpt3sas: use hi-priority queue for TMFs
  2017-01-31  9:25 ` [PATCH 07/10] mpt3sas: use hi-priority queue for TMFs Hannes Reinecke
  2017-02-07 13:19   ` Christoph Hellwig
@ 2017-02-16 10:09   ` Sreekanth Reddy
  2017-02-16 10:14     ` Hannes Reinecke
  1 sibling, 1 reply; 59+ messages in thread
From: Sreekanth Reddy @ 2017-02-16 10:09 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, PDL-MPT-FUSIONLINUX,
	Hannes Reinecke

On Tue, Jan 31, 2017 at 2:55 PM, Hannes Reinecke <hare@suse.de> wrote:
> When sending a TMF via the ioctl interface we should be using
> the hi-priority queue instead of the scsi queue to be consistent
> with overall TMF usage.
>
> Signed-off-by: Hannes Reinecke <hare@suse.com>
> ---
>  drivers/scsi/mpt3sas/mpt3sas_ctl.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
> index 95f0f24..e952175 100644
> --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
> +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
> @@ -720,7 +720,7 @@ enum block_state {
>                 }
>         } else {
>
> -               smid = mpt3sas_base_get_smid_scsiio(ioc, ioc->ctl_cb_idx, NULL);
> +               smid = mpt3sas_base_get_smid_hpr(ioc, ioc->ctl_cb_idx);

This else part is not for TM path, It is for IO path. For the TM
request we are already using smid from hpr queue, as shown below

if (mpi_request->Function == MPI2_FUNCTION_SCSI_TASK_MGMT) {
                smid = mpt3sas_base_get_smid_hpr(ioc, ioc->ctl_cb_idx);
                if (!smid) {
                        pr_err(MPT3SAS_FMT "%s: failed obtaining a smid\n",
                            ioc->name, __func__);
                        ret = -EAGAIN;
                        goto out;
                }
        } else {

                smid = mpt3sas_base_get_smid_scsiio(ioc, ioc->ctl_cb_idx, NULL);

Thanks,
Sreekanth

>                 if (!smid) {
>                         pr_err(MPT3SAS_FMT "%s: failed obtaining a smid\n",
>                             ioc->name, __func__);
> --
> 1.8.5.6
>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 07/10] mpt3sas: use hi-priority queue for TMFs
  2017-02-16 10:09   ` Sreekanth Reddy
@ 2017-02-16 10:14     ` Hannes Reinecke
  2017-02-16 10:23       ` Sreekanth Reddy
  0 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-16 10:14 UTC (permalink / raw)
  To: Sreekanth Reddy
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, PDL-MPT-FUSIONLINUX,
	Hannes Reinecke

On 02/16/2017 11:09 AM, Sreekanth Reddy wrote:
> On Tue, Jan 31, 2017 at 2:55 PM, Hannes Reinecke <hare@suse.de> wrote:
>> When sending a TMF via the ioctl interface we should be using
>> the hi-priority queue instead of the scsi queue to be consistent
>> with overall TMF usage.
>>
>> Signed-off-by: Hannes Reinecke <hare@suse.com>
>> ---
>>  drivers/scsi/mpt3sas/mpt3sas_ctl.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
>> index 95f0f24..e952175 100644
>> --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
>> +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
>> @@ -720,7 +720,7 @@ enum block_state {
>>                 }
>>         } else {
>>
>> -               smid = mpt3sas_base_get_smid_scsiio(ioc, ioc->ctl_cb_idx, NULL);
>> +               smid = mpt3sas_base_get_smid_hpr(ioc, ioc->ctl_cb_idx);
> 
> This else part is not for TM path, It is for IO path. For the TM
> request we are already using smid from hpr queue, as shown below
> 
> if (mpi_request->Function == MPI2_FUNCTION_SCSI_TASK_MGMT) {
>                 smid = mpt3sas_base_get_smid_hpr(ioc, ioc->ctl_cb_idx);
>                 if (!smid) {
>                         pr_err(MPT3SAS_FMT "%s: failed obtaining a smid\n",
>                             ioc->name, __func__);
>                         ret = -EAGAIN;
>                         goto out;
>                 }
>         } else {
> 
>                 smid = mpt3sas_base_get_smid_scsiio(ioc, ioc->ctl_cb_idx, NULL);
> 
Yes, I know.
But the current method of inserting a SCSI command completely goes
against blk-mq request usage; with blk-mq the tags are managed with the
block layer, so you need to integrate with that to get a correct tag.

Is this _really_ a crucial functionality? Can't we just drop it and use
sg/bsg for this?
It would make my life _so_ much easier; the alternative would be to
either implement 'real' reserved command handling or rewriting the above
code-path in terms of 'struct request', packing and unpacking as we go.
Neither is very appealing.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 00/10] mpt3sas: full mq support
  2017-02-16  9:48                               ` Kashyap Desai
@ 2017-02-16 10:18                                 ` Hannes Reinecke
  2017-02-16 10:45                                   ` Kashyap Desai
  0 siblings, 1 reply; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-16 10:18 UTC (permalink / raw)
  To: Kashyap Desai, Sreekanth Reddy
  Cc: Christoph Hellwig, Martin K. Petersen, James Bottomley,
	linux-scsi, Sathya Prakash Veerichetty, PDL-MPT-FUSIONLINUX

On 02/16/2017 10:48 AM, Kashyap Desai wrote:
>> -----Original Message-----
>> From: Hannes Reinecke [mailto:hare@suse.de]
>> Sent: Wednesday, February 15, 2017 3:35 PM
>> To: Kashyap Desai; Sreekanth Reddy
>> Cc: Christoph Hellwig; Martin K. Petersen; James Bottomley; linux-
>> scsi@vger.kernel.org; Sathya Prakash Veerichetty; PDL-MPT-FUSIONLINUX
>> Subject: Re: [PATCH 00/10] mpt3sas: full mq support
>>
>> On 02/15/2017 10:18 AM, Kashyap Desai wrote:
>>>>
>>>>
>>>> Hannes,
>>>>
>>>> Result I have posted last time is with merge operation enabled in
>>>> block layer. If I disable merge operation then I don't see much
>>>> improvement with multiple hw request queues. Here is the result,
>>>>
>>>> fio results when nr_hw_queues=1,
>>>> 4k read when numjobs=24: io=248387MB, bw=1655.1MB/s, iops=423905,
>>>> runt=150003msec
>>>>
>>>> fio results when nr_hw_queues=24,
>>>> 4k read when numjobs=24: io=263904MB, bw=1759.4MB/s, iops=450393,
>>>> runt=150001msec
>>>
>>> Hannes -
>>>
>>>  I worked with Sreekanth and also understand pros/cons of Patch #10.
>>> " [PATCH 10/10] mpt3sas: scsi-mq interrupt steering"
>>>
>>> In above patch, can_queue of HBA is divided based on logic CPU, it
>>> means we want to mimic as if mpt3sas HBA support multi queue
>>> distributing actual resources which is single Submission H/W Queue.
>>> This approach badly impact many performance areas.
>>>
>>> nr_hw_queues = 1 is what I observe as best performance approach since
>>> it never throttle IO if sdev->queue_depth is set to HBA queue depth.
>>> In case of nr_hw_queues = "CPUs" throttle IO at SCSI level since we
>>> never allow more than "updated can_queue" in LLD.
>>>
>> True.
>> And this was actually one of the things I wanted to demonstrate with this
>> patchset :-) ATM blk-mq really works best when having a distinct tag space
>> per port/device. As soon as the hardware provides a _shared_ tag space you
>> end up with tag starvation issues as blk-mq only allows you to do a static
>> split of the available tagspace.
>> While this patchset demonstrates that the HBA itself _does_ benefit from
>> using block-mq (especially on highly parallel loads), it also demonstrates
>> that
>> _block-mq_ has issues with singlethreaded loads on this HBA (or, rather,
>> type of HBA, as I doubt this issue is affecting mpt3sas only).
>>
>>> Below code bring actual HBA can_queue very low ( Ea on 96 logical core
>>> CPU new can_queue goes to 42, if HBA queue depth is 4K). It means we
>>> will see lots of IO throttling in scsi mid layer due to
>>> shost->can_queue reach the limit very soon if you have <fio> jobs with
>> higher QD.
>>>
>>> 	if (ioc->shost->nr_hw_queues > 1) {
>>> 		ioc->shost->nr_hw_queues = ioc->msix_vector_count;
>>> 		ioc->shost->can_queue /= ioc->msix_vector_count;
>>> 	}
>>> I observe negative performance if I have 8 SSD drives attached to
>>> Ventura (latest IT controller). 16 fio jobs at QD=128 gives ~1600K
>>> IOPs and the moment I switch to nr_hw_queues = "CPUs", it gave hardly
>>> ~850K IOPs. This is mainly because of host_busy stuck at very low ~169
>>> on
>> my setup.
>>>
>> Which actually might be an issue with the way scsi is hooked into blk-mq.
>> The SCSI stack is using 'can_queue' as a check for 'host_busy', ie if the
>> host is
>> capable of accepting more commands.
>> As we're limiting can_queue (to get the per-queue command depth
>> correctly) we should be using the _overall_ command depth for the
>> can_queue value itself to make the host_busy check work correctly.
>>
>> I've attached a patch for that; can you test if it makes a difference?
> Hannes -
> Attached patch works fine for me. FYI -  We need to set device queue depth
> to can_queue as we are currently not doing in mpt3sas driver.
> 
> With attached patch when I tried, I see ~2-3% improvement running multiple
> jobs. Single job profile no difference.
> 
> So looks like we are good to reach performance with single nr_hw_queues.
> 
Whee, cool.

> We have some patches to be send so want to know how to rebase this patch
> series as few patches coming from Broadcom. Can we consider below as plan ?
> 
Sure, can do.

> - Patches from 1-7 will be reposted. Also Sreekanth will complete review on
> existing patch 1-7.
> - We need blk_tag support only for nr_hw_queue = 1.
> 
> With that say, we will have many code changes/function without "
> shost_use_blk_mq" check and assume it is single nr_hw_queue supported
> <mpt3sas> driver.
> 
> Ea - Below function can be simplify - just refer tag from scmd->request and
> don't need check of shost_use_blk_mq + nr_hw_queue etc..
> 
> u16
> mpt3sas_base_get_smid_scsiio(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx,
>         struct scsi_cmnd *scmd)
> {
>         struct scsiio_tracker *request;
>         u16 smid = scmd->request->tag +  1;
>          ...
>          return request->smid;
> }
> 
> - Later we can explore if nr_hw_queue more than one really add benefit.
> From current limited testing, I don't see major performance boost if we have
> nr_hw_queue more than one.
> 
Well, the _actual_ code to support mq is rather trivial, and really
serves as a good testbed for scsi-mq.
I would prefer to leave it in, and disable it via a module parameter.

But in either case, I can rebase the patches to leave any notions of
'nr_hw_queues' to patch 8 for implementing full mq support.

And we need to discuss how to handle MPI2_FUNCTION_SCSI_IO_REQUEST; the
current method doesn't work with blk-mq.
I really would like to see that go, especially as sg/bsg supports the
same functionality ...

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 07/10] mpt3sas: use hi-priority queue for TMFs
  2017-02-16 10:14     ` Hannes Reinecke
@ 2017-02-16 10:23       ` Sreekanth Reddy
  2017-02-16 10:26         ` Hannes Reinecke
  0 siblings, 1 reply; 59+ messages in thread
From: Sreekanth Reddy @ 2017-02-16 10:23 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, PDL-MPT-FUSIONLINUX,
	Hannes Reinecke

On Thu, Feb 16, 2017 at 3:44 PM, Hannes Reinecke <hare@suse.de> wrote:
> On 02/16/2017 11:09 AM, Sreekanth Reddy wrote:
>> On Tue, Jan 31, 2017 at 2:55 PM, Hannes Reinecke <hare@suse.de> wrote:
>>> When sending a TMF via the ioctl interface we should be using
>>> the hi-priority queue instead of the scsi queue to be consistent
>>> with overall TMF usage.
>>>
>>> Signed-off-by: Hannes Reinecke <hare@suse.com>
>>> ---
>>>  drivers/scsi/mpt3sas/mpt3sas_ctl.c | 2 +-
>>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
>>> index 95f0f24..e952175 100644
>>> --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
>>> +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
>>> @@ -720,7 +720,7 @@ enum block_state {
>>>                 }
>>>         } else {
>>>
>>> -               smid = mpt3sas_base_get_smid_scsiio(ioc, ioc->ctl_cb_idx, NULL);
>>> +               smid = mpt3sas_base_get_smid_hpr(ioc, ioc->ctl_cb_idx);
>>
>> This else part is not for TM path, It is for IO path. For the TM
>> request we are already using smid from hpr queue, as shown below
>>
>> if (mpi_request->Function == MPI2_FUNCTION_SCSI_TASK_MGMT) {
>>                 smid = mpt3sas_base_get_smid_hpr(ioc, ioc->ctl_cb_idx);
>>                 if (!smid) {
>>                         pr_err(MPT3SAS_FMT "%s: failed obtaining a smid\n",
>>                             ioc->name, __func__);
>>                         ret = -EAGAIN;
>>                         goto out;
>>                 }
>>         } else {
>>
>>                 smid = mpt3sas_base_get_smid_scsiio(ioc, ioc->ctl_cb_idx, NULL);
>>
> Yes, I know.
> But the current method of inserting a SCSI command completely goes
> against blk-mq request usage; with blk-mq the tags are managed with the
> block layer, so you need to integrate with that to get a correct tag.
>
> Is this _really_ a crucial functionality? Can't we just drop it and use
> sg/bsg for this?
> It would make my life _so_ much easier; the alternative would be to
> either implement 'real' reserved command handling or rewriting the above
> code-path in terms of 'struct request', packing and unpacking as we go.
> Neither is very appealing.

I think it is better to take out one request frame from can_queue
count and reserve it for this ioctl scsi io. Since we allow only one
ioctl command at a time.

Thanks,
Sreekanth

>
> Cheers,
>
> Hannes
> --
> Dr. Hannes Reinecke                Teamlead Storage & Networking
> hare@suse.de                                   +49 911 74053 688
> SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
> GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
> HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 07/10] mpt3sas: use hi-priority queue for TMFs
  2017-02-16 10:23       ` Sreekanth Reddy
@ 2017-02-16 10:26         ` Hannes Reinecke
  0 siblings, 0 replies; 59+ messages in thread
From: Hannes Reinecke @ 2017-02-16 10:26 UTC (permalink / raw)
  To: Sreekanth Reddy
  Cc: Martin K. Petersen, James Bottomley, Christoph Hellwig,
	linux-scsi, Sathya Prakash, Kashyap Desai, PDL-MPT-FUSIONLINUX,
	Hannes Reinecke

On 02/16/2017 11:23 AM, Sreekanth Reddy wrote:
> On Thu, Feb 16, 2017 at 3:44 PM, Hannes Reinecke <hare@suse.de> wrote:
>> On 02/16/2017 11:09 AM, Sreekanth Reddy wrote:
>>> On Tue, Jan 31, 2017 at 2:55 PM, Hannes Reinecke <hare@suse.de> wrote:
>>>> When sending a TMF via the ioctl interface we should be using
>>>> the hi-priority queue instead of the scsi queue to be consistent
>>>> with overall TMF usage.
>>>>
>>>> Signed-off-by: Hannes Reinecke <hare@suse.com>
>>>> ---
>>>>  drivers/scsi/mpt3sas/mpt3sas_ctl.c | 2 +-
>>>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
>>>> index 95f0f24..e952175 100644
>>>> --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
>>>> +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
>>>> @@ -720,7 +720,7 @@ enum block_state {
>>>>                 }
>>>>         } else {
>>>>
>>>> -               smid = mpt3sas_base_get_smid_scsiio(ioc, ioc->ctl_cb_idx, NULL);
>>>> +               smid = mpt3sas_base_get_smid_hpr(ioc, ioc->ctl_cb_idx);
>>>
>>> This else part is not for TM path, It is for IO path. For the TM
>>> request we are already using smid from hpr queue, as shown below
>>>
>>> if (mpi_request->Function == MPI2_FUNCTION_SCSI_TASK_MGMT) {
>>>                 smid = mpt3sas_base_get_smid_hpr(ioc, ioc->ctl_cb_idx);
>>>                 if (!smid) {
>>>                         pr_err(MPT3SAS_FMT "%s: failed obtaining a smid\n",
>>>                             ioc->name, __func__);
>>>                         ret = -EAGAIN;
>>>                         goto out;
>>>                 }
>>>         } else {
>>>
>>>                 smid = mpt3sas_base_get_smid_scsiio(ioc, ioc->ctl_cb_idx, NULL);
>>>
>> Yes, I know.
>> But the current method of inserting a SCSI command completely goes
>> against blk-mq request usage; with blk-mq the tags are managed with the
>> block layer, so you need to integrate with that to get a correct tag.
>>
>> Is this _really_ a crucial functionality? Can't we just drop it and use
>> sg/bsg for this?
>> It would make my life _so_ much easier; the alternative would be to
>> either implement 'real' reserved command handling or rewriting the above
>> code-path in terms of 'struct request', packing and unpacking as we go.
>> Neither is very appealing.
> 
> I think it is better to take out one request frame from can_queue
> count and reserve it for this ioctl scsi io. Since we allow only one
> ioctl command at a time.
> 
Ok, that makes things easier. Will be updating the code.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* RE: [PATCH 00/10] mpt3sas: full mq support
  2017-02-16 10:18                                 ` Hannes Reinecke
@ 2017-02-16 10:45                                   ` Kashyap Desai
  0 siblings, 0 replies; 59+ messages in thread
From: Kashyap Desai @ 2017-02-16 10:45 UTC (permalink / raw)
  To: Hannes Reinecke, Sreekanth Reddy
  Cc: Christoph Hellwig, Martin K. Petersen, James Bottomley,
	linux-scsi, Sathya Prakash Veerichetty, PDL-MPT-FUSIONLINUX

> > - Later we can explore if nr_hw_queue more than one really add benefit.
> > From current limited testing, I don't see major performance boost if
> > we have nr_hw_queue more than one.
> >
> Well, the _actual_ code to support mq is rather trivial, and really serves
> as a
> good testbed for scsi-mq.
> I would prefer to leave it in, and disable it via a module parameter.

I am thinking as adding extra code for more than one nr_hw_queue will add
maintenance overhead and support. Especially IO error handling code become
complex with nr_hw_queues > 1 case.  If we really like to see performance
boost, we should attempt and bare other side effect.

For time being we should drop this nr_hw_queue > 1 support is what I choose
(not even module parameter base).

>
> But in either case, I can rebase the patches to leave any notions of
> 'nr_hw_queues' to patch 8 for implementing full mq support.

Thanks Hannes. It was just heads up...We are not sure when we can submit
upcoming patch set from Broadcom. May be we can syncup with you offline in
case any rebase requires.

>
> And we need to discuss how to handle MPI2_FUNCTION_SCSI_IO_REQUEST;
> the current method doesn't work with blk-mq.
> I really would like to see that go, especially as sg/bsg supports the same
> functionality ...
>

^ permalink raw reply	[flat|nested] 59+ messages in thread

end of thread, other threads:[~2017-02-16 10:45 UTC | newest]

Thread overview: 59+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-31  9:25 [PATCH 00/10] mpt3sas: full mq support Hannes Reinecke
2017-01-31  9:25 ` [PATCH 01/10] mpt3sas: switch to pci_alloc_irq_vectors Hannes Reinecke
2017-02-07 13:15   ` Christoph Hellwig
2017-02-16  9:32   ` Sreekanth Reddy
2017-02-16 10:01     ` Hannes Reinecke
2017-01-31  9:25 ` [PATCH 02/10] mpt3sas: set default value for cb_idx Hannes Reinecke
2017-02-07 13:15   ` Christoph Hellwig
2017-01-31  9:25 ` [PATCH 03/10] mpt3sas: implement _dechain_st() Hannes Reinecke
2017-02-07 13:15   ` Christoph Hellwig
2017-02-07 13:18     ` Hannes Reinecke
2017-01-31  9:25 ` [PATCH 04/10] mpt3sas: separate out _base_recovery_check() Hannes Reinecke
2017-02-07 13:16   ` Christoph Hellwig
2017-02-16  9:53   ` Sreekanth Reddy
2017-02-16 10:03     ` Hannes Reinecke
2017-01-31  9:25 ` [PATCH 05/10] mpt3sas: open-code _scsih_scsi_lookup_get() Hannes Reinecke
2017-02-07 13:16   ` Christoph Hellwig
2017-02-16  9:59   ` Sreekanth Reddy
2017-02-16 10:04     ` Hannes Reinecke
2017-01-31  9:25 ` [PATCH 06/10] mpt3sas: Introduce mpt3sas_get_st_from_smid() Hannes Reinecke
2017-02-07 13:17   ` Christoph Hellwig
2017-01-31  9:25 ` [PATCH 07/10] mpt3sas: use hi-priority queue for TMFs Hannes Reinecke
2017-02-07 13:19   ` Christoph Hellwig
2017-02-16 10:09   ` Sreekanth Reddy
2017-02-16 10:14     ` Hannes Reinecke
2017-02-16 10:23       ` Sreekanth Reddy
2017-02-16 10:26         ` Hannes Reinecke
2017-01-31  9:25 ` [PATCH 08/10] mpt3sas: lockless command submission for scsi-mq Hannes Reinecke
2017-01-31 13:22   ` Christoph Hellwig
2017-01-31 13:46     ` Hannes Reinecke
2017-01-31 14:24       ` Christoph Hellwig
2017-01-31  9:25 ` [PATCH 09/10] mpt3sas: Use 'msix_index' as argument for put_smid functions Hannes Reinecke
2017-01-31  9:26 ` [PATCH 10/10] mpt3sas: scsi-mq interrupt steering Hannes Reinecke
2017-01-31 10:05   ` Christoph Hellwig
2017-01-31 10:02 ` [PATCH 00/10] mpt3sas: full mq support Christoph Hellwig
2017-01-31 11:16   ` Hannes Reinecke
2017-01-31 17:54     ` Kashyap Desai
2017-02-01  6:51       ` Hannes Reinecke
2017-02-01  7:07         ` Kashyap Desai
2017-02-01  7:43           ` Hannes Reinecke
2017-02-09 13:03             ` Sreekanth Reddy
2017-02-09 13:12               ` Hannes Reinecke
2017-02-10  4:43                 ` Sreekanth Reddy
2017-02-10  6:59                   ` Hannes Reinecke
2017-02-13  6:15                     ` Sreekanth Reddy
2017-02-13 13:11                       ` Hannes Reinecke
2017-02-15  8:27                         ` Sreekanth Reddy
2017-02-15  9:18                           ` Kashyap Desai
2017-02-15 10:05                             ` Hannes Reinecke
2017-02-16  9:48                               ` Kashyap Desai
2017-02-16 10:18                                 ` Hannes Reinecke
2017-02-16 10:45                                   ` Kashyap Desai
2017-02-07 13:19 ` Christoph Hellwig
2017-02-07 14:38   ` Hannes Reinecke
2017-02-07 15:34     ` Christoph Hellwig
2017-02-07 15:39       ` Hannes Reinecke
2017-02-07 15:40         ` Christoph Hellwig
2017-02-07 15:49           ` Hannes Reinecke
2017-02-15  8:15   ` Christoph Hellwig
2017-02-15  8:19     ` Hannes Reinecke

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.