All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-06-28 12:44 ` Max Gurtovoy
  0 siblings, 0 replies; 34+ messages in thread
From: Max Gurtovoy @ 2017-06-28 12:44 UTC (permalink / raw)
  To: axboe, hch, sagi, bvanassche, linux-nvme, linux-block
  Cc: vladimirk, Max Gurtovoy

This patch performs sequential mapping between CPUs and queues.
In case the system has more CPUs than HWQs then there are still
CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
and their siblings to the same HWQ.
This actually fixes a bug that found unmapped HWQs in a system with
2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
running NVMEoF (opens upto maximum of 64 HWQs).

Performance results running fio (72 jobs, 128 iodepth)
using null_blk (w/w.o patch):

bs      IOPS(read submit_queues=72)   IOPS(write submit_queues=72)   IOPS(read submit_queues=24)  IOPS(write submit_queues=24)
-----  ----------------------------  ------------------------------ ---------------------------- -----------------------------
512    4890.4K/4723.5K                 4524.7K/4324.2K                   4280.2K/4264.3K               3902.4K/3909.5K
1k     4910.1K/4715.2K                 4535.8K/4309.6K                   4296.7K/4269.1K               3906.8K/3914.9K
2k     4906.3K/4739.7K                 4526.7K/4330.6K                   4301.1K/4262.4K               3890.8K/3900.1K
4k     4918.6K/4730.7K                 4556.1K/4343.6K                   4297.6K/4264.5K               3886.9K/3893.9K
8k     4906.4K/4748.9K                 4550.9K/4346.7K                   4283.2K/4268.8K               3863.4K/3858.2K
16k    4903.8K/4782.6K                 4501.5K/4233.9K                   4292.3K/4282.3K               3773.1K/3773.5K
32k    4885.8K/4782.4K                 4365.9K/4184.2K                   4307.5K/4289.4K               3780.3K/3687.3K
64k    4822.5K/4762.7K                 2752.8K/2675.1K                   4308.8K/4312.3K               2651.5K/2655.7K
128k   2388.5K/2313.8K                 1391.9K/1375.7K                   2142.8K/2152.2K               1395.5K/1374.2K

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
---
 block/blk-mq-cpumap.c |   68 ++++++++++++++++---------------------------------
 1 files changed, 22 insertions(+), 46 deletions(-)

diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 8e61e86..2cca4fc 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -14,10 +14,15 @@
 #include "blk.h"
 #include "blk-mq.h"
 
-static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
-			      const int cpu)
+static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
+			      const struct cpumask *online_mask)
 {
-	return cpu * nr_queues / nr_cpus;
+	/*
+	 * Non online CPU will be mapped to queue index 0.
+	 */
+	if (!cpumask_test_cpu(cpu, online_mask))
+		return 0;
+	return cpu % nr_queues;
 }
 
 static int get_first_sibling(unsigned int cpu)
@@ -36,55 +41,26 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
 	unsigned int *map = set->mq_map;
 	unsigned int nr_queues = set->nr_hw_queues;
 	const struct cpumask *online_mask = cpu_online_mask;
-	unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
-	cpumask_var_t cpus;
-
-	if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
-		return -ENOMEM;
-
-	cpumask_clear(cpus);
-	nr_cpus = nr_uniq_cpus = 0;
-	for_each_cpu(i, online_mask) {
-		nr_cpus++;
-		first_sibling = get_first_sibling(i);
-		if (!cpumask_test_cpu(first_sibling, cpus))
-			nr_uniq_cpus++;
-		cpumask_set_cpu(i, cpus);
-	}
-
-	queue = 0;
-	for_each_possible_cpu(i) {
-		if (!cpumask_test_cpu(i, online_mask)) {
-			map[i] = 0;
-			continue;
-		}
+	unsigned int cpu, first_sibling;
 
+	for_each_possible_cpu(cpu) {
 		/*
-		 * Easy case - we have equal or more hardware queues. Or
-		 * there are no thread siblings to take into account. Do
-		 * 1:1 if enough, or sequential mapping if less.
+		 * First do sequential mapping between CPUs and queues.
+		 * In case we still have CPUs to map, and we have some number of
+		 * threads per cores then map sibling threads to the same queue for
+		 * performace optimizations.
 		 */
-		if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) {
-			map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue);
-			queue++;
-			continue;
+		if (cpu < nr_queues) {
+			map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+		} else {
+			first_sibling = get_first_sibling(cpu);
+			if (first_sibling == cpu)
+				map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+			else
+				map[cpu] = map[first_sibling];
 		}
-
-		/*
-		 * Less then nr_cpus queues, and we have some number of
-		 * threads per cores. Map sibling threads to the same
-		 * queue.
-		 */
-		first_sibling = get_first_sibling(i);
-		if (first_sibling == i) {
-			map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues,
-							queue);
-			queue++;
-		} else
-			map[i] = map[first_sibling];
 	}
 
-	free_cpumask_var(cpus);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_mq_map_queues);
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-06-28 12:44 ` Max Gurtovoy
  0 siblings, 0 replies; 34+ messages in thread
From: Max Gurtovoy @ 2017-06-28 12:44 UTC (permalink / raw)


This patch performs sequential mapping between CPUs and queues.
In case the system has more CPUs than HWQs then there are still
CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
and their siblings to the same HWQ.
This actually fixes a bug that found unmapped HWQs in a system with
2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
running NVMEoF (opens upto maximum of 64 HWQs).

Performance results running fio (72 jobs, 128 iodepth)
using null_blk (w/w.o patch):

bs      IOPS(read submit_queues=72)   IOPS(write submit_queues=72)   IOPS(read submit_queues=24)  IOPS(write submit_queues=24)
-----  ----------------------------  ------------------------------ ---------------------------- -----------------------------
512    4890.4K/4723.5K                 4524.7K/4324.2K                   4280.2K/4264.3K               3902.4K/3909.5K
1k     4910.1K/4715.2K                 4535.8K/4309.6K                   4296.7K/4269.1K               3906.8K/3914.9K
2k     4906.3K/4739.7K                 4526.7K/4330.6K                   4301.1K/4262.4K               3890.8K/3900.1K
4k     4918.6K/4730.7K                 4556.1K/4343.6K                   4297.6K/4264.5K               3886.9K/3893.9K
8k     4906.4K/4748.9K                 4550.9K/4346.7K                   4283.2K/4268.8K               3863.4K/3858.2K
16k    4903.8K/4782.6K                 4501.5K/4233.9K                   4292.3K/4282.3K               3773.1K/3773.5K
32k    4885.8K/4782.4K                 4365.9K/4184.2K                   4307.5K/4289.4K               3780.3K/3687.3K
64k    4822.5K/4762.7K                 2752.8K/2675.1K                   4308.8K/4312.3K               2651.5K/2655.7K
128k   2388.5K/2313.8K                 1391.9K/1375.7K                   2142.8K/2152.2K               1395.5K/1374.2K

Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
---
 block/blk-mq-cpumap.c |   68 ++++++++++++++++---------------------------------
 1 files changed, 22 insertions(+), 46 deletions(-)

diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 8e61e86..2cca4fc 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -14,10 +14,15 @@
 #include "blk.h"
 #include "blk-mq.h"
 
-static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
-			      const int cpu)
+static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
+			      const struct cpumask *online_mask)
 {
-	return cpu * nr_queues / nr_cpus;
+	/*
+	 * Non online CPU will be mapped to queue index 0.
+	 */
+	if (!cpumask_test_cpu(cpu, online_mask))
+		return 0;
+	return cpu % nr_queues;
 }
 
 static int get_first_sibling(unsigned int cpu)
@@ -36,55 +41,26 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
 	unsigned int *map = set->mq_map;
 	unsigned int nr_queues = set->nr_hw_queues;
 	const struct cpumask *online_mask = cpu_online_mask;
-	unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
-	cpumask_var_t cpus;
-
-	if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
-		return -ENOMEM;
-
-	cpumask_clear(cpus);
-	nr_cpus = nr_uniq_cpus = 0;
-	for_each_cpu(i, online_mask) {
-		nr_cpus++;
-		first_sibling = get_first_sibling(i);
-		if (!cpumask_test_cpu(first_sibling, cpus))
-			nr_uniq_cpus++;
-		cpumask_set_cpu(i, cpus);
-	}
-
-	queue = 0;
-	for_each_possible_cpu(i) {
-		if (!cpumask_test_cpu(i, online_mask)) {
-			map[i] = 0;
-			continue;
-		}
+	unsigned int cpu, first_sibling;
 
+	for_each_possible_cpu(cpu) {
 		/*
-		 * Easy case - we have equal or more hardware queues. Or
-		 * there are no thread siblings to take into account. Do
-		 * 1:1 if enough, or sequential mapping if less.
+		 * First do sequential mapping between CPUs and queues.
+		 * In case we still have CPUs to map, and we have some number of
+		 * threads per cores then map sibling threads to the same queue for
+		 * performace optimizations.
 		 */
-		if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) {
-			map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue);
-			queue++;
-			continue;
+		if (cpu < nr_queues) {
+			map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+		} else {
+			first_sibling = get_first_sibling(cpu);
+			if (first_sibling == cpu)
+				map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+			else
+				map[cpu] = map[first_sibling];
 		}
-
-		/*
-		 * Less then nr_cpus queues, and we have some number of
-		 * threads per cores. Map sibling threads to the same
-		 * queue.
-		 */
-		first_sibling = get_first_sibling(i);
-		if (first_sibling == i) {
-			map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues,
-							queue);
-			queue++;
-		} else
-			map[i] = map[first_sibling];
 	}
 
-	free_cpumask_var(cpus);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_mq_map_queues);
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-06-28 12:44 ` Max Gurtovoy
@ 2017-06-28 13:07   ` Johannes Thumshirn
  -1 siblings, 0 replies; 34+ messages in thread
From: Johannes Thumshirn @ 2017-06-28 13:07 UTC (permalink / raw)
  To: Max Gurtovoy
  Cc: axboe, hch, sagi, bvanassche, linux-nvme, linux-block, vladimirk

On Wed, Jun 28, 2017 at 03:44:40PM +0300, Max Gurtovoy wrote:
> This patch performs sequential mapping between CPUs and queues.
> In case the system has more CPUs than HWQs then there are still
> CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
> and their siblings to the same HWQ.
> This actually fixes a bug that found unmapped HWQs in a system with
> 2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
> running NVMEoF (opens upto maximum of 64 HWQs).

>From a first look at it, it looks good.
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N�rnberg
GF: Felix Imend�rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N�rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-06-28 13:07   ` Johannes Thumshirn
  0 siblings, 0 replies; 34+ messages in thread
From: Johannes Thumshirn @ 2017-06-28 13:07 UTC (permalink / raw)


On Wed, Jun 28, 2017@03:44:40PM +0300, Max Gurtovoy wrote:
> This patch performs sequential mapping between CPUs and queues.
> In case the system has more CPUs than HWQs then there are still
> CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
> and their siblings to the same HWQ.
> This actually fixes a bug that found unmapped HWQs in a system with
> 2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
> running NVMEoF (opens upto maximum of 64 HWQs).

>From a first look at it, it looks good.
Reviewed-by: Johannes Thumshirn <jthumshirn at suse.de>
-- 
Johannes Thumshirn                                          Storage
jthumshirn at suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N?rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-06-28 12:44 ` Max Gurtovoy
@ 2017-06-28 14:15   ` Christoph Hellwig
  -1 siblings, 0 replies; 34+ messages in thread
From: Christoph Hellwig @ 2017-06-28 14:15 UTC (permalink / raw)
  To: Max Gurtovoy
  Cc: axboe, hch, sagi, bvanassche, linux-nvme, linux-block, vladimirk

On Wed, Jun 28, 2017 at 03:44:40PM +0300, Max Gurtovoy wrote:
> This patch performs sequential mapping between CPUs and queues.
> In case the system has more CPUs than HWQs then there are still
> CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
> and their siblings to the same HWQ.
> This actually fixes a bug that found unmapped HWQs in a system with
> 2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
> running NVMEoF (opens upto maximum of 64 HWQs).
> 
> Performance results running fio (72 jobs, 128 iodepth)
> using null_blk (w/w.o patch):

Can you also tests with Sagi's series to use the proper IRQ
level mapping?

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-06-28 14:15   ` Christoph Hellwig
  0 siblings, 0 replies; 34+ messages in thread
From: Christoph Hellwig @ 2017-06-28 14:15 UTC (permalink / raw)


On Wed, Jun 28, 2017@03:44:40PM +0300, Max Gurtovoy wrote:
> This patch performs sequential mapping between CPUs and queues.
> In case the system has more CPUs than HWQs then there are still
> CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
> and their siblings to the same HWQ.
> This actually fixes a bug that found unmapped HWQs in a system with
> 2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
> running NVMEoF (opens upto maximum of 64 HWQs).
> 
> Performance results running fio (72 jobs, 128 iodepth)
> using null_blk (w/w.o patch):

Can you also tests with Sagi's series to use the proper IRQ
level mapping?

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-06-28 12:44 ` Max Gurtovoy
@ 2017-06-28 14:38   ` Sagi Grimberg
  -1 siblings, 0 replies; 34+ messages in thread
From: Sagi Grimberg @ 2017-06-28 14:38 UTC (permalink / raw)
  To: Max Gurtovoy, axboe, hch, bvanassche, linux-nvme, linux-block; +Cc: vladimirk

Hi Max,

> This patch performs sequential mapping between CPUs and queues.
> In case the system has more CPUs than HWQs then there are still
> CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
> and their siblings to the same HWQ.
> This actually fixes a bug that found unmapped HWQs in a system with
> 2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
> running NVMEoF (opens upto maximum of 64 HWQs).

The explanation can be a bit clearer...

I still need to take a look at the patch itself, but do note that
ideally we will never get to blk_mq_map_queues since we prefer
to map queues based on MSIX assignments. for nvme-rdma, this is
merely a fallback. And looking ahead, MSIX based mapping should
be the primary mapping logic.

Can you please test with my patchset on converting nvme-rdma to
MSIX based mapping (I assume you are testing with mlx5 yes)?
I'd be very much interested to know if the original problem
exists with this applied.

I'll take a closer look into the patch.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-06-28 14:38   ` Sagi Grimberg
  0 siblings, 0 replies; 34+ messages in thread
From: Sagi Grimberg @ 2017-06-28 14:38 UTC (permalink / raw)


Hi Max,

> This patch performs sequential mapping between CPUs and queues.
> In case the system has more CPUs than HWQs then there are still
> CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
> and their siblings to the same HWQ.
> This actually fixes a bug that found unmapped HWQs in a system with
> 2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
> running NVMEoF (opens upto maximum of 64 HWQs).

The explanation can be a bit clearer...

I still need to take a look at the patch itself, but do note that
ideally we will never get to blk_mq_map_queues since we prefer
to map queues based on MSIX assignments. for nvme-rdma, this is
merely a fallback. And looking ahead, MSIX based mapping should
be the primary mapping logic.

Can you please test with my patchset on converting nvme-rdma to
MSIX based mapping (I assume you are testing with mlx5 yes)?
I'd be very much interested to know if the original problem
exists with this applied.

I'll take a closer look into the patch.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-06-28 12:44 ` Max Gurtovoy
@ 2017-06-28 14:43   ` Guilherme G. Piccoli
  -1 siblings, 0 replies; 34+ messages in thread
From: Guilherme G. Piccoli @ 2017-06-28 14:43 UTC (permalink / raw)
  To: Max Gurtovoy, axboe, hch, sagi, bvanassche, linux-nvme, linux-block
  Cc: vladimirk

On 06/28/2017 09:44 AM, Max Gurtovoy wrote:
> This patch performs sequential mapping between CPUs and queues.
> In case the system has more CPUs than HWQs then there are still
> CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
> and their siblings to the same HWQ.
> This actually fixes a bug that found unmapped HWQs in a system with
> 2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
> running NVMEoF (opens upto maximum of 64 HWQs).
> 
> Performance results running fio (72 jobs, 128 iodepth)
> using null_blk (w/w.o patch):
> 
> bs      IOPS(read submit_queues=72)   IOPS(write submit_queues=72)   IOPS(read submit_queues=24)  IOPS(write submit_queues=24)
> -----  ----------------------------  ------------------------------ ---------------------------- -----------------------------
> 512    4890.4K/4723.5K                 4524.7K/4324.2K                   4280.2K/4264.3K               3902.4K/3909.5K
> 1k     4910.1K/4715.2K                 4535.8K/4309.6K                   4296.7K/4269.1K               3906.8K/3914.9K
> 2k     4906.3K/4739.7K                 4526.7K/4330.6K                   4301.1K/4262.4K               3890.8K/3900.1K
> 4k     4918.6K/4730.7K                 4556.1K/4343.6K                   4297.6K/4264.5K               3886.9K/3893.9K
> 8k     4906.4K/4748.9K                 4550.9K/4346.7K                   4283.2K/4268.8K               3863.4K/3858.2K
> 16k    4903.8K/4782.6K                 4501.5K/4233.9K                   4292.3K/4282.3K               3773.1K/3773.5K
> 32k    4885.8K/4782.4K                 4365.9K/4184.2K                   4307.5K/4289.4K               3780.3K/3687.3K
> 64k    4822.5K/4762.7K                 2752.8K/2675.1K                   4308.8K/4312.3K               2651.5K/2655.7K
> 128k   2388.5K/2313.8K                 1391.9K/1375.7K                   2142.8K/2152.2K               1395.5K/1374.2K
> 

Max, thanks for this patch. I was having the same issue, and was
thinking in how to address...

Your patch fixes it for me, just tested it.
I can test any follow-up patch you send, basaed on reviews you got in
the list.

Cheers,


Guilherme

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-06-28 14:43   ` Guilherme G. Piccoli
  0 siblings, 0 replies; 34+ messages in thread
From: Guilherme G. Piccoli @ 2017-06-28 14:43 UTC (permalink / raw)


On 06/28/2017 09:44 AM, Max Gurtovoy wrote:
> This patch performs sequential mapping between CPUs and queues.
> In case the system has more CPUs than HWQs then there are still
> CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
> and their siblings to the same HWQ.
> This actually fixes a bug that found unmapped HWQs in a system with
> 2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
> running NVMEoF (opens upto maximum of 64 HWQs).
> 
> Performance results running fio (72 jobs, 128 iodepth)
> using null_blk (w/w.o patch):
> 
> bs      IOPS(read submit_queues=72)   IOPS(write submit_queues=72)   IOPS(read submit_queues=24)  IOPS(write submit_queues=24)
> -----  ----------------------------  ------------------------------ ---------------------------- -----------------------------
> 512    4890.4K/4723.5K                 4524.7K/4324.2K                   4280.2K/4264.3K               3902.4K/3909.5K
> 1k     4910.1K/4715.2K                 4535.8K/4309.6K                   4296.7K/4269.1K               3906.8K/3914.9K
> 2k     4906.3K/4739.7K                 4526.7K/4330.6K                   4301.1K/4262.4K               3890.8K/3900.1K
> 4k     4918.6K/4730.7K                 4556.1K/4343.6K                   4297.6K/4264.5K               3886.9K/3893.9K
> 8k     4906.4K/4748.9K                 4550.9K/4346.7K                   4283.2K/4268.8K               3863.4K/3858.2K
> 16k    4903.8K/4782.6K                 4501.5K/4233.9K                   4292.3K/4282.3K               3773.1K/3773.5K
> 32k    4885.8K/4782.4K                 4365.9K/4184.2K                   4307.5K/4289.4K               3780.3K/3687.3K
> 64k    4822.5K/4762.7K                 2752.8K/2675.1K                   4308.8K/4312.3K               2651.5K/2655.7K
> 128k   2388.5K/2313.8K                 1391.9K/1375.7K                   2142.8K/2152.2K               1395.5K/1374.2K
> 

Max, thanks for this patch. I was having the same issue, and was
thinking in how to address...

Your patch fixes it for me, just tested it.
I can test any follow-up patch you send, basaed on reviews you got in
the list.

Cheers,


Guilherme

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-06-28 14:38   ` Sagi Grimberg
@ 2017-06-28 14:55     ` Max Gurtovoy
  -1 siblings, 0 replies; 34+ messages in thread
From: Max Gurtovoy @ 2017-06-28 14:55 UTC (permalink / raw)
  To: Sagi Grimberg, axboe, hch, bvanassche, linux-nvme, linux-block; +Cc: vladimirk



On 6/28/2017 5:38 PM, Sagi Grimberg wrote:
> Hi Max,

Hi Sagi,

>
>> This patch performs sequential mapping between CPUs and queues.
>> In case the system has more CPUs than HWQs then there are still
>> CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
>> and their siblings to the same HWQ.
>> This actually fixes a bug that found unmapped HWQs in a system with
>> 2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
>> running NVMEoF (opens upto maximum of 64 HWQs).
>
> The explanation can be a bit clearer...
>
> I still need to take a look at the patch itself, but do note that
> ideally we will never get to blk_mq_map_queues since we prefer
> to map queues based on MSIX assignments. for nvme-rdma, this is
> merely a fallback. And looking ahead, MSIX based mapping should
> be the primary mapping logic.

we still have a fallback option in your series so we surly need some fix 
to the blk_mq_map_queues (also for stable kernel IMO. Jens/Christoph ?).

>
> Can you please test with my patchset on converting nvme-rdma to
> MSIX based mapping (I assume you are testing with mlx5 yes)?

Sure. does V6 is the last version of the patchset ?
I'll test it with ConnectX-5 adapter and send the results.

> I'd be very much interested to know if the original problem
> exists with this applied.

it will exist in case set->nr_hw_queues > dev->num_comp_vectors.

>
> I'll take a closer look into the patch.

Thanks.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-06-28 14:55     ` Max Gurtovoy
  0 siblings, 0 replies; 34+ messages in thread
From: Max Gurtovoy @ 2017-06-28 14:55 UTC (permalink / raw)




On 6/28/2017 5:38 PM, Sagi Grimberg wrote:
> Hi Max,

Hi Sagi,

>
>> This patch performs sequential mapping between CPUs and queues.
>> In case the system has more CPUs than HWQs then there are still
>> CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
>> and their siblings to the same HWQ.
>> This actually fixes a bug that found unmapped HWQs in a system with
>> 2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
>> running NVMEoF (opens upto maximum of 64 HWQs).
>
> The explanation can be a bit clearer...
>
> I still need to take a look at the patch itself, but do note that
> ideally we will never get to blk_mq_map_queues since we prefer
> to map queues based on MSIX assignments. for nvme-rdma, this is
> merely a fallback. And looking ahead, MSIX based mapping should
> be the primary mapping logic.

we still have a fallback option in your series so we surly need some fix 
to the blk_mq_map_queues (also for stable kernel IMO. Jens/Christoph ?).

>
> Can you please test with my patchset on converting nvme-rdma to
> MSIX based mapping (I assume you are testing with mlx5 yes)?

Sure. does V6 is the last version of the patchset ?
I'll test it with ConnectX-5 adapter and send the results.

> I'd be very much interested to know if the original problem
> exists with this applied.

it will exist in case set->nr_hw_queues > dev->num_comp_vectors.

>
> I'll take a closer look into the patch.

Thanks.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-06-28 12:44 ` Max Gurtovoy
@ 2017-06-28 14:58   ` Sagi Grimberg
  -1 siblings, 0 replies; 34+ messages in thread
From: Sagi Grimberg @ 2017-06-28 14:58 UTC (permalink / raw)
  To: Max Gurtovoy, axboe, hch, bvanassche, linux-nvme, linux-block; +Cc: vladimirk


> +static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
> +			      const struct cpumask *online_mask)
>   {
> -	return cpu * nr_queues / nr_cpus;
> +	/*
> +	 * Non online CPU will be mapped to queue index 0.
> +	 */
> +	if (!cpumask_test_cpu(cpu, online_mask))
> +		return 0;

Why not map offline cpus to what they would've map to if they were
online?

> +		if (cpu < nr_queues) {
> +			map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
> +		} else {
> +			first_sibling = get_first_sibling(cpu);
> +			if (first_sibling == cpu)
> +				map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);

This seems to be the only reference to online_mask left (on each side of
the if statement. I think you can just not pass it and use
cpu_online_mask in cpu_to_queue_index.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-06-28 14:58   ` Sagi Grimberg
  0 siblings, 0 replies; 34+ messages in thread
From: Sagi Grimberg @ 2017-06-28 14:58 UTC (permalink / raw)



> +static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
> +			      const struct cpumask *online_mask)
>   {
> -	return cpu * nr_queues / nr_cpus;
> +	/*
> +	 * Non online CPU will be mapped to queue index 0.
> +	 */
> +	if (!cpumask_test_cpu(cpu, online_mask))
> +		return 0;

Why not map offline cpus to what they would've map to if they were
online?

> +		if (cpu < nr_queues) {
> +			map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
> +		} else {
> +			first_sibling = get_first_sibling(cpu);
> +			if (first_sibling == cpu)
> +				map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);

This seems to be the only reference to online_mask left (on each side of
the if statement. I think you can just not pass it and use
cpu_online_mask in cpu_to_queue_index.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-06-28 14:55     ` Max Gurtovoy
@ 2017-06-28 15:01       ` Sagi Grimberg
  -1 siblings, 0 replies; 34+ messages in thread
From: Sagi Grimberg @ 2017-06-28 15:01 UTC (permalink / raw)
  To: Max Gurtovoy, axboe, hch, bvanassche, linux-nvme, linux-block; +Cc: vladimirk


>> Can you please test with my patchset on converting nvme-rdma to
>> MSIX based mapping (I assume you are testing with mlx5 yes)?
> 
> Sure. does V6 is the last version of the patchset ?
> I'll test it with ConnectX-5 adapter and send the results.

Yes.

>> I'd be very much interested to know if the original problem
>> exists with this applied.
> 
> it will exist in case set->nr_hw_queues > dev->num_comp_vectors.

We don't ask for more hw queues than num_comp_vectors.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-06-28 15:01       ` Sagi Grimberg
  0 siblings, 0 replies; 34+ messages in thread
From: Sagi Grimberg @ 2017-06-28 15:01 UTC (permalink / raw)



>> Can you please test with my patchset on converting nvme-rdma to
>> MSIX based mapping (I assume you are testing with mlx5 yes)?
> 
> Sure. does V6 is the last version of the patchset ?
> I'll test it with ConnectX-5 adapter and send the results.

Yes.

>> I'd be very much interested to know if the original problem
>> exists with this applied.
> 
> it will exist in case set->nr_hw_queues > dev->num_comp_vectors.

We don't ask for more hw queues than num_comp_vectors.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-06-28 14:58   ` Sagi Grimberg
@ 2017-06-28 15:09     ` Max Gurtovoy
  -1 siblings, 0 replies; 34+ messages in thread
From: Max Gurtovoy @ 2017-06-28 15:09 UTC (permalink / raw)
  To: Sagi Grimberg, axboe, hch, bvanassche, linux-nvme, linux-block; +Cc: vladimirk



On 6/28/2017 5:58 PM, Sagi Grimberg wrote:
>
>> +static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
>> +                  const struct cpumask *online_mask)
>>   {
>> -    return cpu * nr_queues / nr_cpus;
>> +    /*
>> +     * Non online CPU will be mapped to queue index 0.
>> +     */
>> +    if (!cpumask_test_cpu(cpu, online_mask))
>> +        return 0;
>
> Why not map offline cpus to what they would've map to if they were
> online?

I didn't change logic for offline cpu's.
Should it be done in this patch ?

>
>> +        if (cpu < nr_queues) {
>> +            map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
>> +        } else {
>> +            first_sibling = get_first_sibling(cpu);
>> +            if (first_sibling == cpu)
>> +                map[cpu] = cpu_to_queue_index(nr_queues, cpu,
>> online_mask);
>
> This seems to be the only reference to online_mask left (on each side of
> the if statement. I think you can just not pass it and use
> cpu_online_mask in cpu_to_queue_index.

Another user of cpu_to_queue_index may want to send different mask.
Currently it's a static function so I guess we can do it.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-06-28 15:09     ` Max Gurtovoy
  0 siblings, 0 replies; 34+ messages in thread
From: Max Gurtovoy @ 2017-06-28 15:09 UTC (permalink / raw)




On 6/28/2017 5:58 PM, Sagi Grimberg wrote:
>
>> +static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
>> +                  const struct cpumask *online_mask)
>>   {
>> -    return cpu * nr_queues / nr_cpus;
>> +    /*
>> +     * Non online CPU will be mapped to queue index 0.
>> +     */
>> +    if (!cpumask_test_cpu(cpu, online_mask))
>> +        return 0;
>
> Why not map offline cpus to what they would've map to if they were
> online?

I didn't change logic for offline cpu's.
Should it be done in this patch ?

>
>> +        if (cpu < nr_queues) {
>> +            map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
>> +        } else {
>> +            first_sibling = get_first_sibling(cpu);
>> +            if (first_sibling == cpu)
>> +                map[cpu] = cpu_to_queue_index(nr_queues, cpu,
>> online_mask);
>
> This seems to be the only reference to online_mask left (on each side of
> the if statement. I think you can just not pass it and use
> cpu_online_mask in cpu_to_queue_index.

Another user of cpu_to_queue_index may want to send different mask.
Currently it's a static function so I guess we can do it.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-06-28 15:01       ` Sagi Grimberg
@ 2017-06-28 17:11         ` Max Gurtovoy
  -1 siblings, 0 replies; 34+ messages in thread
From: Max Gurtovoy @ 2017-06-28 17:11 UTC (permalink / raw)
  To: Sagi Grimberg, axboe, hch, bvanassche, linux-nvme, linux-block; +Cc: vladimirk



On 6/28/2017 6:01 PM, Sagi Grimberg wrote:
>
>>> Can you please test with my patchset on converting nvme-rdma to
>>> MSIX based mapping (I assume you are testing with mlx5 yes)?
>>
>> Sure. does V6 is the last version of the patchset ?
>> I'll test it with ConnectX-5 adapter and send the results.
>
> Yes.
>
>>> I'd be very much interested to know if the original problem
>>> exists with this applied.
>>
>> it will exist in case set->nr_hw_queues > dev->num_comp_vectors.
>
> We don't ask for more hw queues than num_comp_vectors.


I've tested Sagi's patches and they fix the connection establishment bug 
for NVMEoF.

here are the results:

fio 72 jobs, 128 iodepth.
NVMEoF register_always=N
1 Subsystem, 1 namespace

num_comp_vector is 60 and therefore num_queues is 60.
I run a comparison to my original patch with 60 queues and also for 64 
queues (possible in my patch because no limitation of num_comp_vectors)

bs      IOPS(read queues=60(Sagi)/60(Max)/64(Max))
-----  --------------------------------------------
512    3424.9K/3587.8K/3619.2K
1k     3421.8K/3613.5K/3630.6K
2k     3416.4K/3605.7K/3630.2K
4k     2361.6K/2399.9K/2404.1K
8k     1368.7K/1370.7K/1370.6K
16k    692K/691K/692K
32k    345K/348K/348K
64k    175K/174K/174K
128k   88K/87K/87K

bs     IOPS(write queues=60(Sagi)/60(Max)/64(Max))
----- ------------------------------
512    3243.6K/3329.7K/3392.9K
1k     3249.7K/3341.2K/3379.2K
2k     3251.2K/3336.9K/3385.9K
4k     2685.8K/2683.9K/2683.3K
8k     1336.6K/1355.1K/1361.6K
16k    690K/690K/691K
32k    348K/348K/348K
64k    174K/174K/174K
128k   87K/87K/87K

My conclusion is that Sagi's patch is correct (although we see little 
bit less performance: 100K-200K less for small block sizes) so you can add:

Tested-by: Max Gurtovoy <maxg@mellanox.com>

Nevertheless, we should review and consider pushing my fixes to the 
block layer for other users of this mapping function.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-06-28 17:11         ` Max Gurtovoy
  0 siblings, 0 replies; 34+ messages in thread
From: Max Gurtovoy @ 2017-06-28 17:11 UTC (permalink / raw)




On 6/28/2017 6:01 PM, Sagi Grimberg wrote:
>
>>> Can you please test with my patchset on converting nvme-rdma to
>>> MSIX based mapping (I assume you are testing with mlx5 yes)?
>>
>> Sure. does V6 is the last version of the patchset ?
>> I'll test it with ConnectX-5 adapter and send the results.
>
> Yes.
>
>>> I'd be very much interested to know if the original problem
>>> exists with this applied.
>>
>> it will exist in case set->nr_hw_queues > dev->num_comp_vectors.
>
> We don't ask for more hw queues than num_comp_vectors.


I've tested Sagi's patches and they fix the connection establishment bug 
for NVMEoF.

here are the results:

fio 72 jobs, 128 iodepth.
NVMEoF register_always=N
1 Subsystem, 1 namespace

num_comp_vector is 60 and therefore num_queues is 60.
I run a comparison to my original patch with 60 queues and also for 64 
queues (possible in my patch because no limitation of num_comp_vectors)

bs      IOPS(read queues=60(Sagi)/60(Max)/64(Max))
-----  --------------------------------------------
512    3424.9K/3587.8K/3619.2K
1k     3421.8K/3613.5K/3630.6K
2k     3416.4K/3605.7K/3630.2K
4k     2361.6K/2399.9K/2404.1K
8k     1368.7K/1370.7K/1370.6K
16k    692K/691K/692K
32k    345K/348K/348K
64k    175K/174K/174K
128k   88K/87K/87K

bs     IOPS(write queues=60(Sagi)/60(Max)/64(Max))
----- ------------------------------
512    3243.6K/3329.7K/3392.9K
1k     3249.7K/3341.2K/3379.2K
2k     3251.2K/3336.9K/3385.9K
4k     2685.8K/2683.9K/2683.3K
8k     1336.6K/1355.1K/1361.6K
16k    690K/690K/691K
32k    348K/348K/348K
64k    174K/174K/174K
128k   87K/87K/87K

My conclusion is that Sagi's patch is correct (although we see little 
bit less performance: 100K-200K less for small block sizes) so you can add:

Tested-by: Max Gurtovoy <maxg at mellanox.com>

Nevertheless, we should review and consider pushing my fixes to the 
block layer for other users of this mapping function.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-06-28 15:09     ` Max Gurtovoy
@ 2017-06-29  5:33       ` Sagi Grimberg
  -1 siblings, 0 replies; 34+ messages in thread
From: Sagi Grimberg @ 2017-06-29  5:33 UTC (permalink / raw)
  To: Max Gurtovoy, axboe, hch, bvanassche, linux-nvme, linux-block; +Cc: vladimirk


>>> +static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
>>> +                  const struct cpumask *online_mask)
>>>   {
>>> -    return cpu * nr_queues / nr_cpus;
>>> +    /*
>>> +     * Non online CPU will be mapped to queue index 0.
>>> +     */
>>> +    if (!cpumask_test_cpu(cpu, online_mask))
>>> +        return 0;
>>
>> Why not map offline cpus to what they would've map to if they were
>> online?
> 
> I didn't change logic for offline cpu's.
> Should it be done in this patch ?

The patch clearly treats offline cpus differently, maps them
to queue 0.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-06-29  5:33       ` Sagi Grimberg
  0 siblings, 0 replies; 34+ messages in thread
From: Sagi Grimberg @ 2017-06-29  5:33 UTC (permalink / raw)



>>> +static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
>>> +                  const struct cpumask *online_mask)
>>>   {
>>> -    return cpu * nr_queues / nr_cpus;
>>> +    /*
>>> +     * Non online CPU will be mapped to queue index 0.
>>> +     */
>>> +    if (!cpumask_test_cpu(cpu, online_mask))
>>> +        return 0;
>>
>> Why not map offline cpus to what they would've map to if they were
>> online?
> 
> I didn't change logic for offline cpu's.
> Should it be done in this patch ?

The patch clearly treats offline cpus differently, maps them
to queue 0.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-06-28 17:11         ` Max Gurtovoy
@ 2017-06-29  8:30           ` Johannes Thumshirn
  -1 siblings, 0 replies; 34+ messages in thread
From: Johannes Thumshirn @ 2017-06-29  8:30 UTC (permalink / raw)
  To: Max Gurtovoy
  Cc: Sagi Grimberg, axboe, hch, bvanassche, linux-nvme, linux-block,
	vladimirk

On Wed, Jun 28, 2017 at 08:11:23PM +0300, Max Gurtovoy wrote:
> Nevertheless, we should review and consider pushing my fixes to the block
> layer for other users of this mapping function.

Totally agree with it and it's easier to backport to let's say stable kernels.

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N�rnberg
GF: Felix Imend�rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N�rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-06-29  8:30           ` Johannes Thumshirn
  0 siblings, 0 replies; 34+ messages in thread
From: Johannes Thumshirn @ 2017-06-29  8:30 UTC (permalink / raw)


On Wed, Jun 28, 2017@08:11:23PM +0300, Max Gurtovoy wrote:
> Nevertheless, we should review and consider pushing my fixes to the block
> layer for other users of this mapping function.

Totally agree with it and it's easier to backport to let's say stable kernels.

-- 
Johannes Thumshirn                                          Storage
jthumshirn at suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N?rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-06-28 12:44 ` Max Gurtovoy
@ 2017-06-29 14:31   ` Christoph Hellwig
  -1 siblings, 0 replies; 34+ messages in thread
From: Christoph Hellwig @ 2017-06-29 14:31 UTC (permalink / raw)
  To: Max Gurtovoy
  Cc: axboe, hch, sagi, bvanassche, linux-nvme, linux-block, vladimirk

Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-06-29 14:31   ` Christoph Hellwig
  0 siblings, 0 replies; 34+ messages in thread
From: Christoph Hellwig @ 2017-06-29 14:31 UTC (permalink / raw)


Looks fine,

Reviewed-by: Christoph Hellwig <hch at lst.de>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-06-28 12:44 ` Max Gurtovoy
@ 2017-07-03 13:38   ` Johannes Thumshirn
  -1 siblings, 0 replies; 34+ messages in thread
From: Johannes Thumshirn @ 2017-07-03 13:38 UTC (permalink / raw)
  To: Max Gurtovoy
  Cc: linux-block, bvanassche, vladimirk, linux-nvme, axboe, hch, sagi

So finally:
Tested-by: Johannes Thumshirn <jthumshirn@suse.de>
-- =

Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N=FCrnberg
GF: Felix Imend=F6rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N=FCrnberg)
Key fingerprint =3D EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-07-03 13:38   ` Johannes Thumshirn
  0 siblings, 0 replies; 34+ messages in thread
From: Johannes Thumshirn @ 2017-07-03 13:38 UTC (permalink / raw)


So finally:
Tested-by: Johannes Thumshirn <jthumshirn at suse.de>
-- 
Johannes Thumshirn                                          Storage
jthumshirn at suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N?rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-06-28 12:44 ` Max Gurtovoy
@ 2017-07-05  7:59   ` Johannes Thumshirn
  -1 siblings, 0 replies; 34+ messages in thread
From: Johannes Thumshirn @ 2017-07-05  7:59 UTC (permalink / raw)
  To: hch, sagi, Keith Busch
  Cc: axboe, bvanassche, linux-nvme, linux-block, vladimirk, Max Gurtovoy

On Wed, Jun 28, 2017 at 03:44:40PM +0300, Max Gurtovoy wrote:
> This patch performs sequential mapping between CPUs and queues.
> In case the system has more CPUs than HWQs then there are still
> CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
> and their siblings to the same HWQ.
> This actually fixes a bug that found unmapped HWQs in a system with
> 2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
> running NVMEoF (opens upto maximum of 64 HWQs).

Christoph/Sagi/Keith,

any updates on this patch? Without it I' not able to run NVMf on a box with 44
Cores and 88 Threads w/o adding -i 44 to the nvme connect statement.

Thanks,
	Johannes

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N�rnberg
GF: Felix Imend�rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N�rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-07-05  7:59   ` Johannes Thumshirn
  0 siblings, 0 replies; 34+ messages in thread
From: Johannes Thumshirn @ 2017-07-05  7:59 UTC (permalink / raw)


On Wed, Jun 28, 2017@03:44:40PM +0300, Max Gurtovoy wrote:
> This patch performs sequential mapping between CPUs and queues.
> In case the system has more CPUs than HWQs then there are still
> CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
> and their siblings to the same HWQ.
> This actually fixes a bug that found unmapped HWQs in a system with
> 2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
> running NVMEoF (opens upto maximum of 64 HWQs).

Christoph/Sagi/Keith,

any updates on this patch? Without it I' not able to run NVMf on a box with 44
Cores and 88 Threads w/o adding -i 44 to the nvme connect statement.

Thanks,
	Johannes

-- 
Johannes Thumshirn                                          Storage
jthumshirn at suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N?rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-07-05  7:59   ` Johannes Thumshirn
@ 2017-07-05  8:11     ` Max Gurtovoy
  -1 siblings, 0 replies; 34+ messages in thread
From: Max Gurtovoy @ 2017-07-05  8:11 UTC (permalink / raw)
  To: Johannes Thumshirn, hch, sagi, Keith Busch
  Cc: axboe, bvanassche, linux-nvme, linux-block, vladimirk



On 7/5/2017 10:59 AM, Johannes Thumshirn wrote:
> On Wed, Jun 28, 2017 at 03:44:40PM +0300, Max Gurtovoy wrote:
>> This patch performs sequential mapping between CPUs and queues.
>> In case the system has more CPUs than HWQs then there are still
>> CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
>> and their siblings to the same HWQ.
>> This actually fixes a bug that found unmapped HWQs in a system with
>> 2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
>> running NVMEoF (opens upto maximum of 64 HWQs).
>
> Christoph/Sagi/Keith,
>
> any updates on this patch? Without it I' not able to run NVMf on a box with 44
> Cores and 88 Threads w/o adding -i 44 to the nvme connect statement.
>
> Thanks,
> 	Johannes
>

Hi Johannes,
this was merge already to the main tree (Jens add it to his pull request)

Cheers,
	Max.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-07-05  8:11     ` Max Gurtovoy
  0 siblings, 0 replies; 34+ messages in thread
From: Max Gurtovoy @ 2017-07-05  8:11 UTC (permalink / raw)




On 7/5/2017 10:59 AM, Johannes Thumshirn wrote:
> On Wed, Jun 28, 2017@03:44:40PM +0300, Max Gurtovoy wrote:
>> This patch performs sequential mapping between CPUs and queues.
>> In case the system has more CPUs than HWQs then there are still
>> CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
>> and their siblings to the same HWQ.
>> This actually fixes a bug that found unmapped HWQs in a system with
>> 2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
>> running NVMEoF (opens upto maximum of 64 HWQs).
>
> Christoph/Sagi/Keith,
>
> any updates on this patch? Without it I' not able to run NVMf on a box with 44
> Cores and 88 Threads w/o adding -i 44 to the nvme connect statement.
>
> Thanks,
> 	Johannes
>

Hi Johannes,
this was merge already to the main tree (Jens add it to his pull request)

Cheers,
	Max.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
  2017-07-05  8:11     ` Max Gurtovoy
@ 2017-07-05  8:15       ` Johannes Thumshirn
  -1 siblings, 0 replies; 34+ messages in thread
From: Johannes Thumshirn @ 2017-07-05  8:15 UTC (permalink / raw)
  To: Max Gurtovoy
  Cc: hch, sagi, Keith Busch, axboe, bvanassche, linux-nvme,
	linux-block, vladimirk

On Wed, Jul 05, 2017 at 11:11:36AM +0300, Max Gurtovoy wrote:
> Hi Johannes,
> this was merge already to the main tree (Jens add it to his pull request)

Oops, sorry for the noise.

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N�rnberg
GF: Felix Imend�rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N�rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system
@ 2017-07-05  8:15       ` Johannes Thumshirn
  0 siblings, 0 replies; 34+ messages in thread
From: Johannes Thumshirn @ 2017-07-05  8:15 UTC (permalink / raw)


On Wed, Jul 05, 2017@11:11:36AM +0300, Max Gurtovoy wrote:
> Hi Johannes,
> this was merge already to the main tree (Jens add it to his pull request)

Oops, sorry for the noise.

-- 
Johannes Thumshirn                                          Storage
jthumshirn at suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N?rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2017-07-05  8:15 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-28 12:44 [PATCH 1/1] blk-mq: map all HWQ also in hyperthreaded system Max Gurtovoy
2017-06-28 12:44 ` Max Gurtovoy
2017-06-28 13:07 ` Johannes Thumshirn
2017-06-28 13:07   ` Johannes Thumshirn
2017-06-28 14:15 ` Christoph Hellwig
2017-06-28 14:15   ` Christoph Hellwig
2017-06-28 14:38 ` Sagi Grimberg
2017-06-28 14:38   ` Sagi Grimberg
2017-06-28 14:55   ` Max Gurtovoy
2017-06-28 14:55     ` Max Gurtovoy
2017-06-28 15:01     ` Sagi Grimberg
2017-06-28 15:01       ` Sagi Grimberg
2017-06-28 17:11       ` Max Gurtovoy
2017-06-28 17:11         ` Max Gurtovoy
2017-06-29  8:30         ` Johannes Thumshirn
2017-06-29  8:30           ` Johannes Thumshirn
2017-06-28 14:43 ` Guilherme G. Piccoli
2017-06-28 14:43   ` Guilherme G. Piccoli
2017-06-28 14:58 ` Sagi Grimberg
2017-06-28 14:58   ` Sagi Grimberg
2017-06-28 15:09   ` Max Gurtovoy
2017-06-28 15:09     ` Max Gurtovoy
2017-06-29  5:33     ` Sagi Grimberg
2017-06-29  5:33       ` Sagi Grimberg
2017-06-29 14:31 ` Christoph Hellwig
2017-06-29 14:31   ` Christoph Hellwig
2017-07-03 13:38 ` Johannes Thumshirn
2017-07-03 13:38   ` Johannes Thumshirn
2017-07-05  7:59 ` Johannes Thumshirn
2017-07-05  7:59   ` Johannes Thumshirn
2017-07-05  8:11   ` Max Gurtovoy
2017-07-05  8:11     ` Max Gurtovoy
2017-07-05  8:15     ` Johannes Thumshirn
2017-07-05  8:15       ` Johannes Thumshirn

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.