linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] blk-mq: balance mapping between CPUs and queues
@ 2019-07-25  7:56 Ming Lei
  2019-07-25  8:35 ` Bob Liu
  0 siblings, 1 reply; 4+ messages in thread
From: Ming Lei @ 2019-07-25  7:56 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, Ming Lei, Yi Zhang

Spread queues among present CPUs first, then building the mapping
on other non-present CPUs.

So we can minimize count of dead queues which are mapped by un-present
CPUs only. Then bad IO performance can be avoided by this unbalanced
mapping between CPUs and queues.

The similar policy has been applied on Managed IRQ affinity.

Reported-by: Yi Zhang <yi.zhang@redhat.com>
Cc: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-mq-cpumap.c | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index f945621a0e8f..e217f3404dc7 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -15,10 +15,9 @@
 #include "blk.h"
 #include "blk-mq.h"
 
-static int cpu_to_queue_index(struct blk_mq_queue_map *qmap,
-			      unsigned int nr_queues, const int cpu)
+static int queue_index(struct blk_mq_queue_map *qmap, const int q)
 {
-	return qmap->queue_offset + (cpu % nr_queues);
+	return qmap->queue_offset + q;
 }
 
 static int get_first_sibling(unsigned int cpu)
@@ -36,23 +35,36 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap)
 {
 	unsigned int *map = qmap->mq_map;
 	unsigned int nr_queues = qmap->nr_queues;
-	unsigned int cpu, first_sibling;
+	unsigned int cpu, first_sibling, q = 0;
+
+	for_each_possible_cpu(cpu)
+		map[cpu] = -1;
+
+	/*
+	 * Spread queues among present CPUs first for minimizing
+	 * count of dead queues which are mapped by all un-present CPUs
+	 */
+	for_each_present_cpu(cpu) {
+		if (q >= nr_queues)
+			break;
+		map[cpu] = queue_index(qmap, q++);
+	}
 
 	for_each_possible_cpu(cpu) {
+		if (map[cpu] != -1)
+			continue;
 		/*
 		 * First do sequential mapping between CPUs and queues.
 		 * In case we still have CPUs to map, and we have some number of
 		 * threads per cores then map sibling threads to the same queue
 		 * for performance optimizations.
 		 */
-		if (cpu < nr_queues) {
-			map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu);
+		first_sibling = get_first_sibling(cpu);
+		if (first_sibling == cpu) {
+			map[cpu] = queue_index(qmap, q);
+			q = (q + 1) % nr_queues;
 		} else {
-			first_sibling = get_first_sibling(cpu);
-			if (first_sibling == cpu)
-				map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu);
-			else
-				map[cpu] = map[first_sibling];
+			map[cpu] = map[first_sibling];
 		}
 	}
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re:[PATCH] blk-mq: balance mapping between CPUs and queues
  2019-07-25  7:56 [PATCH] blk-mq: balance mapping between CPUs and queues Ming Lei
@ 2019-07-25  8:35 ` Bob Liu
  2019-07-25  9:18   ` [PATCH] " Ming Lei
  0 siblings, 1 reply; 4+ messages in thread
From: Bob Liu @ 2019-07-25  8:35 UTC (permalink / raw)
  To: Ming Lei, Jens Axboe; +Cc: linux-block, Yi Zhang

On 7/25/19 4:26 PM, Ming Lei wrote:
> Spread queues among present CPUs first, then building the mapping
> on other non-present CPUs.
> 
> So we can minimize count of dead queues which are mapped by un-present
> CPUs only. Then bad IO performance can be avoided by this unbalanced
> mapping between CPUs and queues.
> 
> The similar policy has been applied on Managed IRQ affinity.
> 
> Reported-by: Yi Zhang <yi.zhang@redhat.com>
> Cc: Yi Zhang <yi.zhang@redhat.com>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>  block/blk-mq-cpumap.c | 34 +++++++++++++++++++++++-----------
>  1 file changed, 23 insertions(+), 11 deletions(-)
> 
> diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
> index f945621a0e8f..e217f3404dc7 100644
> --- a/block/blk-mq-cpumap.c
> +++ b/block/blk-mq-cpumap.c
> @@ -15,10 +15,9 @@
>  #include "blk.h"
>  #include "blk-mq.h"
>  
> -static int cpu_to_queue_index(struct blk_mq_queue_map *qmap,
> -			      unsigned int nr_queues, const int cpu)
> +static int queue_index(struct blk_mq_queue_map *qmap, const int q)
>  {
> -	return qmap->queue_offset + (cpu % nr_queues);
> +	return qmap->queue_offset + q;
>  }
>  
>  static int get_first_sibling(unsigned int cpu)
> @@ -36,23 +35,36 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap)
>  {
>  	unsigned int *map = qmap->mq_map;
>  	unsigned int nr_queues = qmap->nr_queues;
> -	unsigned int cpu, first_sibling;
> +	unsigned int cpu, first_sibling, q = 0;
> +
> +	for_each_possible_cpu(cpu)
> +		map[cpu] = -1;
> +
> +	/*
> +	 * Spread queues among present CPUs first for minimizing
> +	 * count of dead queues which are mapped by all un-present CPUs
> +	 */
> +	for_each_present_cpu(cpu) {
> +		if (q >= nr_queues)
> +			break;
> +		map[cpu] = queue_index(qmap, q++);
> +	}
>  
>  	for_each_possible_cpu(cpu) {
> +		if (map[cpu] != -1)
> +			continue;
>  		/*
>  		 * First do sequential mapping between CPUs and queues.
>  		 * In case we still have CPUs to map, and we have some number of
>  		 * threads per cores then map sibling threads to the same queue
>  		 * for performance optimizations.
>  		 */
> -		if (cpu < nr_queues) {
> -			map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu);

Why not keep this similarly? 

> +		first_sibling = get_first_sibling(cpu);
> +		if (first_sibling == cpu) {
> +			map[cpu] = queue_index(qmap, q);
> +			q = (q + 1) % nr_queues;
>  		} else {
> -			first_sibling = get_first_sibling(cpu);
> -			if (first_sibling == cpu)
> -				map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu);
> -			else
> -				map[cpu] = map[first_sibling];
> +			map[cpu] = map[first_sibling];

Then no need to share queue if nr_queues is enough for all possible cpu.

Regards, -Bob


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] blk-mq: balance mapping between CPUs and queues
  2019-07-25  8:35 ` Bob Liu
@ 2019-07-25  9:18   ` Ming Lei
  2019-07-25  9:20     ` Ming Lei
  0 siblings, 1 reply; 4+ messages in thread
From: Ming Lei @ 2019-07-25  9:18 UTC (permalink / raw)
  To: Bob Liu; +Cc: Jens Axboe, linux-block, Yi Zhang

On Thu, Jul 25, 2019 at 04:35:30PM +0800, Bob Liu wrote:
> On 7/25/19 4:26 PM, Ming Lei wrote:
> > Spread queues among present CPUs first, then building the mapping
> > on other non-present CPUs.
> > 
> > So we can minimize count of dead queues which are mapped by un-present
> > CPUs only. Then bad IO performance can be avoided by this unbalanced
> > mapping between CPUs and queues.
> > 
> > The similar policy has been applied on Managed IRQ affinity.
> > 
> > Reported-by: Yi Zhang <yi.zhang@redhat.com>
> > Cc: Yi Zhang <yi.zhang@redhat.com>
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> >  block/blk-mq-cpumap.c | 34 +++++++++++++++++++++++-----------
> >  1 file changed, 23 insertions(+), 11 deletions(-)
> > 
> > diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
> > index f945621a0e8f..e217f3404dc7 100644
> > --- a/block/blk-mq-cpumap.c
> > +++ b/block/blk-mq-cpumap.c
> > @@ -15,10 +15,9 @@
> >  #include "blk.h"
> >  #include "blk-mq.h"
> >  
> > -static int cpu_to_queue_index(struct blk_mq_queue_map *qmap,
> > -			      unsigned int nr_queues, const int cpu)
> > +static int queue_index(struct blk_mq_queue_map *qmap, const int q)
> >  {
> > -	return qmap->queue_offset + (cpu % nr_queues);
> > +	return qmap->queue_offset + q;
> >  }
> >  
> >  static int get_first_sibling(unsigned int cpu)
> > @@ -36,23 +35,36 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap)
> >  {
> >  	unsigned int *map = qmap->mq_map;
> >  	unsigned int nr_queues = qmap->nr_queues;
> > -	unsigned int cpu, first_sibling;
> > +	unsigned int cpu, first_sibling, q = 0;
> > +
> > +	for_each_possible_cpu(cpu)
> > +		map[cpu] = -1;
> > +
> > +	/*
> > +	 * Spread queues among present CPUs first for minimizing
> > +	 * count of dead queues which are mapped by all un-present CPUs
> > +	 */
> > +	for_each_present_cpu(cpu) {
> > +		if (q >= nr_queues)
> > +			break;
> > +		map[cpu] = queue_index(qmap, q++);
> > +	}
> >  
> >  	for_each_possible_cpu(cpu) {
> > +		if (map[cpu] != -1)
> > +			continue;
> >  		/*
> >  		 * First do sequential mapping between CPUs and queues.
> >  		 * In case we still have CPUs to map, and we have some number of
> >  		 * threads per cores then map sibling threads to the same queue
> >  		 * for performance optimizations.
> >  		 */
> > -		if (cpu < nr_queues) {
> > -			map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu);
> 
> Why not keep this similarly? 

Because the sequential mapping has been done already among present CPUs.

> 
> > +		first_sibling = get_first_sibling(cpu);
> > +		if (first_sibling == cpu) {
> > +			map[cpu] = queue_index(qmap, q);
> > +			q = (q + 1) % nr_queues;
> >  		} else {
> > -			first_sibling = get_first_sibling(cpu);
> > -			if (first_sibling == cpu)
> > -				map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu);
> > -			else
> > -				map[cpu] = map[first_sibling];
> > +			map[cpu] = map[first_sibling];
> 
> Then no need to share queue if nr_queues is enough for all possible cpu.

I am not sure I follow your idea. There isn't 'enough' stuff wrt.
nr_queues, which is just usually <= nr_queues.

The valid mapping has to cover all possible CPUs, and each queue's
mapping can't be overlapped with others. That is exactly what
the patch is doing.

If you think somewhere is wrong or not good enough, please point it
out.


thanks, 
Ming

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] blk-mq: balance mapping between CPUs and queues
  2019-07-25  9:18   ` [PATCH] " Ming Lei
@ 2019-07-25  9:20     ` Ming Lei
  0 siblings, 0 replies; 4+ messages in thread
From: Ming Lei @ 2019-07-25  9:20 UTC (permalink / raw)
  To: Bob Liu; +Cc: Jens Axboe, linux-block, Yi Zhang

On Thu, Jul 25, 2019 at 05:18:08PM +0800, Ming Lei wrote:
> On Thu, Jul 25, 2019 at 04:35:30PM +0800, Bob Liu wrote:
> > On 7/25/19 4:26 PM, Ming Lei wrote:
> > > Spread queues among present CPUs first, then building the mapping
> > > on other non-present CPUs.
> > > 
> > > So we can minimize count of dead queues which are mapped by un-present
> > > CPUs only. Then bad IO performance can be avoided by this unbalanced
> > > mapping between CPUs and queues.
> > > 
> > > The similar policy has been applied on Managed IRQ affinity.
> > > 
> > > Reported-by: Yi Zhang <yi.zhang@redhat.com>
> > > Cc: Yi Zhang <yi.zhang@redhat.com>
> > > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > > ---
> > >  block/blk-mq-cpumap.c | 34 +++++++++++++++++++++++-----------
> > >  1 file changed, 23 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
> > > index f945621a0e8f..e217f3404dc7 100644
> > > --- a/block/blk-mq-cpumap.c
> > > +++ b/block/blk-mq-cpumap.c
> > > @@ -15,10 +15,9 @@
> > >  #include "blk.h"
> > >  #include "blk-mq.h"
> > >  
> > > -static int cpu_to_queue_index(struct blk_mq_queue_map *qmap,
> > > -			      unsigned int nr_queues, const int cpu)
> > > +static int queue_index(struct blk_mq_queue_map *qmap, const int q)
> > >  {
> > > -	return qmap->queue_offset + (cpu % nr_queues);
> > > +	return qmap->queue_offset + q;
> > >  }
> > >  
> > >  static int get_first_sibling(unsigned int cpu)
> > > @@ -36,23 +35,36 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap)
> > >  {
> > >  	unsigned int *map = qmap->mq_map;
> > >  	unsigned int nr_queues = qmap->nr_queues;
> > > -	unsigned int cpu, first_sibling;
> > > +	unsigned int cpu, first_sibling, q = 0;
> > > +
> > > +	for_each_possible_cpu(cpu)
> > > +		map[cpu] = -1;
> > > +
> > > +	/*
> > > +	 * Spread queues among present CPUs first for minimizing
> > > +	 * count of dead queues which are mapped by all un-present CPUs
> > > +	 */
> > > +	for_each_present_cpu(cpu) {
> > > +		if (q >= nr_queues)
> > > +			break;
> > > +		map[cpu] = queue_index(qmap, q++);
> > > +	}
> > >  
> > >  	for_each_possible_cpu(cpu) {
> > > +		if (map[cpu] != -1)
> > > +			continue;
> > >  		/*
> > >  		 * First do sequential mapping between CPUs and queues.
> > >  		 * In case we still have CPUs to map, and we have some number of
> > >  		 * threads per cores then map sibling threads to the same queue
> > >  		 * for performance optimizations.
> > >  		 */
> > > -		if (cpu < nr_queues) {
> > > -			map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu);
> > 
> > Why not keep this similarly? 
> 
> Because the sequential mapping has been done already among present CPUs.
> 
> > 
> > > +		first_sibling = get_first_sibling(cpu);
> > > +		if (first_sibling == cpu) {
> > > +			map[cpu] = queue_index(qmap, q);
> > > +			q = (q + 1) % nr_queues;
> > >  		} else {
> > > -			first_sibling = get_first_sibling(cpu);
> > > -			if (first_sibling == cpu)
> > > -				map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu);
> > > -			else
> > > -				map[cpu] = map[first_sibling];
> > > +			map[cpu] = map[first_sibling];
> > 
> > Then no need to share queue if nr_queues is enough for all possible cpu.
> 
> I am not sure I follow your idea. There isn't 'enough' stuff wrt.
> nr_queues, which is just usually <= nr_queues.

The 2nd 'nr_queues' should have been 'nr_cpu_ids'.


thanks,
Ming

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-07-25  9:20 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-25  7:56 [PATCH] blk-mq: balance mapping between CPUs and queues Ming Lei
2019-07-25  8:35 ` Bob Liu
2019-07-25  9:18   ` [PATCH] " Ming Lei
2019-07-25  9:20     ` Ming Lei

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).