All of lore.kernel.org
 help / color / mirror / Atom feed
* [dm-devel] [PATCH 1/1] dm mpath: add IO affinity path selector
@ 2020-10-23  0:27 Mike Christie
  2020-10-27 12:55 ` Mike Snitzer
  0 siblings, 1 reply; 4+ messages in thread
From: Mike Christie @ 2020-10-23  0:27 UTC (permalink / raw)
  To: dm-devel, snitzer

This patch adds a path selector that selects paths based on a CPU to
path mapping the user passes in and what CPU we are executing on. The
primary user for this PS is where the app is optimized to use specific
CPUs so other PSs undo the apps handy work, and the storage and it's
transport are not a bottlneck.

For these io-affinity PS setups a path's transport/interconnect
perf is not going to flucuate a lot and there is no major differences
between paths, so QL/HST smarts do not help and RR always messes up
what the app is trying to do.

On a system with 16 cores, where you have a job per CPU:

fio --filename=/dev/dm-0 --direct=1 --rw=randrw --bs=4k \
--ioengine=libaio --iodepth=128 --numjobs=16

and a dm-multipath device setup where each CPU is mapped to one path:

// When in mq mode I had to set dm_mq_nr_hw_queues=$NUM_PATHS.
// Bio mode also showed similar results.
0 16777216 multipath 0 0 1 1 io-affinity 0 16 1 8:16 1 8:32 2 8:64 4
8:48 8 8:80 10 8:96 20 8:112 40 8:128 80 8:144 100 8:160 200 8:176
400 8:192 800 8:208 1000 8:224 2000 8:240 4000 65:0 8000

we can see a IOPs increase of 25%.

The percent increase depends on the device and interconnect. For a
slower/medium speed path/device that can do around 180K IOPs a path
if you ran that fio command to it directly we saw a 25% increase like
above. Slower path'd devices that could do around 90K per path showed
maybe around a 2 - 5% increase. If you use something like null_blk or
scsi_debug which can multi-million IOPs and hack it up so each device
they export shows up as a path then you see 50%+ increases.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 drivers/md/Kconfig          |   9 ++
 drivers/md/Makefile         |   1 +
 drivers/md/dm-io-affinity.c | 272 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 282 insertions(+)
 create mode 100644 drivers/md/dm-io-affinity.c

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 30ba357..c82d8b6 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -463,6 +463,15 @@ config DM_MULTIPATH_HST
 
 	  If unsure, say N.
 
+config DM_MULTIPATH_IOA
+	tristate "I/O Path Selector based on CPU submission"
+	depends on DM_MULTIPATH
+	help
+	  This path selector selects the path based on the CPU the IO is
+	  executed on and the CPU to path mapping setup at path addition time.
+
+	  If unsure, say N.
+
 config DM_DELAY
 	tristate "I/O delaying target"
 	depends on BLK_DEV_DM
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 6d3e234..4f95f33 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_DM_MULTIPATH)	+= dm-multipath.o dm-round-robin.o
 obj-$(CONFIG_DM_MULTIPATH_QL)	+= dm-queue-length.o
 obj-$(CONFIG_DM_MULTIPATH_ST)	+= dm-service-time.o
 obj-$(CONFIG_DM_MULTIPATH_HST)	+= dm-historical-service-time.o
+obj-$(CONFIG_DM_MULTIPATH_IOA)	+= dm-io-affinity.o
 obj-$(CONFIG_DM_SWITCH)		+= dm-switch.o
 obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
 obj-$(CONFIG_DM_PERSISTENT_DATA)	+= persistent-data/
diff --git a/drivers/md/dm-io-affinity.c b/drivers/md/dm-io-affinity.c
new file mode 100644
index 0000000..7c03ab2
--- /dev/null
+++ b/drivers/md/dm-io-affinity.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Oracle Corporation
+ *
+ * Module Author: Mike Christie
+ */
+#include "dm-path-selector.h"
+
+#include <linux/device-mapper.h>
+#include <linux/module.h>
+
+#define DM_MSG_PREFIX "multipath io-affinity"
+
+struct path_info {
+	struct dm_path *path;
+	cpumask_var_t cpumask;
+	refcount_t refcount;
+	bool failed;
+};
+
+struct selector {
+	struct path_info **path_map;
+	cpumask_var_t path_mask;
+	atomic_t map_misses;
+};
+
+static void ioa_free_path(struct selector *s, unsigned int cpu)
+{
+	struct path_info *pi = s->path_map[cpu];
+
+	if (!pi)
+		return;
+
+	if (refcount_dec_and_test(&pi->refcount)) {
+		cpumask_clear_cpu(cpu, s->path_mask);
+		free_cpumask_var(pi->cpumask);
+		kfree(pi);
+
+		s->path_map[cpu] = NULL;
+	}
+}
+
+static int ioa_add_path(struct path_selector *ps, struct dm_path *path,
+			int argc, char **argv, char **error)
+{
+	struct selector *s = ps->context;
+	struct path_info *pi = NULL;
+	unsigned int cpu;
+	int ret;
+
+	if (argc != 1) {
+		*error = "io-affinity ps: invalid number of arguments";
+		return -EINVAL;
+	}
+
+	pi = kzalloc_node(sizeof(*pi), GFP_KERNEL, cpu_to_node(cpu));
+	if (!pi) {
+		*error = "io-affinity ps: Error allocating path context";
+		return -ENOMEM;
+	}
+
+	pi->path = path;
+	path->pscontext = pi;
+	refcount_set(&pi->refcount, 1);
+
+	if (!zalloc_cpumask_var(&pi->cpumask, GFP_KERNEL)) {
+		*error = "io-affinity ps: Error allocating cpumask context";
+		ret = -ENOMEM;
+		goto free_pi;
+	}
+
+	ret = cpumask_parse(argv[0], pi->cpumask);
+	if (ret) {
+		*error = "io-affinity ps: invalid cpumask";
+		ret = -EINVAL;
+		goto free_mask;
+	}
+
+	for_each_cpu(cpu, pi->cpumask) {
+		if (cpu >= nr_cpu_ids) {
+			DMWARN_LIMIT("Ignoring mapping for CPU %u. Max CPU is %u\n",
+				     cpu, nr_cpu_ids);
+			break;
+		}
+
+		if (s->path_map[cpu]) {
+			DMWARN("CPU mapping for %u exists. Ignoring.", cpu);
+			continue;
+		}
+
+		cpumask_set_cpu(cpu, s->path_mask);
+		s->path_map[cpu] = pi;
+		refcount_inc(&pi->refcount);
+		continue;
+	}
+
+	if (refcount_dec_and_test(&pi->refcount)) {
+		*error = "io-affinity ps: No new/valid CPU mapping found";
+		ret = -EINVAL;
+		goto free_mask;
+	}
+
+	return 0;
+
+free_mask:
+	free_cpumask_var(pi->cpumask);
+free_pi:
+	kfree(pi);
+	return ret;
+}
+
+static int ioa_create(struct path_selector *ps, unsigned argc, char **argv)
+{
+	struct selector *s;
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+
+	s->path_map = kzalloc(nr_cpu_ids * sizeof(struct path_info *),
+			      GFP_KERNEL);
+	if (!s->path_map)
+		goto free_selector;
+
+	if (!zalloc_cpumask_var(&s->path_mask, GFP_KERNEL))
+		goto free_map;
+
+	atomic_set(&s->map_misses, 0);
+	ps->context = s;
+	return 0;
+
+free_map:
+	kfree(s->path_map);
+free_selector:
+	kfree(s);
+	return -ENOMEM;
+}
+
+static void ioa_destroy(struct path_selector *ps)
+{
+	struct selector *s = ps->context;
+	unsigned cpu;
+
+	for_each_cpu(cpu, s->path_mask)
+		ioa_free_path(s, cpu);
+
+	free_cpumask_var(s->path_mask);
+	kfree(s->path_map);
+	kfree(s);
+
+	ps->context = NULL;
+}
+
+static int ioa_status(struct path_selector *ps, struct dm_path *path,
+		      status_type_t type, char *result, unsigned int maxlen)
+{
+	struct selector *s = ps->context;
+	struct path_info *pi;
+	int sz = 0;
+
+	if (!path) {
+		DMEMIT("0 ");
+		return sz;
+	}
+
+	switch(type) {
+	case STATUSTYPE_INFO:
+		DMEMIT("%d ", atomic_read(&s->map_misses));
+		break;
+	case STATUSTYPE_TABLE:
+		pi = path->pscontext;
+		DMEMIT("%*pb ", cpumask_pr_args(pi->cpumask));
+		break;
+	}
+
+	return sz;
+}
+
+static void ioa_fail_path(struct path_selector *ps, struct dm_path *p)
+{
+	struct path_info *pi = p->pscontext;
+
+	pi->failed = true;
+}
+
+static int ioa_reinstate_path(struct path_selector *ps, struct dm_path *p)
+{
+	struct path_info *pi = p->pscontext;
+
+	pi->failed = false;
+	return 0;
+}
+
+static struct dm_path *ioa_select_path(struct path_selector *ps,
+				       size_t nr_bytes)
+{
+	unsigned int cpu, node;
+	struct selector *s = ps->context;
+	const struct cpumask *cpumask;
+	struct path_info *pi;
+	int i;
+
+	cpu = get_cpu();
+
+	pi = s->path_map[cpu];
+	if (pi && !pi->failed)
+		goto done;
+
+	/*
+	 * Perf is not optimal, but we at least try the local node then just
+	 * try not to fail.
+	 */
+	if (!pi)
+		atomic_inc(&s->map_misses);
+
+	node = cpu_to_node(cpu);
+	cpumask = cpumask_of_node(node);
+	for_each_cpu(i, cpumask) {
+		pi = s->path_map[i];
+		if (pi && !pi->failed)
+			goto done;
+	}
+
+	for_each_cpu(i, s->path_mask) {
+		pi = s->path_map[i];
+		if (pi && !pi->failed)
+			goto done;
+	}
+	pi = NULL;
+
+done:
+	put_cpu();
+	return pi ? pi->path : NULL;
+}
+
+static struct path_selector_type ioa_ps = {
+	.name		= "io-affinity",
+	.module		= THIS_MODULE,
+	.table_args	= 1,
+	.info_args	= 1,
+	.create		= ioa_create,
+	.destroy	= ioa_destroy,
+	.status		= ioa_status,
+	.add_path	= ioa_add_path,
+	.fail_path	= ioa_fail_path,
+	.reinstate_path	= ioa_reinstate_path,
+	.select_path	= ioa_select_path,
+};
+
+static int __init dm_ioa_init(void)
+{
+	int ret = dm_register_path_selector(&ioa_ps);
+
+	if (ret < 0)
+		DMERR("register failed %d", ret);
+	return ret;
+}
+
+static void __exit dm_ioa_exit(void)
+{
+	int ret = dm_unregister_path_selector(&ioa_ps);
+
+	if (ret < 0)
+		DMERR("unregister failed %d", ret);
+}
+
+module_init(dm_ioa_init);
+module_exit(dm_ioa_exit);
+
+MODULE_DESCRIPTION(DM_NAME " multipath path selector that selects paths based on the CPU IO is being executed on");
+MODULE_AUTHOR("Mike Christie <michael.christie@oracle.com>");
+MODULE_LICENSE("GPL");
-- 
1.8.3.1

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [dm-devel] [PATCH 1/1] dm mpath: add IO affinity path selector
  2020-10-23  0:27 [dm-devel] [PATCH 1/1] dm mpath: add IO affinity path selector Mike Christie
@ 2020-10-27 12:55 ` Mike Snitzer
  2020-10-27 13:34   ` Mike Snitzer
  2020-10-28 16:01   ` Mike Christie
  0 siblings, 2 replies; 4+ messages in thread
From: Mike Snitzer @ 2020-10-27 12:55 UTC (permalink / raw)
  To: Mike Christie; +Cc: dm-devel

On Thu, Oct 22 2020 at  8:27pm -0400,
Mike Christie <michael.christie@oracle.com> wrote:

> This patch adds a path selector that selects paths based on a CPU to
> path mapping the user passes in and what CPU we are executing on. The
> primary user for this PS is where the app is optimized to use specific
> CPUs so other PSs undo the apps handy work, and the storage and it's
> transport are not a bottlneck.
> 
> For these io-affinity PS setups a path's transport/interconnect
> perf is not going to flucuate a lot and there is no major differences
> between paths, so QL/HST smarts do not help and RR always messes up
> what the app is trying to do.
> 
> On a system with 16 cores, where you have a job per CPU:
> 
> fio --filename=/dev/dm-0 --direct=1 --rw=randrw --bs=4k \
> --ioengine=libaio --iodepth=128 --numjobs=16
> 
> and a dm-multipath device setup where each CPU is mapped to one path:
> 
> // When in mq mode I had to set dm_mq_nr_hw_queues=$NUM_PATHS.

OK, the modparam was/is a means to an end but the default of 1 is very
limiting (especially in that it becomes one-size-fits-all, which isn't
true, for all dm-multipath devices in the system).

If you have any ideas for what a sane heuristic would be for
dm_mq_nr_hw_queues I'm open to suggestions.  But DM target <-> DM core
<-> early block core interface coordination is "fun". ;)

> // Bio mode also showed similar results.
> 0 16777216 multipath 0 0 1 1 io-affinity 0 16 1 8:16 1 8:32 2 8:64 4
> 8:48 8 8:80 10 8:96 20 8:112 40 8:128 80 8:144 100 8:160 200 8:176
> 400 8:192 800 8:208 1000 8:224 2000 8:240 4000 65:0 8000
> 
> we can see a IOPs increase of 25%.

Great. What utility/code are you using to extract the path:cpu affinity?
Is it array specific?  Which hardware pins IO like this?

Will you, or others, be enhancing multipath-tools to allow passing such
io-affinity DM multipath tables?

> The percent increase depends on the device and interconnect. For a
> slower/medium speed path/device that can do around 180K IOPs a path
> if you ran that fio command to it directly we saw a 25% increase like
> above. Slower path'd devices that could do around 90K per path showed
> maybe around a 2 - 5% increase. If you use something like null_blk or
> scsi_debug which can multi-million IOPs and hack it up so each device
> they export shows up as a path then you see 50%+ increases.
> 
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
>  drivers/md/Kconfig          |   9 ++
>  drivers/md/Makefile         |   1 +
>  drivers/md/dm-io-affinity.c | 272 ++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 282 insertions(+)
>  create mode 100644 drivers/md/dm-io-affinity.c
> 
> diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
> index 30ba357..c82d8b6 100644
> --- a/drivers/md/Kconfig
> +++ b/drivers/md/Kconfig
> @@ -463,6 +463,15 @@ config DM_MULTIPATH_HST
>  
>  	  If unsure, say N.
>  
> +config DM_MULTIPATH_IOA
> +	tristate "I/O Path Selector based on CPU submission"
> +	depends on DM_MULTIPATH
> +	help
> +	  This path selector selects the path based on the CPU the IO is
> +	  executed on and the CPU to path mapping setup at path addition time.
> +
> +	  If unsure, say N.
> +
>  config DM_DELAY
>  	tristate "I/O delaying target"
>  	depends on BLK_DEV_DM
> diff --git a/drivers/md/Makefile b/drivers/md/Makefile
> index 6d3e234..4f95f33 100644
> --- a/drivers/md/Makefile
> +++ b/drivers/md/Makefile
> @@ -59,6 +59,7 @@ obj-$(CONFIG_DM_MULTIPATH)	+= dm-multipath.o dm-round-robin.o
>  obj-$(CONFIG_DM_MULTIPATH_QL)	+= dm-queue-length.o
>  obj-$(CONFIG_DM_MULTIPATH_ST)	+= dm-service-time.o
>  obj-$(CONFIG_DM_MULTIPATH_HST)	+= dm-historical-service-time.o
> +obj-$(CONFIG_DM_MULTIPATH_IOA)	+= dm-io-affinity.o
>  obj-$(CONFIG_DM_SWITCH)		+= dm-switch.o
>  obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
>  obj-$(CONFIG_DM_PERSISTENT_DATA)	+= persistent-data/

Thinking about renaming all PS files to have a dm-ps prefix...

Fact that we have dm-io.c makes dm-io-affinity.c all the more confusing.

Can you rename to dm-ps-io-affinity.c and post v2?

(Code looks good, pretty simple)

Thanks,
Mike

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [dm-devel] [PATCH 1/1] dm mpath: add IO affinity path selector
  2020-10-27 12:55 ` Mike Snitzer
@ 2020-10-27 13:34   ` Mike Snitzer
  2020-10-28 16:01   ` Mike Christie
  1 sibling, 0 replies; 4+ messages in thread
From: Mike Snitzer @ 2020-10-27 13:34 UTC (permalink / raw)
  To: Mike Christie; +Cc: dm-devel

On Tue, Oct 27 2020 at  8:55am -0400,
Mike Snitzer <snitzer@redhat.com> wrote:

> On Thu, Oct 22 2020 at  8:27pm -0400,
> Mike Christie <michael.christie@oracle.com> wrote:
> 
> > This patch adds a path selector that selects paths based on a CPU to
> > path mapping the user passes in and what CPU we are executing on. The
> > primary user for this PS is where the app is optimized to use specific
> > CPUs so other PSs undo the apps handy work, and the storage and it's
> > transport are not a bottlneck.
> > 
> > For these io-affinity PS setups a path's transport/interconnect
> > perf is not going to flucuate a lot and there is no major differences
> > between paths, so QL/HST smarts do not help and RR always messes up
> > what the app is trying to do.
> > 
> > On a system with 16 cores, where you have a job per CPU:
> > 
> > fio --filename=/dev/dm-0 --direct=1 --rw=randrw --bs=4k \
> > --ioengine=libaio --iodepth=128 --numjobs=16
> > 
> > and a dm-multipath device setup where each CPU is mapped to one path:
> > 
> > // When in mq mode I had to set dm_mq_nr_hw_queues=$NUM_PATHS.
> 
> OK, the modparam was/is a means to an end but the default of 1 is very
> limiting (especially in that it becomes one-size-fits-all, which isn't
> true, for all dm-multipath devices in the system).
> 
> If you have any ideas for what a sane heuristic would be for
> dm_mq_nr_hw_queues I'm open to suggestions.  But DM target <-> DM core
> <-> early block core interface coordination is "fun". ;)
> 
> > // Bio mode also showed similar results.
> > 0 16777216 multipath 0 0 1 1 io-affinity 0 16 1 8:16 1 8:32 2 8:64 4
> > 8:48 8 8:80 10 8:96 20 8:112 40 8:128 80 8:144 100 8:160 200 8:176
> > 400 8:192 800 8:208 1000 8:224 2000 8:240 4000 65:0 8000
> > 
> > we can see a IOPs increase of 25%.
> 
> Great. What utility/code are you using to extract the path:cpu affinity?
> Is it array specific?  Which hardware pins IO like this?
> 
> Will you, or others, be enhancing multipath-tools to allow passing such
> io-affinity DM multipath tables?
> 
> > The percent increase depends on the device and interconnect. For a
> > slower/medium speed path/device that can do around 180K IOPs a path
> > if you ran that fio command to it directly we saw a 25% increase like
> > above. Slower path'd devices that could do around 90K per path showed
> > maybe around a 2 - 5% increase. If you use something like null_blk or
> > scsi_debug which can multi-million IOPs and hack it up so each device
> > they export shows up as a path then you see 50%+ increases.
> > 
> > Signed-off-by: Mike Christie <michael.christie@oracle.com>
> > ---
> >  drivers/md/Kconfig          |   9 ++
> >  drivers/md/Makefile         |   1 +
> >  drivers/md/dm-io-affinity.c | 272 ++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 282 insertions(+)
> >  create mode 100644 drivers/md/dm-io-affinity.c
> > 
> > diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
> > index 30ba357..c82d8b6 100644
> > --- a/drivers/md/Kconfig
> > +++ b/drivers/md/Kconfig
> > @@ -463,6 +463,15 @@ config DM_MULTIPATH_HST
> >  
> >  	  If unsure, say N.
> >  
> > +config DM_MULTIPATH_IOA
> > +	tristate "I/O Path Selector based on CPU submission"
> > +	depends on DM_MULTIPATH
> > +	help
> > +	  This path selector selects the path based on the CPU the IO is
> > +	  executed on and the CPU to path mapping setup at path addition time.
> > +
> > +	  If unsure, say N.
> > +
> >  config DM_DELAY
> >  	tristate "I/O delaying target"
> >  	depends on BLK_DEV_DM
> > diff --git a/drivers/md/Makefile b/drivers/md/Makefile
> > index 6d3e234..4f95f33 100644
> > --- a/drivers/md/Makefile
> > +++ b/drivers/md/Makefile
> > @@ -59,6 +59,7 @@ obj-$(CONFIG_DM_MULTIPATH)	+= dm-multipath.o dm-round-robin.o
> >  obj-$(CONFIG_DM_MULTIPATH_QL)	+= dm-queue-length.o
> >  obj-$(CONFIG_DM_MULTIPATH_ST)	+= dm-service-time.o
> >  obj-$(CONFIG_DM_MULTIPATH_HST)	+= dm-historical-service-time.o
> > +obj-$(CONFIG_DM_MULTIPATH_IOA)	+= dm-io-affinity.o
> >  obj-$(CONFIG_DM_SWITCH)		+= dm-switch.o
> >  obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
> >  obj-$(CONFIG_DM_PERSISTENT_DATA)	+= persistent-data/
> 
> Thinking about renaming all PS files to have a dm-ps prefix...
> 
> Fact that we have dm-io.c makes dm-io-affinity.c all the more confusing.
> 
> Can you rename to dm-ps-io-affinity.c and post v2?

You know what, I'll take care of it when I deal with the other path
selectors.

We'll need entries like this in Makefile:

dm-io-affinity-y += dm-ps-io-affinity.o

Otherwise the module won't be name dm-io-affinity and "io-affinity" on
DM table wouldn't autoload the kernel module.

Alternatively, MODALIAS can be used... might prefer that as it'd reduce
Makefile complexity for each path-selector.

Mike

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [dm-devel] [PATCH 1/1] dm mpath: add IO affinity path selector
  2020-10-27 12:55 ` Mike Snitzer
  2020-10-27 13:34   ` Mike Snitzer
@ 2020-10-28 16:01   ` Mike Christie
  1 sibling, 0 replies; 4+ messages in thread
From: Mike Christie @ 2020-10-28 16:01 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel

On 10/27/20 7:55 AM, Mike Snitzer wrote:
> On Thu, Oct 22 2020 at  8:27pm -0400,
> Mike Christie <michael.christie@oracle.com> wrote:
> 
>> This patch adds a path selector that selects paths based on a CPU to
>> path mapping the user passes in and what CPU we are executing on. The
>> primary user for this PS is where the app is optimized to use specific
>> CPUs so other PSs undo the apps handy work, and the storage and it's
>> transport are not a bottlneck.
>>
>> For these io-affinity PS setups a path's transport/interconnect
>> perf is not going to flucuate a lot and there is no major differences
>> between paths, so QL/HST smarts do not help and RR always messes up
>> what the app is trying to do.
>>
>> On a system with 16 cores, where you have a job per CPU:
>>
>> fio --filename=/dev/dm-0 --direct=1 --rw=randrw --bs=4k \
>> --ioengine=libaio --iodepth=128 --numjobs=16
>>
>> and a dm-multipath device setup where each CPU is mapped to one path:
>>
>> // When in mq mode I had to set dm_mq_nr_hw_queues=$NUM_PATHS.
> 
> OK, the modparam was/is a means to an end but the default of 1 is very
> limiting (especially in that it becomes one-size-fits-all, which isn't
> true, for all dm-multipath devices in the system).
> 
> If you have any ideas for what a sane heuristic would be for
> dm_mq_nr_hw_queues I'm open to suggestions.  But DM target <-> DM core
> <-> early block core interface coordination is "fun". ;)
I do not have any good ideas.


> 
>> // Bio mode also showed similar results.
>> 0 16777216 multipath 0 0 1 1 io-affinity 0 16 1 8:16 1 8:32 2 8:64 4
>> 8:48 8 8:80 10 8:96 20 8:112 40 8:128 80 8:144 100 8:160 200 8:176
>> 400 8:192 800 8:208 1000 8:224 2000 8:240 4000 65:0 8000
>>
>> we can see a IOPs increase of 25%.
> 
> Great. What utility/code are you using to extract the path:cpu affinity?
> Is it array specific?  Which hardware pins IO like this?

It is not specific to an array.

We use it for iscsi. We have fast networks and arrays, but to better 
utilize them you need to use multiple iscsi sessions (tcp 
connection/socket). So you typically set it up like how nvme/tcp does 
its connections/queues by default where that driver will create a TCP 
connection per CPU then map the connection to a hw queue/ctx. For iscsi, 
we set the session's IO xmit thread's affinity, then setup networking so 
the socket/connection's IO is routed to the same CPU. We then create N 
sessions and do multipath over them.

> 
> Will you, or others, be enhancing multipath-tools to allow passing such
> io-affinity DM multipath tables?

Yeah, I am working on that.

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-10-29  9:08 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-23  0:27 [dm-devel] [PATCH 1/1] dm mpath: add IO affinity path selector Mike Christie
2020-10-27 12:55 ` Mike Snitzer
2020-10-27 13:34   ` Mike Snitzer
2020-10-28 16:01   ` Mike Christie

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.