All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv2 0/3] nvme: NUMA locality for fabrics
@ 2018-10-26 12:57 Hannes Reinecke
  2018-10-26 12:57 ` [PATCH 1/3] nvme: NUMA locality information " Hannes Reinecke
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Hannes Reinecke @ 2018-10-26 12:57 UTC (permalink / raw)


Hi all,

here's a patchset to leverage NUMA locality information for fabric controllers.
This is the second attempt for doing so; after discussion with hch we came
to the conclusion that the attempt in the initial submission with a manual
configuration would only lead to more confusion and suboptimal configuration.

So here's now a version with an automatic NUMA balancing, where we attempt
to split the number submitting CPUs/cores evenly across the available
controller.

With this patchset I'm seeing a performance increase from
262k IOPS to 344k IOPS, measured against a NetApp AF700.

As usual, comments and reviews are welcome.

Hannes Reinecke (3):
  nvme: NUMA locality information for fabrics
  nvme-multipath: Select paths based on NUMA locality
  nvme-multipath: automatic NUMA path balancing

 drivers/nvme/host/core.c      |  41 +++++++++++++-
 drivers/nvme/host/fc.c        |   5 +-
 drivers/nvme/host/multipath.c | 122 ++++++++++++++++++++++++++++++++++++++++--
 drivers/nvme/host/nvme.h      |   3 ++
 drivers/nvme/host/pci.c       |   2 +-
 drivers/nvme/host/rdma.c      |   6 ++-
 6 files changed, 169 insertions(+), 10 deletions(-)

-- 
2.16.4

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/3] nvme: NUMA locality information for fabrics
  2018-10-26 12:57 [PATCHv2 0/3] nvme: NUMA locality for fabrics Hannes Reinecke
@ 2018-10-26 12:57 ` Hannes Reinecke
  2018-10-30 18:35   ` Sagi Grimberg
  2018-10-26 12:57 ` [PATCH 2/3] nvme-multipath: Select paths based on NUMA locality Hannes Reinecke
  2018-10-26 12:57 ` [PATCH 3/3] nvme-multipath: automatic NUMA path balancing Hannes Reinecke
  2 siblings, 1 reply; 6+ messages in thread
From: Hannes Reinecke @ 2018-10-26 12:57 UTC (permalink / raw)


From: Hannes Reinecke <hare@suse.com>

Add a new field 'node_id' to the nvme_ctrl structure to hold the
NUMA locality information of the underlying hardware.
With that we can allocate the memory structures on the same NUMA
node as the underlying hardware.

Signed-off-by: Hannes Reinecke <hare at suse.com>
---
 drivers/nvme/host/core.c      | 4 +++-
 drivers/nvme/host/fc.c        | 5 +++--
 drivers/nvme/host/multipath.c | 4 ++--
 drivers/nvme/host/nvme.h      | 1 +
 drivers/nvme/host/rdma.c      | 5 +++--
 5 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 65c42448e904..ad7abeb7442b 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2766,6 +2766,7 @@ static ssize_t  field##_show(struct device *dev,				\
 static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
 
 nvme_show_int_function(cntlid);
+nvme_show_int_function(node_id);
 
 static ssize_t nvme_sysfs_delete(struct device *dev,
 				struct device_attribute *attr, const char *buf,
@@ -2845,6 +2846,7 @@ static struct attribute *nvme_dev_attrs[] = {
 	&dev_attr_subsysnqn.attr,
 	&dev_attr_address.attr,
 	&dev_attr_state.attr,
+	&dev_attr_node_id.attr,
 	NULL
 };
 
@@ -3055,7 +3057,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	struct gendisk *disk;
 	struct nvme_id_ns *id;
 	char disk_name[DISK_NAME_LEN];
-	int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT;
+	int node = ctrl->node_id, flags = GENHD_FL_EXT_DEVT;
 
 	ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
 	if (!ns)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index e52b9d3c0bd6..500d9d6ec7ce 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2433,7 +2433,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 	ctrl->tag_set.ops = &nvme_fc_mq_ops;
 	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
 	ctrl->tag_set.reserved_tags = 1; /* fabric connect */
-	ctrl->tag_set.numa_node = NUMA_NO_NODE;
+	ctrl->tag_set.numa_node = ctrl->ctrl.node_id;
 	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 	ctrl->tag_set.cmd_size =
 		struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
@@ -3000,6 +3000,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 	ctrl->ctrl.opts = opts;
 	ctrl->ctrl.nr_reconnects = 0;
+	ctrl->ctrl.node_id = dev_to_node(lport->dev);
 	INIT_LIST_HEAD(&ctrl->ctrl_list);
 	ctrl->lport = lport;
 	ctrl->rport = rport;
@@ -3038,7 +3039,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
 	ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
 	ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
-	ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
+	ctrl->admin_tag_set.numa_node = ctrl->ctrl.node_id;
 	ctrl->admin_tag_set.cmd_size =
 		struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
 			    ctrl->lport->ops->fcprqst_priv_sz);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 5e3cc8c59a39..123292187b40 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -141,7 +141,7 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
 		    test_bit(NVME_NS_ANA_PENDING, &ns->flags))
 			continue;
 
-		distance = node_distance(node, dev_to_node(ns->ctrl->dev));
+		distance = node_distance(node, ns->ctrl->node_id);
 
 		switch (ns->ana_state) {
 		case NVME_ANA_OPTIMIZED:
@@ -276,7 +276,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
 	if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath)
 		return 0;
 
-	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL);
+	q = blk_alloc_queue_node(GFP_KERNEL, ctrl->node_id, NULL);
 	if (!q)
 		goto out;
 	q->queuedata = head;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9fefba039d1e..55347a547d84 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -153,6 +153,7 @@ struct nvme_ctrl {
 	struct request_queue *connect_q;
 	struct device *dev;
 	int instance;
+	int node_id;
 	struct blk_mq_tag_set *tagset;
 	struct blk_mq_tag_set *admin_tagset;
 	struct list_head namespaces;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index d181cafedc58..31712d77b372 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -693,7 +693,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
 		set->ops = &nvme_rdma_admin_mq_ops;
 		set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
 		set->reserved_tags = 2; /* connect + keep-alive */
-		set->numa_node = NUMA_NO_NODE;
+		set->numa_node = nctrl->node_id;
 		set->cmd_size = sizeof(struct nvme_rdma_request) +
 			SG_CHUNK_SIZE * sizeof(struct scatterlist);
 		set->driver_data = ctrl;
@@ -706,7 +706,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
 		set->ops = &nvme_rdma_mq_ops;
 		set->queue_depth = nctrl->sqsize + 1;
 		set->reserved_tags = 1; /* fabric connect */
-		set->numa_node = NUMA_NO_NODE;
+		set->numa_node = nctrl->node_id;
 		set->flags = BLK_MQ_F_SHOULD_MERGE;
 		set->cmd_size = sizeof(struct nvme_rdma_request) +
 			SG_CHUNK_SIZE * sizeof(struct scatterlist);
@@ -762,6 +762,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 		return error;
 
 	ctrl->device = ctrl->queues[0].device;
+	ctrl->ctrl.node_id = dev_to_node(ctrl->device->dev->dma_device);
 
 	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
 
-- 
2.16.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/3] nvme-multipath: Select paths based on NUMA locality
  2018-10-26 12:57 [PATCHv2 0/3] nvme: NUMA locality for fabrics Hannes Reinecke
  2018-10-26 12:57 ` [PATCH 1/3] nvme: NUMA locality information " Hannes Reinecke
@ 2018-10-26 12:57 ` Hannes Reinecke
  2018-10-30 18:39   ` Sagi Grimberg
  2018-10-26 12:57 ` [PATCH 3/3] nvme-multipath: automatic NUMA path balancing Hannes Reinecke
  2 siblings, 1 reply; 6+ messages in thread
From: Hannes Reinecke @ 2018-10-26 12:57 UTC (permalink / raw)


This patch creates a per-controller map to hold the NUMA locality
information. With that we can route I/O to the controller which is
'nearest' to the issuing CPU and decrease the latency there.

Signed-off-by: Hannes Reinecke <hare at suse.com>
---
 drivers/nvme/host/core.c      | 37 ++++++++++++++++++++++++++++++++++++-
 drivers/nvme/host/fc.c        |  2 +-
 drivers/nvme/host/multipath.c |  9 +++++++--
 drivers/nvme/host/nvme.h      |  2 ++
 drivers/nvme/host/pci.c       |  2 +-
 drivers/nvme/host/rdma.c      |  3 ++-
 6 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index ad7abeb7442b..02a697c6b983 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2204,6 +2204,21 @@ static int nvme_active_ctrls(struct nvme_subsystem *subsys)
 	return count;
 }
 
+void nvme_set_ctrl_node(struct nvme_ctrl *ctrl, int node_id)
+{
+	int node;
+
+	ctrl->node_id = node_id;
+	if (node_id == NUMA_NO_NODE)
+		return;
+	ctrl->node_map = kzalloc(num_possible_nodes() * sizeof(int),
+				 GFP_KERNEL);
+	if (ctrl->node_map)
+		for_each_node(node)
+			ctrl->node_map[node] = node_distance(node, node_id);
+}
+EXPORT_SYMBOL_GPL(nvme_set_ctrl_node);
+
 static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 {
 	struct nvme_subsystem *subsys, *found;
@@ -2834,6 +2849,23 @@ static ssize_t nvme_sysfs_show_address(struct device *dev,
 }
 static DEVICE_ATTR(address, S_IRUGO, nvme_sysfs_show_address, NULL);
 
+static ssize_t nvme_sysfs_show_node_map(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+	int node;
+	ssize_t offset = 0;
+
+	for_each_node(node)
+		offset += snprintf(buf + offset, PAGE_SIZE - offset,
+				   "%d ", ctrl->node_map[node]);
+	offset += snprintf(buf + offset, PAGE_SIZE - offset, "\n");
+
+	return offset;
+}
+static DEVICE_ATTR(node_map, S_IRUGO, nvme_sysfs_show_node_map, NULL);
+
 static struct attribute *nvme_dev_attrs[] = {
 	&dev_attr_reset_controller.attr,
 	&dev_attr_rescan_controller.attr,
@@ -2847,6 +2879,7 @@ static struct attribute *nvme_dev_attrs[] = {
 	&dev_attr_address.attr,
 	&dev_attr_state.attr,
 	&dev_attr_node_id.attr,
+	&dev_attr_node_map.attr,
 	NULL
 };
 
@@ -2860,7 +2893,8 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
 		return 0;
 	if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
 		return 0;
-
+	if (a == &dev_attr_node_map.attr && !ctrl->node_map)
+		return 0;
 	return a->mode;
 }
 
@@ -3507,6 +3541,7 @@ static void nvme_free_ctrl(struct device *dev)
 
 	ida_simple_remove(&nvme_instance_ida, ctrl->instance);
 	kfree(ctrl->effects);
+	kfree(ctrl->node_map);
 	nvme_mpath_uninit(ctrl);
 
 	if (subsys) {
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 500d9d6ec7ce..ce01140450c4 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3000,7 +3000,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 	ctrl->ctrl.opts = opts;
 	ctrl->ctrl.nr_reconnects = 0;
-	ctrl->ctrl.node_id = dev_to_node(lport->dev);
+	nvme_set_ctrl_node(&ctrl->ctrl, dev_to_node(lport->dev));
 	INIT_LIST_HEAD(&ctrl->ctrl_list);
 	ctrl->lport = lport;
 	ctrl->rport = rport;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 123292187b40..a589a1a7b6ce 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -141,7 +141,8 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
 		    test_bit(NVME_NS_ANA_PENDING, &ns->flags))
 			continue;
 
-		distance = node_distance(node, ns->ctrl->node_id);
+		distance = ns->ctrl->node_map ?
+			ns->ctrl->node_map[node] : INT_MAX;
 
 		switch (ns->ana_state) {
 		case NVME_ANA_OPTIMIZED:
@@ -163,8 +164,11 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
 
 	if (!found)
 		found = fallback;
-	if (found)
+	if (found) {
+		dev_dbg(disk_to_dev(head->disk), "none: node %d path %s\n",
+			node, found->disk->disk_name);
 		rcu_assign_pointer(head->current_path[node], found);
+	}
 	return found;
 }
 
@@ -528,6 +532,7 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
 		nvme_mpath_set_live(ns);
 		mutex_unlock(&ns->head->lock);
 	}
+	nvme_mpath_clear_current_path(ns);
 }
 
 void nvme_mpath_remove_disk(struct nvme_ns_head *head)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 55347a547d84..0027f5d41ff8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -154,6 +154,7 @@ struct nvme_ctrl {
 	struct device *dev;
 	int instance;
 	int node_id;
+	int *node_map;
 	struct blk_mq_tag_set *tagset;
 	struct blk_mq_tag_set *admin_tagset;
 	struct list_head namespaces;
@@ -437,6 +438,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl);
 void nvme_wait_freeze(struct nvme_ctrl *ctrl);
 void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
 void nvme_start_freeze(struct nvme_ctrl *ctrl);
+void nvme_set_ctrl_node(struct nvme_ctrl *ctrl, int node);
 
 #define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4e023cd007e1..461116bd5a03 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2509,7 +2509,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		result = -ENOMEM;
 		goto release_pools;
 	}
-
+	nvme_set_ctrl_node(&dev->ctrl, node);
 	result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
 			quirks);
 	if (result)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 31712d77b372..e9d34a9305b8 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -762,7 +762,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 		return error;
 
 	ctrl->device = ctrl->queues[0].device;
-	ctrl->ctrl.node_id = dev_to_node(ctrl->device->dev->dma_device);
+	nvme_set_ctrl_node(&ctrl->ctrl,
+			   dev_to_node(ctrl->device->dev->dma_device));
 
 	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
 
-- 
2.16.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/3] nvme-multipath: automatic NUMA path balancing
  2018-10-26 12:57 [PATCHv2 0/3] nvme: NUMA locality for fabrics Hannes Reinecke
  2018-10-26 12:57 ` [PATCH 1/3] nvme: NUMA locality information " Hannes Reinecke
  2018-10-26 12:57 ` [PATCH 2/3] nvme-multipath: Select paths based on NUMA locality Hannes Reinecke
@ 2018-10-26 12:57 ` Hannes Reinecke
  2 siblings, 0 replies; 6+ messages in thread
From: Hannes Reinecke @ 2018-10-26 12:57 UTC (permalink / raw)


In order to utilize both paths on dual-ported HBAs we cannot rely
on the NUMA affinity alone, but rather have to distribute the
locality information to get the best possible result.
This patch implements a two-pass algorithm for assinging NUMA
locality information:
1. Distribute existing locality information so that no core has
more than one 'local' controller
2. Assign a 'local' controller for each of the remaining cores,
so that the overall weight (ie the sum of all locality information)
per ctrl is minimal.

Signed-off-by: Hannes Reinecke <hare at suse.com>
---
 drivers/nvme/host/multipath.c | 111 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index a589a1a7b6ce..9e4183401539 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -262,6 +262,115 @@ static void nvme_requeue_work(struct work_struct *work)
 	}
 }
 
+void nvme_mpath_distribute_paths(struct nvme_subsystem *subsys, int num_ctrls,
+				 struct nvme_ctrl *ctrl, int node_id)
+{
+	int node;
+	int found_node = NUMA_NO_NODE;
+	int max = LOCAL_DISTANCE * num_ctrls;
+
+	for_each_node(node) {
+		struct nvme_ctrl *c;
+		int sum = 0;
+
+		list_for_each_entry(c, &subsys->ctrls, subsys_entry)
+			sum += c->node_map[node];
+		if (sum > max) {
+			max = sum;
+			found_node = node;
+		}
+	}
+	if (found_node != NUMA_NO_NODE) {
+		ctrl->node_map[found_node] = LOCAL_DISTANCE;
+		ctrl->node_map[node_id] = REMOTE_DISTANCE;
+	}
+}
+
+void nvme_mpath_balance_node(struct nvme_subsystem *subsys,
+			     int num_ctrls, int node_id)
+{
+	struct nvme_ctrl *found = NULL, *ctrl;
+	int max = LOCAL_DISTANCE * num_ctrls, node;
+
+	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+		int sum = 0;
+
+		for_each_node(node)
+			sum += ctrl->node_map[node];
+		if (sum > max) {
+			max = sum;
+			found = ctrl;
+		}
+	}
+	if (found)
+		found->node_map[node_id] = LOCAL_DISTANCE;
+}
+
+void nvme_mpath_balance_subsys(struct nvme_subsystem *subsys)
+{
+	struct nvme_ctrl *ctrl;
+	int num_ctrls = 0;
+	int node;
+
+	mutex_lock(&subsys->lock);
+
+	/*
+	 * 1. Reset set NUMA distance
+	 *    During creation the NUMA distance is only set
+	 *    per controller, so after connecting the other
+	 *    controllers the NUMA information on the existing
+	 *    ones is incorrect.
+	 */
+	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+		for_each_node(node)
+			ctrl->node_map[node] =
+				node_distance(node, ctrl->node_id);
+		num_ctrls++;
+	}
+
+	/*
+	 * 2. Distribute optimal paths:
+	 *    Only one primary paths per node.
+	 *    Additional primary paths are moved to unassigned nodes.
+	 */
+	for_each_node(node) {
+		bool optimal = false;
+
+		list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+			if (ctrl->node_map[node] == LOCAL_DISTANCE) {
+				if (!optimal) {
+					optimal = true;
+					continue;
+				}
+				nvme_mpath_distribute_paths(subsys, num_ctrls,
+							    ctrl, node);
+			}
+		}
+	}
+
+	/*
+	 * 3. Balance unassigned nodes:
+	 *    Each unassigned node should have one primary path;
+	 *    the primary path is assigned to the ctrl with the
+	 *    minimal weight (ie the sum of distances over all nodes)
+	 */
+	for_each_node(node) {
+		bool optimal = false;
+
+		list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+			if (ctrl->node_map[node] == LOCAL_DISTANCE) {
+				optimal = true;
+				break;
+			}
+		}
+		if (optimal)
+			continue;
+		nvme_mpath_balance_node(subsys, num_ctrls, node);
+	}
+
+	mutex_unlock(&subsys->lock);
+}
+
 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
 {
 	struct request_queue *q;
@@ -553,6 +662,8 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 {
 	int error;
 
+	nvme_mpath_balance_subsys(ctrl->subsys);
+
 	if (!nvme_ctrl_use_ana(ctrl))
 		return 0;
 
-- 
2.16.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 1/3] nvme: NUMA locality information for fabrics
  2018-10-26 12:57 ` [PATCH 1/3] nvme: NUMA locality information " Hannes Reinecke
@ 2018-10-30 18:35   ` Sagi Grimberg
  0 siblings, 0 replies; 6+ messages in thread
From: Sagi Grimberg @ 2018-10-30 18:35 UTC (permalink / raw)



>   static ssize_t nvme_sysfs_delete(struct device *dev,
>   				struct device_attribute *attr, const char *buf,
> @@ -2845,6 +2846,7 @@ static struct attribute *nvme_dev_attrs[] = {
>   	&dev_attr_subsysnqn.attr,
>   	&dev_attr_address.attr,
>   	&dev_attr_state.attr,
> +	&dev_attr_node_id.attr,

Maybe call it numa_node to align with other devices sysfs attributes?

Otherwise looks fine,

Reviewed-by: Sagi Grimberg <sagi at grimberg.me>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 2/3] nvme-multipath: Select paths based on NUMA locality
  2018-10-26 12:57 ` [PATCH 2/3] nvme-multipath: Select paths based on NUMA locality Hannes Reinecke
@ 2018-10-30 18:39   ` Sagi Grimberg
  0 siblings, 0 replies; 6+ messages in thread
From: Sagi Grimberg @ 2018-10-30 18:39 UTC (permalink / raw)



> @@ -163,8 +164,11 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
>   
>   	if (!found)
>   		found = fallback;
> -	if (found)
> +	if (found) {
> +		dev_dbg(disk_to_dev(head->disk), "none: node %d path %s\n",
> +			node, found->disk->disk_name);

none?

>   		rcu_assign_pointer(head->current_path[node], found);
> +	}
>   	return found;
>   }
>   
> @@ -528,6 +532,7 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
>   		nvme_mpath_set_live(ns);
>   		mutex_unlock(&ns->head->lock);
>   	}
> +	nvme_mpath_clear_current_path(ns);

Didn't I see this in a different patch from keith?

Please document why this is added.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-10-30 18:39 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-26 12:57 [PATCHv2 0/3] nvme: NUMA locality for fabrics Hannes Reinecke
2018-10-26 12:57 ` [PATCH 1/3] nvme: NUMA locality information " Hannes Reinecke
2018-10-30 18:35   ` Sagi Grimberg
2018-10-26 12:57 ` [PATCH 2/3] nvme-multipath: Select paths based on NUMA locality Hannes Reinecke
2018-10-30 18:39   ` Sagi Grimberg
2018-10-26 12:57 ` [PATCH 3/3] nvme-multipath: automatic NUMA path balancing Hannes Reinecke

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.