linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V2 1/3] blk-mq: allocate blk_mq_tags and requests in correct node
@ 2017-02-01 17:53 Shaohua Li
  2017-02-01 17:53 ` [PATCH V2 2/3] PCI: add an API to get node from vector Shaohua Li
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Shaohua Li @ 2017-02-01 17:53 UTC (permalink / raw)
  To: linux-kernel, linux-block; +Cc: bhelgaas, hch, axboe

blk_mq_tags/requests of specific hardware queue are mostly used in
specific cpus, which might not be in the same numa node as disk. For
example, a nvme card is in node 0. half hardware queue will be used by
node 0, the other node 1.

Signed-off-by: Shaohua Li <shli@fb.com>
---
 block/blk-mq.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 48df5fd..888077c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1666,16 +1666,20 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
 					unsigned int reserved_tags)
 {
 	struct blk_mq_tags *tags;
+	int node;
+
+	node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx);
+	if (node == NUMA_NO_NODE)
+		node = set->numa_node;
 
-	tags = blk_mq_init_tags(nr_tags, reserved_tags,
-				set->numa_node,
+	tags = blk_mq_init_tags(nr_tags, reserved_tags, node,
 				BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
 	if (!tags)
 		return NULL;
 
 	tags->rqs = kzalloc_node(nr_tags * sizeof(struct request *),
 				 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
-				 set->numa_node);
+				 node);
 	if (!tags->rqs) {
 		blk_mq_free_tags(tags);
 		return NULL;
@@ -1683,7 +1687,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
 
 	tags->static_rqs = kzalloc_node(nr_tags * sizeof(struct request *),
 				 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
-				 set->numa_node);
+				 node);
 	if (!tags->static_rqs) {
 		kfree(tags->rqs);
 		blk_mq_free_tags(tags);
@@ -1703,6 +1707,11 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 {
 	unsigned int i, j, entries_per_page, max_order = 4;
 	size_t rq_size, left;
+	int node;
+
+	node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx);
+	if (node == NUMA_NO_NODE)
+		node = set->numa_node;
 
 	INIT_LIST_HEAD(&tags->page_list);
 
@@ -1724,7 +1733,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 			this_order--;
 
 		do {
-			page = alloc_pages_node(set->numa_node,
+			page = alloc_pages_node(node,
 				GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
 				this_order);
 			if (page)
@@ -1757,7 +1766,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 			if (set->ops->init_request) {
 				if (set->ops->init_request(set->driver_data,
 						rq, hctx_idx, i,
-						set->numa_node)) {
+						node)) {
 					tags->static_rqs[i] = NULL;
 					goto fail;
 				}
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH V2 2/3] PCI: add an API to get node from vector
  2017-02-01 17:53 [PATCH V2 1/3] blk-mq: allocate blk_mq_tags and requests in correct node Shaohua Li
@ 2017-02-01 17:53 ` Shaohua Li
  2017-02-01 18:11   ` Christoph Hellwig
  2017-02-24 22:29   ` Bjorn Helgaas
  2017-02-01 17:53 ` [PATCH V2 3/3] nvme: allocate nvme_queue in correct node Shaohua Li
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 11+ messages in thread
From: Shaohua Li @ 2017-02-01 17:53 UTC (permalink / raw)
  To: linux-kernel, linux-block; +Cc: bhelgaas, hch, axboe

Next patch will use the API to get the node from vector for nvme device

Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/pci/msi.c   | 16 ++++++++++++++++
 include/linux/pci.h |  6 ++++++
 2 files changed, 22 insertions(+)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 50c5003..ab7aee7 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1313,6 +1313,22 @@ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
 }
 EXPORT_SYMBOL(pci_irq_get_affinity);
 
+/**
+ * pci_irq_get_node - return the numa node of a particular msi vector
+ * @pdev:	PCI device to operate on
+ * @vec:	device-relative interrupt vector index (0-based).
+ */
+int pci_irq_get_node(struct pci_dev *pdev, int vec)
+{
+	const struct cpumask *mask;
+
+	mask = pci_irq_get_affinity(pdev, vec);
+	if (mask)
+		return local_memory_node(cpu_to_node(cpumask_first(mask)));
+	return dev_to_node(&pdev->dev);
+}
+EXPORT_SYMBOL(pci_irq_get_node);
+
 struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
 {
 	return to_pci_dev(desc->dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index e2d1a12..df2c649 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1334,6 +1334,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 void pci_free_irq_vectors(struct pci_dev *dev);
 int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
 const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec);
+int pci_irq_get_node(struct pci_dev *pdev, int vec);
 
 #else
 static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
@@ -1384,6 +1385,11 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev,
 {
 	return cpu_possible_mask;
 }
+
+static inline int pci_irq_get_node(struct pci_dev *pdev, int vec)
+{
+	return first_online_node;
+}
 #endif
 
 static inline int
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH V2 3/3] nvme: allocate nvme_queue in correct node
  2017-02-01 17:53 [PATCH V2 1/3] blk-mq: allocate blk_mq_tags and requests in correct node Shaohua Li
  2017-02-01 17:53 ` [PATCH V2 2/3] PCI: add an API to get node from vector Shaohua Li
@ 2017-02-01 17:53 ` Shaohua Li
  2017-02-01 18:14   ` Christoph Hellwig
  2017-02-01 18:11 ` [PATCH V2 1/3] blk-mq: allocate blk_mq_tags and requests " Christoph Hellwig
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Shaohua Li @ 2017-02-01 17:53 UTC (permalink / raw)
  To: linux-kernel, linux-block; +Cc: bhelgaas, hch, axboe

nvme_queue is per-cpu queue (mostly). Allocating it in node where blk-mq
will use it.

Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/nvme/host/pci.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 032237c..9733008 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1040,9 +1040,10 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
 }
 
 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
-							int depth)
+							int depth, int node)
 {
-	struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
+	struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL,
+							node);
 	if (!nvmeq)
 		return NULL;
 
@@ -1219,7 +1220,8 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 
 	nvmeq = dev->queues[0];
 	if (!nvmeq) {
-		nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
+		nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH,
+					dev_to_node(dev->dev));
 		if (!nvmeq)
 			return -ENOMEM;
 	}
@@ -1311,7 +1313,9 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
 	int ret = 0;
 
 	for (i = dev->queue_count; i <= dev->max_qid; i++) {
-		if (!nvme_alloc_queue(dev, i, dev->q_depth)) {
+		/* vector == qid - 1, match nvme_create_queue */
+		if (!nvme_alloc_queue(dev, i, dev->q_depth,
+		     pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
 			ret = -ENOMEM;
 			break;
 		}
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH V2 1/3] blk-mq: allocate blk_mq_tags and requests in correct node
  2017-02-01 17:53 [PATCH V2 1/3] blk-mq: allocate blk_mq_tags and requests in correct node Shaohua Li
  2017-02-01 17:53 ` [PATCH V2 2/3] PCI: add an API to get node from vector Shaohua Li
  2017-02-01 17:53 ` [PATCH V2 3/3] nvme: allocate nvme_queue in correct node Shaohua Li
@ 2017-02-01 18:11 ` Christoph Hellwig
  2017-02-01 19:09 ` Jens Axboe
  2017-02-25  2:54 ` Jens Axboe
  4 siblings, 0 replies; 11+ messages in thread
From: Christoph Hellwig @ 2017-02-01 18:11 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-kernel, linux-block, bhelgaas, hch, axboe

Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH V2 2/3] PCI: add an API to get node from vector
  2017-02-01 17:53 ` [PATCH V2 2/3] PCI: add an API to get node from vector Shaohua Li
@ 2017-02-01 18:11   ` Christoph Hellwig
  2017-02-24 22:29   ` Bjorn Helgaas
  1 sibling, 0 replies; 11+ messages in thread
From: Christoph Hellwig @ 2017-02-01 18:11 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-kernel, linux-block, bhelgaas, hch, axboe

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH V2 3/3] nvme: allocate nvme_queue in correct node
  2017-02-01 17:53 ` [PATCH V2 3/3] nvme: allocate nvme_queue in correct node Shaohua Li
@ 2017-02-01 18:14   ` Christoph Hellwig
  0 siblings, 0 replies; 11+ messages in thread
From: Christoph Hellwig @ 2017-02-01 18:14 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-kernel, linux-block, bhelgaas, hch, axboe

Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH V2 1/3] blk-mq: allocate blk_mq_tags and requests in correct node
  2017-02-01 17:53 [PATCH V2 1/3] blk-mq: allocate blk_mq_tags and requests in correct node Shaohua Li
                   ` (2 preceding siblings ...)
  2017-02-01 18:11 ` [PATCH V2 1/3] blk-mq: allocate blk_mq_tags and requests " Christoph Hellwig
@ 2017-02-01 19:09 ` Jens Axboe
  2017-02-24 22:23   ` Jens Axboe
  2017-02-25  2:54 ` Jens Axboe
  4 siblings, 1 reply; 11+ messages in thread
From: Jens Axboe @ 2017-02-01 19:09 UTC (permalink / raw)
  To: Shaohua Li, linux-kernel, linux-block; +Cc: bhelgaas, hch

On 02/01/2017 09:53 AM, Shaohua Li wrote:
> blk_mq_tags/requests of specific hardware queue are mostly used in
> specific cpus, which might not be in the same numa node as disk. For
> example, a nvme card is in node 0. half hardware queue will be used by
> node 0, the other node 1.

All three patches look good to me. Bjorn, to avoid complications, if
you can review/ack patch #2, then I will queue it up through the block
tree for 4.11.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH V2 1/3] blk-mq: allocate blk_mq_tags and requests in correct node
  2017-02-01 19:09 ` Jens Axboe
@ 2017-02-24 22:23   ` Jens Axboe
  0 siblings, 0 replies; 11+ messages in thread
From: Jens Axboe @ 2017-02-24 22:23 UTC (permalink / raw)
  To: Shaohua Li, linux-kernel, linux-block; +Cc: bhelgaas, hch

On 02/01/2017 12:09 PM, Jens Axboe wrote:
> On 02/01/2017 09:53 AM, Shaohua Li wrote:
>> blk_mq_tags/requests of specific hardware queue are mostly used in
>> specific cpus, which might not be in the same numa node as disk. For
>> example, a nvme card is in node 0. half hardware queue will be used by
>> node 0, the other node 1.
> 
> All three patches look good to me. Bjorn, to avoid complications, if
> you can review/ack patch #2, then I will queue it up through the block
> tree for 4.11.

Bjorn, ping. You were CC'ed on the original patch three weeks ago.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH V2 2/3] PCI: add an API to get node from vector
  2017-02-01 17:53 ` [PATCH V2 2/3] PCI: add an API to get node from vector Shaohua Li
  2017-02-01 18:11   ` Christoph Hellwig
@ 2017-02-24 22:29   ` Bjorn Helgaas
  2017-02-24 22:34     ` Jens Axboe
  1 sibling, 1 reply; 11+ messages in thread
From: Bjorn Helgaas @ 2017-02-24 22:29 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-kernel, linux-block, bhelgaas, hch, axboe

On Wed, Feb 01, 2017 at 09:53:15AM -0800, Shaohua Li wrote:
> Next patch will use the API to get the node from vector for nvme device
> 
> Signed-off-by: Shaohua Li <shli@fb.com>

Acked-by: Bjorn Helgaas <bhelgaas@google.com>

Sorry I missed this; I normally work from the linux-pci patchwork, and
this didn't show up there because it wasn't cc'd to linux-pci.  But I
should have noticed anyway.

> ---
>  drivers/pci/msi.c   | 16 ++++++++++++++++
>  include/linux/pci.h |  6 ++++++
>  2 files changed, 22 insertions(+)
> 
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index 50c5003..ab7aee7 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -1313,6 +1313,22 @@ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
>  }
>  EXPORT_SYMBOL(pci_irq_get_affinity);
>  
> +/**
> + * pci_irq_get_node - return the numa node of a particular msi vector
> + * @pdev:	PCI device to operate on
> + * @vec:	device-relative interrupt vector index (0-based).
> + */
> +int pci_irq_get_node(struct pci_dev *pdev, int vec)
> +{
> +	const struct cpumask *mask;
> +
> +	mask = pci_irq_get_affinity(pdev, vec);
> +	if (mask)
> +		return local_memory_node(cpu_to_node(cpumask_first(mask)));
> +	return dev_to_node(&pdev->dev);
> +}
> +EXPORT_SYMBOL(pci_irq_get_node);
> +
>  struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
>  {
>  	return to_pci_dev(desc->dev);
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index e2d1a12..df2c649 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1334,6 +1334,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
>  void pci_free_irq_vectors(struct pci_dev *dev);
>  int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
>  const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec);
> +int pci_irq_get_node(struct pci_dev *pdev, int vec);
>  
>  #else
>  static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
> @@ -1384,6 +1385,11 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev,
>  {
>  	return cpu_possible_mask;
>  }
> +
> +static inline int pci_irq_get_node(struct pci_dev *pdev, int vec)
> +{
> +	return first_online_node;
> +}
>  #endif
>  
>  static inline int
> -- 
> 2.9.3
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH V2 2/3] PCI: add an API to get node from vector
  2017-02-24 22:29   ` Bjorn Helgaas
@ 2017-02-24 22:34     ` Jens Axboe
  0 siblings, 0 replies; 11+ messages in thread
From: Jens Axboe @ 2017-02-24 22:34 UTC (permalink / raw)
  To: Bjorn Helgaas, Shaohua Li; +Cc: linux-kernel, linux-block, bhelgaas, hch

On 02/24/2017 03:29 PM, Bjorn Helgaas wrote:
> On Wed, Feb 01, 2017 at 09:53:15AM -0800, Shaohua Li wrote:
>> Next patch will use the API to get the node from vector for nvme device
>>
>> Signed-off-by: Shaohua Li <shli@fb.com>
> 
> Acked-by: Bjorn Helgaas <bhelgaas@google.com>
> 
> Sorry I missed this; I normally work from the linux-pci patchwork, and
> this didn't show up there because it wasn't cc'd to linux-pci.  But I
> should have noticed anyway.

Thanks Bjorn!

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH V2 1/3] blk-mq: allocate blk_mq_tags and requests in correct node
  2017-02-01 17:53 [PATCH V2 1/3] blk-mq: allocate blk_mq_tags and requests in correct node Shaohua Li
                   ` (3 preceding siblings ...)
  2017-02-01 19:09 ` Jens Axboe
@ 2017-02-25  2:54 ` Jens Axboe
  4 siblings, 0 replies; 11+ messages in thread
From: Jens Axboe @ 2017-02-25  2:54 UTC (permalink / raw)
  To: Shaohua Li, linux-kernel, linux-block; +Cc: bhelgaas, hch

On 02/01/2017 10:53 AM, Shaohua Li wrote:
> blk_mq_tags/requests of specific hardware queue are mostly used in
> specific cpus, which might not be in the same numa node as disk. For
> example, a nvme card is in node 0. half hardware queue will be used by
> node 0, the other node 1.

Applied 1-3 for this series, thanks Shaohua!

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-02-25  2:54 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-01 17:53 [PATCH V2 1/3] blk-mq: allocate blk_mq_tags and requests in correct node Shaohua Li
2017-02-01 17:53 ` [PATCH V2 2/3] PCI: add an API to get node from vector Shaohua Li
2017-02-01 18:11   ` Christoph Hellwig
2017-02-24 22:29   ` Bjorn Helgaas
2017-02-24 22:34     ` Jens Axboe
2017-02-01 17:53 ` [PATCH V2 3/3] nvme: allocate nvme_queue in correct node Shaohua Li
2017-02-01 18:14   ` Christoph Hellwig
2017-02-01 18:11 ` [PATCH V2 1/3] blk-mq: allocate blk_mq_tags and requests " Christoph Hellwig
2017-02-01 19:09 ` Jens Axboe
2017-02-24 22:23   ` Jens Axboe
2017-02-25  2:54 ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).