linux-pci.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] PCI: vmd: Fix possible >= MAX_ORDER allocation
@ 2019-10-21 11:47 Jon Derrick
  2019-10-21 11:47 ` [PATCH 1/2] Revert "x86/PCI: VMD: Eliminate index member from IRQ list" Jon Derrick
  2019-10-21 11:47 ` [PATCH 2/2] PCI: vmd: Add indirection layer to vmd irq lists Jon Derrick
  0 siblings, 2 replies; 4+ messages in thread
From: Jon Derrick @ 2019-10-21 11:47 UTC (permalink / raw)
  To: Lorenzo Pieralisi; +Cc: Bjorn Helgaas, Keith Busch, linux-pci, Jon Derrick

Hi Lorenzo,

This set covers a condition where, with many debug options enabled, the
allocation of the vmd irq lists can exceed the max order of the allocator.

The first patch reverts a very old optimization which is no longer valid.
The second patch adds a layer of indirection to the vmd irq lists to
significantly reduce the size per allocation.

Please consider these for v5.5

Jon Derrick (2):
  Revert "x86/PCI: VMD: Eliminate index member from IRQ list"
  PCI: vmd: Add indirection layer to vmd irq lists

 drivers/pci/controller/vmd.c | 47 ++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] Revert "x86/PCI: VMD: Eliminate index member from IRQ list"
  2019-10-21 11:47 [PATCH 0/2] PCI: vmd: Fix possible >= MAX_ORDER allocation Jon Derrick
@ 2019-10-21 11:47 ` Jon Derrick
  2019-10-21 11:47 ` [PATCH 2/2] PCI: vmd: Add indirection layer to vmd irq lists Jon Derrick
  1 sibling, 0 replies; 4+ messages in thread
From: Jon Derrick @ 2019-10-21 11:47 UTC (permalink / raw)
  To: Lorenzo Pieralisi; +Cc: Bjorn Helgaas, Keith Busch, linux-pci, Jon Derrick

This reverts commit b31822277abcd7c83d1c1c0af876da9ccdf3b7d6.

In b3182227, index_from_irqs() was added to calculate the irq list index
from the array of irqs, in order to shrink vmd_irq_list for performance.

Due to the embedded srcu_struct within the vmd_irq_list struct having a
varying size depending on a number of factors, the vmd_irq_list struct
no longer guarantees optimal granularity.

This patch removes this unneccesary complexity and is a prep patch for
adding another layer of indirection to the vmd irq lists.

Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
---
 drivers/pci/controller/vmd.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index a35d3f3..c4de95a 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -82,6 +82,7 @@ struct vmd_irq_list {
 	struct list_head	irq_list;
 	struct srcu_struct	srcu;
 	unsigned int		count;
+	unsigned int		index;
 };
 
 struct vmd_dev {
@@ -108,12 +109,6 @@ static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus)
 	return container_of(bus->sysdata, struct vmd_dev, sysdata);
 }
 
-static inline unsigned int index_from_irqs(struct vmd_dev *vmd,
-					   struct vmd_irq_list *irqs)
-{
-	return irqs - vmd->irqs;
-}
-
 /*
  * Drivers managing a device in a VMD domain allocate their own IRQs as before,
  * but the MSI entry for the hardware it's driving will be programmed with a
@@ -126,11 +121,10 @@ static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 {
 	struct vmd_irq *vmdirq = data->chip_data;
 	struct vmd_irq_list *irq = vmdirq->irq;
-	struct vmd_dev *vmd = irq_data_get_irq_handler_data(data);
 
 	msg->address_hi = MSI_ADDR_BASE_HI;
 	msg->address_lo = MSI_ADDR_BASE_LO |
-			  MSI_ADDR_DEST_ID(index_from_irqs(vmd, irq));
+			  MSI_ADDR_DEST_ID(irq->index);
 	msg->data = 0;
 }
 
@@ -230,7 +224,7 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
 	struct msi_desc *desc = arg->desc;
 	struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus);
 	struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL);
-	unsigned int index, vector;
+	unsigned int vector;
 
 	if (!vmdirq)
 		return -ENOMEM;
@@ -238,8 +232,7 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
 	INIT_LIST_HEAD(&vmdirq->node);
 	vmdirq->irq = vmd_next_irq(vmd, desc);
 	vmdirq->virq = virq;
-	index = index_from_irqs(vmd, vmdirq->irq);
-	vector = pci_irq_vector(vmd->dev, index);
+	vector = pci_irq_vector(vmd->dev, vmdirq->irq->index);
 
 	irq_domain_set_info(domain, virq, vector, info->chip, vmdirq,
 			    handle_untracked_irq, vmd, NULL);
@@ -776,6 +769,7 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
 			return err;
 
 		INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
+		vmd->irqs[i].index = i;
 		err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i),
 				       vmd_irq, IRQF_NO_THREAD,
 				       "vmd", &vmd->irqs[i]);
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] PCI: vmd: Add indirection layer to vmd irq lists
  2019-10-21 11:47 [PATCH 0/2] PCI: vmd: Fix possible >= MAX_ORDER allocation Jon Derrick
  2019-10-21 11:47 ` [PATCH 1/2] Revert "x86/PCI: VMD: Eliminate index member from IRQ list" Jon Derrick
@ 2019-10-21 11:47 ` Jon Derrick
  2019-10-31 11:24   ` Lorenzo Pieralisi
  1 sibling, 1 reply; 4+ messages in thread
From: Jon Derrick @ 2019-10-21 11:47 UTC (permalink / raw)
  To: Lorenzo Pieralisi; +Cc: Bjorn Helgaas, Keith Busch, linux-pci, Jon Derrick

With CONFIG_MAXSMP and other debugging options enabled, the size of an
srcu_struct can grow quite large. These are embedded in the vmd_irq_list
struct, and a N=64 allocation can exceed MAX_ORDER, violating reclaim
rules.

This patch changes the irq list array into an array of pointers to irq
lists to avoid allocation failures with greater msix counts.

Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
---
 drivers/pci/controller/vmd.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index c4de95a..096006e 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -92,7 +92,7 @@ struct vmd_dev {
 	char __iomem		*cfgbar;
 
 	int msix_count;
-	struct vmd_irq_list	*irqs;
+	struct vmd_irq_list	**irqs;
 
 	struct pci_sysdata	sysdata;
 	struct resource		resources[3];
@@ -194,7 +194,7 @@ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *d
 	unsigned long flags;
 
 	if (vmd->msix_count == 1)
-		return &vmd->irqs[0];
+		return vmd->irqs[0];
 
 	/*
 	 * White list for fast-interrupt handlers. All others will share the
@@ -204,17 +204,17 @@ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *d
 	case PCI_CLASS_STORAGE_EXPRESS:
 		break;
 	default:
-		return &vmd->irqs[0];
+		return vmd->irqs[0];
 	}
 
 	raw_spin_lock_irqsave(&list_lock, flags);
 	for (i = 1; i < vmd->msix_count; i++)
-		if (vmd->irqs[i].count < vmd->irqs[best].count)
+		if (vmd->irqs[i]->count < vmd->irqs[best]->count)
 			best = i;
-	vmd->irqs[best].count++;
+	vmd->irqs[best]->count++;
 	raw_spin_unlock_irqrestore(&list_lock, flags);
 
-	return &vmd->irqs[best];
+	return vmd->irqs[best];
 }
 
 static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
@@ -764,15 +764,22 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		return -ENOMEM;
 
 	for (i = 0; i < vmd->msix_count; i++) {
-		err = init_srcu_struct(&vmd->irqs[i].srcu);
+		vmd->irqs[i] = devm_kcalloc(&dev->dev, 1, sizeof(**vmd->irqs),
+					    GFP_KERNEL);
+		if (!vmd->irqs[i])
+			return -ENOMEM;
+	}
+
+	for (i = 0; i < vmd->msix_count; i++) {
+		err = init_srcu_struct(&vmd->irqs[i]->srcu);
 		if (err)
 			return err;
 
-		INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
-		vmd->irqs[i].index = i;
+		INIT_LIST_HEAD(&vmd->irqs[i]->irq_list);
+		vmd->irqs[i]->index = i;
 		err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i),
 				       vmd_irq, IRQF_NO_THREAD,
-				       "vmd", &vmd->irqs[i]);
+				       "vmd", vmd->irqs[i]);
 		if (err)
 			return err;
 	}
@@ -793,7 +800,7 @@ static void vmd_cleanup_srcu(struct vmd_dev *vmd)
 	int i;
 
 	for (i = 0; i < vmd->msix_count; i++)
-		cleanup_srcu_struct(&vmd->irqs[i].srcu);
+		cleanup_srcu_struct(&vmd->irqs[i]->srcu);
 }
 
 static void vmd_remove(struct pci_dev *dev)
@@ -817,7 +824,7 @@ static int vmd_suspend(struct device *dev)
 	int i;
 
 	for (i = 0; i < vmd->msix_count; i++)
-                devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]);
+                devm_free_irq(dev, pci_irq_vector(pdev, i), vmd->irqs[i]);
 
 	pci_save_state(pdev);
 	return 0;
@@ -832,7 +839,7 @@ static int vmd_resume(struct device *dev)
 	for (i = 0; i < vmd->msix_count; i++) {
 		err = devm_request_irq(dev, pci_irq_vector(pdev, i),
 				       vmd_irq, IRQF_NO_THREAD,
-				       "vmd", &vmd->irqs[i]);
+				       "vmd", vmd->irqs[i]);
 		if (err)
 			return err;
 	}
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 2/2] PCI: vmd: Add indirection layer to vmd irq lists
  2019-10-21 11:47 ` [PATCH 2/2] PCI: vmd: Add indirection layer to vmd irq lists Jon Derrick
@ 2019-10-31 11:24   ` Lorenzo Pieralisi
  0 siblings, 0 replies; 4+ messages in thread
From: Lorenzo Pieralisi @ 2019-10-31 11:24 UTC (permalink / raw)
  To: Jon Derrick; +Cc: Bjorn Helgaas, Keith Busch, linux-pci

On Mon, Oct 21, 2019 at 05:47:39AM -0600, Jon Derrick wrote:
> With CONFIG_MAXSMP and other debugging options enabled, the size of an
> srcu_struct can grow quite large. These are embedded in the vmd_irq_list
> struct, and a N=64 allocation can exceed MAX_ORDER, violating reclaim
> rules.
> 
> This patch changes the irq list array into an array of pointers to irq
> lists to avoid allocation failures with greater msix counts.
> 
> Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
> ---
>  drivers/pci/controller/vmd.c | 33 ++++++++++++++++++++-------------
>  1 file changed, 20 insertions(+), 13 deletions(-)

Hi Jon,

I think that for bisectability reasons these two patches should
be squashed together. Also if you can provide more fine grain details
of what we are fixing in the commit log I think that would be
beneficial.

Thanks,
Lorenzo

> diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
> index c4de95a..096006e 100644
> --- a/drivers/pci/controller/vmd.c
> +++ b/drivers/pci/controller/vmd.c
> @@ -92,7 +92,7 @@ struct vmd_dev {
>  	char __iomem		*cfgbar;
>  
>  	int msix_count;
> -	struct vmd_irq_list	*irqs;
> +	struct vmd_irq_list	**irqs;
>  
>  	struct pci_sysdata	sysdata;
>  	struct resource		resources[3];
> @@ -194,7 +194,7 @@ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *d
>  	unsigned long flags;
>  
>  	if (vmd->msix_count == 1)
> -		return &vmd->irqs[0];
> +		return vmd->irqs[0];
>  
>  	/*
>  	 * White list for fast-interrupt handlers. All others will share the
> @@ -204,17 +204,17 @@ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *d
>  	case PCI_CLASS_STORAGE_EXPRESS:
>  		break;
>  	default:
> -		return &vmd->irqs[0];
> +		return vmd->irqs[0];
>  	}
>  
>  	raw_spin_lock_irqsave(&list_lock, flags);
>  	for (i = 1; i < vmd->msix_count; i++)
> -		if (vmd->irqs[i].count < vmd->irqs[best].count)
> +		if (vmd->irqs[i]->count < vmd->irqs[best]->count)
>  			best = i;
> -	vmd->irqs[best].count++;
> +	vmd->irqs[best]->count++;
>  	raw_spin_unlock_irqrestore(&list_lock, flags);
>  
> -	return &vmd->irqs[best];
> +	return vmd->irqs[best];
>  }
>  
>  static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
> @@ -764,15 +764,22 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
>  		return -ENOMEM;
>  
>  	for (i = 0; i < vmd->msix_count; i++) {
> -		err = init_srcu_struct(&vmd->irqs[i].srcu);
> +		vmd->irqs[i] = devm_kcalloc(&dev->dev, 1, sizeof(**vmd->irqs),
> +					    GFP_KERNEL);
> +		if (!vmd->irqs[i])
> +			return -ENOMEM;
> +	}
> +
> +	for (i = 0; i < vmd->msix_count; i++) {
> +		err = init_srcu_struct(&vmd->irqs[i]->srcu);
>  		if (err)
>  			return err;
>  
> -		INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
> -		vmd->irqs[i].index = i;
> +		INIT_LIST_HEAD(&vmd->irqs[i]->irq_list);
> +		vmd->irqs[i]->index = i;
>  		err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i),
>  				       vmd_irq, IRQF_NO_THREAD,
> -				       "vmd", &vmd->irqs[i]);
> +				       "vmd", vmd->irqs[i]);
>  		if (err)
>  			return err;
>  	}
> @@ -793,7 +800,7 @@ static void vmd_cleanup_srcu(struct vmd_dev *vmd)
>  	int i;
>  
>  	for (i = 0; i < vmd->msix_count; i++)
> -		cleanup_srcu_struct(&vmd->irqs[i].srcu);
> +		cleanup_srcu_struct(&vmd->irqs[i]->srcu);
>  }
>  
>  static void vmd_remove(struct pci_dev *dev)
> @@ -817,7 +824,7 @@ static int vmd_suspend(struct device *dev)
>  	int i;
>  
>  	for (i = 0; i < vmd->msix_count; i++)
> -                devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]);
> +                devm_free_irq(dev, pci_irq_vector(pdev, i), vmd->irqs[i]);
>  
>  	pci_save_state(pdev);
>  	return 0;
> @@ -832,7 +839,7 @@ static int vmd_resume(struct device *dev)
>  	for (i = 0; i < vmd->msix_count; i++) {
>  		err = devm_request_irq(dev, pci_irq_vector(pdev, i),
>  				       vmd_irq, IRQF_NO_THREAD,
> -				       "vmd", &vmd->irqs[i]);
> +				       "vmd", vmd->irqs[i]);
>  		if (err)
>  			return err;
>  	}
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-10-31 11:24 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-21 11:47 [PATCH 0/2] PCI: vmd: Fix possible >= MAX_ORDER allocation Jon Derrick
2019-10-21 11:47 ` [PATCH 1/2] Revert "x86/PCI: VMD: Eliminate index member from IRQ list" Jon Derrick
2019-10-21 11:47 ` [PATCH 2/2] PCI: vmd: Add indirection layer to vmd irq lists Jon Derrick
2019-10-31 11:24   ` Lorenzo Pieralisi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).