* [PATCH 0/2] PCI: vmd: Fix possible >= MAX_ORDER allocation
@ 2019-10-21 11:47 Jon Derrick
2019-10-21 11:47 ` [PATCH 1/2] Revert "x86/PCI: VMD: Eliminate index member from IRQ list" Jon Derrick
2019-10-21 11:47 ` [PATCH 2/2] PCI: vmd: Add indirection layer to vmd irq lists Jon Derrick
0 siblings, 2 replies; 4+ messages in thread
From: Jon Derrick @ 2019-10-21 11:47 UTC (permalink / raw)
To: Lorenzo Pieralisi; +Cc: Bjorn Helgaas, Keith Busch, linux-pci, Jon Derrick
Hi Lorenzo,
This set covers a condition where, with many debug options enabled, the
allocation of the vmd irq lists can exceed the max order of the allocator.
The first patch reverts a very old optimization which is no longer valid.
The second patch adds a layer of indirection to the vmd irq lists to
significantly reduce the size per allocation.
Please consider these for v5.5
Jon Derrick (2):
Revert "x86/PCI: VMD: Eliminate index member from IRQ list"
PCI: vmd: Add indirection layer to vmd irq lists
drivers/pci/controller/vmd.c | 47 ++++++++++++++++++++++----------------------
1 file changed, 24 insertions(+), 23 deletions(-)
--
1.8.3.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/2] Revert "x86/PCI: VMD: Eliminate index member from IRQ list"
2019-10-21 11:47 [PATCH 0/2] PCI: vmd: Fix possible >= MAX_ORDER allocation Jon Derrick
@ 2019-10-21 11:47 ` Jon Derrick
2019-10-21 11:47 ` [PATCH 2/2] PCI: vmd: Add indirection layer to vmd irq lists Jon Derrick
1 sibling, 0 replies; 4+ messages in thread
From: Jon Derrick @ 2019-10-21 11:47 UTC (permalink / raw)
To: Lorenzo Pieralisi; +Cc: Bjorn Helgaas, Keith Busch, linux-pci, Jon Derrick
This reverts commit b31822277abcd7c83d1c1c0af876da9ccdf3b7d6.
In b3182227, index_from_irqs() was added to calculate the irq list index
from the array of irqs, in order to shrink vmd_irq_list for performance.
Due to the embedded srcu_struct within the vmd_irq_list struct having a
varying size depending on a number of factors, the vmd_irq_list struct
no longer guarantees optimal granularity.
This patch removes this unneccesary complexity and is a prep patch for
adding another layer of indirection to the vmd irq lists.
Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
---
drivers/pci/controller/vmd.c | 16 +++++-----------
1 file changed, 5 insertions(+), 11 deletions(-)
diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index a35d3f3..c4de95a 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -82,6 +82,7 @@ struct vmd_irq_list {
struct list_head irq_list;
struct srcu_struct srcu;
unsigned int count;
+ unsigned int index;
};
struct vmd_dev {
@@ -108,12 +109,6 @@ static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus)
return container_of(bus->sysdata, struct vmd_dev, sysdata);
}
-static inline unsigned int index_from_irqs(struct vmd_dev *vmd,
- struct vmd_irq_list *irqs)
-{
- return irqs - vmd->irqs;
-}
-
/*
* Drivers managing a device in a VMD domain allocate their own IRQs as before,
* but the MSI entry for the hardware it's driving will be programmed with a
@@ -126,11 +121,10 @@ static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
struct vmd_irq *vmdirq = data->chip_data;
struct vmd_irq_list *irq = vmdirq->irq;
- struct vmd_dev *vmd = irq_data_get_irq_handler_data(data);
msg->address_hi = MSI_ADDR_BASE_HI;
msg->address_lo = MSI_ADDR_BASE_LO |
- MSI_ADDR_DEST_ID(index_from_irqs(vmd, irq));
+ MSI_ADDR_DEST_ID(irq->index);
msg->data = 0;
}
@@ -230,7 +224,7 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
struct msi_desc *desc = arg->desc;
struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus);
struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL);
- unsigned int index, vector;
+ unsigned int vector;
if (!vmdirq)
return -ENOMEM;
@@ -238,8 +232,7 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
INIT_LIST_HEAD(&vmdirq->node);
vmdirq->irq = vmd_next_irq(vmd, desc);
vmdirq->virq = virq;
- index = index_from_irqs(vmd, vmdirq->irq);
- vector = pci_irq_vector(vmd->dev, index);
+ vector = pci_irq_vector(vmd->dev, vmdirq->irq->index);
irq_domain_set_info(domain, virq, vector, info->chip, vmdirq,
handle_untracked_irq, vmd, NULL);
@@ -776,6 +769,7 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
return err;
INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
+ vmd->irqs[i].index = i;
err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i),
vmd_irq, IRQF_NO_THREAD,
"vmd", &vmd->irqs[i]);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] PCI: vmd: Add indirection layer to vmd irq lists
2019-10-21 11:47 [PATCH 0/2] PCI: vmd: Fix possible >= MAX_ORDER allocation Jon Derrick
2019-10-21 11:47 ` [PATCH 1/2] Revert "x86/PCI: VMD: Eliminate index member from IRQ list" Jon Derrick
@ 2019-10-21 11:47 ` Jon Derrick
2019-10-31 11:24 ` Lorenzo Pieralisi
1 sibling, 1 reply; 4+ messages in thread
From: Jon Derrick @ 2019-10-21 11:47 UTC (permalink / raw)
To: Lorenzo Pieralisi; +Cc: Bjorn Helgaas, Keith Busch, linux-pci, Jon Derrick
With CONFIG_MAXSMP and other debugging options enabled, the size of an
srcu_struct can grow quite large. These are embedded in the vmd_irq_list
struct, and a N=64 allocation can exceed MAX_ORDER, violating reclaim
rules.
This patch changes the irq list array into an array of pointers to irq
lists to avoid allocation failures with greater msix counts.
Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
---
drivers/pci/controller/vmd.c | 33 ++++++++++++++++++++-------------
1 file changed, 20 insertions(+), 13 deletions(-)
diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index c4de95a..096006e 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -92,7 +92,7 @@ struct vmd_dev {
char __iomem *cfgbar;
int msix_count;
- struct vmd_irq_list *irqs;
+ struct vmd_irq_list **irqs;
struct pci_sysdata sysdata;
struct resource resources[3];
@@ -194,7 +194,7 @@ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *d
unsigned long flags;
if (vmd->msix_count == 1)
- return &vmd->irqs[0];
+ return vmd->irqs[0];
/*
* White list for fast-interrupt handlers. All others will share the
@@ -204,17 +204,17 @@ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *d
case PCI_CLASS_STORAGE_EXPRESS:
break;
default:
- return &vmd->irqs[0];
+ return vmd->irqs[0];
}
raw_spin_lock_irqsave(&list_lock, flags);
for (i = 1; i < vmd->msix_count; i++)
- if (vmd->irqs[i].count < vmd->irqs[best].count)
+ if (vmd->irqs[i]->count < vmd->irqs[best]->count)
best = i;
- vmd->irqs[best].count++;
+ vmd->irqs[best]->count++;
raw_spin_unlock_irqrestore(&list_lock, flags);
- return &vmd->irqs[best];
+ return vmd->irqs[best];
}
static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
@@ -764,15 +764,22 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
return -ENOMEM;
for (i = 0; i < vmd->msix_count; i++) {
- err = init_srcu_struct(&vmd->irqs[i].srcu);
+ vmd->irqs[i] = devm_kcalloc(&dev->dev, 1, sizeof(**vmd->irqs),
+ GFP_KERNEL);
+ if (!vmd->irqs[i])
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < vmd->msix_count; i++) {
+ err = init_srcu_struct(&vmd->irqs[i]->srcu);
if (err)
return err;
- INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
- vmd->irqs[i].index = i;
+ INIT_LIST_HEAD(&vmd->irqs[i]->irq_list);
+ vmd->irqs[i]->index = i;
err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i),
vmd_irq, IRQF_NO_THREAD,
- "vmd", &vmd->irqs[i]);
+ "vmd", vmd->irqs[i]);
if (err)
return err;
}
@@ -793,7 +800,7 @@ static void vmd_cleanup_srcu(struct vmd_dev *vmd)
int i;
for (i = 0; i < vmd->msix_count; i++)
- cleanup_srcu_struct(&vmd->irqs[i].srcu);
+ cleanup_srcu_struct(&vmd->irqs[i]->srcu);
}
static void vmd_remove(struct pci_dev *dev)
@@ -817,7 +824,7 @@ static int vmd_suspend(struct device *dev)
int i;
for (i = 0; i < vmd->msix_count; i++)
- devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]);
+ devm_free_irq(dev, pci_irq_vector(pdev, i), vmd->irqs[i]);
pci_save_state(pdev);
return 0;
@@ -832,7 +839,7 @@ static int vmd_resume(struct device *dev)
for (i = 0; i < vmd->msix_count; i++) {
err = devm_request_irq(dev, pci_irq_vector(pdev, i),
vmd_irq, IRQF_NO_THREAD,
- "vmd", &vmd->irqs[i]);
+ "vmd", vmd->irqs[i]);
if (err)
return err;
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 2/2] PCI: vmd: Add indirection layer to vmd irq lists
2019-10-21 11:47 ` [PATCH 2/2] PCI: vmd: Add indirection layer to vmd irq lists Jon Derrick
@ 2019-10-31 11:24 ` Lorenzo Pieralisi
0 siblings, 0 replies; 4+ messages in thread
From: Lorenzo Pieralisi @ 2019-10-31 11:24 UTC (permalink / raw)
To: Jon Derrick; +Cc: Bjorn Helgaas, Keith Busch, linux-pci
On Mon, Oct 21, 2019 at 05:47:39AM -0600, Jon Derrick wrote:
> With CONFIG_MAXSMP and other debugging options enabled, the size of an
> srcu_struct can grow quite large. These are embedded in the vmd_irq_list
> struct, and a N=64 allocation can exceed MAX_ORDER, violating reclaim
> rules.
>
> This patch changes the irq list array into an array of pointers to irq
> lists to avoid allocation failures with greater msix counts.
>
> Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
> ---
> drivers/pci/controller/vmd.c | 33 ++++++++++++++++++++-------------
> 1 file changed, 20 insertions(+), 13 deletions(-)
Hi Jon,
I think that for bisectability reasons these two patches should
be squashed together. Also if you can provide more fine grain details
of what we are fixing in the commit log I think that would be
beneficial.
Thanks,
Lorenzo
> diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
> index c4de95a..096006e 100644
> --- a/drivers/pci/controller/vmd.c
> +++ b/drivers/pci/controller/vmd.c
> @@ -92,7 +92,7 @@ struct vmd_dev {
> char __iomem *cfgbar;
>
> int msix_count;
> - struct vmd_irq_list *irqs;
> + struct vmd_irq_list **irqs;
>
> struct pci_sysdata sysdata;
> struct resource resources[3];
> @@ -194,7 +194,7 @@ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *d
> unsigned long flags;
>
> if (vmd->msix_count == 1)
> - return &vmd->irqs[0];
> + return vmd->irqs[0];
>
> /*
> * White list for fast-interrupt handlers. All others will share the
> @@ -204,17 +204,17 @@ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *d
> case PCI_CLASS_STORAGE_EXPRESS:
> break;
> default:
> - return &vmd->irqs[0];
> + return vmd->irqs[0];
> }
>
> raw_spin_lock_irqsave(&list_lock, flags);
> for (i = 1; i < vmd->msix_count; i++)
> - if (vmd->irqs[i].count < vmd->irqs[best].count)
> + if (vmd->irqs[i]->count < vmd->irqs[best]->count)
> best = i;
> - vmd->irqs[best].count++;
> + vmd->irqs[best]->count++;
> raw_spin_unlock_irqrestore(&list_lock, flags);
>
> - return &vmd->irqs[best];
> + return vmd->irqs[best];
> }
>
> static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
> @@ -764,15 +764,22 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
> return -ENOMEM;
>
> for (i = 0; i < vmd->msix_count; i++) {
> - err = init_srcu_struct(&vmd->irqs[i].srcu);
> + vmd->irqs[i] = devm_kcalloc(&dev->dev, 1, sizeof(**vmd->irqs),
> + GFP_KERNEL);
> + if (!vmd->irqs[i])
> + return -ENOMEM;
> + }
> +
> + for (i = 0; i < vmd->msix_count; i++) {
> + err = init_srcu_struct(&vmd->irqs[i]->srcu);
> if (err)
> return err;
>
> - INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
> - vmd->irqs[i].index = i;
> + INIT_LIST_HEAD(&vmd->irqs[i]->irq_list);
> + vmd->irqs[i]->index = i;
> err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i),
> vmd_irq, IRQF_NO_THREAD,
> - "vmd", &vmd->irqs[i]);
> + "vmd", vmd->irqs[i]);
> if (err)
> return err;
> }
> @@ -793,7 +800,7 @@ static void vmd_cleanup_srcu(struct vmd_dev *vmd)
> int i;
>
> for (i = 0; i < vmd->msix_count; i++)
> - cleanup_srcu_struct(&vmd->irqs[i].srcu);
> + cleanup_srcu_struct(&vmd->irqs[i]->srcu);
> }
>
> static void vmd_remove(struct pci_dev *dev)
> @@ -817,7 +824,7 @@ static int vmd_suspend(struct device *dev)
> int i;
>
> for (i = 0; i < vmd->msix_count; i++)
> - devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]);
> + devm_free_irq(dev, pci_irq_vector(pdev, i), vmd->irqs[i]);
>
> pci_save_state(pdev);
> return 0;
> @@ -832,7 +839,7 @@ static int vmd_resume(struct device *dev)
> for (i = 0; i < vmd->msix_count; i++) {
> err = devm_request_irq(dev, pci_irq_vector(pdev, i),
> vmd_irq, IRQF_NO_THREAD,
> - "vmd", &vmd->irqs[i]);
> + "vmd", vmd->irqs[i]);
> if (err)
> return err;
> }
> --
> 1.8.3.1
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2019-10-31 11:24 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-21 11:47 [PATCH 0/2] PCI: vmd: Fix possible >= MAX_ORDER allocation Jon Derrick
2019-10-21 11:47 ` [PATCH 1/2] Revert "x86/PCI: VMD: Eliminate index member from IRQ list" Jon Derrick
2019-10-21 11:47 ` [PATCH 2/2] PCI: vmd: Add indirection layer to vmd irq lists Jon Derrick
2019-10-31 11:24 ` Lorenzo Pieralisi
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.