All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] PCI/MSI: Export all remapped MSIs to sysfs attributes
@ 2015-09-23 23:31 Romain Bezut
  2015-10-09 22:33 ` Bjorn Helgaas
  2015-10-15 17:23 ` Bjorn Helgaas
  0 siblings, 2 replies; 6+ messages in thread
From: Romain Bezut @ 2015-09-23 23:31 UTC (permalink / raw)
  To: linux-pci, bhelgaas; +Cc: Romain Bezut

irqbalance uses these attributes to populate its internal database, which is
then used to bind the irq on the appropriate NUMA node.

On a device accepting multiple MSIs and with interrupt remapping enabled,
only the first irq entry is exported to msi_irqs directory.
This results in irqbalance having no clue of the NUMA affinity for the extra
irqs and starting to bind them on random nodes.

This patch exports all MSI interrupts as sysfs attributes when relevant.

Signed-off-by: Romain Bezut <rbezut@gmail.com>
---
 drivers/pci/msi.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index d449714..324a164 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -475,10 +475,11 @@ static int populate_msi_sysfs(struct pci_dev *pdev)
 	int ret = -ENOMEM;
 	int num_msi = 0;
 	int count = 0;
+	int i;
 
 	/* Determine how many msi entries we have */
 	for_each_pci_msi_entry(entry, pdev)
-		++num_msi;
+		num_msi += entry->nvec_used;
 	if (!num_msi)
 		return 0;
 
@@ -487,19 +488,21 @@ static int populate_msi_sysfs(struct pci_dev *pdev)
 	if (!msi_attrs)
 		return -ENOMEM;
 	for_each_pci_msi_entry(entry, pdev) {
-		msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
-		if (!msi_dev_attr)
-			goto error_attrs;
-		msi_attrs[count] = &msi_dev_attr->attr;
-
-		sysfs_attr_init(&msi_dev_attr->attr);
-		msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
-						    entry->irq);
-		if (!msi_dev_attr->attr.name)
-			goto error_attrs;
-		msi_dev_attr->attr.mode = S_IRUGO;
-		msi_dev_attr->show = msi_mode_show;
-		++count;
+		for (i = 0; i < entry->nvec_used; i++) {
+			msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
+			if (!msi_dev_attr)
+				goto error_attrs;
+			msi_attrs[count] = &msi_dev_attr->attr;
+
+			sysfs_attr_init(&msi_dev_attr->attr);
+			msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
+							    entry->irq + i);
+			if (!msi_dev_attr->attr.name)
+				goto error_attrs;
+			msi_dev_attr->attr.mode = S_IRUGO;
+			msi_dev_attr->show = msi_mode_show;
+			++count;
+		}
 	}
 
 	msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL);
-- 
2.4.9


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] PCI/MSI: Export all remapped MSIs to sysfs attributes
  2015-09-23 23:31 [PATCH] PCI/MSI: Export all remapped MSIs to sysfs attributes Romain Bezut
@ 2015-10-09 22:33 ` Bjorn Helgaas
  2015-10-11  9:49   ` Thomas Gleixner
  2015-10-15 17:23 ` Bjorn Helgaas
  1 sibling, 1 reply; 6+ messages in thread
From: Bjorn Helgaas @ 2015-10-09 22:33 UTC (permalink / raw)
  To: Romain Bezut
  Cc: linux-pci, bhelgaas, linux-kernel, Thomas Gleixner, Neil Horman

[+cc Neil, Thomas, linux-kernel]

Hi Romain,

On Thu, Sep 24, 2015 at 01:31:16AM +0200, Romain Bezut wrote:
> irqbalance uses these attributes to populate its internal database, which is
> then used to bind the irq on the appropriate NUMA node.
> 
> On a device accepting multiple MSIs and with interrupt remapping enabled,
> only the first irq entry is exported to msi_irqs directory.
> This results in irqbalance having no clue of the NUMA affinity for the extra
> irqs and starting to bind them on random nodes.
> 
> This patch exports all MSI interrupts as sysfs attributes when relevant.
> 
> Signed-off-by: Romain Bezut <rbezut@gmail.com>

This seems like it makes sense to me.  Do you have any bug reports
related to this problem?  If so, I'll mention them in the changelog.

Added Neil, Thomas, and linux-kernel since this is of interest to folks
beyond the usual linux-pci audience.

> ---
>  drivers/pci/msi.c | 31 +++++++++++++++++--------------
>  1 file changed, 17 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index d449714..324a164 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -475,10 +475,11 @@ static int populate_msi_sysfs(struct pci_dev *pdev)
>  	int ret = -ENOMEM;
>  	int num_msi = 0;
>  	int count = 0;
> +	int i;
>  
>  	/* Determine how many msi entries we have */
>  	for_each_pci_msi_entry(entry, pdev)
> -		++num_msi;
> +		num_msi += entry->nvec_used;
>  	if (!num_msi)
>  		return 0;
>  
> @@ -487,19 +488,21 @@ static int populate_msi_sysfs(struct pci_dev *pdev)
>  	if (!msi_attrs)
>  		return -ENOMEM;
>  	for_each_pci_msi_entry(entry, pdev) {
> -		msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
> -		if (!msi_dev_attr)
> -			goto error_attrs;
> -		msi_attrs[count] = &msi_dev_attr->attr;
> -
> -		sysfs_attr_init(&msi_dev_attr->attr);
> -		msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
> -						    entry->irq);
> -		if (!msi_dev_attr->attr.name)
> -			goto error_attrs;
> -		msi_dev_attr->attr.mode = S_IRUGO;
> -		msi_dev_attr->show = msi_mode_show;
> -		++count;
> +		for (i = 0; i < entry->nvec_used; i++) {
> +			msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
> +			if (!msi_dev_attr)
> +				goto error_attrs;
> +			msi_attrs[count] = &msi_dev_attr->attr;
> +
> +			sysfs_attr_init(&msi_dev_attr->attr);
> +			msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
> +							    entry->irq + i);
> +			if (!msi_dev_attr->attr.name)
> +				goto error_attrs;
> +			msi_dev_attr->attr.mode = S_IRUGO;
> +			msi_dev_attr->show = msi_mode_show;
> +			++count;
> +		}
>  	}
>  
>  	msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL);
> -- 
> 2.4.9
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] PCI/MSI: Export all remapped MSIs to sysfs attributes
  2015-10-09 22:33 ` Bjorn Helgaas
@ 2015-10-11  9:49   ` Thomas Gleixner
  0 siblings, 0 replies; 6+ messages in thread
From: Thomas Gleixner @ 2015-10-11  9:49 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Romain Bezut, linux-pci, bhelgaas, linux-kernel, Neil Horman

On Fri, 9 Oct 2015, Bjorn Helgaas wrote:
> On Thu, Sep 24, 2015 at 01:31:16AM +0200, Romain Bezut wrote:
> > irqbalance uses these attributes to populate its internal database, which is
> > then used to bind the irq on the appropriate NUMA node.
> > 
> > On a device accepting multiple MSIs and with interrupt remapping enabled,
> > only the first irq entry is exported to msi_irqs directory.
> > This results in irqbalance having no clue of the NUMA affinity for the extra
> > irqs and starting to bind them on random nodes.
> > 
> > This patch exports all MSI interrupts as sysfs attributes when relevant.
> > 
> > Signed-off-by: Romain Bezut <rbezut@gmail.com>
> 
> This seems like it makes sense to me.  Do you have any bug reports
> related to this problem?  If so, I'll mention them in the changelog.
> 
> Added Neil, Thomas, and linux-kernel since this is of interest to folks
> beyond the usual linux-pci audience.

Acked-by: Thomas Gleixner <tglx@linutronix.de>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] PCI/MSI: Export all remapped MSIs to sysfs attributes
  2015-09-23 23:31 [PATCH] PCI/MSI: Export all remapped MSIs to sysfs attributes Romain Bezut
  2015-10-09 22:33 ` Bjorn Helgaas
@ 2015-10-15 17:23 ` Bjorn Helgaas
  2017-01-23 20:57   ` Myron Stowe
  1 sibling, 1 reply; 6+ messages in thread
From: Bjorn Helgaas @ 2015-10-15 17:23 UTC (permalink / raw)
  To: Romain Bezut; +Cc: linux-pci, bhelgaas

On Thu, Sep 24, 2015 at 01:31:16AM +0200, Romain Bezut wrote:
> irqbalance uses these attributes to populate its internal database, which is
> then used to bind the irq on the appropriate NUMA node.
> 
> On a device accepting multiple MSIs and with interrupt remapping enabled,
> only the first irq entry is exported to msi_irqs directory.
> This results in irqbalance having no clue of the NUMA affinity for the extra
> irqs and starting to bind them on random nodes.
> 
> This patch exports all MSI interrupts as sysfs attributes when relevant.
> 
> Signed-off-by: Romain Bezut <rbezut@gmail.com>

Applied with Thomas' ack to pci/msi for v4.4, thanks, Romain!
> ---
>  drivers/pci/msi.c | 31 +++++++++++++++++--------------
>  1 file changed, 17 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index d449714..324a164 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -475,10 +475,11 @@ static int populate_msi_sysfs(struct pci_dev *pdev)
>  	int ret = -ENOMEM;
>  	int num_msi = 0;
>  	int count = 0;
> +	int i;
>  
>  	/* Determine how many msi entries we have */
>  	for_each_pci_msi_entry(entry, pdev)
> -		++num_msi;
> +		num_msi += entry->nvec_used;
>  	if (!num_msi)
>  		return 0;
>  
> @@ -487,19 +488,21 @@ static int populate_msi_sysfs(struct pci_dev *pdev)
>  	if (!msi_attrs)
>  		return -ENOMEM;
>  	for_each_pci_msi_entry(entry, pdev) {
> -		msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
> -		if (!msi_dev_attr)
> -			goto error_attrs;
> -		msi_attrs[count] = &msi_dev_attr->attr;
> -
> -		sysfs_attr_init(&msi_dev_attr->attr);
> -		msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
> -						    entry->irq);
> -		if (!msi_dev_attr->attr.name)
> -			goto error_attrs;
> -		msi_dev_attr->attr.mode = S_IRUGO;
> -		msi_dev_attr->show = msi_mode_show;
> -		++count;
> +		for (i = 0; i < entry->nvec_used; i++) {
> +			msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
> +			if (!msi_dev_attr)
> +				goto error_attrs;
> +			msi_attrs[count] = &msi_dev_attr->attr;
> +
> +			sysfs_attr_init(&msi_dev_attr->attr);
> +			msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
> +							    entry->irq + i);
> +			if (!msi_dev_attr->attr.name)
> +				goto error_attrs;
> +			msi_dev_attr->attr.mode = S_IRUGO;
> +			msi_dev_attr->show = msi_mode_show;
> +			++count;
> +		}
>  	}
>  
>  	msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL);
> -- 
> 2.4.9
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] PCI/MSI: Export all remapped MSIs to sysfs attributes
  2015-10-15 17:23 ` Bjorn Helgaas
@ 2017-01-23 20:57   ` Myron Stowe
  2017-01-23 21:49     ` Neil Horman
  0 siblings, 1 reply; 6+ messages in thread
From: Myron Stowe @ 2017-01-23 20:57 UTC (permalink / raw)
  To: Bjorn Helgaas, rbezut
  Cc: linux-pci, Bjorn Helgaas, LKML, Thomas Gleixner, Neil Horman

On Thu, Oct 15, 2015 at 11:23 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> On Thu, Sep 24, 2015 at 01:31:16AM +0200, Romain Bezut wrote:
>> irqbalance uses these attributes to populate its internal database, which is
>> then used to bind the irq on the appropriate NUMA node.
>>
>> On a device accepting multiple MSIs and with interrupt remapping enabled,
>> only the first irq entry is exported to msi_irqs directory.
>> This results in irqbalance having no clue of the NUMA affinity for the extra
>> irqs and starting to bind them on random nodes.
>>
>> This patch exports all MSI interrupts as sysfs attributes when relevant.
>>
>> Signed-off-by: Romain Bezut <rbezut@gmail.com>
>
> Applied with Thomas' ack to pci/msi for v4.4, thanks, Romain!

Internal testing with netperf - network performance between two
machines with 10Gb Intel NICs running 24 instances of the netperf tool
in parallel (to utilize all CPU cores) - shows a roughly 20%
performance degradation.  Bi-section showed the offending commit to be
this patch: commit a86760664f4 ("PCI/MSI: Export all remapped MSIs to
sysfs attributes").

  Prior: 9.62 +-0.00 gbits/sec
  After: 7.77 +-0.17 gbits/sec

>> ---
>>  drivers/pci/msi.c | 31 +++++++++++++++++--------------
>>  1 file changed, 17 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
>> index d449714..324a164 100644
>> --- a/drivers/pci/msi.c
>> +++ b/drivers/pci/msi.c
>> @@ -475,10 +475,11 @@ static int populate_msi_sysfs(struct pci_dev *pdev)
>>       int ret = -ENOMEM;
>>       int num_msi = 0;
>>       int count = 0;
>> +     int i;
>>
>>       /* Determine how many msi entries we have */
>>       for_each_pci_msi_entry(entry, pdev)
>> -             ++num_msi;
>> +             num_msi += entry->nvec_used;
>>       if (!num_msi)
>>               return 0;
>>
>> @@ -487,19 +488,21 @@ static int populate_msi_sysfs(struct pci_dev *pdev)
>>       if (!msi_attrs)
>>               return -ENOMEM;
>>       for_each_pci_msi_entry(entry, pdev) {
>> -             msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
>> -             if (!msi_dev_attr)
>> -                     goto error_attrs;
>> -             msi_attrs[count] = &msi_dev_attr->attr;
>> -
>> -             sysfs_attr_init(&msi_dev_attr->attr);
>> -             msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
>> -                                                 entry->irq);
>> -             if (!msi_dev_attr->attr.name)
>> -                     goto error_attrs;
>> -             msi_dev_attr->attr.mode = S_IRUGO;
>> -             msi_dev_attr->show = msi_mode_show;
>> -             ++count;
>> +             for (i = 0; i < entry->nvec_used; i++) {
>> +                     msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
>> +                     if (!msi_dev_attr)
>> +                             goto error_attrs;
>> +                     msi_attrs[count] = &msi_dev_attr->attr;
>> +
>> +                     sysfs_attr_init(&msi_dev_attr->attr);
>> +                     msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
>> +                                                         entry->irq + i);
>> +                     if (!msi_dev_attr->attr.name)
>> +                             goto error_attrs;
>> +                     msi_dev_attr->attr.mode = S_IRUGO;
>> +                     msi_dev_attr->show = msi_mode_show;
>> +                     ++count;
>> +             }
>>       }
>>
>>       msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL);
>> --
>> 2.4.9
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] PCI/MSI: Export all remapped MSIs to sysfs attributes
  2017-01-23 20:57   ` Myron Stowe
@ 2017-01-23 21:49     ` Neil Horman
  0 siblings, 0 replies; 6+ messages in thread
From: Neil Horman @ 2017-01-23 21:49 UTC (permalink / raw)
  To: Myron Stowe
  Cc: Bjorn Helgaas, rbezut, linux-pci, Bjorn Helgaas, LKML, Thomas Gleixner

On Mon, Jan 23, 2017 at 01:57:28PM -0700, Myron Stowe wrote:
> On Thu, Oct 15, 2015 at 11:23 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> > On Thu, Sep 24, 2015 at 01:31:16AM +0200, Romain Bezut wrote:
> >> irqbalance uses these attributes to populate its internal database, which is
> >> then used to bind the irq on the appropriate NUMA node.
> >>
> >> On a device accepting multiple MSIs and with interrupt remapping enabled,
> >> only the first irq entry is exported to msi_irqs directory.
> >> This results in irqbalance having no clue of the NUMA affinity for the extra
> >> irqs and starting to bind them on random nodes.
> >>
> >> This patch exports all MSI interrupts as sysfs attributes when relevant.
> >>
> >> Signed-off-by: Romain Bezut <rbezut@gmail.com>
> >
> > Applied with Thomas' ack to pci/msi for v4.4, thanks, Romain!
> 
> Internal testing with netperf - network performance between two
> machines with 10Gb Intel NICs running 24 instances of the netperf tool
> in parallel (to utilize all CPU cores) - shows a roughly 20%
> performance degradation.  Bi-section showed the offending commit to be
> this patch: commit a86760664f4 ("PCI/MSI: Export all remapped MSIs to
> sysfs attributes").
> 
>   Prior: 9.62 +-0.00 gbits/sec
>   After: 7.77 +-0.17 gbits/sec
> 


Hmm, any idea whats leading to the performance degradation?  Seems odd that
exposing that info should lead to reduced performance.  is irqbalance placing
the interrupts on a node that is inappropriate (i.e. making the wrong decision
based on the info exposed)?

Neil
 
> >> ---
> >>  drivers/pci/msi.c | 31 +++++++++++++++++--------------
> >>  1 file changed, 17 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> >> index d449714..324a164 100644
> >> --- a/drivers/pci/msi.c
> >> +++ b/drivers/pci/msi.c
> >> @@ -475,10 +475,11 @@ static int populate_msi_sysfs(struct pci_dev *pdev)
> >>       int ret = -ENOMEM;
> >>       int num_msi = 0;
> >>       int count = 0;
> >> +     int i;
> >>
> >>       /* Determine how many msi entries we have */
> >>       for_each_pci_msi_entry(entry, pdev)
> >> -             ++num_msi;
> >> +             num_msi += entry->nvec_used;
> >>       if (!num_msi)
> >>               return 0;
> >>
> >> @@ -487,19 +488,21 @@ static int populate_msi_sysfs(struct pci_dev *pdev)
> >>       if (!msi_attrs)
> >>               return -ENOMEM;
> >>       for_each_pci_msi_entry(entry, pdev) {
> >> -             msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
> >> -             if (!msi_dev_attr)
> >> -                     goto error_attrs;
> >> -             msi_attrs[count] = &msi_dev_attr->attr;
> >> -
> >> -             sysfs_attr_init(&msi_dev_attr->attr);
> >> -             msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
> >> -                                                 entry->irq);
> >> -             if (!msi_dev_attr->attr.name)
> >> -                     goto error_attrs;
> >> -             msi_dev_attr->attr.mode = S_IRUGO;
> >> -             msi_dev_attr->show = msi_mode_show;
> >> -             ++count;
> >> +             for (i = 0; i < entry->nvec_used; i++) {
> >> +                     msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
> >> +                     if (!msi_dev_attr)
> >> +                             goto error_attrs;
> >> +                     msi_attrs[count] = &msi_dev_attr->attr;
> >> +
> >> +                     sysfs_attr_init(&msi_dev_attr->attr);
> >> +                     msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
> >> +                                                         entry->irq + i);
> >> +                     if (!msi_dev_attr->attr.name)
> >> +                             goto error_attrs;
> >> +                     msi_dev_attr->attr.mode = S_IRUGO;
> >> +                     msi_dev_attr->show = msi_mode_show;
> >> +                     ++count;
> >> +             }
> >>       }
> >>
> >>       msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL);
> >> --
> >> 2.4.9
> >>
> >> --
> >> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> >> the body of a message to majordomo@vger.kernel.org
> >> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-01-23 21:50 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-23 23:31 [PATCH] PCI/MSI: Export all remapped MSIs to sysfs attributes Romain Bezut
2015-10-09 22:33 ` Bjorn Helgaas
2015-10-11  9:49   ` Thomas Gleixner
2015-10-15 17:23 ` Bjorn Helgaas
2017-01-23 20:57   ` Myron Stowe
2017-01-23 21:49     ` Neil Horman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.