genirq/msi: Activate Multi-MSI early when MSI_FLAG_ACTIVATE_EARLY is set
diff mbox series

Message ID 20210123122759.1781359-1-maz@kernel.org
State Accepted
Commit 4c457e8cb75eda91906a4f89fc39bde3f9a43922
Headers show
Series
  • genirq/msi: Activate Multi-MSI early when MSI_FLAG_ACTIVATE_EARLY is set
Related show

Commit Message

Marc Zyngier Jan. 23, 2021, 12:27 p.m. UTC
When MSI_FLAG_ACTIVATE_EARLY is set (which is the case for PCI),
we perform the activation of the interrupt (which in the case of
PCI results in the endpoint being programmed) as soon as the
interrupt is allocated.

But it appears that this is only done for the first vector,
introducing an inconsistent behaviour for PCI Multi-MSI.

Fix it by iterating over the number of vectors allocated to
each MSI descriptor. This is easily achieved by introducing
a new "for_each_msi_vector" iterator, together with a tiny
bit of refactoring.

Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early")
Reported-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Cc: stable@vger.kernel.org
---
 include/linux/msi.h |  6 ++++++
 kernel/irq/msi.c    | 44 ++++++++++++++++++++------------------------
 2 files changed, 26 insertions(+), 24 deletions(-)

Comments

Shameerali Kolothum Thodi Jan. 25, 2021, 2:39 p.m. UTC | #1
> -----Original Message-----
> From: Marc Zyngier [mailto:maz@kernel.org]
> Sent: 23 January 2021 12:28
> To: linux-kernel@vger.kernel.org
> Cc: Thomas Gleixner <tglx@linutronix.de>; Bjorn Helgaas
> <bhelgaas@google.com>; Shameerali Kolothum Thodi
> <shameerali.kolothum.thodi@huawei.com>; stable@vger.kernel.org
> Subject: [PATCH] genirq/msi: Activate Multi-MSI early when
> MSI_FLAG_ACTIVATE_EARLY is set
> 
> When MSI_FLAG_ACTIVATE_EARLY is set (which is the case for PCI),
> we perform the activation of the interrupt (which in the case of
> PCI results in the endpoint being programmed) as soon as the
> interrupt is allocated.
> 
> But it appears that this is only done for the first vector,
> introducing an inconsistent behaviour for PCI Multi-MSI.
> 
> Fix it by iterating over the number of vectors allocated to
> each MSI descriptor. This is easily achieved by introducing
> a new "for_each_msi_vector" iterator, together with a tiny
> bit of refactoring.
> 
> Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early")
> Reported-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> Cc: stable@vger.kernel.org
> ---
>  include/linux/msi.h |  6 ++++++
>  kernel/irq/msi.c    | 44 ++++++++++++++++++++------------------------
>  2 files changed, 26 insertions(+), 24 deletions(-)
> 
> diff --git a/include/linux/msi.h b/include/linux/msi.h
> index 360a0a7e7341..aef35fd1cf11 100644
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -178,6 +178,12 @@ struct msi_desc {
>  	list_for_each_entry((desc), dev_to_msi_list((dev)), list)
>  #define for_each_msi_entry_safe(desc, tmp, dev)	\
>  	list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list)
> +#define for_each_msi_vector(desc, __irq, dev)				\
> +	for_each_msi_entry((desc), (dev))				\
> +		if ((desc)->irq)					\
> +			for (__irq = (desc)->irq;			\
> +			     __irq < ((desc)->irq + (desc)->nvec_used);	\
> +			     __irq++)
> 
>  #ifdef CONFIG_IRQ_MSI_IOMMU
>  static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc)
> diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
> index 2c0c4d6d0f83..d924676c8781 100644
> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -436,22 +436,22 @@ int __msi_domain_alloc_irqs(struct irq_domain
> *domain, struct device *dev,
> 
>  	can_reserve = msi_check_reservation_mode(domain, info, dev);
> 
> -	for_each_msi_entry(desc, dev) {
> -		virq = desc->irq;
> -		if (desc->nvec_used == 1)
> -			dev_dbg(dev, "irq %d for MSI\n", virq);
> -		else
> +	/*
> +	 * This flag is set by the PCI layer as we need to activate
> +	 * the MSI entries before the PCI layer enables MSI in the
> +	 * card. Otherwise the card latches a random msi message.
> +	 */
> +	if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
> +		goto skip_activate;

This will change the dbg print behavior. From the commit f3b0946d629c,
it looks like the below dev_dbg() code was there for !MSI_FLAG_ACTIVATE_EARLY
case as well. Not sure how much this matters though.

Thanks,
Shameer

> +
> +	for_each_msi_vector(desc, i, dev) {
> +		if (desc->irq == i) {
> +			virq = desc->irq;
>  			dev_dbg(dev, "irq [%d-%d] for MSI\n",
>  				virq, virq + desc->nvec_used - 1);
> -		/*
> -		 * This flag is set by the PCI layer as we need to activate
> -		 * the MSI entries before the PCI layer enables MSI in the
> -		 * card. Otherwise the card latches a random msi message.
> -		 */
> -		if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
> -			continue;
> +		}
> 
> -		irq_data = irq_domain_get_irq_data(domain, desc->irq);
> +		irq_data = irq_domain_get_irq_data(domain, i);
>  		if (!can_reserve) {
>  			irqd_clr_can_reserve(irq_data);
>  			if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
> @@ -462,28 +462,24 @@ int __msi_domain_alloc_irqs(struct irq_domain
> *domain, struct device *dev,
>  			goto cleanup;
>  	}
> 
> +skip_activate:
>  	/*
>  	 * If these interrupts use reservation mode, clear the activated bit
>  	 * so request_irq() will assign the final vector.
>  	 */
>  	if (can_reserve) {
> -		for_each_msi_entry(desc, dev) {
> -			irq_data = irq_domain_get_irq_data(domain, desc->irq);
> +		for_each_msi_vector(desc, i, dev) {
> +			irq_data = irq_domain_get_irq_data(domain, i);
>  			irqd_clr_activated(irq_data);
>  		}
>  	}
>  	return 0;
> 
>  cleanup:
> -	for_each_msi_entry(desc, dev) {
> -		struct irq_data *irqd;
> -
> -		if (desc->irq == virq)
> -			break;
> -
> -		irqd = irq_domain_get_irq_data(domain, desc->irq);
> -		if (irqd_is_activated(irqd))
> -			irq_domain_deactivate_irq(irqd);
> +	for_each_msi_vector(desc, i, dev) {
> +		irq_data = irq_domain_get_irq_data(domain, i);
> +		if (irqd_is_activated(irq_data))
> +			irq_domain_deactivate_irq(irq_data);
>  	}
>  	msi_domain_free_irqs(domain, dev);
>  	return ret;
> --
> 2.29.2
Marc Zyngier Jan. 25, 2021, 2:48 p.m. UTC | #2
On 2021-01-25 14:39, Shameerali Kolothum Thodi wrote:
>> -----Original Message-----
>> From: Marc Zyngier [mailto:maz@kernel.org]
>> Sent: 23 January 2021 12:28
>> To: linux-kernel@vger.kernel.org
>> Cc: Thomas Gleixner <tglx@linutronix.de>; Bjorn Helgaas
>> <bhelgaas@google.com>; Shameerali Kolothum Thodi
>> <shameerali.kolothum.thodi@huawei.com>; stable@vger.kernel.org
>> Subject: [PATCH] genirq/msi: Activate Multi-MSI early when
>> MSI_FLAG_ACTIVATE_EARLY is set
>> 
>> When MSI_FLAG_ACTIVATE_EARLY is set (which is the case for PCI),
>> we perform the activation of the interrupt (which in the case of
>> PCI results in the endpoint being programmed) as soon as the
>> interrupt is allocated.
>> 
>> But it appears that this is only done for the first vector,
>> introducing an inconsistent behaviour for PCI Multi-MSI.
>> 
>> Fix it by iterating over the number of vectors allocated to
>> each MSI descriptor. This is easily achieved by introducing
>> a new "for_each_msi_vector" iterator, together with a tiny
>> bit of refactoring.
>> 
>> Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated 
>> early")
>> Reported-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>> Cc: stable@vger.kernel.org
>> ---
>>  include/linux/msi.h |  6 ++++++
>>  kernel/irq/msi.c    | 44 ++++++++++++++++++++------------------------
>>  2 files changed, 26 insertions(+), 24 deletions(-)
>> 
>> diff --git a/include/linux/msi.h b/include/linux/msi.h
>> index 360a0a7e7341..aef35fd1cf11 100644
>> --- a/include/linux/msi.h
>> +++ b/include/linux/msi.h
>> @@ -178,6 +178,12 @@ struct msi_desc {
>>  	list_for_each_entry((desc), dev_to_msi_list((dev)), list)
>>  #define for_each_msi_entry_safe(desc, tmp, dev)	\
>>  	list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), 
>> list)
>> +#define for_each_msi_vector(desc, __irq, dev)				\
>> +	for_each_msi_entry((desc), (dev))				\
>> +		if ((desc)->irq)					\
>> +			for (__irq = (desc)->irq;			\
>> +			     __irq < ((desc)->irq + (desc)->nvec_used);	\
>> +			     __irq++)
>> 
>>  #ifdef CONFIG_IRQ_MSI_IOMMU
>>  static inline const void *msi_desc_get_iommu_cookie(struct msi_desc 
>> *desc)
>> diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
>> index 2c0c4d6d0f83..d924676c8781 100644
>> --- a/kernel/irq/msi.c
>> +++ b/kernel/irq/msi.c
>> @@ -436,22 +436,22 @@ int __msi_domain_alloc_irqs(struct irq_domain
>> *domain, struct device *dev,
>> 
>>  	can_reserve = msi_check_reservation_mode(domain, info, dev);
>> 
>> -	for_each_msi_entry(desc, dev) {
>> -		virq = desc->irq;
>> -		if (desc->nvec_used == 1)
>> -			dev_dbg(dev, "irq %d for MSI\n", virq);
>> -		else
>> +	/*
>> +	 * This flag is set by the PCI layer as we need to activate
>> +	 * the MSI entries before the PCI layer enables MSI in the
>> +	 * card. Otherwise the card latches a random msi message.
>> +	 */
>> +	if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
>> +		goto skip_activate;
> 
> This will change the dbg print behavior. From the commit f3b0946d629c,
> it looks like the below dev_dbg() code was there for 
> !MSI_FLAG_ACTIVATE_EARLY
> case as well. Not sure how much this matters though.

I'm not sure this matters either. We may have relied on these statements
some 6/7 years ago, as the whole hierarchy stuff was brand new, but we
now have a much better debug infrastructure thanks to Thomas. I'd be
totally in favour of dropping it.

Thanks,

         M.
Shameerali Kolothum Thodi Jan. 26, 2021, 9:28 a.m. UTC | #3
> -----Original Message-----
> From: Marc Zyngier [mailto:maz@kernel.org]
> Sent: 25 January 2021 14:49
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> Cc: linux-kernel@vger.kernel.org; Thomas Gleixner <tglx@linutronix.de>; Bjorn
> Helgaas <bhelgaas@google.com>; stable@vger.kernel.org
> Subject: Re: [PATCH] genirq/msi: Activate Multi-MSI early when
> MSI_FLAG_ACTIVATE_EARLY is set
> 
> On 2021-01-25 14:39, Shameerali Kolothum Thodi wrote:
> >> -----Original Message-----
> >> From: Marc Zyngier [mailto:maz@kernel.org]
> >> Sent: 23 January 2021 12:28
> >> To: linux-kernel@vger.kernel.org
> >> Cc: Thomas Gleixner <tglx@linutronix.de>; Bjorn Helgaas
> >> <bhelgaas@google.com>; Shameerali Kolothum Thodi
> >> <shameerali.kolothum.thodi@huawei.com>; stable@vger.kernel.org
> >> Subject: [PATCH] genirq/msi: Activate Multi-MSI early when
> >> MSI_FLAG_ACTIVATE_EARLY is set
> >>
> >> When MSI_FLAG_ACTIVATE_EARLY is set (which is the case for PCI),
> >> we perform the activation of the interrupt (which in the case of
> >> PCI results in the endpoint being programmed) as soon as the
> >> interrupt is allocated.
> >>
> >> But it appears that this is only done for the first vector,
> >> introducing an inconsistent behaviour for PCI Multi-MSI.
> >>
> >> Fix it by iterating over the number of vectors allocated to
> >> each MSI descriptor. This is easily achieved by introducing
> >> a new "for_each_msi_vector" iterator, together with a tiny
> >> bit of refactoring.
> >>
> >> Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated
> >> early")
> >> Reported-by: Shameer Kolothum
> <shameerali.kolothum.thodi@huawei.com>
> >> Signed-off-by: Marc Zyngier <maz@kernel.org>
> >> Cc: stable@vger.kernel.org
> >> ---
> >>  include/linux/msi.h |  6 ++++++
> >>  kernel/irq/msi.c    | 44 ++++++++++++++++++++------------------------
> >>  2 files changed, 26 insertions(+), 24 deletions(-)
> >>
> >> diff --git a/include/linux/msi.h b/include/linux/msi.h
> >> index 360a0a7e7341..aef35fd1cf11 100644
> >> --- a/include/linux/msi.h
> >> +++ b/include/linux/msi.h
> >> @@ -178,6 +178,12 @@ struct msi_desc {
> >>  	list_for_each_entry((desc), dev_to_msi_list((dev)), list)
> >>  #define for_each_msi_entry_safe(desc, tmp, dev)	\
> >>  	list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)),
> >> list)
> >> +#define for_each_msi_vector(desc, __irq, dev)				\
> >> +	for_each_msi_entry((desc), (dev))				\
> >> +		if ((desc)->irq)					\
> >> +			for (__irq = (desc)->irq;			\
> >> +			     __irq < ((desc)->irq + (desc)->nvec_used);	\
> >> +			     __irq++)
> >>
> >>  #ifdef CONFIG_IRQ_MSI_IOMMU
> >>  static inline const void *msi_desc_get_iommu_cookie(struct msi_desc
> >> *desc)
> >> diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
> >> index 2c0c4d6d0f83..d924676c8781 100644
> >> --- a/kernel/irq/msi.c
> >> +++ b/kernel/irq/msi.c
> >> @@ -436,22 +436,22 @@ int __msi_domain_alloc_irqs(struct irq_domain
> >> *domain, struct device *dev,
> >>
> >>  	can_reserve = msi_check_reservation_mode(domain, info, dev);
> >>
> >> -	for_each_msi_entry(desc, dev) {
> >> -		virq = desc->irq;
> >> -		if (desc->nvec_used == 1)
> >> -			dev_dbg(dev, "irq %d for MSI\n", virq);
> >> -		else
> >> +	/*
> >> +	 * This flag is set by the PCI layer as we need to activate
> >> +	 * the MSI entries before the PCI layer enables MSI in the
> >> +	 * card. Otherwise the card latches a random msi message.
> >> +	 */
> >> +	if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
> >> +		goto skip_activate;
> >
> > This will change the dbg print behavior. From the commit f3b0946d629c,
> > it looks like the below dev_dbg() code was there for
> > !MSI_FLAG_ACTIVATE_EARLY
> > case as well. Not sure how much this matters though.
> 
> I'm not sure this matters either. We may have relied on these statements
> some 6/7 years ago, as the whole hierarchy stuff was brand new, but we
> now have a much better debug infrastructure thanks to Thomas. I'd be
> totally in favour of dropping it.
> 
Ok.

Tested on D06 with gicv4 enabled and Guest MSI dev works fine.

FWIW,
   Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>

Thanks,
Shameer

Patch
diff mbox series

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 360a0a7e7341..aef35fd1cf11 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -178,6 +178,12 @@  struct msi_desc {
 	list_for_each_entry((desc), dev_to_msi_list((dev)), list)
 #define for_each_msi_entry_safe(desc, tmp, dev)	\
 	list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list)
+#define for_each_msi_vector(desc, __irq, dev)				\
+	for_each_msi_entry((desc), (dev))				\
+		if ((desc)->irq)					\
+			for (__irq = (desc)->irq;			\
+			     __irq < ((desc)->irq + (desc)->nvec_used);	\
+			     __irq++)
 
 #ifdef CONFIG_IRQ_MSI_IOMMU
 static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc)
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 2c0c4d6d0f83..d924676c8781 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -436,22 +436,22 @@  int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 
 	can_reserve = msi_check_reservation_mode(domain, info, dev);
 
-	for_each_msi_entry(desc, dev) {
-		virq = desc->irq;
-		if (desc->nvec_used == 1)
-			dev_dbg(dev, "irq %d for MSI\n", virq);
-		else
+	/*
+	 * This flag is set by the PCI layer as we need to activate
+	 * the MSI entries before the PCI layer enables MSI in the
+	 * card. Otherwise the card latches a random msi message.
+	 */
+	if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
+		goto skip_activate;
+
+	for_each_msi_vector(desc, i, dev) {
+		if (desc->irq == i) {
+			virq = desc->irq;
 			dev_dbg(dev, "irq [%d-%d] for MSI\n",
 				virq, virq + desc->nvec_used - 1);
-		/*
-		 * This flag is set by the PCI layer as we need to activate
-		 * the MSI entries before the PCI layer enables MSI in the
-		 * card. Otherwise the card latches a random msi message.
-		 */
-		if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
-			continue;
+		}
 
-		irq_data = irq_domain_get_irq_data(domain, desc->irq);
+		irq_data = irq_domain_get_irq_data(domain, i);
 		if (!can_reserve) {
 			irqd_clr_can_reserve(irq_data);
 			if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
@@ -462,28 +462,24 @@  int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 			goto cleanup;
 	}
 
+skip_activate:
 	/*
 	 * If these interrupts use reservation mode, clear the activated bit
 	 * so request_irq() will assign the final vector.
 	 */
 	if (can_reserve) {
-		for_each_msi_entry(desc, dev) {
-			irq_data = irq_domain_get_irq_data(domain, desc->irq);
+		for_each_msi_vector(desc, i, dev) {
+			irq_data = irq_domain_get_irq_data(domain, i);
 			irqd_clr_activated(irq_data);
 		}
 	}
 	return 0;
 
 cleanup:
-	for_each_msi_entry(desc, dev) {
-		struct irq_data *irqd;
-
-		if (desc->irq == virq)
-			break;
-
-		irqd = irq_domain_get_irq_data(domain, desc->irq);
-		if (irqd_is_activated(irqd))
-			irq_domain_deactivate_irq(irqd);
+	for_each_msi_vector(desc, i, dev) {
+		irq_data = irq_domain_get_irq_data(domain, i);
+		if (irqd_is_activated(irq_data))
+			irq_domain_deactivate_irq(irq_data);
 	}
 	msi_domain_free_irqs(domain, dev);
 	return ret;