All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table
@ 2007-03-26 23:18 Mitch Williams
  2007-03-27  0:09 ` Greg KH
  2007-03-27 15:25 ` Grant Grundler
  0 siblings, 2 replies; 14+ messages in thread
From: Mitch Williams @ 2007-03-26 23:18 UTC (permalink / raw)
  To: linux-pci; +Cc: gregkh, ebiederm, linux-kernel, akpm, auke-jan.h.kok

This patch fixes a kernel bug which is triggered when using the
irqbalance daemon with MSI-X hardware.

Because both MSI-X interrupt messages and MSI-X table writes are posted,
it's possible for them to cross while in-flight.  This results in
interrupts being received long after the kernel thinks they're disabled,
and in interrupts being sent to stale vectors after rebalancing.

This patch performs a read flush after writes to the MSI-X table for
enable/disable and rebalancing operations.  Because this is an expensive
operation, we do not perform the read flush after mask/unmask
operations.  Hardware which supports MSI-X typically also supports some
sort of interrupt moderation, so a read-flush is not necessary for
mask/unmask operations.

This patch has been validated with (unreleased) network hardware which
uses MSI-X.

Generated from 2.6.21-rc4; applies cleanly to 2.6.21-rc5.

Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>

diff -urpN -X dontdiff linux-2.6.21-rc4-clean/arch/i386/kernel/io_apic.c linux-2.6.21-rc4/arch/i386/kernel/io_apic.c
--- linux-2.6.21-rc4-clean/arch/i386/kernel/io_apic.c	2007-03-19 16:16:30.000000000 -0700
+++ linux-2.6.21-rc4/arch/i386/kernel/io_apic.c	2007-03-19 16:24:05.000000000 -0700
@@ -2594,6 +2594,8 @@ static void set_msi_irq_affinity(unsigne
  */
 static struct irq_chip msi_chip = {
 	.name		= "PCI-MSI",
+	.enable		= enable_msi_irq,
+	.disable	= disable_msi_irq,
 	.unmask		= unmask_msi_irq,
 	.mask		= mask_msi_irq,
 	.ack		= ack_ioapic_irq,
diff -urpN -X dontdiff linux-2.6.21-rc4-clean/arch/ia64/kernel/msi_ia64.c linux-2.6.21-rc4/arch/ia64/kernel/msi_ia64.c
--- linux-2.6.21-rc4-clean/arch/ia64/kernel/msi_ia64.c	2007-03-19 16:16:30.000000000 -0700
+++ linux-2.6.21-rc4/arch/ia64/kernel/msi_ia64.c	2007-03-19 16:24:05.000000000 -0700
@@ -121,6 +121,8 @@ static int ia64_msi_retrigger_irq(unsign
  */
 static struct irq_chip ia64_msi_chip = {
 	.name		= "PCI-MSI",
+	.enable		= enable_msi_irq,
+	.disable	= disable_msi_irq,
 	.mask		= mask_msi_irq,
 	.unmask		= unmask_msi_irq,
 	.ack		= ia64_ack_msi_irq,
diff -urpN -X dontdiff linux-2.6.21-rc4-clean/arch/ia64/sn/kernel/msi_sn.c linux-2.6.21-rc4/arch/ia64/sn/kernel/msi_sn.c
--- linux-2.6.21-rc4-clean/arch/ia64/sn/kernel/msi_sn.c	2007-03-19 16:16:30.000000000 -0700
+++ linux-2.6.21-rc4/arch/ia64/sn/kernel/msi_sn.c	2007-03-19 16:24:05.000000000 -0700
@@ -224,6 +224,8 @@ static int sn_msi_retrigger_irq(unsigned
 
 static struct irq_chip sn_msi_chip = {
 	.name		= "PCI-MSI",
+	.enable		= enable_msi_irq,
+	.disable	= disable_msi_irq,
 	.mask		= mask_msi_irq,
 	.unmask		= unmask_msi_irq,
 	.ack		= sn_ack_msi_irq,
diff -urpN -X dontdiff linux-2.6.21-rc4-clean/arch/x86_64/kernel/io_apic.c linux-2.6.21-rc4/arch/x86_64/kernel/io_apic.c
--- linux-2.6.21-rc4-clean/arch/x86_64/kernel/io_apic.c	2007-03-19 16:16:31.000000000 -0700
+++ linux-2.6.21-rc4/arch/x86_64/kernel/io_apic.c	2007-03-21 12:44:51.000000000 -0700
@@ -1942,6 +1942,7 @@ static void set_msi_irq_affinity(unsigne
 	if (cpus_empty(tmp))
 		return;
 
+	msix_flush_writes(irq);
 	if (assign_irq_vector(irq, mask))
 		return;
 
@@ -1956,6 +1957,7 @@ static void set_msi_irq_affinity(unsigne
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg(irq, &msg);
+	msix_flush_writes(irq);
 	irq_desc[irq].affinity = mask;
 }
 #endif /* CONFIG_SMP */
@@ -1966,6 +1968,8 @@ static void set_msi_irq_affinity(unsigne
  */
 static struct irq_chip msi_chip = {
 	.name		= "PCI-MSI",
+	.enable		= enable_msi_irq,
+	.disable	= disable_msi_irq,
 	.unmask		= unmask_msi_irq,
 	.mask		= mask_msi_irq,
 	.ack		= ack_apic_edge,
diff -urpN -X dontdiff linux-2.6.21-rc4-clean/drivers/pci/msi.c linux-2.6.21-rc4/drivers/pci/msi.c
--- linux-2.6.21-rc4-clean/drivers/pci/msi.c	2007-03-19 16:16:32.000000000 -0700
+++ linux-2.6.21-rc4/drivers/pci/msi.c	2007-03-21 12:44:51.000000000 -0700
@@ -68,6 +68,29 @@ static void msix_set_enable(struct pci_d
 	}
 }
 
+void msix_flush_writes(unsigned int irq)
+{
+	struct msi_desc *entry;
+
+	entry = get_irq_msi(irq);
+	BUG_ON(!entry || !entry->dev);
+	switch (entry->msi_attrib.type) {
+	case PCI_CAP_ID_MSI:
+		/* nothing to do */
+		break;
+	case PCI_CAP_ID_MSIX:
+	{
+		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
+		readl(entry->mask_base + offset);
+		break;
+	}
+	default:
+		BUG();
+		break;
+	}
+}
+
 static void msi_set_mask_bit(unsigned int irq, int flag)
 {
 	struct msi_desc *entry;
@@ -193,6 +216,17 @@ void unmask_msi_irq(unsigned int irq)
 	msi_set_mask_bit(irq, 0);
 }
 
+void disable_msi_irq(unsigned int irq)
+{
+	msi_set_mask_bit(irq, 1);
+	msix_flush_writes(irq);
+}
+
+void enable_msi_irq(unsigned int irq)
+{
+	msi_set_mask_bit(irq, 0);
+	msix_flush_writes(irq);
+}
 static int msi_free_irq(struct pci_dev* dev, int irq);
 
 static int msi_init(void)
diff -urpN -X dontdiff linux-2.6.21-rc4-clean/include/linux/msi.h linux-2.6.21-rc4/include/linux/msi.h
--- linux-2.6.21-rc4-clean/include/linux/msi.h	2007-03-19 16:16:34.000000000 -0700
+++ linux-2.6.21-rc4/include/linux/msi.h	2007-03-21 12:44:51.000000000 -0700
@@ -10,8 +10,11 @@ struct msi_msg {
 /* Helper functions */
 extern void mask_msi_irq(unsigned int irq);
 extern void unmask_msi_irq(unsigned int irq);
+extern void disable_msi_irq(unsigned int irq);
+extern void enable_msi_irq(unsigned int irq);
 extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
 extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
+extern void msix_flush_writes(unsigned int irq);
 
 struct msi_desc {
 	struct {

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table
  2007-03-26 23:18 [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table Mitch Williams
@ 2007-03-27  0:09 ` Greg KH
  2007-03-27  2:26   ` Eric W. Biederman
  2007-03-27 13:23   ` Eric W. Biederman
  2007-03-27 15:25 ` Grant Grundler
  1 sibling, 2 replies; 14+ messages in thread
From: Greg KH @ 2007-03-27  0:09 UTC (permalink / raw)
  To: Mitch Williams, Michael Ellerman
  Cc: linux-pci, gregkh, ebiederm, linux-kernel, akpm, auke-jan.h.kok

On Mon, Mar 26, 2007 at 04:18:22PM -0700, Mitch Williams wrote:
> This patch fixes a kernel bug which is triggered when using the
> irqbalance daemon with MSI-X hardware.
> 
> Because both MSI-X interrupt messages and MSI-X table writes are posted,
> it's possible for them to cross while in-flight.  This results in
> interrupts being received long after the kernel thinks they're disabled,
> and in interrupts being sent to stale vectors after rebalancing.
> 
> This patch performs a read flush after writes to the MSI-X table for
> enable/disable and rebalancing operations.  Because this is an expensive
> operation, we do not perform the read flush after mask/unmask
> operations.  Hardware which supports MSI-X typically also supports some
> sort of interrupt moderation, so a read-flush is not necessary for
> mask/unmask operations.
> 
> This patch has been validated with (unreleased) network hardware which
> uses MSI-X.

How well does this play with the MSI core changes that Michael Ellerman
has proposed on the linux-pci mailing list?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table
  2007-03-27  0:09 ` Greg KH
@ 2007-03-27  2:26   ` Eric W. Biederman
  2007-03-27 13:23   ` Eric W. Biederman
  1 sibling, 0 replies; 14+ messages in thread
From: Eric W. Biederman @ 2007-03-27  2:26 UTC (permalink / raw)
  To: Greg KH
  Cc: Mitch Williams, Michael Ellerman, linux-pci, gregkh,
	linux-kernel, akpm, auke-jan.h.kok

Greg KH <greg@kroah.com> writes:

> How well does this play with the MSI core changes that Michael Ellerman
> has proposed on the linux-pci mailing list?

The patch looks reasonable and it is independent of those changes.

This just modifies the helper code for using the msi capability itself as
an interrupt controller.   Other architectures that mask/unmask the interrupt
at an architecture specific interrupt controller  won't be affected.

Eric

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table
  2007-03-27  0:09 ` Greg KH
  2007-03-27  2:26   ` Eric W. Biederman
@ 2007-03-27 13:23   ` Eric W. Biederman
  2007-03-27 14:55     ` Grant Grundler
  1 sibling, 1 reply; 14+ messages in thread
From: Eric W. Biederman @ 2007-03-27 13:23 UTC (permalink / raw)
  To: Greg KH
  Cc: Mitch Williams, Michael Ellerman, linux-pci, gregkh,
	linux-kernel, akpm, auke-jan.h.kok

Greg KH <greg@kroah.com> writes:

> On Mon, Mar 26, 2007 at 04:18:22PM -0700, Mitch Williams wrote:
>> This patch fixes a kernel bug which is triggered when using the
>> irqbalance daemon with MSI-X hardware.
>> 
>> Because both MSI-X interrupt messages and MSI-X table writes are posted,
>> it's possible for them to cross while in-flight.  This results in
>> interrupts being received long after the kernel thinks they're disabled,
>> and in interrupts being sent to stale vectors after rebalancing.
>> 
>> This patch performs a read flush after writes to the MSI-X table for
>> enable/disable and rebalancing operations.  Because this is an expensive
>> operation, we do not perform the read flush after mask/unmask
>> operations.  Hardware which supports MSI-X typically also supports some
>> sort of interrupt moderation, so a read-flush is not necessary for
>> mask/unmask operations.
>> 
>> This patch has been validated with (unreleased) network hardware which
>> uses MSI-X.
>
> How well does this play with the MSI core changes that Michael Ellerman
> has proposed on the linux-pci mailing list?

I guess I should add that I'm not certain that the code is exactly correct
there are weird differences between enable/disable and mask.  Where generally
the mask/unmask methods do the work and enable/disable do some weird software
thing.  Having them different and enable/disable not doing some software
thing concerns me a little.  I think mask/unmask may been overoptimized
in this case.

So I expect someone will wind up refactor this code at some point.

However the code is clearly better than what we have now, and it can't
affect anything else.

Eric

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table
  2007-03-27 13:23   ` Eric W. Biederman
@ 2007-03-27 14:55     ` Grant Grundler
  2007-03-27 15:15       ` Eric W. Biederman
  0 siblings, 1 reply; 14+ messages in thread
From: Grant Grundler @ 2007-03-27 14:55 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Greg KH, Mitch Williams, Michael Ellerman, linux-pci, gregkh,
	linux-kernel, akpm, auke-jan.h.kok

On Tue, Mar 27, 2007 at 07:23:16AM -0600, Eric W. Biederman wrote:
> I guess I should add that I'm not certain that the code is exactly correct
> there are weird differences between enable/disable and mask.

My understanding was "enable" would clear (or ignore) pending interrupts
and "unmask" would deliver pending interrupts. Disable and mask could
in many implementations be the same thing as long as the enable/unmask
difference was supported.

thanks,
grnat

>   Where generally
> the mask/unmask methods do the work and enable/disable do some weird software
> thing.  Having them different and enable/disable not doing some software
> thing concerns me a little.  I think mask/unmask may been overoptimized
> in this case.
> 
> So I expect someone will wind up refactor this code at some point.
> 
> However the code is clearly better than what we have now, and it can't
> affect anything else.
> 
> Eric

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table
  2007-03-27 14:55     ` Grant Grundler
@ 2007-03-27 15:15       ` Eric W. Biederman
  0 siblings, 0 replies; 14+ messages in thread
From: Eric W. Biederman @ 2007-03-27 15:15 UTC (permalink / raw)
  To: Grant Grundler
  Cc: Greg KH, Mitch Williams, Michael Ellerman, linux-pci, gregkh,
	linux-kernel, akpm, auke-jan.h.kok

Grant Grundler <grundler@parisc-linux.org> writes:

> On Tue, Mar 27, 2007 at 07:23:16AM -0600, Eric W. Biederman wrote:
>> I guess I should add that I'm not certain that the code is exactly correct
>> there are weird differences between enable/disable and mask.
>
> My understanding was "enable" would clear (or ignore) pending interrupts
> and "unmask" would deliver pending interrupts. Disable and mask could
> in many implementations be the same thing as long as the enable/unmask
> difference was supported.

enable/disable are what are available to drivers.  When the call
enable_irq or  disable_irq.  Frequently we have code to make up
for deficiencies in the hardware irq controller implementations
here.

mask/unmask are helper functions only used by the internals of the
irq implementation, and actually are required to touch the hardware.

The problem on x86 is that an ioapic will drop interrupts that come
in while it is masked.    Thus we have to play software games not
to loose pending interrupts (i.e. leave the irq enabled until we
get a pending interrupt).

So I think you had the general drift although you had a couple of
details confused.

Eric

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table
  2007-03-26 23:18 [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table Mitch Williams
  2007-03-27  0:09 ` Greg KH
@ 2007-03-27 15:25 ` Grant Grundler
  2007-03-27 15:31   ` Williams, Mitch A
  1 sibling, 1 reply; 14+ messages in thread
From: Grant Grundler @ 2007-03-27 15:25 UTC (permalink / raw)
  To: Mitch Williams
  Cc: linux-pci, gregkh, ebiederm, linux-kernel, akpm, auke-jan.h.kok

On Mon, Mar 26, 2007 at 04:18:22PM -0700, Mitch Williams wrote:
> This patch fixes a kernel bug which is triggered when using the
> irqbalance daemon with MSI-X hardware.
> 
> Because both MSI-X interrupt messages and MSI-X table writes are posted,
> it's possible for them to cross while in-flight.  This results in
> interrupts being received long after the kernel thinks they're disabled,
> and in interrupts being sent to stale vectors after rebalancing.
>
> This patch performs a read flush after writes to the MSI-X table for
> enable/disable and rebalancing operations.

Why wouldn't MSI have the same problems as MSI-X?

...
> diff -urpN -X dontdiff linux-2.6.21-rc4-clean/drivers/pci/msi.c linux-2.6.21-rc4/drivers/pci/msi.c
> --- linux-2.6.21-rc4-clean/drivers/pci/msi.c	2007-03-19 16:16:32.000000000 -0700
> +++ linux-2.6.21-rc4/drivers/pci/msi.c	2007-03-21 12:44:51.000000000 -0700
> @@ -68,6 +68,29 @@ static void msix_set_enable(struct pci_d
>  	}
>  }
>  
> +void msix_flush_writes(unsigned int irq)
> +{
> +	struct msi_desc *entry;
> +
> +	entry = get_irq_msi(irq);
> +	BUG_ON(!entry || !entry->dev);
> +	switch (entry->msi_attrib.type) {
> +	case PCI_CAP_ID_MSI:
> +		/* nothing to do */
> +		break;
> +	case PCI_CAP_ID_MSIX:
> +	{
> +		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
> +			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
> +		readl(entry->mask_base + offset);
> +		break;
> +	}
> +	default:
> +		BUG();
> +		break;
> +	}
> +}

PCI_CAP_ID_MSI case seems wrong to me.
I was expecting a readl() in that case as well.

thanks,
grant

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table
  2007-03-27 15:25 ` Grant Grundler
@ 2007-03-27 15:31   ` Williams, Mitch A
  2007-03-27 16:01     ` Eric W. Biederman
  0 siblings, 1 reply; 14+ messages in thread
From: Williams, Mitch A @ 2007-03-27 15:31 UTC (permalink / raw)
  To: Grant Grundler
  Cc: linux-pci, gregkh, ebiederm, linux-kernel, akpm, Kok, Auke-jan H

Grant Grundler wrote: 

>Why wouldn't MSI have the same problems as MSI-X?
>

Because enabling and disabling the MSI interrupt is done through
config space, and config space writes are not posted.  So we won't
see the problem that we do with MSI-X.

-Mitch

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table
  2007-03-27 15:31   ` Williams, Mitch A
@ 2007-03-27 16:01     ` Eric W. Biederman
  2007-03-27 17:15       ` Williams, Mitch A
  0 siblings, 1 reply; 14+ messages in thread
From: Eric W. Biederman @ 2007-03-27 16:01 UTC (permalink / raw)
  To: Williams, Mitch A
  Cc: Grant Grundler, linux-pci, gregkh, linux-kernel, akpm, Kok, Auke-jan H

"Williams, Mitch A" <mitch.a.williams@intel.com> writes:

> Grant Grundler wrote: 
>
>>Why wouldn't MSI have the same problems as MSI-X?
>>
>
> Because enabling and disabling the MSI interrupt is done through
> config space, and config space writes are not posted.  So we won't
> see the problem that we do with MSI-X.

Normally this is true.  However when we have  memory mapped pci config
space the writes could very easily be posted.  Even if there aren't
any Intel platforms that do it today other architectures might.

The more I think about this the more I think we should make mask and
unmask do the read and set enable/disable to mask/unmask in the msi
case.

Ugh.  Looking closer the function should really be called msi_flush
or msi_flush_writes not msix_flush_writes.

William do you think you can fix any of that up?
That is name the function msi_flush_writes and call it from
mask_msi_irq/unmask_msi_irq.

I think being a little more paranoid will result in slightly simpler
more maintainable code, and not measurably affect performance.  I
don't expect you are migrating irqs in a fast path.

Eric

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table
  2007-03-27 16:01     ` Eric W. Biederman
@ 2007-03-27 17:15       ` Williams, Mitch A
  2007-03-27 21:13         ` Eric W. Biederman
  0 siblings, 1 reply; 14+ messages in thread
From: Williams, Mitch A @ 2007-03-27 17:15 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Grant Grundler, linux-pci, gregkh, linux-kernel, akpm, Kok, Auke-jan H

Eric W. Biederman wrote: 
>
>> Because enabling and disabling the MSI interrupt is done through
>> config space, and config space writes are not posted.  So we won't
>> see the problem that we do with MSI-X.
>
>Normally this is true.  However when we have  memory mapped pci config
>space the writes could very easily be posted.  Even if there aren't
>any Intel platforms that do it today other architectures might.

>From what I read in the spec, configuration writes are not ever posted.
They are a completely separate transaction type from memory writes, and
only memory writes are posted.  So there's no need to read-flush these.

>The more I think about this the more I think we should make mask and
>unmask do the read and set enable/disable to mask/unmask in the msi
>case.
>
>I think being a little more paranoid will result in slightly simpler
>more maintainable code, and not measurably affect performance.  I
>don't expect you are migrating irqs in a fast path.

Migrating IRQs happens in the fast path, but infrequently (every 10 
seconds or so).  However the mask function is called at EVERY interrupt,
so this change would be VERY expensive.

If the driver really needs to disable the interrupt, then it can call
irq_disable().  Otherwise, mask (as-is) should be fine -- it masks
the interrupt at the APIC, and the device's interrupt moderation
should take care of keeping it from generating interrupts right away.

-Mitch

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table
  2007-03-27 17:15       ` Williams, Mitch A
@ 2007-03-27 21:13         ` Eric W. Biederman
  2007-03-27 21:43           ` Williams, Mitch A
  0 siblings, 1 reply; 14+ messages in thread
From: Eric W. Biederman @ 2007-03-27 21:13 UTC (permalink / raw)
  To: Williams, Mitch A
  Cc: Grant Grundler, linux-pci, gregkh, linux-kernel, akpm, Kok, Auke-jan H

"Williams, Mitch A" <mitch.a.williams@intel.com> writes:

> Eric W. Biederman wrote: 
>>
>>> Because enabling and disabling the MSI interrupt is done through
>>> config space, and config space writes are not posted.  So we won't
>>> see the problem that we do with MSI-X.
>>
>>Normally this is true.  However when we have  memory mapped pci config
>>space the writes could very easily be posted.  Even if there aren't
>>any Intel platforms that do it today other architectures might.
>
>>From what I read in the spec, configuration writes are not ever posted.
> They are a completely separate transaction type from memory writes, and
> only memory writes are posted.  So there's no need to read-flush these.

You may have a point there.  It isn't important at this point, and
it is easy enough to fix at this point.

>>The more I think about this the more I think we should make mask and
>>unmask do the read and set enable/disable to mask/unmask in the msi
>>case.
>>
>>I think being a little more paranoid will result in slightly simpler
>>more maintainable code, and not measurably affect performance.  I
>>don't expect you are migrating irqs in a fast path.
>
> Migrating IRQs happens in the fast path, but infrequently (every 10 
> seconds or so).  

Migrating IRQS gets called from the irq handler.  So I guess in that
sense it qualifies as a fast path operation.   The actual irq migration
itself is time consuming even if the individual operations aren't.  In
large part because ioapics don't appear to obey pci ordering rules so
flushing pending irqs is black magic :(

The comparative low frequency is important.

I guess what I meant is as soon as the test to for IRQ_MOVE_PENDING
is true we need to migrate an irq so we branch off of the fast path,
and irq handling path, where we do what we have to make things
work and the cost is what it takes to be reliable.

> However the mask function is called at EVERY interrupt,
> so this change would be VERY expensive.

If true I think that would be bad.  However I don't see it.
Where in handle_edge_irq do we mask the interrupt?
The only place I see us calling ->mask is from move_native_irq
and that only if IRQ_MOVE_PENDING is true.

All I can see is us routinely calling is ack_APIC_edge.

> If the driver really needs to disable the interrupt, then it can call
> irq_disable().

Agreed.  However the driver should not have any access to the mask method
in any shape or form.  That is an internal irq implementation helper.

>  Otherwise, mask (as-is) should be fine -- it masks
> the interrupt at the APIC, and the device's interrupt moderation
> should take care of keeping it from generating interrupts right away.

Please point at code because you are reading the code quite differently
from myself, and I think I have been over that code enough times to see
how it operates.

Eric

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table
  2007-03-27 21:13         ` Eric W. Biederman
@ 2007-03-27 21:43           ` Williams, Mitch A
  2007-03-28  1:14             ` Eric W. Biederman
  0 siblings, 1 reply; 14+ messages in thread
From: Williams, Mitch A @ 2007-03-27 21:43 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Grant Grundler, linux-pci, gregkh, linux-kernel, akpm, Kok, Auke-jan H

Eric W. Biederman wrote:

>> However the mask function is called at EVERY interrupt,
>> so this change would be VERY expensive.
>
>If true I think that would be bad.  However I don't see it.
>Where in handle_edge_irq do we mask the interrupt?
>The only place I see us calling ->mask is from move_native_irq
>and that only if IRQ_MOVE_PENDING is true.
>
>All I can see is us routinely calling is ack_APIC_edge.

Doh!  I was reading the code wrong.  We only mask if we're still
handling a previous interrupt on the same vector.  My bad.

However, I can't really see where mask() is used outside of that
instance.  Which then leads us back to the question:  do we need
a read flush on mask/unmask or just enable/disable?


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table
  2007-03-27 21:43           ` Williams, Mitch A
@ 2007-03-28  1:14             ` Eric W. Biederman
  2007-03-28 15:37               ` Williams, Mitch A
  0 siblings, 1 reply; 14+ messages in thread
From: Eric W. Biederman @ 2007-03-28  1:14 UTC (permalink / raw)
  To: Williams, Mitch A
  Cc: Grant Grundler, linux-pci, gregkh, linux-kernel, akpm, Kok, Auke-jan H

"Williams, Mitch A" <mitch.a.williams@intel.com> writes:

> Doh!  I was reading the code wrong.  We only mask if we're still
> handling a previous interrupt on the same vector.  My bad.
>
> However, I can't really see where mask() is used outside of that
> instance.  Which then leads us back to the question:  do we need
> a read flush on mask/unmask or just enable/disable?

I'm not even certain we need the read flush in the enable.
However having it in there makes the code easier to reason
about.  Which is a big plus.

Generally if the interrupt controller hardware is sane 
mask/unmask and enable/disable should be the same function.

If we need to work around something in the hardware enable/disable
should do that and mask/unmask should poke the hardware.

Since MSI is specified as properly handle pending interrupts
I would put the write flush in mask.  It makes the code easier
to understand and comprehend.

The practical question in my book is do we set the enable/disable
methods to the same functions as the mask/unmask methods or
do we let them default to the crazy delayed disable scenario.

Given that we do have a tiny race where we need to ensure the
MSI is disabled before we unregister it, we don't know of any
MSI implementation problems that will result in a screaming IRQ.
I would say set enable/disable to the mask/unmask methods.

This will fix the tiny freeing bug mentioned above, and not play
games with drivers that are using MSI irqs.

If at some point we need a lesser form someone can change the msi
enable/disable methods to something else.

Eric

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table
  2007-03-28  1:14             ` Eric W. Biederman
@ 2007-03-28 15:37               ` Williams, Mitch A
  0 siblings, 0 replies; 14+ messages in thread
From: Williams, Mitch A @ 2007-03-28 15:37 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Grant Grundler, linux-pci, gregkh, linux-kernel, akpm, Kok, Auke-jan H

Eric W. Biederman wrote:
>The practical question in my book is do we set the enable/disable
>methods to the same functions as the mask/unmask methods or
>do we let them default to the crazy delayed disable scenario.
>
>Given that we do have a tiny race where we need to ensure the
>MSI is disabled before we unregister it, we don't know of any
>MSI implementation problems that will result in a screaming IRQ.
>I would say set enable/disable to the mask/unmask methods.
>

OK, that's easy.  I'll whip up a patch today, test it overnight,
and post it tomorrow.

Thanks for looking at this.

-Mitch

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2007-03-28 15:37 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-03-26 23:18 [PATCH 2.6.21-rc5] MSI: read-flush MSI-X table Mitch Williams
2007-03-27  0:09 ` Greg KH
2007-03-27  2:26   ` Eric W. Biederman
2007-03-27 13:23   ` Eric W. Biederman
2007-03-27 14:55     ` Grant Grundler
2007-03-27 15:15       ` Eric W. Biederman
2007-03-27 15:25 ` Grant Grundler
2007-03-27 15:31   ` Williams, Mitch A
2007-03-27 16:01     ` Eric W. Biederman
2007-03-27 17:15       ` Williams, Mitch A
2007-03-27 21:13         ` Eric W. Biederman
2007-03-27 21:43           ` Williams, Mitch A
2007-03-28  1:14             ` Eric W. Biederman
2007-03-28 15:37               ` Williams, Mitch A

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.