From mboxrd@z Thu Jan 1 00:00:00 1970 From: Ross Lagerwall Subject: [PATCH v3] x86: Fixup IRQs when CPUs go down during shutdown Date: Fri, 4 Dec 2015 14:01:40 +0000 Message-ID: <1449237700-10602-1-git-send-email-ross.lagerwall@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel@lists.xen.org Cc: Ross Lagerwall , Jan Beulich , Andrew Cooper List-Id: xen-devel@lists.xenproject.org Commit fc0c3fa2ad5c ("x86/IO-APIC: fix setup of Xen internally used IRQs (take 2)") introduced a regression on some hardware where Xen would hang during shutdown, repeating the following message: APIC error on CPU0: 08(08), Receive accept error This appears to be because an interrupt (in this case from the serial console) destined for a CPU other than the boot CPU is left unhandled so an APIC error on CPU 0 is generated instead. To fix this, before taking down the non-boot CPUs, call fixup_irqs() with a CPU mask of only the boot CPU to reset the IRQ affinities correctly. Signed-off-by: Ross Lagerwall --- v3: * Don't call fixup_eoi() in shutdown path * Use cpumask_of * Update comments * Reduce code churn xen/arch/x86/irq.c | 28 ++++++++++++++++++---------- xen/arch/x86/smp.c | 4 ++++ xen/arch/x86/smpboot.c | 3 ++- xen/include/asm-x86/irq.h | 5 +++-- 4 files changed, 27 insertions(+), 13 deletions(-) diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c index 5f515a0..1228568 100644 --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c @@ -2328,14 +2328,12 @@ static int __init setup_dump_irqs(void) } __initcall(setup_dump_irqs); -/* A cpu has been removed from cpu_online_mask. Re-set irq affinities. */ -void fixup_irqs(void) +/* Reset irq affinities to match the given CPU mask. */ +void fixup_irqs(const cpumask_t *mask, bool_t verbose) { - unsigned int irq, sp; + unsigned int irq; static int warned; struct irq_desc *desc; - irq_guest_action_t *action; - struct pending_eoi *peoi; for ( irq = 0; irq < nr_irqs; irq++ ) { @@ -2355,21 +2353,20 @@ void fixup_irqs(void) vector = irq_to_vector(irq); if ( vector >= FIRST_HIPRIORITY_VECTOR && vector <= LAST_HIPRIORITY_VECTOR ) - cpumask_and(desc->arch.cpu_mask, desc->arch.cpu_mask, - &cpu_online_map); + cpumask_and(desc->arch.cpu_mask, desc->arch.cpu_mask, mask); cpumask_copy(&affinity, desc->affinity); - if ( !desc->action || cpumask_subset(&affinity, &cpu_online_map) ) + if ( !desc->action || cpumask_subset(&affinity, mask) ) { spin_unlock(&desc->lock); continue; } - cpumask_and(&affinity, &affinity, &cpu_online_map); + cpumask_and(&affinity, &affinity, mask); if ( cpumask_empty(&affinity) ) { break_affinity = 1; - cpumask_copy(&affinity, &cpu_online_map); + cpumask_copy(&affinity, mask); } if ( desc->handler->disable ) @@ -2385,6 +2382,9 @@ void fixup_irqs(void) spin_unlock(&desc->lock); + if ( !verbose ) + continue; + if ( break_affinity && set_affinity ) printk("Broke affinity for irq %i\n", irq); else if ( !set_affinity ) @@ -2395,6 +2395,14 @@ void fixup_irqs(void) local_irq_enable(); mdelay(1); local_irq_disable(); +} + +void fixup_eoi(void) +{ + unsigned int irq, sp; + struct irq_desc *desc; + irq_guest_action_t *action; + struct pending_eoi *peoi; /* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */ for ( irq = 0; irq < nr_irqs; irq++ ) diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c index 50ff6c2..988b9c2 100644 --- a/xen/arch/x86/smp.c +++ b/xen/arch/x86/smp.c @@ -299,6 +299,10 @@ void smp_send_stop(void) { int timeout = 10; + local_irq_disable(); + fixup_irqs(cpumask_of(smp_processor_id()), 0); + local_irq_enable(); + smp_call_function(stop_this_cpu, NULL, 0); /* Wait 10ms for all other CPUs to go offline. */ diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index 5d48bac..1eb16cb 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -922,7 +922,8 @@ void __cpu_disable(void) /* It's now safe to remove this processor from the online map */ cpumask_clear_cpu(cpu, &cpu_online_map); - fixup_irqs(); + fixup_irqs(&cpu_online_map, 1); + fixup_eoi(); if ( cpu_disable_scheduler(cpu) ) BUG(); diff --git a/xen/include/asm-x86/irq.h b/xen/include/asm-x86/irq.h index fcf37a3..7efdd37 100644 --- a/xen/include/asm-x86/irq.h +++ b/xen/include/asm-x86/irq.h @@ -148,8 +148,9 @@ int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq); int unmap_domain_pirq_emuirq(struct domain *d, int pirq); bool_t hvm_domain_use_pirq(const struct domain *, const struct pirq *); -/* A cpu has been removed from cpu_online_mask. Re-set irq affinities. */ -void fixup_irqs(void); +/* Reset irq affinities to match the given CPU mask. */ +void fixup_irqs(const cpumask_t *mask, bool_t verbose); +void fixup_eoi(void); int init_irq_data(void); -- 2.4.3