All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch 0/5, 2.6.9-rc3] generic irq subsystem, description
@ 2004-10-01 20:45 Ingo Molnar
  2004-10-01 20:46 ` [patch 1/5, 2.6.9-rc3] generic irq subsystem, core Ingo Molnar
  0 siblings, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2004-10-01 20:45 UTC (permalink / raw)
  To: linux-kernel; +Cc: Christoph Hellwig, Andrew Morton


this is a new release of the generic irq handling subsystem, against
2.6.9-rc3. This snapshot includes new ppc and ppc64 support written and
tested by Christoph Hellwig.

the core bits were reworked quite significantly. Changes:

 - split up kernel/hardirq.c into individual files in kernel/irq/

 - 80-90% of each converted architecture's irq.c file is now eliminated
   by this patch. The most dramatic one is x64's irq.c - a mere 108
   lines remained. The total effect on the 4 architectures covered so
   far: more than 3000 lines of irq.c code removed while kernel/irq/*.c
   remains around 1000 lines of code.

 - more irq.h, interrupt.h, asm/hardirq.h and linux/hardirq.h
   consolidation from Christoph and me.

 - thorough cleanup of the generic irq code.

 - compile-testing of the following architectures: x86, x64, ppc64, ppc, 
   ia64, s390, s390x. The first four were boot-tested as well.

there are 5 patches that will follow in the next 5 mails:

  generic-hardirqs-core.patch
  generic-hardirqs-x86.patch
  generic-hardirqs-x64.patch
  generic-hardirqs-ppc.patch
  generic-hardirqs-ppc64.patch

note that it is still a goal of the patchset to not break the building
of any non-CONFIG_GENERIC_HARDIRQS platform - 3 such platforms were
compile-tested.

comments welcome,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 1/5, 2.6.9-rc3] generic irq subsystem, core
  2004-10-01 20:45 [patch 0/5, 2.6.9-rc3] generic irq subsystem, description Ingo Molnar
@ 2004-10-01 20:46 ` Ingo Molnar
  2004-10-01 20:47   ` [patch 2/5, 2.6.9-rc3] generic irq subsystem, x86 port Ingo Molnar
       [not found]   ` <20041001143332.7e3a5aba.akpm@osdl.org>
  0 siblings, 2 replies; 15+ messages in thread
From: Ingo Molnar @ 2004-10-01 20:46 UTC (permalink / raw)
  To: linux-kernel; +Cc: Christoph Hellwig, Andrew Morton


core bits of generic hardirqs.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Christoph Hellwig <hch@lst.de>

--- linux/kernel/irq/proc.c.orig
+++ linux/kernel/irq/proc.c
@@ -0,0 +1,183 @@
+/*
+ * linux/kernel/irq/proc.c
+ *
+ * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the /proc/irq/ handling code.
+ */
+
+#include <linux/irq.h>
+#include <linux/proc_fs.h>
+#include <linux/interrupt.h>
+
+unsigned long prof_cpu_mask = -1;
+
+static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS];
+
+#ifdef CONFIG_SMP
+
+/*
+ * The /proc/irq/<irq>/smp_affinity values:
+ */
+static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
+
+cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
+
+static int irq_affinity_read_proc(char *page, char **start, off_t off,
+				  int count, int *eof, void *data)
+{
+	int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]);
+
+	if (count - len < 2)
+		return -EINVAL;
+	len += sprintf(page + len, "\n");
+	return len;
+}
+
+static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
+				   unsigned long count, void *data)
+{
+	unsigned int irq = (int)(long)data, full_count = count, err;
+	cpumask_t new_value, tmp;
+
+	if (!irq_desc[irq].handler->set_affinity)
+		return -EIO;
+
+	err = cpumask_parse(buffer, count, new_value);
+	if (err)
+		return err;
+
+	/*
+	 * Do not allow disabling IRQs completely - it's a too easy
+	 * way to make the system unusable accidentally :-) At least
+	 * one online CPU still has to be targeted.
+	 */
+	cpus_and(tmp, new_value, cpu_online_map);
+	if (cpus_empty(tmp))
+		return -EINVAL;
+
+	irq_affinity[irq] = new_value;
+	irq_desc[irq].handler->set_affinity(irq,
+					cpumask_of_cpu(first_cpu(new_value)));
+
+	return full_count;
+}
+
+#endif
+
+static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
+				   int count, int *eof, void *data)
+{
+	int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
+
+	if (count - len < 2)
+		return -EINVAL;
+	len += sprintf(page + len, "\n");
+	return len;
+}
+
+static int prof_cpu_mask_write_proc(struct file *file,
+				    const char __user *buffer,
+				    unsigned long count, void *data)
+{
+	unsigned long full_count = count, err;
+	cpumask_t *mask = (cpumask_t *)data;
+	cpumask_t new_value;
+
+	err = cpumask_parse(buffer, count, new_value);
+	if (err)
+		return err;
+
+	*mask = new_value;
+
+	return full_count;
+}
+
+#define MAX_NAMELEN 128
+
+void register_handler_proc(unsigned int irq, struct irqaction *action)
+{
+	char name [MAX_NAMELEN];
+
+	if (!irq_dir[irq] || action->dir || !action->name)
+		return;
+
+	memset(name, 0, MAX_NAMELEN);
+	snprintf(name, MAX_NAMELEN, "%s", action->name);
+
+	/* create /proc/irq/1234/handler/ */
+	action->dir = proc_mkdir(name, irq_dir[irq]);
+}
+
+#undef MAX_NAMELEN
+
+#define MAX_NAMELEN 10
+
+void register_irq_proc(unsigned int irq)
+{
+	char name [MAX_NAMELEN];
+
+	if (!root_irq_dir ||
+		(irq_desc[irq].handler == &no_irq_type) ||
+			irq_dir[irq])
+		return;
+
+	memset(name, 0, MAX_NAMELEN);
+	sprintf(name, "%d", irq);
+
+	/* create /proc/irq/1234 */
+	irq_dir[irq] = proc_mkdir(name, root_irq_dir);
+
+#ifdef CONFIG_SMP
+	{
+		struct proc_dir_entry *entry;
+
+		/* create /proc/irq/<irq>/smp_affinity */
+		entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
+
+		if (entry) {
+			entry->nlink = 1;
+			entry->data = (void *)(long)irq;
+			entry->read_proc = irq_affinity_read_proc;
+			entry->write_proc = irq_affinity_write_proc;
+		}
+		smp_affinity_entry[irq] = entry;
+	}
+#endif
+}
+
+#undef MAX_NAMELEN
+
+void unregister_handler_proc(unsigned int irq, struct irqaction *action)
+{
+	if (action->dir)
+		remove_proc_entry(action->dir->name, irq_dir[irq]);
+}
+
+void init_irq_proc(void)
+{
+	struct proc_dir_entry *entry;
+	int i;
+
+	/* create /proc/irq */
+	root_irq_dir = proc_mkdir("irq", NULL);
+	if (!root_irq_dir)
+		return;
+
+	/* create /proc/irq/prof_cpu_mask */
+	entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
+	if (!entry)
+		return;
+
+	entry->nlink = 1;
+	entry->data = (void *)&prof_cpu_mask;
+	entry->read_proc = prof_cpu_mask_read_proc;
+	entry->write_proc = prof_cpu_mask_write_proc;
+
+	/*
+	 * Create entries for all existing IRQs.
+	 */
+	for (i = 0; i < NR_IRQS; i++)
+		register_irq_proc(i);
+}
+
--- linux/kernel/irq/manage.c.orig
+++ linux/kernel/irq/manage.c
@@ -0,0 +1,347 @@
+/*
+ * linux/kernel/irq/manage.c
+ *
+ * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains driver APIs to the irq subsystem.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/interrupt.h>
+
+#include "internals.h"
+
+#ifdef CONFIG_SMP
+
+/**
+ *	synchronize_irq - wait for pending IRQ handlers (on other CPUs)
+ *
+ *	This function waits for any pending IRQ handlers for this interrupt
+ *	to complete before returning. If you use this function while
+ *	holding a resource the IRQ handler may need you will deadlock.
+ *
+ *	This function may be called - with care - from IRQ context.
+ */
+void synchronize_irq(unsigned int irq)
+{
+	struct irq_desc *desc = irq_desc + irq;
+
+	while (desc->status & IRQ_INPROGRESS)
+		cpu_relax();
+}
+
+EXPORT_SYMBOL(synchronize_irq);
+
+#endif
+
+/**
+ *	disable_irq_nosync - disable an irq without waiting
+ *	@irq: Interrupt to disable
+ *
+ *	Disable the selected interrupt line.  Disables and Enables are
+ *	nested.
+ *	Unlike disable_irq(), this function does not ensure existing
+ *	instances of the IRQ handler have completed before returning.
+ *
+ *	This function may be called from IRQ context.
+ */
+void disable_irq_nosync(unsigned int irq)
+{
+	irq_desc_t *desc = irq_desc + irq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	if (!desc->depth++) {
+		desc->status |= IRQ_DISABLED;
+		desc->handler->disable(irq);
+	}
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+EXPORT_SYMBOL(disable_irq_nosync);
+
+/**
+ *	disable_irq - disable an irq and wait for completion
+ *	@irq: Interrupt to disable
+ *
+ *	Disable the selected interrupt line.  Enables and Disables are
+ *	nested.
+ *	This function waits for any pending IRQ handlers for this interrupt
+ *	to complete before returning. If you use this function while
+ *	holding a resource the IRQ handler may need you will deadlock.
+ *
+ *	This function may be called - with care - from IRQ context.
+ */
+void disable_irq(unsigned int irq)
+{
+	irq_desc_t *desc = irq_desc + irq;
+
+	disable_irq_nosync(irq);
+	if (desc->action)
+		synchronize_irq(irq);
+}
+
+EXPORT_SYMBOL(disable_irq);
+
+/**
+ *	enable_irq - enable handling of an irq
+ *	@irq: Interrupt to enable
+ *
+ *	Undoes the effect of one call to disable_irq().  If this
+ *	matches the last disable, processing of interrupts on this
+ *	IRQ line is re-enabled.
+ *
+ *	This function may be called from IRQ context.
+ */
+void enable_irq(unsigned int irq)
+{
+	irq_desc_t *desc = irq_desc + irq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	switch (desc->depth) {
+	case 0:
+		WARN_ON(1);
+		break;
+	case 1: {
+		unsigned int status = desc->status & ~IRQ_DISABLED;
+
+		desc->status = status;
+		if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+			desc->status = status | IRQ_REPLAY;
+			hw_resend_irq(desc->handler,irq);
+		}
+		desc->handler->enable(irq);
+		/* fall-through */
+	}
+	default:
+		desc->depth--;
+	}
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+EXPORT_SYMBOL(enable_irq);
+
+/*
+ * Internal function that tells the architecture code whether a
+ * particular irq has been exclusively allocated or is available
+ * for driver use.
+ */
+int can_request_irq(unsigned int irq, unsigned long irqflags)
+{
+	struct irqaction *action;
+
+	if (irq >= NR_IRQS)
+		return 0;
+
+	action = irq_desc[irq].action;
+	if (action)
+		if (irqflags & action->flags & SA_SHIRQ)
+			action = NULL;
+
+	return !action;
+}
+
+/*
+ * Internal function to register an irqaction - typically used to
+ * allocate special interrupts that are part of the architecture.
+ */
+int setup_irq(unsigned int irq, struct irqaction * new)
+{
+	struct irq_desc *desc = irq_desc + irq;
+	struct irqaction *old, **p;
+	unsigned long flags;
+	int shared = 0;
+
+	if (desc->handler == &no_irq_type)
+		return -ENOSYS;
+	/*
+	 * Some drivers like serial.c use request_irq() heavily,
+	 * so we have to be careful not to interfere with a
+	 * running system.
+	 */
+	if (new->flags & SA_SAMPLE_RANDOM) {
+		/*
+		 * This function might sleep, we want to call it first,
+		 * outside of the atomic block.
+		 * Yes, this might clear the entropy pool if the wrong
+		 * driver is attempted to be loaded, without actually
+		 * installing a new handler, but is this really a problem,
+		 * only the sysadmin is able to do this.
+		 */
+		rand_initialize_irq(irq);
+	}
+
+	/*
+	 * The following block of code has to be executed atomically
+	 */
+	spin_lock_irqsave(&desc->lock,flags);
+	p = &desc->action;
+	if ((old = *p) != NULL) {
+		/* Can't share interrupts unless both agree to */
+		if (!(old->flags & new->flags & SA_SHIRQ)) {
+			spin_unlock_irqrestore(&desc->lock,flags);
+			return -EBUSY;
+		}
+
+		/* add new interrupt at end of irq queue */
+		do {
+			p = &old->next;
+			old = *p;
+		} while (old);
+		shared = 1;
+	}
+
+	*p = new;
+
+	if (!shared) {
+		desc->depth = 0;
+		desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT |
+				  IRQ_WAITING | IRQ_INPROGRESS);
+		if (desc->handler->startup)
+			desc->handler->startup(irq);
+		else
+			desc->handler->enable(irq);
+	}
+	spin_unlock_irqrestore(&desc->lock,flags);
+
+	new->irq = irq;
+	register_irq_proc(irq);
+	new->dir = NULL;
+	register_handler_proc(irq, new);
+
+	return 0;
+}
+
+/**
+ *	free_irq - free an interrupt
+ *	@irq: Interrupt line to free
+ *	@dev_id: Device identity to free
+ *
+ *	Remove an interrupt handler. The handler is removed and if the
+ *	interrupt line is no longer in use by any driver it is disabled.
+ *	On a shared IRQ the caller must ensure the interrupt is disabled
+ *	on the card it drives before calling this function. The function
+ *	does not return until any executing interrupts for this IRQ
+ *	have completed.
+ *
+ *	This function must not be called from interrupt context.
+ */
+void free_irq(unsigned int irq, void *dev_id)
+{
+	struct irq_desc *desc;
+	struct irqaction **p;
+	unsigned long flags;
+
+	if (irq >= NR_IRQS)
+		return;
+
+	desc = irq_desc + irq;
+	spin_lock_irqsave(&desc->lock,flags);
+	p = &desc->action;
+	for (;;) {
+		struct irqaction * action = *p;
+
+		if (action) {
+			struct irqaction **pp = p;
+
+			p = &action->next;
+			if (action->dev_id != dev_id)
+				continue;
+
+			/* Found it - now remove it from the list of entries */
+			*pp = action->next;
+			if (!desc->action) {
+				desc->status |= IRQ_DISABLED;
+				if (desc->handler->shutdown)
+					desc->handler->shutdown(irq);
+				else
+					desc->handler->disable(irq);
+			}
+			spin_unlock_irqrestore(&desc->lock,flags);
+			unregister_handler_proc(irq, action);
+
+			/* Make sure it's not being used on another CPU */
+			synchronize_irq(irq);
+			kfree(action);
+			return;
+		}
+		printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
+		spin_unlock_irqrestore(&desc->lock,flags);
+		return;
+	}
+}
+
+EXPORT_SYMBOL(free_irq);
+
+/**
+ *	request_irq - allocate an interrupt line
+ *	@irq: Interrupt line to allocate
+ *	@handler: Function to be called when the IRQ occurs
+ *	@irqflags: Interrupt type flags
+ *	@devname: An ascii name for the claiming device
+ *	@dev_id: A cookie passed back to the handler function
+ *
+ *	This call allocates interrupt resources and enables the
+ *	interrupt line and IRQ handling. From the point this
+ *	call is made your handler function may be invoked. Since
+ *	your handler function must clear any interrupt the board
+ *	raises, you must take care both to initialise your hardware
+ *	and to set up the interrupt handler in the right order.
+ *
+ *	Dev_id must be globally unique. Normally the address of the
+ *	device data structure is used as the cookie. Since the handler
+ *	receives this value it makes sense to use it.
+ *
+ *	If your interrupt is shared you must pass a non NULL dev_id
+ *	as this is required when freeing the interrupt.
+ *
+ *	Flags:
+ *
+ *	SA_SHIRQ		Interrupt is shared
+ *	SA_INTERRUPT		Disable local interrupts while processing
+ *	SA_SAMPLE_RANDOM	The interrupt can be used for entropy
+ *
+ */
+int request_irq(unsigned int irq,
+		irqreturn_t (*handler)(int, void *, struct pt_regs *),
+		unsigned long irqflags, const char * devname, void *dev_id)
+{
+	struct irqaction * action;
+	int retval;
+
+	/*
+	 * Sanity-check: shared interrupts must pass in a real dev-ID,
+	 * otherwise we'll have trouble later trying to figure out
+	 * which interrupt is which (messes up the interrupt freeing
+	 * logic etc).
+	 */
+	if ((irqflags & SA_SHIRQ) && !dev_id)
+		return -EINVAL;
+	if (irq >= NR_IRQS)
+		return -EINVAL;
+	if (!handler)
+		return -EINVAL;
+
+	action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
+	if (!action)
+		return -ENOMEM;
+
+	action->handler = handler;
+	action->flags = irqflags;
+	cpus_clear(action->mask);
+	action->name = devname;
+	action->next = NULL;
+	action->dev_id = dev_id;
+
+	retval = setup_irq(irq, action);
+	if (retval)
+		kfree(action);
+
+	return retval;
+}
+
+EXPORT_SYMBOL(request_irq);
+
--- linux/kernel/irq/spurious.c.orig
+++ linux/kernel/irq/spurious.c
@@ -0,0 +1,96 @@
+/*
+ * linux/kernel/irq/spurious.c
+ *
+ * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains spurious interrupt handling.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+
+/*
+ * If 99,900 of the previous 100,000 interrupts have not been handled
+ * then assume that the IRQ is stuck in some manner. Drop a diagnostic
+ * and try to turn the IRQ off.
+ *
+ * (The other 100-of-100,000 interrupts may have been a correctly
+ *  functioning device sharing an IRQ with the failing one)
+ *
+ * Called under desc->lock
+ */
+
+static void
+__report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+	struct irqaction *action;
+
+	if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
+		printk(KERN_ERR "irq event %d: bogus return value %x\n",
+				irq, action_ret);
+	} else {
+		printk(KERN_ERR "irq %d: nobody cared!\n", irq);
+	}
+	dump_stack();
+	printk(KERN_ERR "handlers:\n");
+	action = desc->action;
+	while (action) {
+		printk(KERN_ERR "[<%p>]", action->handler);
+		print_symbol(" (%s)",
+			(unsigned long)action->handler);
+		printk("\n");
+		action = action->next;
+	}
+}
+
+void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+	static int count = 100;
+
+	if (count > 0) {
+		count--;
+		__report_bad_irq(irq, desc, action_ret);
+	}
+}
+
+void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+	if (action_ret != IRQ_HANDLED) {
+		desc->irqs_unhandled++;
+		if (action_ret != IRQ_NONE)
+			report_bad_irq(irq, desc, action_ret);
+	}
+
+	desc->irq_count++;
+	if (desc->irq_count < 100000)
+		return;
+
+	desc->irq_count = 0;
+	if (desc->irqs_unhandled > 99900) {
+		/*
+		 * The interrupt is stuck
+		 */
+		__report_bad_irq(irq, desc, action_ret);
+		/*
+		 * Now kill the IRQ
+		 */
+		printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
+		desc->status |= IRQ_DISABLED;
+		desc->handler->disable(irq);
+	}
+	desc->irqs_unhandled = 0;
+}
+
+int noirqdebug;
+
+static int __init noirqdebug_setup(char *str)
+{
+	noirqdebug = 1;
+	printk(KERN_INFO "IRQ lockup detection disabled\n");
+	return 1;
+}
+
+__setup("noirqdebug", noirqdebug_setup);
+
--- linux/kernel/irq/handle.c.orig
+++ linux/kernel/irq/handle.c
@@ -0,0 +1,204 @@
+/*
+ * linux/kernel/irq/handle.c
+ *
+ * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the core interrupt handling code.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#include "internals.h"
+
+/*
+ * Linux has a controller-independent interrupt architecture.
+ * Every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the apropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * The code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic or
+ * having to touch the generic code.
+ *
+ * Controller mappings for all interrupt sources:
+ */
+irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
+	[0 ... NR_IRQS-1] = {
+		.handler = &no_irq_type,
+		.lock = SPIN_LOCK_UNLOCKED
+	}
+};
+
+/*
+ * Generic 'no controller' code
+ */
+static void end_none(unsigned int irq) { }
+static void enable_none(unsigned int irq) { }
+static void disable_none(unsigned int irq) { }
+static void shutdown_none(unsigned int irq) { }
+static unsigned int startup_none(unsigned int irq) { return 0; }
+
+static void ack_none(unsigned int irq)
+{
+	/*
+	 * 'what should we do if we get a hw irq event on an illegal vector'.
+	 * each architecture has to answer this themself.
+	 */
+	ack_bad_irq(irq);
+}
+
+struct hw_interrupt_type no_irq_type = {
+	typename:	"none",
+	startup:	startup_none,
+	shutdown:	shutdown_none,
+	enable:		enable_none,
+	disable:	disable_none,
+	ack:		ack_none,
+	end:		end_none,
+	set_affinity:	NULL
+};
+
+/*
+ * Special, empty irq handler:
+ */
+irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
+{
+	return IRQ_NONE;
+}
+
+/*
+ * Exit an interrupt context. Process softirqs if needed and possible:
+ */
+void irq_exit(void)
+{
+	preempt_count() -= IRQ_EXIT_OFFSET;
+	if (!in_interrupt() && local_softirq_pending())
+		do_softirq();
+	preempt_enable_no_resched();
+}
+
+/*
+ * Have got an event to handle:
+ */
+asmlinkage int handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
+				struct irqaction *action)
+{
+	int ret, retval = 0, status = 0;
+
+	if (!(action->flags & SA_INTERRUPT))
+		local_irq_enable();
+
+	do {
+		ret = action->handler(irq, action->dev_id, regs);
+		if (ret == IRQ_HANDLED)
+			status |= action->flags;
+		retval |= ret;
+		action = action->next;
+	} while (action);
+
+	if (status & SA_SAMPLE_RANDOM)
+		add_interrupt_randomness(irq);
+	local_irq_disable();
+
+	return retval;
+}
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+asmlinkage unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
+{
+	irq_desc_t *desc = irq_desc + irq;
+	struct irqaction * action;
+	unsigned int status;
+
+	kstat_this_cpu.irqs[irq]++;
+	if (desc->status & IRQ_PER_CPU) {
+		irqreturn_t action_ret;
+
+		/*
+		 * No locking required for CPU-local interrupts:
+		 */
+		desc->handler->ack(irq);
+		action_ret = handle_IRQ_event(irq, regs, desc->action);
+		if (!noirqdebug)
+			note_interrupt(irq, desc, action_ret);
+		desc->handler->end(irq);
+		return 1;
+	}
+
+	spin_lock(&desc->lock);
+	desc->handler->ack(irq);
+	/*
+	 * REPLAY is when Linux resends an IRQ that was dropped earlier
+	 * WAITING is used by probe to mark irqs that are being tested
+	 */
+	status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+	status |= IRQ_PENDING; /* we _want_ to handle it */
+
+	/*
+	 * If the IRQ is disabled for whatever reason, we cannot
+	 * use the action we have.
+	 */
+	action = NULL;
+	if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
+		action = desc->action;
+		status &= ~IRQ_PENDING; /* we commit to handling */
+		status |= IRQ_INPROGRESS; /* we are handling it */
+	}
+	desc->status = status;
+
+	/*
+	 * If there is no IRQ handler or it was disabled, exit early.
+	 * Since we set PENDING, if another processor is handling
+	 * a different instance of this same irq, the other processor
+	 * will take care of it.
+	 */
+	if (unlikely(!action))
+		goto out;
+
+	/*
+	 * Edge triggered interrupts need to remember
+	 * pending events.
+	 * This applies to any hw interrupts that allow a second
+	 * instance of the same irq to arrive while we are in do_IRQ
+	 * or in the handler. But the code here only handles the _second_
+	 * instance of the irq, not the third or fourth. So it is mostly
+	 * useful for irq hardware that does not mask cleanly in an
+	 * SMP environment.
+	 */
+	for (;;) {
+		irqreturn_t action_ret;
+
+		spin_unlock(&desc->lock);
+
+		action_ret = handle_IRQ_event(irq, regs, action);
+
+		spin_lock(&desc->lock);
+		if (!noirqdebug)
+			note_interrupt(irq, desc, action_ret);
+		if (likely(!(desc->status & IRQ_PENDING)))
+			break;
+		desc->status &= ~IRQ_PENDING;
+	}
+	desc->status &= ~IRQ_INPROGRESS;
+
+out:
+	/*
+	 * The ->end() handler has to deal with interrupts which got
+	 * disabled while the handler was running.
+	 */
+	desc->handler->end(irq);
+	spin_unlock(&desc->lock);
+
+	return 1;
+}
+
--- linux/kernel/irq/Makefile.orig
+++ linux/kernel/irq/Makefile
@@ -0,0 +1,4 @@
+
+obj-y := autoprobe.o handle.o manage.o spurious.o
+obj-$(CONFIG_PROC_FS) += proc.o
+
--- linux/kernel/irq/autoprobe.c.orig
+++ linux/kernel/irq/autoprobe.c
@@ -0,0 +1,188 @@
+/*
+ * linux/kernel/irq/autoprobe.c
+ *
+ * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the interrupt probing code and driver APIs.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+
+/*
+ * Autodetection depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck with
+ * "IRQ_WAITING" cleared and the interrupt disabled.
+ */
+static DECLARE_MUTEX(probe_sem);
+
+/**
+ *	probe_irq_on	- begin an interrupt autodetect
+ *
+ *	Commence probing for an interrupt. The interrupts are scanned
+ *	and a mask of potential interrupt lines is returned.
+ *
+ */
+unsigned long probe_irq_on(void)
+{
+	unsigned long val, delay;
+	irq_desc_t *desc;
+	unsigned int i;
+
+	down(&probe_sem);
+	/*
+	 * something may have generated an irq long ago and we want to
+	 * flush such a longstanding irq before considering it as spurious.
+	 */
+	for (i = NR_IRQS-1; i > 0; i--) {
+		desc = irq_desc + i;
+
+		spin_lock_irq(&desc->lock);
+		if (!irq_desc[i].action)
+			irq_desc[i].handler->startup(i);
+		spin_unlock_irq(&desc->lock);
+	}
+
+	/* Wait for longstanding interrupts to trigger. */
+	for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
+		/* about 20ms delay */ barrier();
+
+	/*
+	 * enable any unassigned irqs
+	 * (we must startup again here because if a longstanding irq
+	 * happened in the previous stage, it may have masked itself)
+	 */
+	for (i = NR_IRQS-1; i > 0; i--) {
+		desc = irq_desc + i;
+
+		spin_lock_irq(&desc->lock);
+		if (!desc->action) {
+			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
+			if (desc->handler->startup(i))
+				desc->status |= IRQ_PENDING;
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+
+	/*
+	 * Wait for spurious interrupts to trigger
+	 */
+	for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
+		/* about 100ms delay */ barrier();
+
+	/*
+	 * Now filter out any obviously spurious interrupts
+	 */
+	val = 0;
+	for (i = 0; i < NR_IRQS; i++) {
+		irq_desc_t *desc = irq_desc + i;
+		unsigned int status;
+
+		spin_lock_irq(&desc->lock);
+		status = desc->status;
+
+		if (status & IRQ_AUTODETECT) {
+			/* It triggered already - consider it spurious. */
+			if (!(status & IRQ_WAITING)) {
+				desc->status = status & ~IRQ_AUTODETECT;
+				desc->handler->shutdown(i);
+			} else
+				if (i < 32)
+					val |= 1 << i;
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+
+	return val;
+}
+
+EXPORT_SYMBOL(probe_irq_on);
+
+/**
+ *	probe_irq_mask - scan a bitmap of interrupt lines
+ *	@val:	mask of interrupts to consider
+ *
+ *	Scan the interrupt lines and return a bitmap of active
+ *	autodetect interrupts. The interrupt probe logic state
+ *	is then returned to its previous value.
+ *
+ *	Note: we need to scan all the irq's even though we will
+ *	only return autodetect irq numbers - just so that we reset
+ *	them all to a known state.
+ */
+unsigned int probe_irq_mask(unsigned long val)
+{
+	unsigned int mask;
+	int i;
+
+	mask = 0;
+	for (i = 0; i < NR_IRQS; i++) {
+		irq_desc_t *desc = irq_desc + i;
+		unsigned int status;
+
+		spin_lock_irq(&desc->lock);
+		status = desc->status;
+
+		if (status & IRQ_AUTODETECT) {
+			if (i < 16 && !(status & IRQ_WAITING))
+				mask |= 1 << i;
+
+			desc->status = status & ~IRQ_AUTODETECT;
+			desc->handler->shutdown(i);
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+	up(&probe_sem);
+
+	return mask & val;
+}
+
+/**
+ *	probe_irq_off	- end an interrupt autodetect
+ *	@val: mask of potential interrupts (unused)
+ *
+ *	Scans the unused interrupt lines and returns the line which
+ *	appears to have triggered the interrupt. If no interrupt was
+ *	found then zero is returned. If more than one interrupt is
+ *	found then minus the first candidate is returned to indicate
+ *	their is doubt.
+ *
+ *	The interrupt probe logic state is returned to its previous
+ *	value.
+ *
+ *	BUGS: When used in a module (which arguably shouldn't happen)
+ *	nothing prevents two IRQ probe callers from overlapping. The
+ *	results of this are non-optimal.
+ */
+int probe_irq_off(unsigned long val)
+{
+	int i, irq_found = 0, nr_irqs = 0;
+
+	for (i = 0; i < NR_IRQS; i++) {
+		irq_desc_t *desc = irq_desc + i;
+		unsigned int status;
+
+		spin_lock_irq(&desc->lock);
+		status = desc->status;
+
+		if (status & IRQ_AUTODETECT) {
+			if (!(status & IRQ_WAITING)) {
+				if (!nr_irqs)
+					irq_found = i;
+				nr_irqs++;
+			}
+			desc->status = status & ~IRQ_AUTODETECT;
+			desc->handler->shutdown(i);
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+	up(&probe_sem);
+
+	if (nr_irqs > 1)
+		irq_found = -irq_found;
+	return irq_found;
+}
+
+EXPORT_SYMBOL(probe_irq_off);
+
--- linux/kernel/irq/internals.h.orig
+++ linux/kernel/irq/internals.h
@@ -0,0 +1,18 @@
+/*
+ * IRQ subsystem internal functions and variables:
+ */
+
+extern int noirqdebug;
+
+#ifdef CONFIG_PROC_FS
+extern void register_irq_proc(unsigned int irq);
+extern void register_handler_proc(unsigned int irq, struct irqaction *action);
+extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
+#else
+static inline void register_irq_proc(unsigned int irq) { }
+static inline void register_handler_proc(unsigned int irq,
+					 struct irqaction *action) { }
+static inline void unregister_handler_proc(unsigned int irq,
+					   struct irqaction *action) { }
+#endif
+
--- linux/kernel/Makefile.orig
+++ linux/kernel/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_STOP_MACHINE) += stop_machi
 obj-$(CONFIG_AUDIT) += audit.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
 obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 
 ifneq ($(CONFIG_IA64),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
--- linux/include/linux/irq.h.orig
+++ linux/include/linux/irq.h
@@ -13,6 +13,7 @@
 
 #if !defined(CONFIG_ARCH_S390)
 
+#include <linux/linkage.h>
 #include <linux/cache.h>
 #include <linux/spinlock.h>
 #include <linux/cpumask.h>
@@ -71,7 +72,19 @@ extern irq_desc_t irq_desc [NR_IRQS];
 
 #include <asm/hw_irq.h> /* the arch dependent stuff */
 
-extern int setup_irq(unsigned int , struct irqaction * );
+#ifdef CONFIG_GENERIC_HARDIRQS
+extern cpumask_t irq_affinity[NR_IRQS];
+
+extern asmlinkage int handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
+				       struct irqaction *action);
+extern asmlinkage unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs);
+extern void note_interrupt(unsigned int irq, irq_desc_t *desc, int action_ret);
+extern void report_bad_irq(unsigned int irq, irq_desc_t *desc, int action_ret);
+extern int can_request_irq(unsigned int irq, unsigned long irqflags);
+extern int setup_irq(unsigned int irq, struct irqaction * new);
+
+extern void init_irq_proc(void);
+#endif
 
 extern hw_irq_controller no_irq_type;  /* needed in every arch ? */
 
--- linux/include/linux/interrupt.h.orig
+++ linux/include/linux/interrupt.h
@@ -40,6 +40,8 @@ struct irqaction {
 	const char *name;
 	void *dev_id;
 	struct irqaction *next;
+	int irq;
+	struct proc_dir_entry *dir;
 };
 
 extern irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs);
@@ -48,6 +50,13 @@ extern int request_irq(unsigned int,
 		       unsigned long, const char *, void *);
 extern void free_irq(unsigned int, void *);
 
+
+#ifdef CONFIG_GENERIC_HARDIRQS
+extern void disable_irq_nosync(unsigned int irq);
+extern void disable_irq(unsigned int irq);
+extern void enable_irq(unsigned int irq);
+#endif
+
 /*
  * Temporary defines for UP kernels, until all code gets fixed.
  */
--- linux/include/linux/hardirq.h.orig
+++ linux/include/linux/hardirq.h
@@ -5,6 +5,40 @@
 #include <linux/smp_lock.h>
 #include <asm/hardirq.h>
 
+#ifdef CONFIG_GENERIC_HARDIRQS
+/*
+ * We put the hardirq and softirq counter into the preemption
+ * counter. The bitmask has the following meaning:
+ *
+ * - bits 0-7 are the preemption count (max preemption depth: 256)
+ * - bits 8-15 are the softirq count (max # of softirqs: 256)
+ * - bits 16-27 are the hardirq count (max # of hardirqs: 4096)
+ *
+ * - ( bit 26 is the PREEMPT_ACTIVE flag. )
+ *
+ * PREEMPT_MASK: 0x000000ff
+ * SOFTIRQ_MASK: 0x0000ff00
+ * HARDIRQ_MASK: 0x0fff0000
+ */
+
+#define PREEMPT_BITS	8
+#define SOFTIRQ_BITS	8
+#define HARDIRQ_BITS	12
+
+#define PREEMPT_SHIFT	0
+#define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
+#define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+
+/*
+ * The hardirq mask has to be large enough to have
+ * space for potentially all IRQ sources in the system
+ * nesting on a single CPU:
+ */
+#if (1 << HARDIRQ_BITS) < NR_IRQS
+# error HARDIRQ_BITS is too low!
+#endif
+#endif /* CONFIG_GENERIC_HARDIRQS */
+
 #define __IRQ_MASK(x)	((1UL << (x))-1)
 
 #define PREEMPT_MASK	(__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
@@ -43,4 +77,12 @@ extern void synchronize_irq(unsigned int
 # define synchronize_irq(irq)	barrier()
 #endif
 
+#ifdef CONFIG_GENERIC_HARDIRQS
+#define nmi_enter()		(preempt_count() += HARDIRQ_OFFSET)
+#define nmi_exit()		(preempt_count() -= HARDIRQ_OFFSET)
+
+#define irq_enter()		(preempt_count() += HARDIRQ_OFFSET)
+extern void irq_exit(void);
+#endif
+
 #endif /* LINUX_HARDIRQ_H */

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 2/5, 2.6.9-rc3] generic irq subsystem, x86 port
  2004-10-01 20:46 ` [patch 1/5, 2.6.9-rc3] generic irq subsystem, core Ingo Molnar
@ 2004-10-01 20:47   ` Ingo Molnar
  2004-10-01 20:48     ` [patch 3/5, 2.6.9-rc3] generic irq subsystem, x64 port Ingo Molnar
       [not found]   ` <20041001143332.7e3a5aba.akpm@osdl.org>
  1 sibling, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2004-10-01 20:47 UTC (permalink / raw)
  To: linux-kernel; +Cc: Christoph Hellwig, Andrew Morton


x86 port of generic hardirq handling.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Christoph Hellwig <hch@lst.de>

--- linux/arch/i386/Kconfig.orig
+++ linux/arch/i386/Kconfig
@@ -1194,6 +1194,13 @@ source "crypto/Kconfig"
 
 source "lib/Kconfig"
 
+#
+# Use the generic interrupt handling code in kernel/hardirq.c:
+#
+config GENERIC_HARDIRQS
+	bool
+	default y
+
 config X86_SMP
 	bool
 	depends on SMP && !X86_VOYAGER
--- linux/arch/i386/kernel/vm86.c.orig
+++ linux/arch/i386/kernel/vm86.c
@@ -741,7 +741,7 @@ static inline void free_vm86_irq(int irq
 	spin_unlock_irqrestore(&irqbits_lock, flags);	
 }
 
-void release_x86_irqs(struct task_struct *task)
+void release_vm86_irqs(struct task_struct *task)
 {
 	int i;
 	for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++)
--- linux/arch/i386/kernel/io_apic.c.orig
+++ linux/arch/i386/kernel/io_apic.c
@@ -255,8 +255,6 @@ static void set_ioapic_affinity_irq(unsi
 #  define Dprintk(x...) 
 # endif
 
-extern cpumask_t irq_affinity[NR_IRQS];
-
 cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
 
 #define IRQBALANCE_CHECK_ARCH -999
--- linux/arch/i386/kernel/i386_ksyms.c.orig
+++ linux/arch/i386/kernel/i386_ksyms.c
@@ -76,9 +76,6 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin);
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(ioremap_nocache);
 EXPORT_SYMBOL(iounmap);
-EXPORT_SYMBOL(enable_irq);
-EXPORT_SYMBOL(disable_irq);
-EXPORT_SYMBOL(disable_irq_nosync);
 EXPORT_SYMBOL(probe_irq_mask);
 EXPORT_SYMBOL(kernel_thread);
 EXPORT_SYMBOL(pm_idle);
@@ -146,7 +143,6 @@ EXPORT_SYMBOL(__write_lock_failed);
 EXPORT_SYMBOL(__read_lock_failed);
 
 /* Global SMP stuff */
-EXPORT_SYMBOL(synchronize_irq);
 EXPORT_SYMBOL(smp_call_function);
 
 /* TLB flushing */
--- linux/arch/i386/kernel/process.c.orig
+++ linux/arch/i386/kernel/process.c
@@ -344,7 +344,7 @@ void release_thread(struct task_struct *
 		}
 	}
 
-	release_x86_irqs(dead_task);
+	release_vm86_irqs(dead_task);
 }
 
 /*
--- linux/arch/i386/kernel/irq.c.orig
+++ linux/arch/i386/kernel/irq.c
@@ -3,437 +3,47 @@
  *
  *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
  *
- * This file contains the code used by various IRQ handling routines:
- * asking for different IRQ's should be done through these routines
- * instead of just grabbing them. Thus setups with different IRQ numbers
- * shouldn't result in any weird surprises, and installing new handlers
- * should be easier.
+ * This file contains the lowest level x86-specific interrupt
+ * entry, irq-stacks and irq statistics code. All the remaining
+ * irq logic is done by the generic kernel/irq/ code and
+ * by the x86-specific irq controller code. (e.g. i8259.c and
+ * io_apic.c.)
  */
 
-/*
- * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
- *
- * IRQs are in fact implemented a bit like signal handlers for the kernel.
- * Naturally it's not a 1:1 relation, but there are similarities.
- */
-
-#include <linux/config.h>
-#include <linux/errno.h>
+#include <asm/uaccess.h>
 #include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
+#include <linux/seq_file.h>
 #include <linux/interrupt.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/smp_lock.h>
-#include <linux/init.h>
 #include <linux/kernel_stat.h>
-#include <linux/irq.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/kallsyms.h>
-
-#include <asm/atomic.h>
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/system.h>
-#include <asm/bitops.h>
-#include <asm/uaccess.h>
-#include <asm/delay.h>
-#include <asm/desc.h>
-#include <asm/irq.h>
-
-/*
- * Linux has a controller-independent x86 interrupt architecture.
- * every controller has a 'controller-template', that is used
- * by the main code to do the right thing. Each driver-visible
- * interrupt source is transparently wired to the apropriate
- * controller. Thus drivers need not be aware of the
- * interrupt-controller.
- *
- * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
- * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
- * (IO-APICs assumed to be messaging to Pentium local-APICs)
- *
- * the code is designed to be easily extended with new/different
- * interrupt controllers, without having to do assembly magic.
- */
 
-/*
- * Controller mappings for all interrupt sources:
- */
-irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
-	[0 ... NR_IRQS-1] = {
-		.handler = &no_irq_type,
-		.lock = SPIN_LOCK_UNLOCKED
-	}
-};
-
-static void register_irq_proc (unsigned int irq);
-
-/*
- * per-CPU IRQ handling stacks
- */
 #ifdef CONFIG_4KSTACKS
-union irq_ctx *hardirq_ctx[NR_CPUS];
-union irq_ctx *softirq_ctx[NR_CPUS];
-#endif
-
-/*
- * Special irq handlers.
- */
-
-irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
-{ return IRQ_NONE; }
-
 /*
- * Generic no controller code
+ * per-CPU IRQ handling contexts (thread information and stack)
  */
-
-static void enable_none(unsigned int irq) { }
-static unsigned int startup_none(unsigned int irq) { return 0; }
-static void disable_none(unsigned int irq) { }
-static void ack_none(unsigned int irq)
-{
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves, it doesn't deserve
- * a generic callback i think.
- */
-#ifdef CONFIG_X86
-	printk("unexpected IRQ trap at vector %02x\n", irq);
-#ifdef CONFIG_X86_LOCAL_APIC
-	/*
-	 * Currently unexpected vectors happen only on SMP and APIC.
-	 * We _must_ ack these because every local APIC has only N
-	 * irq slots per priority level, and a 'hanging, unacked' IRQ
-	 * holds up an irq slot - in excessive cases (when multiple
-	 * unexpected vectors occur) that might lock up the APIC
-	 * completely.
-	 */
-	ack_APIC_irq();
-#endif
-#endif
-}
-
-/* startup is the same as "enable", shutdown is same as "disable" */
-#define shutdown_none	disable_none
-#define end_none	enable_none
-
-struct hw_interrupt_type no_irq_type = {
-	"none",
-	startup_none,
-	shutdown_none,
-	enable_none,
-	disable_none,
-	ack_none,
-	end_none
+union irq_ctx {
+	struct thread_info      tinfo;
+	u32                     stack[THREAD_SIZE/sizeof(u32)];
 };
 
-atomic_t irq_err_count;
-#if defined(CONFIG_X86_IO_APIC) && defined(APIC_MISMATCH_DEBUG)
-atomic_t irq_mis_count;
-#endif
-
-/*
- * Generic, controller-independent functions:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
-	int i = *(loff_t *) v, j;
-	struct irqaction * action;
-	unsigned long flags;
-
-	if (i == 0) {
-		seq_printf(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
-		seq_putc(p, '\n');
-	}
-
-	if (i < NR_IRQS) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
-		action = irq_desc[i].action;
-		if (!action) 
-			goto skip;
-		seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
-		seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-#endif
-		seq_printf(p, " %14s", irq_desc[i].handler->typename);
-		seq_printf(p, "  %s", action->name);
-
-		for (action=action->next; action; action = action->next)
-			seq_printf(p, ", %s", action->name);
-
-		seq_putc(p, '\n');
-skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	} else if (i == NR_IRQS) {
-		seq_printf(p, "NMI: ");
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", nmi_count(j));
-		seq_putc(p, '\n');
-#ifdef CONFIG_X86_LOCAL_APIC
-		seq_printf(p, "LOC: ");
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", irq_stat[j].apic_timer_irqs);
-		seq_putc(p, '\n');
-#endif
-		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-#if defined(CONFIG_X86_IO_APIC) && defined(APIC_MISMATCH_DEBUG)
-		seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
-#endif
-	}
-	return 0;
-}
-
-
-
-
-#ifdef CONFIG_SMP
-inline void synchronize_irq(unsigned int irq)
-{
-	while (irq_desc[irq].status & IRQ_INPROGRESS)
-		cpu_relax();
-}
+static union irq_ctx *hardirq_ctx[NR_CPUS];
+static union irq_ctx *softirq_ctx[NR_CPUS];
 #endif
 
 /*
- * This should really return information about whether
- * we should do bottom half handling etc. Right now we
- * end up _always_ checking the bottom half, which is a
- * waste of time and is not what some drivers would
- * prefer.
- */
-asmlinkage int handle_IRQ_event(unsigned int irq,
-		struct pt_regs *regs, struct irqaction *action)
-{
-	int status = 1;	/* Force the "do bottom halves" bit */
-	int ret, retval = 0;
-
-	if (!(action->flags & SA_INTERRUPT))
-		local_irq_enable();
-
-	do {
-		ret = action->handler(irq, action->dev_id, regs);
-		if (ret == IRQ_HANDLED)
-			status |= action->flags;
-		retval |= ret;
-		action = action->next;
-	} while (action);
-	if (status & SA_SAMPLE_RANDOM)
-		add_interrupt_randomness(irq);
-	local_irq_disable();
-	return retval;
-}
-
-static void __report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
-{
-	struct irqaction *action;
-
-	if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
-		printk(KERN_ERR "irq event %d: bogus return value %x\n",
-				irq, action_ret);
-	} else {
-		printk(KERN_ERR "irq %d: nobody cared!\n", irq);
-	}
-	dump_stack();
-	printk(KERN_ERR "handlers:\n");
-	action = desc->action;
-	do {
-		printk(KERN_ERR "[<%p>]", action->handler);
-		print_symbol(" (%s)",
-			(unsigned long)action->handler);
-		printk("\n");
-		action = action->next;
-	} while (action);
-}
-
-static void report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
-{
-	static int count = 100;
-
-	if (count) {
-		count--;
-		__report_bad_irq(irq, desc, action_ret);
-	}
-}
-
-static int noirqdebug;
-
-static int __init noirqdebug_setup(char *str)
-{
-	noirqdebug = 1;
-	printk("IRQ lockup detection disabled\n");
-	return 1;
-}
-
-__setup("noirqdebug", noirqdebug_setup);
-
-/*
- * If 99,900 of the previous 100,000 interrupts have not been handled then
- * assume that the IRQ is stuck in some manner.  Drop a diagnostic and try to
- * turn the IRQ off.
- *
- * (The other 100-of-100,000 interrupts may have been a correctly-functioning
- *  device sharing an IRQ with the failing one)
- *
- * Called under desc->lock
- */
-static void note_interrupt(int irq, irq_desc_t *desc, irqreturn_t action_ret)
-{
-	if (action_ret != IRQ_HANDLED) {
-		desc->irqs_unhandled++;
-		if (action_ret != IRQ_NONE)
-			report_bad_irq(irq, desc, action_ret);
-	}
-
-	desc->irq_count++;
-	if (desc->irq_count < 100000)
-		return;
-
-	desc->irq_count = 0;
-	if (desc->irqs_unhandled > 99900) {
-		/*
-		 * The interrupt is stuck
-		 */
-		__report_bad_irq(irq, desc, action_ret);
-		/*
-		 * Now kill the IRQ
-		 */
-		printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
-		desc->status |= IRQ_DISABLED;
-		desc->handler->disable(irq);
-	}
-	desc->irqs_unhandled = 0;
-}
-
-/*
- * Generic enable/disable code: this just calls
- * down into the PIC-specific version for the actual
- * hardware disable after having gotten the irq
- * controller lock. 
- */
- 
-/**
- *	disable_irq_nosync - disable an irq without waiting
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line.  Disables and Enables are
- *	nested.
- *	Unlike disable_irq(), this function does not ensure existing
- *	instances of the IRQ handler have completed before returning.
- *
- *	This function may be called from IRQ context.
- */
- 
-inline void disable_irq_nosync(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	if (!desc->depth++) {
-		desc->status |= IRQ_DISABLED;
-		desc->handler->disable(irq);
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-/**
- *	disable_irq - disable an irq and wait for completion
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line.  Enables and Disables are
- *	nested.
- *	This function waits for any pending IRQ handlers for this interrupt
- *	to complete before returning. If you use this function while
- *	holding a resource the IRQ handler may need you will deadlock.
- *
- *	This function may be called - with care - from IRQ context.
- */
- 
-void disable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	disable_irq_nosync(irq);
-	if (desc->action)
-		synchronize_irq(irq);
-}
-
-/**
- *	enable_irq - enable handling of an irq
- *	@irq: Interrupt to enable
- *
- *	Undoes the effect of one call to disable_irq().  If this
- *	matches the last disable, processing of interrupts on this
- *	IRQ line is re-enabled.
- *
- *	This function may be called from IRQ context.
- */
- 
-void enable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	switch (desc->depth) {
-	case 1: {
-		unsigned int status = desc->status & ~IRQ_DISABLED;
-		desc->status = status;
-		if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
-			desc->status = status | IRQ_REPLAY;
-			hw_resend_irq(desc->handler,irq);
-		}
-		desc->handler->enable(irq);
-		/* fall-through */
-	}
-	default:
-		desc->depth--;
-		break;
-	case 0:
-		printk("enable_irq(%u) unbalanced from %p\n", irq,
-		       __builtin_return_address(0));
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-/*
  * do_IRQ handles all normal device IRQ's (the special
  * SMP cross-CPU interrupts have their own specific
  * handlers).
  */
 asmlinkage unsigned int do_IRQ(struct pt_regs regs)
 {	
-	/* 
-	 * We ack quickly, we don't want the irq controller
-	 * thinking we're snobs just because some other CPU has
-	 * disabled global interrupts (we have already done the
-	 * INT_ACK cycles, it's too late to try to pretend to the
-	 * controller that we aren't taking the interrupt).
-	 *
-	 * 0 return value means that this irq is already being
-	 * handled by some other CPU. (or is disabled)
-	 */
-	int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code  */
-	irq_desc_t *desc = irq_desc + irq;
-	struct irqaction * action;
-	unsigned int status;
+	/* high bits used in ret_from_ code */
+	int irq = regs.orig_eax & 0xff;
+#ifdef CONFIG_4KSTACKS
+	union irq_ctx *curctx, *irqctx;
+	u32 *isp;
+#endif
 
 	irq_enter();
-
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 	/* Debugging check for stack overflow: is there less than 1KB free? */
 	{
@@ -448,642 +58,54 @@ asmlinkage unsigned int do_IRQ(struct pt
 		}
 	}
 #endif
-	kstat_this_cpu.irqs[irq]++;
-	spin_lock(&desc->lock);
-	desc->handler->ack(irq);
-	/*
-	   REPLAY is when Linux resends an IRQ that was dropped earlier
-	   WAITING is used by probe to mark irqs that are being tested
-	   */
-	status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
-	status |= IRQ_PENDING; /* we _want_ to handle it */
 
-	/*
-	 * If the IRQ is disabled for whatever reason, we cannot
-	 * use the action we have.
-	 */
-	action = NULL;
-	if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
-		action = desc->action;
-		status &= ~IRQ_PENDING; /* we commit to handling */
-		status |= IRQ_INPROGRESS; /* we are handling it */
-	}
-	desc->status = status;
+#ifdef CONFIG_4KSTACKS
 
-	/*
-	 * If there is no IRQ handler or it was disabled, exit early.
-	   Since we set PENDING, if another processor is handling
-	   a different instance of this same irq, the other processor
-	   will take care of it.
-	 */
-	if (unlikely(!action))
-		goto out;
+	curctx = (union irq_ctx *) current_thread_info();
+	irqctx = hardirq_ctx[smp_processor_id()];
 
 	/*
-	 * Edge triggered interrupts need to remember
-	 * pending events.
-	 * This applies to any hw interrupts that allow a second
-	 * instance of the same irq to arrive while we are in do_IRQ
-	 * or in the handler. But the code here only handles the _second_
-	 * instance of the irq, not the third or fourth. So it is mostly
-	 * useful for irq hardware that does not mask cleanly in an
-	 * SMP environment.
+	 * this is where we switch to the IRQ stack. However, if we are
+	 * already using the IRQ stack (because we interrupted a hardirq
+	 * handler) we can't do that and just have to keep using the
+	 * current stack (which is the irq stack already after all)
 	 */
-#ifdef CONFIG_4KSTACKS
-
-	for (;;) {
-		irqreturn_t action_ret;
-		u32 *isp;
-		union irq_ctx * curctx;
-		union irq_ctx * irqctx;
-
-		curctx = (union irq_ctx *) current_thread_info();
-		irqctx = hardirq_ctx[smp_processor_id()];
-
-		spin_unlock(&desc->lock);
-
-		/*
-		 * this is where we switch to the IRQ stack. However, if we are already using
-		 * the IRQ stack (because we interrupted a hardirq handler) we can't do that
-		 * and just have to keep using the current stack (which is the irq stack already
-		 * after all)
-		 */
-
-		if (curctx == irqctx)
-			action_ret = handle_IRQ_event(irq, &regs, action);
-		else {
-			/* build the stack frame on the IRQ stack */
-			isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
-			irqctx->tinfo.task = curctx->tinfo.task;
-			irqctx->tinfo.previous_esp = current_stack_pointer();
-
-			*--isp = (u32) action;
-			*--isp = (u32) &regs;
-			*--isp = (u32) irq;
-
-			asm volatile(
-				"       xchgl   %%ebx,%%esp     \n"
-				"       call    handle_IRQ_event \n"
-				"       xchgl   %%ebx,%%esp     \n"
-				: "=a"(action_ret)
-				: "b"(isp)
-				: "memory", "cc", "edx", "ecx"
-			);
-
-
-		}
-		spin_lock(&desc->lock);
-		if (!noirqdebug)
-			note_interrupt(irq, desc, action_ret);
-		if (curctx != irqctx)
-			irqctx->tinfo.task = NULL;
-		if (likely(!(desc->status & IRQ_PENDING)))
-			break;
-		desc->status &= ~IRQ_PENDING;
-	}
-
-#else
-
-	for (;;) {
-		irqreturn_t action_ret;
-
-		spin_unlock(&desc->lock);
+	if (curctx != irqctx) {
+		/* build the stack frame on the IRQ stack */
+		isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+		irqctx->tinfo.task = curctx->tinfo.task;
+		irqctx->tinfo.previous_esp = current_stack_pointer();
 
-		action_ret = handle_IRQ_event(irq, &regs, action);
+		*--isp = (u32) &regs;
+		*--isp = (u32) irq;
 
-		spin_lock(&desc->lock);
-		if (!noirqdebug)
-			note_interrupt(irq, desc, action_ret);
-		if (likely(!(desc->status & IRQ_PENDING)))
-			break;
-		desc->status &= ~IRQ_PENDING;
-	}
+		asm volatile(
+			"       xchgl   %%ebx,%%esp      \n"
+			"       call    __do_IRQ         \n"
+			"       xchgl   %%ebx,%%esp      \n"
+			: : "b"(isp)
+			: "memory", "cc", "eax", "edx", "ecx"
+		);
+	} else
 #endif
-	desc->status &= ~IRQ_INPROGRESS;
-
-out:
-	/*
-	 * The ->end() handler has to deal with interrupts which got
-	 * disabled while the handler was running.
-	 */
-	desc->handler->end(irq);
-	spin_unlock(&desc->lock);
+		__do_IRQ(irq, &regs);
 
 	irq_exit();
 
 	return 1;
 }
 
-int can_request_irq(unsigned int irq, unsigned long irqflags)
-{
-	struct irqaction *action;
-
-	if (irq >= NR_IRQS)
-		return 0;
-	action = irq_desc[irq].action;
-	if (action) {
-		if (irqflags & action->flags & SA_SHIRQ)
-			action = NULL;
-	}
-	return !action;
-}
-
-/**
- *	request_irq - allocate an interrupt line
- *	@irq: Interrupt line to allocate
- *	@handler: Function to be called when the IRQ occurs
- *	@irqflags: Interrupt type flags
- *	@devname: An ascii name for the claiming device
- *	@dev_id: A cookie passed back to the handler function
- *
- *	This call allocates interrupt resources and enables the
- *	interrupt line and IRQ handling. From the point this
- *	call is made your handler function may be invoked. Since
- *	your handler function must clear any interrupt the board 
- *	raises, you must take care both to initialise your hardware
- *	and to set up the interrupt handler in the right order.
- *
- *	Dev_id must be globally unique. Normally the address of the
- *	device data structure is used as the cookie. Since the handler
- *	receives this value it makes sense to use it.
- *
- *	If your interrupt is shared you must pass a non NULL dev_id
- *	as this is required when freeing the interrupt.
- *
- *	Flags:
- *
- *	SA_SHIRQ		Interrupt is shared
- *
- *	SA_INTERRUPT		Disable local interrupts while processing
- *
- *	SA_SAMPLE_RANDOM	The interrupt can be used for entropy
- *
- */
- 
-int request_irq(unsigned int irq, 
-		irqreturn_t (*handler)(int, void *, struct pt_regs *),
-		unsigned long irqflags, 
-		const char * devname,
-		void *dev_id)
-{
-	int retval;
-	struct irqaction * action;
-
-#if 1
-	/*
-	 * Sanity-check: shared interrupts should REALLY pass in
-	 * a real dev-ID, otherwise we'll have trouble later trying
-	 * to figure out which interrupt is which (messes up the
-	 * interrupt freeing logic etc).
-	 */
-	if (irqflags & SA_SHIRQ) {
-		if (!dev_id)
-			printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", devname, (&irq)[-1]);
-	}
-#endif
-
-	if (irq >= NR_IRQS)
-		return -EINVAL;
-	if (!handler)
-		return -EINVAL;
-
-	action = (struct irqaction *)
-			kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
-	if (!action)
-		return -ENOMEM;
-
-	action->handler = handler;
-	action->flags = irqflags;
-	cpus_clear(action->mask);
-	action->name = devname;
-	action->next = NULL;
-	action->dev_id = dev_id;
-
-	retval = setup_irq(irq, action);
-	if (retval)
-		kfree(action);
-	return retval;
-}
-
-EXPORT_SYMBOL(request_irq);
-
-/**
- *	free_irq - free an interrupt
- *	@irq: Interrupt line to free
- *	@dev_id: Device identity to free
- *
- *	Remove an interrupt handler. The handler is removed and if the
- *	interrupt line is no longer in use by any driver it is disabled.
- *	On a shared IRQ the caller must ensure the interrupt is disabled
- *	on the card it drives before calling this function. The function
- *	does not return until any executing interrupts for this IRQ
- *	have completed.
- *
- *	This function must not be called from interrupt context. 
- */
- 
-void free_irq(unsigned int irq, void *dev_id)
-{
-	irq_desc_t *desc;
-	struct irqaction **p;
-	unsigned long flags;
-
-	if (irq >= NR_IRQS)
-		return;
-
-	desc = irq_desc + irq;
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	for (;;) {
-		struct irqaction * action = *p;
-		if (action) {
-			struct irqaction **pp = p;
-			p = &action->next;
-			if (action->dev_id != dev_id)
-				continue;
-
-			/* Found it - now remove it from the list of entries */
-			*pp = action->next;
-			if (!desc->action) {
-				desc->status |= IRQ_DISABLED;
-				desc->handler->shutdown(irq);
-			}
-			spin_unlock_irqrestore(&desc->lock,flags);
-
-			/* Wait to make sure it's not being used on another CPU */
-			synchronize_irq(irq);
-			kfree(action);
-			return;
-		}
-		printk("Trying to free free IRQ%d\n",irq);
-		spin_unlock_irqrestore(&desc->lock,flags);
-		return;
-	}
-}
-
-EXPORT_SYMBOL(free_irq);
-
-/*
- * IRQ autodetection code..
- *
- * This depends on the fact that any interrupt that
- * comes in on to an unassigned handler will get stuck
- * with "IRQ_WAITING" cleared and the interrupt
- * disabled.
- */
-
-static DECLARE_MUTEX(probe_sem);
-
-/**
- *	probe_irq_on	- begin an interrupt autodetect
- *
- *	Commence probing for an interrupt. The interrupts are scanned
- *	and a mask of potential interrupt lines is returned.
- *
- */
- 
-unsigned long probe_irq_on(void)
-{
-	unsigned int i;
-	irq_desc_t *desc;
-	unsigned long val;
-	unsigned long delay;
-
-	down(&probe_sem);
-	/* 
-	 * something may have generated an irq long ago and we want to
-	 * flush such a longstanding irq before considering it as spurious. 
-	 */
-	for (i = NR_IRQS-1; i > 0; i--)  {
-		desc = irq_desc + i;
-
-		spin_lock_irq(&desc->lock);
-		if (!irq_desc[i].action) 
-			irq_desc[i].handler->startup(i);
-		spin_unlock_irq(&desc->lock);
-	}
-
-	/* Wait for longstanding interrupts to trigger. */
-	for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
-		/* about 20ms delay */ barrier();
-
-	/*
-	 * enable any unassigned irqs
-	 * (we must startup again here because if a longstanding irq
-	 * happened in the previous stage, it may have masked itself)
-	 */
-	for (i = NR_IRQS-1; i > 0; i--) {
-		desc = irq_desc + i;
-
-		spin_lock_irq(&desc->lock);
-		if (!desc->action) {
-			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
-			if (desc->handler->startup(i))
-				desc->status |= IRQ_PENDING;
-		}
-		spin_unlock_irq(&desc->lock);
-	}
-
-	/*
-	 * Wait for spurious interrupts to trigger
-	 */
-	for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
-		/* about 100ms delay */ barrier();
-
-	/*
-	 * Now filter out any obviously spurious interrupts
-	 */
-	val = 0;
-	for (i = 0; i < NR_IRQS; i++) {
-		irq_desc_t *desc = irq_desc + i;
-		unsigned int status;
-
-		spin_lock_irq(&desc->lock);
-		status = desc->status;
-
-		if (status & IRQ_AUTODETECT) {
-			/* It triggered already - consider it spurious. */
-			if (!(status & IRQ_WAITING)) {
-				desc->status = status & ~IRQ_AUTODETECT;
-				desc->handler->shutdown(i);
-			} else
-				if (i < 32)
-					val |= 1 << i;
-		}
-		spin_unlock_irq(&desc->lock);
-	}
-
-	return val;
-}
-
-EXPORT_SYMBOL(probe_irq_on);
-
-/*
- * Return a mask of triggered interrupts (this
- * can handle only legacy ISA interrupts).
- */
- 
-/**
- *	probe_irq_mask - scan a bitmap of interrupt lines
- *	@val:	mask of interrupts to consider
- *
- *	Scan the ISA bus interrupt lines and return a bitmap of
- *	active interrupts. The interrupt probe logic state is then
- *	returned to its previous value.
- *
- *	Note: we need to scan all the irq's even though we will
- *	only return ISA irq numbers - just so that we reset them
- *	all to a known state.
- */
-unsigned int probe_irq_mask(unsigned long val)
-{
-	int i;
-	unsigned int mask;
-
-	mask = 0;
-	for (i = 0; i < NR_IRQS; i++) {
-		irq_desc_t *desc = irq_desc + i;
-		unsigned int status;
-
-		spin_lock_irq(&desc->lock);
-		status = desc->status;
-
-		if (status & IRQ_AUTODETECT) {
-			if (i < 16 && !(status & IRQ_WAITING))
-				mask |= 1 << i;
-
-			desc->status = status & ~IRQ_AUTODETECT;
-			desc->handler->shutdown(i);
-		}
-		spin_unlock_irq(&desc->lock);
-	}
-	up(&probe_sem);
-
-	return mask & val;
-}
-
-/*
- * Return the one interrupt that triggered (this can
- * handle any interrupt source).
- */
-
-/**
- *	probe_irq_off	- end an interrupt autodetect
- *	@val: mask of potential interrupts (unused)
- *
- *	Scans the unused interrupt lines and returns the line which
- *	appears to have triggered the interrupt. If no interrupt was
- *	found then zero is returned. If more than one interrupt is
- *	found then minus the first candidate is returned to indicate
- *	their is doubt.
- *
- *	The interrupt probe logic state is returned to its previous
- *	value.
- *
- *	BUGS: When used in a module (which arguably shouldnt happen)
- *	nothing prevents two IRQ probe callers from overlapping. The
- *	results of this are non-optimal.
- */
- 
-int probe_irq_off(unsigned long val)
-{
-	int i, irq_found, nr_irqs;
-
-	nr_irqs = 0;
-	irq_found = 0;
-	for (i = 0; i < NR_IRQS; i++) {
-		irq_desc_t *desc = irq_desc + i;
-		unsigned int status;
-
-		spin_lock_irq(&desc->lock);
-		status = desc->status;
-
-		if (status & IRQ_AUTODETECT) {
-			if (!(status & IRQ_WAITING)) {
-				if (!nr_irqs)
-					irq_found = i;
-				nr_irqs++;
-			}
-			desc->status = status & ~IRQ_AUTODETECT;
-			desc->handler->shutdown(i);
-		}
-		spin_unlock_irq(&desc->lock);
-	}
-	up(&probe_sem);
-
-	if (nr_irqs > 1)
-		irq_found = -irq_found;
-	return irq_found;
-}
-
-EXPORT_SYMBOL(probe_irq_off);
-
-/* this was setup_x86_irq but it seems pretty generic */
-int setup_irq(unsigned int irq, struct irqaction * new)
-{
-	int shared = 0;
-	unsigned long flags;
-	struct irqaction *old, **p;
-	irq_desc_t *desc = irq_desc + irq;
-
-	if (desc->handler == &no_irq_type)
-		return -ENOSYS;
-	/*
-	 * Some drivers like serial.c use request_irq() heavily,
-	 * so we have to be careful not to interfere with a
-	 * running system.
-	 */
-	if (new->flags & SA_SAMPLE_RANDOM) {
-		/*
-		 * This function might sleep, we want to call it first,
-		 * outside of the atomic block.
-		 * Yes, this might clear the entropy pool if the wrong
-		 * driver is attempted to be loaded, without actually
-		 * installing a new handler, but is this really a problem,
-		 * only the sysadmin is able to do this.
-		 */
-		rand_initialize_irq(irq);
-	}
-
-	/*
-	 * The following block of code has to be executed atomically
-	 */
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	if ((old = *p) != NULL) {
-		/* Can't share interrupts unless both agree to */
-		if (!(old->flags & new->flags & SA_SHIRQ)) {
-			spin_unlock_irqrestore(&desc->lock,flags);
-			return -EBUSY;
-		}
-
-		/* add new interrupt at end of irq queue */
-		do {
-			p = &old->next;
-			old = *p;
-		} while (old);
-		shared = 1;
-	}
-
-	*p = new;
-
-	if (!shared) {
-		desc->depth = 0;
-		desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS);
-		desc->handler->startup(irq);
-	}
-	spin_unlock_irqrestore(&desc->lock,flags);
-
-	register_irq_proc(irq);
-	return 0;
-}
-
-static struct proc_dir_entry * root_irq_dir;
-static struct proc_dir_entry * irq_dir [NR_IRQS];
-
-#ifdef CONFIG_SMP
-
-static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
-
-cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
-
-static int irq_affinity_read_proc(char *page, char **start, off_t off,
-			int count, int *eof, void *data)
-{
-	int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]);
-	if (count - len < 2)
-		return -EINVAL;
-	len += sprintf(page + len, "\n");
-	return len;
-}
-
-static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
-					unsigned long count, void *data)
-{
-	int irq = (long)data, full_count = count, err;
-	cpumask_t new_value, tmp;
-
-	if (!irq_desc[irq].handler->set_affinity)
-		return -EIO;
-
-	err = cpumask_parse(buffer, count, new_value);
-	if (err)
-		return err;
-
-	/*
-	 * Do not allow disabling IRQs completely - it's a too easy
-	 * way to make the system unusable accidentally :-) At least
-	 * one online CPU still has to be targeted.
-	 */
-	cpus_and(tmp, new_value, cpu_online_map);
-	if (cpus_empty(tmp))
-		return -EINVAL;
-
-	irq_affinity[irq] = new_value;
-	irq_desc[irq].handler->set_affinity(irq,
-					cpumask_of_cpu(first_cpu(new_value)));
-
-	return full_count;
-}
-
-#endif
-#define MAX_NAMELEN 10
-
-static void register_irq_proc (unsigned int irq)
-{
-	char name [MAX_NAMELEN];
-
-	if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) ||
-			irq_dir[irq])
-		return;
-
-	memset(name, 0, MAX_NAMELEN);
-	sprintf(name, "%d", irq);
-
-	/* create /proc/irq/1234 */
-	irq_dir[irq] = proc_mkdir(name, root_irq_dir);
-
-#ifdef CONFIG_SMP
-	{
-		struct proc_dir_entry *entry;
-
-		/* create /proc/irq/1234/smp_affinity */
-		entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
-
-		if (entry) {
-			entry->nlink = 1;
-			entry->data = (void *)(long)irq;
-			entry->read_proc = irq_affinity_read_proc;
-			entry->write_proc = irq_affinity_write_proc;
-		}
-
-		smp_affinity_entry[irq] = entry;
-	}
-#endif
-}
-
-void init_irq_proc (void)
-{
-	int i;
-
-	/* create /proc/irq */
-	root_irq_dir = proc_mkdir("irq", NULL);
-	create_prof_cpu_mask(root_irq_dir);
-	/*
-	 * Create entries for all existing IRQs.
-	 */
-	for (i = 0; i < NR_IRQS; i++)
-		register_irq_proc(i);
-}
-
-
 #ifdef CONFIG_4KSTACKS
+
 /*
  * These should really be __section__(".bss.page_aligned") as well, but
  * gcc's 3.0 and earlier don't handle that correctly.
  */
-static char softirq_stack[NR_CPUS * THREAD_SIZE]  __attribute__((__aligned__(THREAD_SIZE)));
-static char hardirq_stack[NR_CPUS * THREAD_SIZE]  __attribute__((__aligned__(THREAD_SIZE)));
+static char softirq_stack[NR_CPUS * THREAD_SIZE]
+		__attribute__((__aligned__(THREAD_SIZE)));
+
+static char hardirq_stack[NR_CPUS * THREAD_SIZE]
+		__attribute__((__aligned__(THREAD_SIZE)));
 
 /*
  * allocate per-cpu stacks for hardirq and for softirq processing
@@ -1140,7 +162,6 @@ asmlinkage void do_softirq(void)
 		/* build the stack frame on the softirq stack */
 		isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
 
-
 		asm volatile(
 			"       xchgl   %%ebx,%%esp     \n"
 			"       call    __do_softirq    \n"
@@ -1156,3 +177,74 @@ asmlinkage void do_softirq(void)
 
 EXPORT_SYMBOL(do_softirq);
 #endif
+
+/*
+ * Interrupt statistics:
+ */
+
+atomic_t irq_err_count;
+#if defined(CONFIG_X86_IO_APIC) && defined(APIC_MISMATCH_DEBUG)
+atomic_t irq_mis_count;
+#endif
+
+/*
+ * /proc/interrupts printing:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+	int i = *(loff_t *) v, j;
+	struct irqaction * action;
+	unsigned long flags;
+
+	if (i == 0) {
+		seq_printf(p, "           ");
+		for (j=0; j<NR_CPUS; j++)
+			if (cpu_online(j))
+				seq_printf(p, "CPU%d       ",j);
+		seq_putc(p, '\n');
+	}
+
+	if (i < NR_IRQS) {
+		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		action = irq_desc[i].action;
+		if (!action) 
+			goto skip;
+		seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+		seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+		for (j = 0; j < NR_CPUS; j++)
+			if (cpu_online(j))
+				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+		seq_printf(p, " %14s", irq_desc[i].handler->typename);
+		seq_printf(p, "  %s", action->name);
+
+		for (action=action->next; action; action = action->next)
+			seq_printf(p, ", %s", action->name);
+
+		seq_putc(p, '\n');
+skip:
+		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+	} else if (i == NR_IRQS) {
+		seq_printf(p, "NMI: ");
+		for (j = 0; j < NR_CPUS; j++)
+			if (cpu_online(j))
+				seq_printf(p, "%10u ", nmi_count(j));
+		seq_putc(p, '\n');
+#ifdef CONFIG_X86_LOCAL_APIC
+		seq_printf(p, "LOC: ");
+		for (j = 0; j < NR_CPUS; j++)
+			if (cpu_online(j))
+				seq_printf(p, "%10u ",
+					irq_stat[j].apic_timer_irqs);
+		seq_putc(p, '\n');
+#endif
+		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC) && defined(APIC_MISMATCH_DEBUG)
+		seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+	}
+	return 0;
+}
--- linux/include/asm-i386/irq.h.orig
+++ linux/include/asm-i386/irq.h
@@ -21,38 +21,17 @@ static __inline__ int irq_canonicalize(i
 	return ((irq == 2) ? 9 : irq);
 }
 
-extern void disable_irq(unsigned int);
-extern void disable_irq_nosync(unsigned int);
-extern void enable_irq(unsigned int);
-extern void release_x86_irqs(struct task_struct *);
-extern int can_request_irq(unsigned int, unsigned long flags);
+extern void release_vm86_irqs(struct task_struct *);
 
 #ifdef CONFIG_X86_LOCAL_APIC
-#define ARCH_HAS_NMI_WATCHDOG		/* See include/linux/nmi.h */
+# define ARCH_HAS_NMI_WATCHDOG		/* See include/linux/nmi.h */
 #endif
 
 #ifdef CONFIG_4KSTACKS
-/*
- * per-CPU IRQ handling contexts (thread information and stack)
- */
-union irq_ctx {
-	struct thread_info      tinfo;
-	u32                     stack[THREAD_SIZE/sizeof(u32)];
-};
-
-extern union irq_ctx *hardirq_ctx[NR_CPUS];
-extern union irq_ctx *softirq_ctx[NR_CPUS];
-
-extern void irq_ctx_init(int cpu);
-
-#define __ARCH_HAS_DO_SOFTIRQ
+  extern void irq_ctx_init(int cpu);
+# define __ARCH_HAS_DO_SOFTIRQ
 #else
-#define irq_ctx_init(cpu) do { ; } while (0)
+# define irq_ctx_init(cpu) do { } while (0)
 #endif
 
-struct irqaction;
-struct pt_regs;
-asmlinkage int handle_IRQ_event(unsigned int, struct pt_regs *,
-				struct irqaction *);
-
 #endif /* _ASM_IRQ_H */
--- linux/include/asm-i386/hardirq.h.orig
+++ linux/include/asm-i386/hardirq.h
@@ -15,47 +15,25 @@ typedef struct {
 #include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
 
 /*
- * We put the hardirq and softirq counter into the preemption
- * counter. The bitmask has the following meaning:
- *
- * - bits 0-7 are the preemption count (max preemption depth: 256)
- * - bits 8-15 are the softirq count (max # of softirqs: 256)
- * - bits 16-23 are the hardirq count (max # of hardirqs: 256)
- *
- * - ( bit 26 is the PREEMPT_ACTIVE flag. )
- *
- * PREEMPT_MASK: 0x000000ff
- * SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x00ff0000
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
  */
-
-#define PREEMPT_BITS	8
-#define SOFTIRQ_BITS	8
-#define HARDIRQ_BITS	8
-
-#define PREEMPT_SHIFT	0
-#define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
-#define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
+static inline void ack_bad_irq(unsigned int irq)
+{
+#ifdef CONFIG_X86
+	printk("unexpected IRQ trap at vector %02x\n", irq);
+#ifdef CONFIG_X86_LOCAL_APIC
+	/*
+	 * Currently unexpected vectors happen only on SMP and APIC.
+	 * We _must_ ack these because every local APIC has only N
+	 * irq slots per priority level, and a 'hanging, unacked' IRQ
+	 * holds up an irq slot - in excessive cases (when multiple
+	 * unexpected vectors occur) that might lock up the APIC
+	 * completely.
+	 */
+	ack_APIC_irq();
 #endif
-
-#define nmi_enter()		(irq_enter())
-#define nmi_exit()		(preempt_count() -= HARDIRQ_OFFSET)
-
-#define irq_enter()		(preempt_count() += HARDIRQ_OFFSET)
-#define irq_exit()							\
-do {									\
-		preempt_count() -= IRQ_EXIT_OFFSET;			\
-		if (!in_interrupt() && softirq_pending(smp_processor_id())) \
-			do_softirq();					\
-		preempt_enable_no_resched();				\
-} while (0)
+#endif
+}
 
 #endif /* __ASM_HARDIRQ_H */
--- linux/include/asm-i386/hw_irq.h.orig
+++ linux/include/asm-i386/hw_irq.h
@@ -45,8 +45,6 @@ asmlinkage void thermal_interrupt(struct
 #define platform_legacy_irq(irq)	((irq) < 16)
 #endif
 
-void mask_irq(unsigned int irq);
-void unmask_irq(unsigned int irq);
 void disable_8259A_irq(unsigned int irq);
 void enable_8259A_irq(unsigned int irq);
 int i8259A_irq_pending(unsigned int irq);

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 3/5, 2.6.9-rc3] generic irq subsystem, x64 port
  2004-10-01 20:47   ` [patch 2/5, 2.6.9-rc3] generic irq subsystem, x86 port Ingo Molnar
@ 2004-10-01 20:48     ` Ingo Molnar
  2004-10-01 20:48       ` [patch 4/5, 2.6.9-rc3] generic irq subsystem, ppc port Ingo Molnar
  2004-10-01 20:49       ` [patch 5/5, 2.6.9-rc3] generic irq subsystem, ppc64 port Ingo Molnar
  0 siblings, 2 replies; 15+ messages in thread
From: Ingo Molnar @ 2004-10-01 20:48 UTC (permalink / raw)
  To: linux-kernel; +Cc: Christoph Hellwig, Andrew Morton


x64 port of generic hardirq handling.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Christoph Hellwig <hch@lst.de>

--- linux/arch/x86_64/Kconfig.orig
+++ linux/arch/x86_64/Kconfig
@@ -329,6 +329,12 @@ config X86_MCE
 
 endmenu
 
+#
+# Use the generic interrupt handling code in kernel/hardirq.c:
+#
+config GENERIC_HARDIRQS
+	bool
+	default y
 
 menu "Power management options"
 
--- linux/arch/x86_64/kernel/irq.c.orig
+++ linux/arch/x86_64/kernel/irq.c
@@ -3,127 +3,19 @@
  *
  *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
  *
- * This file contains the code used by various IRQ handling routines:
- * asking for different IRQ's should be done through these routines
- * instead of just grabbing them. Thus setups with different IRQ numbers
- * shouldn't result in any weird surprises, and installing new handlers
- * should be easier.
+ * This file contains the lowest level x86_64-specific interrupt
+ * entry and irq statistics code. All the remaining irq logic is
+ * done by the generic kernel/hardirq.c code and in the
+ * x86_64-specific irq controller code. (e.g. i8259.c and
+ * io_apic.c.)
  */
 
-/*
- * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
- *
- * IRQs are in fact implemented a bit like signal handlers for the kernel.
- * Naturally it's not a 1:1 relation, but there are similarities.
- */
-
-#include <linux/config.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/smp_lock.h>
-#include <linux/init.h>
 #include <linux/kernel_stat.h>
-#include <linux/irq.h>
-#include <linux/proc_fs.h>
+#include <linux/interrupt.h>
 #include <linux/seq_file.h>
-
-#include <asm/atomic.h>
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/system.h>
-#include <asm/bitops.h>
+#include <linux/module.h>
 #include <asm/uaccess.h>
-#include <asm/pgalloc.h>
-#include <asm/delay.h>
-#include <asm/desc.h>
-#include <asm/irq.h>
-
-
-
-/*
- * Linux has a controller-independent x86 interrupt architecture.
- * every controller has a 'controller-template', that is used
- * by the main code to do the right thing. Each driver-visible
- * interrupt source is transparently wired to the appropriate
- * controller. Thus drivers need not be aware of the
- * interrupt-controller.
- *
- * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
- * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
- * (IO-APICs assumed to be messaging to Pentium local-APICs)
- *
- * the code is designed to be easily extended with new/different
- * interrupt controllers, without having to do assembly magic.
- */
-
-/*
- * Controller mappings for all interrupt sources:
- */
-irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
-	[0 ... NR_IRQS-1] = {
-		.handler = &no_irq_type,
-		.lock = SPIN_LOCK_UNLOCKED
-	}
-};
-
-static void register_irq_proc (unsigned int irq);
-
-/*
- * Special irq handlers.
- */
-
-irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs) { return IRQ_NONE; }
-
-/*
- * Generic no controller code
- */
-
-static void enable_none(unsigned int irq) { }
-static unsigned int startup_none(unsigned int irq) { return 0; }
-static void disable_none(unsigned int irq) { }
-static void ack_none(unsigned int irq)
-{
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves, it doesn't deserve
- * a generic callback i think.
- */
-#ifdef CONFIG_X86
-	printk("unexpected IRQ trap at vector %02x\n", irq);
-#ifdef CONFIG_X86_LOCAL_APIC
-	/*
-	 * Currently unexpected vectors happen only on SMP and APIC.
-	 * We _must_ ack these because every local APIC has only N
-	 * irq slots per priority level, and a 'hanging, unacked' IRQ
-	 * holds up an irq slot - in excessive cases (when multiple
-	 * unexpected vectors occur) that might lock up the APIC
-	 * completely.
-	 */
-	ack_APIC_irq();
-#endif
-#endif
-}
-
-/* startup is the same as "enable", shutdown is same as "disable" */
-#define shutdown_none	disable_none
-#define end_none	enable_none
-
-struct hw_interrupt_type no_irq_type = {
-	"none",
-	startup_none,
-	shutdown_none,
-	enable_none,
-	disable_none,
-	ack_none,
-	end_none
-};
+#include <asm/io_apic.h>
 
 atomic_t irq_err_count;
 #ifdef CONFIG_X86_IO_APIC
@@ -195,131 +87,6 @@ skip:
 	return 0;
 }
 
-#ifdef CONFIG_SMP
-inline void synchronize_irq(unsigned int irq)
-{
-	while (irq_desc[irq].status & IRQ_INPROGRESS)
-		cpu_relax();
-}
-#endif
-
-/*
- * This should really return information about whether
- * we should do bottom half handling etc. Right now we
- * end up _always_ checking the bottom half, which is a
- * waste of time and is not what some drivers would
- * prefer.
- */
-int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action)
-{
-	int status = 1; /* Force the "do bottom halves" bit */
-	int ret;
-
-	if (!(action->flags & SA_INTERRUPT))
-		local_irq_enable();
-
-	do {
-		ret = action->handler(irq, action->dev_id, regs);
-		if (ret == IRQ_HANDLED)
-			status |= action->flags;
-		action = action->next;
-	} while (action);
-	if (status & SA_SAMPLE_RANDOM)
-		add_interrupt_randomness(irq);
-	local_irq_disable();
-
-	return status;
-}
-
-/*
- * Generic enable/disable code: this just calls
- * down into the PIC-specific version for the actual
- * hardware disable after having gotten the irq
- * controller lock. 
- */
- 
-/**
- *	disable_irq_nosync - disable an irq without waiting
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line.  Disables and Enables are
- *	nested.
- *	Unlike disable_irq(), this function does not ensure existing
- *	instances of the IRQ handler have completed before returning.
- *
- *	This function must not be called from IRQ context.
- */
- 
-inline void disable_irq_nosync(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	if (!desc->depth++) {
-		desc->status |= IRQ_DISABLED;
-		desc->handler->disable(irq);
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-/**
- *	disable_irq - disable an irq and wait for completion
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line.  Enables and Disables are
- *	nested.
- *	This function waits for any pending IRQ handlers for this interrupt
- *	to complete before returning. If you use this function while
- *	holding a resource the IRQ handler may need you will deadlock.
- *
- *	This function may be called - with care - from IRQ context.
- */
- 
-void disable_irq(unsigned int irq)
-{
-	disable_irq_nosync(irq);
-	synchronize_irq(irq);
-}
-
-/**
- *	enable_irq - enable handling of an irq
- *	@irq: Interrupt to enable
- *
- *	Undoes the effect of one call to disable_irq().  If this
- *	matches the last disable, processing of interrupts on this
- *	IRQ line is re-enabled.
- *
- *	This function may be called from IRQ context.
- */
- 
-void enable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	switch (desc->depth) {
-	case 1: {
-		unsigned int status = desc->status & ~IRQ_DISABLED;
-		desc->status = status;
-		if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
-			desc->status = status | IRQ_REPLAY;
-			hw_resend_irq(desc->handler,irq);
-		}
-		desc->handler->enable(irq);
-		/* fall-through */
-	}
-	default:
-		desc->depth--;
-		break;
-	case 0:
-		printk("enable_irq(%u) unbalanced from %p\n", irq,
-		       __builtin_return_address(0));
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
 /*
  * do_IRQ handles all normal device IRQ's (the special
  * SMP cross-CPU interrupts have their own specific
@@ -327,591 +94,15 @@ void enable_irq(unsigned int irq)
  */
 asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 {	
-	/* 
-	 * We ack quickly, we don't want the irq controller
-	 * thinking we're snobs just because some other CPU has
-	 * disabled global interrupts (we have already done the
-	 * INT_ACK cycles, it's too late to try to pretend to the
-	 * controller that we aren't taking the interrupt).
-	 *
-	 * 0 return value means that this irq is already being
-	 * handled by some other CPU. (or is disabled)
-	 */
-	unsigned irq = regs->orig_rax & 0xff; /* high bits used in ret_from_ code  */
-	int cpu = smp_processor_id();
-	irq_desc_t *desc = irq_desc + irq;
-	struct irqaction * action;
-	unsigned int status;
-
-	if (irq > 256) BUG();
+	/* high bits used in ret_from_ code  */
+	unsigned irq = regs->orig_rax & 0xff;
 
-	irq_enter(); 
-	kstat_cpu(cpu).irqs[irq]++;
-	spin_lock(&desc->lock);
-	desc->handler->ack(irq);
-	/*
-	   REPLAY is when Linux resends an IRQ that was dropped earlier
-	   WAITING is used by probe to mark irqs that are being tested
-	   */
-	status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
-	status |= IRQ_PENDING; /* we _want_ to handle it */
-
-	/*
-	 * If the IRQ is disabled for whatever reason, we cannot
-	 * use the action we have.
-	 */
-	action = NULL;
-	if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
-		action = desc->action;
-		status &= ~IRQ_PENDING; /* we commit to handling */
-		status |= IRQ_INPROGRESS; /* we are handling it */
-	}
-	desc->status = status;
-
-	/*
-	 * If there is no IRQ handler or it was disabled, exit early.
-	   Since we set PENDING, if another processor is handling
-	   a different instance of this same irq, the other processor
-	   will take care of it.
-	 */
-	if (unlikely(!action))
-		goto out;
-
-	/*
-	 * Edge triggered interrupts need to remember
-	 * pending events.
-	 * This applies to any hw interrupts that allow a second
-	 * instance of the same irq to arrive while we are in do_IRQ
-	 * or in the handler. But the code here only handles the _second_
-	 * instance of the irq, not the third or fourth. So it is mostly
-	 * useful for irq hardware that does not mask cleanly in an
-	 * SMP environment.
-	 */
-	for (;;) {
-		spin_unlock(&desc->lock);
-		handle_IRQ_event(irq, regs, action);
-		spin_lock(&desc->lock);
-		
-		if (unlikely(!(desc->status & IRQ_PENDING)))
-			break;
-		desc->status &= ~IRQ_PENDING;
-	}
-	desc->status &= ~IRQ_INPROGRESS;
-out:
-	/*
-	 * The ->end() handler has to deal with interrupts which got
-	 * disabled while the handler was running.
-	 */
-	if (irq > 256) BUG();
-	desc->handler->end(irq);
-	spin_unlock(&desc->lock);
+	irq_enter();
+	BUG_ON(irq > 256);
 
+	__do_IRQ(irq, regs);
 	irq_exit();
-	return 1;
-}
-
-int can_request_irq(unsigned int irq, unsigned long irqflags)
-{
-	struct irqaction *action;
-
-	if (irq >= NR_IRQS)
-		return 0;
-	action = irq_desc[irq].action;
-	if (action) {
-		if (irqflags & action->flags & SA_SHIRQ)
-			action = NULL;
-	}
-	return !action;
-}
-
-/**
- *	request_irq - allocate an interrupt line
- *	@irq: Interrupt line to allocate
- *	@handler: Function to be called when the IRQ occurs
- *	@irqflags: Interrupt type flags
- *	@devname: An ascii name for the claiming device
- *	@dev_id: A cookie passed back to the handler function
- *
- *	This call allocates interrupt resources and enables the
- *	interrupt line and IRQ handling. From the point this
- *	call is made your handler function may be invoked. Since
- *	your handler function must clear any interrupt the board 
- *	raises, you must take care both to initialise your hardware
- *	and to set up the interrupt handler in the right order.
- *
- *	Dev_id must be globally unique. Normally the address of the
- *	device data structure is used as the cookie. Since the handler
- *	receives this value it makes sense to use it.
- *
- *	If your interrupt is shared you must pass a non NULL dev_id
- *	as this is required when freeing the interrupt.
- *
- *	Flags:
- *
- *	SA_SHIRQ		Interrupt is shared
- *
- *	SA_INTERRUPT		Disable local interrupts while processing
- *
- *	SA_SAMPLE_RANDOM	The interrupt can be used for entropy
- *
- */
- 
-int request_irq(unsigned int irq, 
-		irqreturn_t (*handler)(int, void *, struct pt_regs *),
-		unsigned long irqflags, 
-		const char * devname,
-		void *dev_id)
-{
-	int retval;
-	struct irqaction * action;
-
-#if 1
-	/*
-	 * Sanity-check: shared interrupts should REALLY pass in
-	 * a real dev-ID, otherwise we'll have trouble later trying
-	 * to figure out which interrupt is which (messes up the
-	 * interrupt freeing logic etc).
-	 */
-	if (irqflags & SA_SHIRQ) {
-		if (!dev_id)
-			printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", devname, (&irq)[-1]);
-	}
-#endif
-
-	if (irq >= NR_IRQS)
-		return -EINVAL;
-	if (!handler)
-		return -EINVAL;
-
-	action = (struct irqaction *)
-			kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
-	if (!action)
-		return -ENOMEM;
-
-	action->handler = handler;
-	action->flags = irqflags;
-	cpus_clear(action->mask);
-	action->name = devname;
-	action->next = NULL;
-	action->dev_id = dev_id;
-
-	retval = setup_irq(irq, action);
-	if (retval)
-		kfree(action);
-	return retval;
-}
-
-EXPORT_SYMBOL(request_irq);
-
-/**
- *	free_irq - free an interrupt
- *	@irq: Interrupt line to free
- *	@dev_id: Device identity to free
- *
- *	Remove an interrupt handler. The handler is removed and if the
- *	interrupt line is no longer in use by any driver it is disabled.
- *	On a shared IRQ the caller must ensure the interrupt is disabled
- *	on the card it drives before calling this function. The function
- *	does not return until any executing interrupts for this IRQ
- *	have completed.
- *
- *	This function may be called from interrupt context. 
- *
- *	Bugs: Attempting to free an irq in a handler for the same irq hangs
- *	      the machine.
- */
- 
-void free_irq(unsigned int irq, void *dev_id)
-{
-	irq_desc_t *desc;
-	struct irqaction **p;
-	unsigned long flags;
-
-	if (irq >= NR_IRQS)
-		return;
-
-	desc = irq_desc + irq;
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	for (;;) {
-		struct irqaction * action = *p;
-		if (action) {
-			struct irqaction **pp = p;
-			p = &action->next;
-			if (action->dev_id != dev_id)
-				continue;
-
-			/* Found it - now remove it from the list of entries */
-			*pp = action->next;
-			if (!desc->action) {
-				desc->status |= IRQ_DISABLED;
-				desc->handler->shutdown(irq);
-			}
-			spin_unlock_irqrestore(&desc->lock,flags);
-
-			synchronize_irq(irq);
-			kfree(action);
-			return;
-		}
-		printk("Trying to free free IRQ%d\n",irq);
-		spin_unlock_irqrestore(&desc->lock,flags);
-		return;
-	}
-}
-
-EXPORT_SYMBOL(free_irq);
-
-/*
- * IRQ autodetection code..
- *
- * This depends on the fact that any interrupt that
- * comes in on to an unassigned handler will get stuck
- * with "IRQ_WAITING" cleared and the interrupt
- * disabled.
- */
-
-static DECLARE_MUTEX(probe_sem);
-
-/**
- *	probe_irq_on	- begin an interrupt autodetect
- *
- *	Commence probing for an interrupt. The interrupts are scanned
- *	and a mask of potential interrupt lines is returned.
- *
- */
- 
-unsigned long probe_irq_on(void)
-{
-	unsigned int i;
-	irq_desc_t *desc;
-	unsigned long val;
-	unsigned long delay;
-
-	down(&probe_sem);
-	/* 
-	 * something may have generated an irq long ago and we want to
-	 * flush such a longstanding irq before considering it as spurious. 
-	 */
-	for (i = NR_IRQS-1; i > 0; i--)  {
-		desc = irq_desc + i;
-
-		spin_lock_irq(&desc->lock);
-		if (!irq_desc[i].action) 
-			irq_desc[i].handler->startup(i);
-		spin_unlock_irq(&desc->lock);
-	}
-
-	/* Wait for longstanding interrupts to trigger. */
-	for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
-		/* about 20ms delay */ barrier();
-
-	/*
-	 * enable any unassigned irqs
-	 * (we must startup again here because if a longstanding irq
-	 * happened in the previous stage, it may have masked itself)
-	 */
-	for (i = NR_IRQS-1; i > 0; i--) {
-		desc = irq_desc + i;
-
-		spin_lock_irq(&desc->lock);
-		if (!desc->action) {
-			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
-			if (desc->handler->startup(i))
-				desc->status |= IRQ_PENDING;
-		}
-		spin_unlock_irq(&desc->lock);
-	}
-
-	/*
-	 * Wait for spurious interrupts to trigger
-	 */
-	for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
-		/* about 100ms delay */ barrier();
-
-	/*
-	 * Now filter out any obviously spurious interrupts
-	 */
-	val = 0;
-	for (i = 0; i < NR_IRQS; i++) {
-		irq_desc_t *desc = irq_desc + i;
-		unsigned int status;
-
-		spin_lock_irq(&desc->lock);
-		status = desc->status;
-
-		if (status & IRQ_AUTODETECT) {
-			/* It triggered already - consider it spurious. */
-			if (!(status & IRQ_WAITING)) {
-				desc->status = status & ~IRQ_AUTODETECT;
-				desc->handler->shutdown(i);
-			} else
-				if (i < 32)
-					val |= 1 << i;
-		}
-		spin_unlock_irq(&desc->lock);
-	}
-
-	return val;
-}
-
-EXPORT_SYMBOL(probe_irq_on);
-
-/*
- * Return a mask of triggered interrupts (this
- * can handle only legacy ISA interrupts).
- */
- 
-/**
- *	probe_irq_mask - scan a bitmap of interrupt lines
- *	@val:	mask of interrupts to consider
- *
- *	Scan the ISA bus interrupt lines and return a bitmap of
- *	active interrupts. The interrupt probe logic state is then
- *	returned to its previous value.
- *
- *	Note: we need to scan all the irq's even though we will
- *	only return ISA irq numbers - just so that we reset them
- *	all to a known state.
- */
-unsigned int probe_irq_mask(unsigned long val)
-{
-	int i;
-	unsigned int mask;
-
-	mask = 0;
-	for (i = 0; i < NR_IRQS; i++) {
-		irq_desc_t *desc = irq_desc + i;
-		unsigned int status;
-
-		spin_lock_irq(&desc->lock);
-		status = desc->status;
-
-		if (status & IRQ_AUTODETECT) {
-			if (i < 16 && !(status & IRQ_WAITING))
-				mask |= 1 << i;
-
-			desc->status = status & ~IRQ_AUTODETECT;
-			desc->handler->shutdown(i);
-		}
-		spin_unlock_irq(&desc->lock);
-	}
-	up(&probe_sem);
-
-	return mask & val;
-}
-
-/*
- * Return the one interrupt that triggered (this can
- * handle any interrupt source).
- */
-
-/**
- *	probe_irq_off	- end an interrupt autodetect
- *	@val: mask of potential interrupts (unused)
- *
- *	Scans the unused interrupt lines and returns the line which
- *	appears to have triggered the interrupt. If no interrupt was
- *	found then zero is returned. If more than one interrupt is
- *	found then minus the first candidate is returned to indicate
- *	their is doubt.
- *
- *	The interrupt probe logic state is returned to its previous
- *	value.
- *
- *	BUGS: When used in a module (which arguably shouldn't happen)
- *	nothing prevents two IRQ probe callers from overlapping. The
- *	results of this are non-optimal.
- */
- 
-int probe_irq_off(unsigned long val)
-{
-	int i, irq_found, nr_irqs;
 
-	nr_irqs = 0;
-	irq_found = 0;
-	for (i = 0; i < NR_IRQS; i++) {
-		irq_desc_t *desc = irq_desc + i;
-		unsigned int status;
-
-		spin_lock_irq(&desc->lock);
-		status = desc->status;
-
-		if (status & IRQ_AUTODETECT) {
-			if (!(status & IRQ_WAITING)) {
-				if (!nr_irqs)
-					irq_found = i;
-				nr_irqs++;
-			}
-			desc->status = status & ~IRQ_AUTODETECT;
-			desc->handler->shutdown(i);
-		}
-		spin_unlock_irq(&desc->lock);
-	}
-	up(&probe_sem);
-
-	if (nr_irqs > 1)
-		irq_found = -irq_found;
-	return irq_found;
-}
-
-EXPORT_SYMBOL(probe_irq_off);
-
-/* this was setup_x86_irq but it seems pretty generic */
-int setup_irq(unsigned int irq, struct irqaction * new)
-{
-	int shared = 0;
-	unsigned long flags;
-	struct irqaction *old, **p;
-	irq_desc_t *desc = irq_desc + irq;
-
-	if (desc->handler == &no_irq_type)
-		return -ENOSYS;
-
-	/*
-	 * Some drivers like serial.c use request_irq() heavily,
-	 * so we have to be careful not to interfere with a
-	 * running system.
-	 */
-	if (new->flags & SA_SAMPLE_RANDOM) {
-		/*
-		 * This function might sleep, we want to call it first,
-		 * outside of the atomic block.
-		 * Yes, this might clear the entropy pool if the wrong
-		 * driver is attempted to be loaded, without actually
-		 * installing a new handler, but is this really a problem,
-		 * only the sysadmin is able to do this.
-		 */
-		rand_initialize_irq(irq);
-	}
-
-	/*
-	 * The following block of code has to be executed atomically
-	 */
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	if ((old = *p) != NULL) {
-		/* Can't share interrupts unless both agree to */
-		if (!(old->flags & new->flags & SA_SHIRQ)) {
-			spin_unlock_irqrestore(&desc->lock,flags);
-			return -EBUSY;
-		}
-
-		/* add new interrupt at end of irq queue */
-		do {
-			p = &old->next;
-			old = *p;
-		} while (old);
-		shared = 1;
-	}
-
-	*p = new;
-
-	if (!shared) {
-		desc->depth = 0;
-		desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
-		desc->handler->startup(irq);
-	}
-	spin_unlock_irqrestore(&desc->lock,flags);
-
-	register_irq_proc(irq);
-	return 0;
-}
-
-static struct proc_dir_entry * root_irq_dir;
-static struct proc_dir_entry * irq_dir [NR_IRQS];
-
-#ifdef CONFIG_SMP
-
-static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];
-
-static cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
-static int irq_affinity_read_proc (char *page, char **start, off_t off,
-			int count, int *eof, void *data)
-{
-	int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]);
-	if (count - len < 2)
-		return -EINVAL;
-	len += sprintf(page + len, "\n");
-	return len;
-}
-
-static int irq_affinity_write_proc (struct file *file,
-					const char __user *buffer,
-					unsigned long count, void *data)
-{
-	int irq = (long) data, full_count = count, err;
-	cpumask_t tmp, new_value;
-
-	if (!irq_desc[irq].handler->set_affinity)
-		return -EIO;
-
-	err = cpumask_parse(buffer, count, new_value);
-
-	/*
-	 * Do not allow disabling IRQs completely - it's a too easy
-	 * way to make the system unusable accidentally :-) At least
-	 * one online CPU still has to be targeted.
-	 */
-	cpus_and(tmp, new_value, cpu_online_map);
-	if (cpus_empty(tmp))
-		return -EINVAL;
-
-	irq_affinity[irq] = new_value;
-	irq_desc[irq].handler->set_affinity(irq, new_value);
-
-	return full_count;
-}
-
-#endif
-
-#define MAX_NAMELEN 10
-
-static void register_irq_proc (unsigned int irq)
-{
-	char name [MAX_NAMELEN];
-
-	if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) ||
-			irq_dir[irq])
-		return;
-
-	memset(name, 0, MAX_NAMELEN);
-	sprintf(name, "%d", irq);
-
-	/* create /proc/irq/1234 */
-	irq_dir[irq] = proc_mkdir(name, root_irq_dir);
-
-#ifdef CONFIG_SMP
-	{
-		struct proc_dir_entry *entry;
-
-		/* create /proc/irq/1234/smp_affinity */
-		entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
-
-		if (entry) {
-			entry->nlink = 1;
-			entry->data = (void *)(long)irq;
-			entry->read_proc = irq_affinity_read_proc;
-			entry->write_proc = irq_affinity_write_proc;
-		}
-
-		smp_affinity_entry[irq] = entry;
-	}
-#endif
+	return 1;
 }
 
-void init_irq_proc (void)
-{
-	int i;
-
-	/* create /proc/irq */
-	root_irq_dir = proc_mkdir("irq", NULL);
-
-	/* create /proc/irq/prof_cpu_mask */
-	create_prof_cpu_mask(root_irq_dir);
-
-	/*
-	 * Create entries for all existing IRQs.
-	 */
-	for (i = 0; i < NR_IRQS; i++)
-		register_irq_proc(i);
-}
--- linux/include/asm-x86_64/irq.h.orig
+++ linux/include/asm-x86_64/irq.h
@@ -44,11 +44,6 @@ static __inline__ int irq_canonicalize(i
 	return ((irq == 2) ? 9 : irq);
 }
 
-extern void disable_irq(unsigned int);
-extern void disable_irq_nosync(unsigned int);
-extern void enable_irq(unsigned int);
-extern int can_request_irq(unsigned int, unsigned long flags);
-
 #ifdef CONFIG_X86_LOCAL_APIC
 #define ARCH_HAS_NMI_WATCHDOG		/* See include/linux/nmi.h */
 #endif
--- linux/include/asm-x86_64/hardirq.h.orig
+++ linux/include/asm-x86_64/hardirq.h
@@ -5,6 +5,7 @@
 #include <linux/threads.h>
 #include <linux/irq.h>
 #include <asm/pda.h>
+#include <asm/apic.h>
 
 #define __ARCH_IRQ_STAT 1
 
@@ -15,47 +16,24 @@
 #include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
 
 /*
- * We put the hardirq and softirq counter into the preemption
- * counter. The bitmask has the following meaning:
- *
- * - bits 0-7 are the preemption count (max preemption depth: 256)
- * - bits 8-15 are the softirq count (max # of softirqs: 256)
- * - bits 16-23 are the hardirq count (max # of hardirqs: 256)
- *
- * - ( bit 26 is the PREEMPT_ACTIVE flag. )
- *
- * PREEMPT_MASK: 0x000000ff
- * HARDIRQ_MASK: 0x0000ff00
- * SOFTIRQ_MASK: 0x00ff0000
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
  */
-
-#define PREEMPT_BITS	8
-#define SOFTIRQ_BITS	8
-#define HARDIRQ_BITS	8
-
-#define PREEMPT_SHIFT	0
-#define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
-#define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
+static inline void ack_bad_irq(unsigned int irq)
+{
+#ifdef CONFIG_X86
+	printk("unexpected IRQ trap at vector %02x\n", irq);
+#ifdef CONFIG_X86_LOCAL_APIC
+	/*
+	 * Currently unexpected vectors happen only on SMP and APIC.
+	 * We _must_ ack these because every local APIC has only N
+	 * irq slots per priority level, and a 'hanging, unacked' IRQ
+	 * holds up an irq slot - in excessive cases (when multiple
+	 * unexpected vectors occur) that might lock up the APIC
+	 * completely.
+	 */
+	ack_APIC_irq();
 #endif
-
-#define nmi_enter()		(irq_enter())
-#define nmi_exit()		(preempt_count() -= HARDIRQ_OFFSET)
-
-#define irq_enter()		(preempt_count() += HARDIRQ_OFFSET)
-#define irq_exit()							\
-do {									\
-		preempt_count() -= IRQ_EXIT_OFFSET;			\
-		if (!in_interrupt() && softirq_pending(smp_processor_id())) \
-			do_softirq();					\
-		preempt_enable_no_resched();				\
-} while (0)
-
+#endif
+}
 #endif /* __ASM_HARDIRQ_H */

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 4/5, 2.6.9-rc3] generic irq subsystem, ppc port
  2004-10-01 20:48     ` [patch 3/5, 2.6.9-rc3] generic irq subsystem, x64 port Ingo Molnar
@ 2004-10-01 20:48       ` Ingo Molnar
  2004-10-01 20:49       ` [patch 5/5, 2.6.9-rc3] generic irq subsystem, ppc64 port Ingo Molnar
  1 sibling, 0 replies; 15+ messages in thread
From: Ingo Molnar @ 2004-10-01 20:48 UTC (permalink / raw)
  To: linux-kernel; +Cc: Christoph Hellwig, Andrew Morton


ppc32 port of generic hardirq handling.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Christoph Hellwig <hch@lst.de>

--- linux/arch/ppc/Kconfig.orig
+++ linux/arch/ppc/Kconfig
@@ -11,6 +11,10 @@ config MMU
 config UID16
 	bool
 
+config GENERIC_HARDIRQS
+	bool
+	default y
+
 config RWSEM_GENERIC_SPINLOCK
 	bool
 
--- linux/arch/ppc/syslib/ppc8xx_pic.c.orig
+++ linux/arch/ppc/syslib/ppc8xx_pic.c
@@ -105,7 +105,7 @@ m8xx_do_IRQ(struct pt_regs *regs,
 		ppc_spurious_interrupts++;
 	}
 	else {
-                ppc_irq_dispatch_handler( regs, irq );
+                __do_IRQ(irq, regs);
 	}
 
 }
@@ -161,7 +161,7 @@ void mbx_i8259_action(int cpl, void *dev
 	}
 	bits = 1UL << irq;
 	irq += i8259_pic.irq_offset;
-	ppc_irq_dispatch_handler( regs, irq );
+	__do_IRQ(irq, regs);
 }
 #endif
 
--- linux/arch/ppc/kernel/ppc_ksyms.c.orig
+++ linux/arch/ppc/kernel/ppc_ksyms.c
@@ -84,10 +84,6 @@ EXPORT_SYMBOL(SingleStepException);
 EXPORT_SYMBOL(sys_sigreturn);
 EXPORT_SYMBOL(ppc_n_lost_interrupts);
 EXPORT_SYMBOL(ppc_lost_interrupts);
-EXPORT_SYMBOL(enable_irq);
-EXPORT_SYMBOL(disable_irq);
-EXPORT_SYMBOL(disable_irq_nosync);
-EXPORT_SYMBOL(probe_irq_mask);
 
 EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
 EXPORT_SYMBOL(DMA_MODE_READ);
@@ -205,7 +201,6 @@ EXPORT_SYMBOL(giveup_spe);
 #ifdef CONFIG_SMP
 EXPORT_SYMBOL(smp_call_function);
 EXPORT_SYMBOL(smp_hw_index);
-EXPORT_SYMBOL(synchronize_irq);
 #endif
 
 EXPORT_SYMBOL(ppc_md);
@@ -292,8 +287,6 @@ EXPORT_SYMBOL(local_irq_restore_end);
 #endif
 EXPORT_SYMBOL(timer_interrupt);
 EXPORT_SYMBOL(irq_desc);
-void ppc_irq_dispatch_handler(struct pt_regs *, int);
-EXPORT_SYMBOL(ppc_irq_dispatch_handler);
 EXPORT_SYMBOL(tb_ticks_per_jiffy);
 EXPORT_SYMBOL(get_wchan);
 EXPORT_SYMBOL(console_drivers);
--- linux/arch/ppc/kernel/irq.c.orig
+++ linux/arch/ppc/kernel/irq.c
@@ -62,296 +62,15 @@
 
 extern atomic_t ipi_recv;
 extern atomic_t ipi_sent;
-void enable_irq(unsigned int irq_nr);
-void disable_irq(unsigned int irq_nr);
-
-static void register_irq_proc (unsigned int irq);
 
 #define MAXCOUNT 10000000
 
-irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
-	[0 ... NR_IRQS-1] = {
-		.lock = SPIN_LOCK_UNLOCKED
-	}
-};
-
 int ppc_spurious_interrupts = 0;
 struct irqaction *ppc_irq_action[NR_IRQS];
 unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
 unsigned long ppc_lost_interrupts[NR_MASK_WORDS];
 atomic_t ppc_n_lost_interrupts;
 
-/* nasty hack for shared irq's since we need to do kmalloc calls but
- * can't very early in the boot when we need to do a request irq.
- * this needs to be removed.
- * -- Cort
- */
-#define IRQ_KMALLOC_ENTRIES 8
-static int cache_bitmask = 0;
-static struct irqaction malloc_cache[IRQ_KMALLOC_ENTRIES];
-extern int mem_init_done;
-
-#if defined(CONFIG_TAU_INT)
-extern int tau_interrupts(unsigned long cpu);
-extern int tau_initialized;
-#endif
-
-void *irq_kmalloc(size_t size, int pri)
-{
-	unsigned int i;
-	if ( mem_init_done )
-		return kmalloc(size,pri);
-	for ( i = 0; i < IRQ_KMALLOC_ENTRIES ; i++ )
-		if ( ! ( cache_bitmask & (1<<i) ) )
-		{
-			cache_bitmask |= (1<<i);
-			return (void *)(&malloc_cache[i]);
-		}
-	return NULL;
-}
-
-void irq_kfree(void *ptr)
-{
-	unsigned int i;
-	for ( i = 0 ; i < IRQ_KMALLOC_ENTRIES ; i++ )
-		if ( ptr == &malloc_cache[i] )
-		{
-			cache_bitmask &= ~(1<<i);
-			return;
-		}
-	kfree(ptr);
-}
-
-int
-setup_irq(unsigned int irq, struct irqaction * new)
-{
-	int shared = 0;
-	unsigned long flags;
-	struct irqaction *old, **p;
-	irq_desc_t *desc = irq_desc + irq;
-
-	/*
-	 * Some drivers like serial.c use request_irq() heavily,
-	 * so we have to be careful not to interfere with a
-	 * running system.
-	 */
-	if (new->flags & SA_SAMPLE_RANDOM) {
-		/*
-		 * This function might sleep, we want to call it first,
-		 * outside of the atomic block.
-		 * Yes, this might clear the entropy pool if the wrong
-		 * driver is attempted to be loaded, without actually
-		 * installing a new handler, but is this really a problem,
-		 * only the sysadmin is able to do this.
-		 */
-		rand_initialize_irq(irq);
-	}
-
-	/*
-	 * The following block of code has to be executed atomically
-	 */
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	if ((old = *p) != NULL) {
-		/* Can't share interrupts unless both agree to */
-		if (!(old->flags & new->flags & SA_SHIRQ)) {
-			spin_unlock_irqrestore(&desc->lock,flags);
-			return -EBUSY;
-		}
-
-		/* add new interrupt at end of irq queue */
-		do {
-			p = &old->next;
-			old = *p;
-		} while (old);
-		shared = 1;
-	}
-
-	*p = new;
-
-	if (!shared) {
-		desc->depth = 0;
-		desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
-		if (desc->handler) {
-			if (desc->handler->startup)
-				desc->handler->startup(irq);
-			else if (desc->handler->enable)
-				desc->handler->enable(irq);
-		}
-	}
-	spin_unlock_irqrestore(&desc->lock,flags);
-
-	register_irq_proc(irq);
-	return 0;
-}
-
-void free_irq(unsigned int irq, void* dev_id)
-{
-	irq_desc_t *desc;
-	struct irqaction **p;
-	unsigned long flags;
-
-	desc = irq_desc + irq;
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	for (;;) {
-		struct irqaction * action = *p;
-		if (action) {
-			struct irqaction **pp = p;
-			p = &action->next;
-			if (action->dev_id != dev_id)
-				continue;
-
-			/* Found it - now remove it from the list of entries */
-			*pp = action->next;
-			if (!desc->action) {
-				desc->status |= IRQ_DISABLED;
-				mask_irq(irq);
-			}
-			spin_unlock_irqrestore(&desc->lock,flags);
-
-			synchronize_irq(irq);
-			irq_kfree(action);
-			return;
-		}
-		printk("Trying to free free IRQ%d\n",irq);
-		spin_unlock_irqrestore(&desc->lock,flags);
-		break;
-	}
-	return;
-}
-
-EXPORT_SYMBOL(free_irq);
-
-int request_irq(unsigned int irq,
-	irqreturn_t (*handler)(int, void *, struct pt_regs *),
-	unsigned long irqflags, const char * devname, void *dev_id)
-{
-	struct irqaction *action;
-	int retval;
-
-	if (irq >= NR_IRQS)
-		return -EINVAL;
-	if (!handler) {
-		printk(KERN_ERR "request_irq called with NULL handler!\n");
-		dump_stack();
-		return 0;
-	}
-
-	action = (struct irqaction *)
-		irq_kmalloc(sizeof(struct irqaction), GFP_KERNEL);
-	if (!action) {
-		printk(KERN_ERR "irq_kmalloc() failed for irq %d !\n", irq);
-		return -ENOMEM;
-	}
-
-	action->handler = handler;
-	action->flags = irqflags;			
-	cpus_clear(action->mask);
-	action->name = devname;
-	action->dev_id = dev_id;
-	action->next = NULL;
-
-	retval = setup_irq(irq, action);
-	if (retval) {
-		kfree(action);
-		return retval;
-	}
-
-	return 0;
-}
-
-EXPORT_SYMBOL(request_irq);
-
-/*
- * Generic enable/disable code: this just calls
- * down into the PIC-specific version for the actual
- * hardware disable after having gotten the irq
- * controller lock.
- */
-
-/**
- *	disable_irq_nosync - disable an irq without waiting
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line. Disables of an interrupt
- *	stack. Unlike disable_irq(), this function does not ensure existing
- *	instances of the IRQ handler have completed before returning.
- *
- *	This function may be called from IRQ context.
- */
-
-void disable_irq_nosync(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	if (!desc->depth++) {
-		if (!(desc->status & IRQ_PER_CPU))
-			desc->status |= IRQ_DISABLED;
-		mask_irq(irq);
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-/**
- *	disable_irq - disable an irq and wait for completion
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line. Disables of an interrupt
- *	stack. That is for two disables you need two enables. This
- *	function waits for any pending IRQ handlers for this interrupt
- *	to complete before returning. If you use this function while
- *	holding a resource the IRQ handler may need you will deadlock.
- *
- *	This function may be called - with care - from IRQ context.
- */
-
-void disable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	disable_irq_nosync(irq);
-	if (desc->action)
-		synchronize_irq(irq);
-}
-
-/**
- *	enable_irq - enable interrupt handling on an irq
- *	@irq: Interrupt to enable
- *
- *	Re-enables the processing of interrupts on this IRQ line
- *	providing no disable_irq calls are now in effect.
- *
- *	This function may be called from IRQ context.
- */
-
-void enable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	switch (desc->depth) {
-	case 1: {
-		unsigned int status = desc->status & ~IRQ_DISABLED;
-		desc->status = status;
-		if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
-			desc->status = status | IRQ_REPLAY;
-			hw_resend_irq(desc->handler,irq);
-		}
-		unmask_irq(irq);
-		/* fall-through */
-	}
-	default:
-		desc->depth--;
-		break;
-	case 0:
-		printk("enable_irq(%u) unbalanced\n", irq);
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
 int show_interrupts(struct seq_file *p, void *v)
 {
 	int i = *(loff_t *) v, j;
@@ -411,115 +130,6 @@ skip:
 	return 0;
 }
 
-static inline void
-handle_irq_event(int irq, struct pt_regs *regs, struct irqaction *action)
-{
-	int status = 0;
-	int ret;
-
-	if (!(action->flags & SA_INTERRUPT))
-		local_irq_enable();
-
-	do {
-		ret = action->handler(irq, action->dev_id, regs);
-		if (ret == IRQ_HANDLED)
-			status |= action->flags;
-		action = action->next;
-	} while (action);
-	if (status & SA_SAMPLE_RANDOM)
-		add_interrupt_randomness(irq);
-	local_irq_disable();
-}
-
-/*
- * Eventually, this should take an array of interrupts and an array size
- * so it can dispatch multiple interrupts.
- */
-void ppc_irq_dispatch_handler(struct pt_regs *regs, int irq)
-{
-	int status;
-	struct irqaction *action;
-	irq_desc_t *desc = irq_desc + irq;
-
-	kstat_this_cpu.irqs[irq]++;
-	spin_lock(&desc->lock);
-	ack_irq(irq);
-	/*
-	   REPLAY is when Linux resends an IRQ that was dropped earlier
-	   WAITING is used by probe to mark irqs that are being tested
-	   */
-	status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
-	if (!(status & IRQ_PER_CPU))
-		status |= IRQ_PENDING; /* we _want_ to handle it */
-
-	/*
-	 * If the IRQ is disabled for whatever reason, we cannot
-	 * use the action we have.
-	 */
-	action = NULL;
-	if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
-		action = desc->action;
-		if (!action || !action->handler) {
-			ppc_spurious_interrupts++;
-			printk(KERN_DEBUG "Unhandled interrupt %x, disabled\n", irq);
-			/* We can't call disable_irq here, it would deadlock */
-			++desc->depth;
-			desc->status |= IRQ_DISABLED;
-			mask_irq(irq);
-			/* This is a real interrupt, we have to eoi it,
-			   so we jump to out */
-			goto out;
-		}
-		status &= ~IRQ_PENDING; /* we commit to handling */
-		if (!(status & IRQ_PER_CPU))
-			status |= IRQ_INPROGRESS; /* we are handling it */
-	}
-	desc->status = status;
-
-	/*
-	 * If there is no IRQ handler or it was disabled, exit early.
-	   Since we set PENDING, if another processor is handling
-	   a different instance of this same irq, the other processor
-	   will take care of it.
-	 */
-	if (unlikely(!action))
-		goto out;
-
-
-	/*
-	 * Edge triggered interrupts need to remember
-	 * pending events.
-	 * This applies to any hw interrupts that allow a second
-	 * instance of the same irq to arrive while we are in do_IRQ
-	 * or in the handler. But the code here only handles the _second_
-	 * instance of the irq, not the third or fourth. So it is mostly
-	 * useful for irq hardware that does not mask cleanly in an
-	 * SMP environment.
-	 */
-	for (;;) {
-		spin_unlock(&desc->lock);
-		handle_irq_event(irq, regs, action);
-		spin_lock(&desc->lock);
-
-		if (likely(!(desc->status & IRQ_PENDING)))
-			break;
-		desc->status &= ~IRQ_PENDING;
-	}
-out:
-	desc->status &= ~IRQ_INPROGRESS;
-	/*
-	 * The ->end() handler has to deal with interrupts which got
-	 * disabled while the handler was running.
-	 */
-	if (irq_desc[irq].handler) {
-		if (irq_desc[irq].handler->end)
-			irq_desc[irq].handler->end(irq);
-		else if (irq_desc[irq].handler->enable)
-			irq_desc[irq].handler->enable(irq);
-	}
-	spin_unlock(&desc->lock);
-}
-
 void do_IRQ(struct pt_regs *regs)
 {
 	int irq, first = 1;
@@ -534,7 +144,7 @@ void do_IRQ(struct pt_regs *regs)
 	 * has already been handled. -- Tom
 	 */
 	while ((irq = ppc_md.get_irq(regs)) >= 0) {
-		ppc_irq_dispatch_handler(regs, irq);
+		__do_IRQ(irq, regs);
 		first = 0;
 	}
 	if (irq != -2 && first)
@@ -543,143 +153,7 @@ void do_IRQ(struct pt_regs *regs)
         irq_exit();
 }
 
-unsigned long probe_irq_on (void)
-{
-	return 0;
-}
-
-EXPORT_SYMBOL(probe_irq_on);
-
-int probe_irq_off (unsigned long irqs)
-{
-	return 0;
-}
-
-EXPORT_SYMBOL(probe_irq_off);
-
-unsigned int probe_irq_mask(unsigned long irqs)
-{
-	return 0;
-}
-
-#ifdef CONFIG_SMP
-void synchronize_irq(unsigned int irq)
-{
-	while (irq_desc[irq].status & IRQ_INPROGRESS)
-		barrier();
-}
-#endif /* CONFIG_SMP */
-
-static struct proc_dir_entry *root_irq_dir;
-static struct proc_dir_entry *irq_dir[NR_IRQS];
-static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
-
-#ifdef CONFIG_IRQ_ALL_CPUS
-#define DEFAULT_CPU_AFFINITY CPU_MASK_ALL
-#else
-#define DEFAULT_CPU_AFFINITY cpumask_of_cpu(0)
-#endif
-
-cpumask_t irq_affinity [NR_IRQS];
-
-static int irq_affinity_read_proc (char *page, char **start, off_t off,
-			int count, int *eof, void *data)
-{
-	int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]);
-	if (count - len < 2)
-		return -EINVAL;
-	len += sprintf(page + len, "\n");
-	return len;
-}
-
-static int irq_affinity_write_proc (struct file *file, const char __user *buffer,
-					unsigned long count, void *data)
-{
-	int irq = (int) data, full_count = count, err;
-	cpumask_t new_value, tmp;
-
-	if (!irq_desc[irq].handler->set_affinity)
-		return -EIO;
-
-	err = cpumask_parse(buffer, count, new_value);
-
-	/*
-	 * Do not allow disabling IRQs completely - it's a too easy
-	 * way to make the system unusable accidentally :-) At least
-	 * one online CPU still has to be targeted.
-	 *
-	 * We assume a 1-1 logical<->physical cpu mapping here.  If
-	 * we assume that the cpu indices in /proc/irq/../smp_affinity
-	 * are actually logical cpu #'s then we have no problem.
-	 *  -- Cort <cort@fsmlabs.com>
-	 */
-	cpus_and(tmp, new_value, cpu_online_map);
-	if (cpus_empty(tmp))
-		return -EINVAL;
-
-	irq_affinity[irq] = new_value;
-	irq_desc[irq].handler->set_affinity(irq, new_value);
-
-	return full_count;
-}
-
-#define MAX_NAMELEN 10
-
-static void register_irq_proc (unsigned int irq)
-{
-	struct proc_dir_entry *entry;
-	char name [MAX_NAMELEN];
-
-	if (!root_irq_dir || (irq_desc[irq].handler == NULL) || irq_dir[irq])
-		return;
-
-	memset(name, 0, MAX_NAMELEN);
-	sprintf(name, "%d", irq);
-
-	/* create /proc/irq/1234 */
-	irq_dir[irq] = proc_mkdir(name, root_irq_dir);
-
-	/* create /proc/irq/1234/smp_affinity */
-	entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
-
-	entry->nlink = 1;
-	entry->data = (void *)irq;
-	entry->read_proc = irq_affinity_read_proc;
-	entry->write_proc = irq_affinity_write_proc;
-
-	smp_affinity_entry[irq] = entry;
-}
-
-void init_irq_proc (void)
-{
-	int i;
-
-	/* create /proc/irq */
-	root_irq_dir = proc_mkdir("irq", NULL);
-	/* create /proc/irq/prof_cpu_mask */
-	create_prof_cpu_mask(root_irq_dir);
-
-	/*
-	 * Create entries for all existing IRQs.
-	 */
-	for (i = 0; i < NR_IRQS; i++) {
-		if (irq_desc[i].handler == NULL)
-			continue;
-		register_irq_proc(i);
-	}
-}
-
-irqreturn_t no_action(int irq, void *dev, struct pt_regs *regs)
-{
-	return IRQ_NONE;
-}
-
 void __init init_IRQ(void)
 {
-	int i;
-
-	for (i = 0; i < NR_IRQS; ++i)
-		irq_affinity[i] = DEFAULT_CPU_AFFINITY;
-
 	ppc_md.init_IRQ();
 }
--- linux/arch/ppc/platforms/pmac_pic.c.orig
+++ linux/arch/ppc/platforms/pmac_pic.c
@@ -214,7 +214,7 @@ static irqreturn_t gatwick_action(int cp
 		if (bits == 0)
 			continue;
 		irq += __ilog2(bits);
-		ppc_irq_dispatch_handler(regs, irq);
+		__do_IRQ(irq, regs);
 		return IRQ_HANDLED;
 	}
 	printk("gatwick irq not from gatwick pic\n");
@@ -383,11 +383,34 @@ static irqreturn_t k2u3_action(int cpl, 
 
 	irq = openpic2_get_irq(regs);
 	if (irq != -1)
-		ppc_irq_dispatch_handler(regs, irq);
+		__do_IRQ(irq, regs);
 	return IRQ_HANDLED;
 }
+
+static struct irqaction k2u3_cascade_action = {
+	.handler	= k2u3_action,
+	.flags		= 0,
+	.mask		= CPU_MASK_NONE,
+	.name		= "U3->K2 Cascade",
+};
 #endif /* CONFIG_POWER4 */
 
+#ifdef CONFIG_XMON
+static struct irqaction xmon_action = {
+	.handler	= xmon_irq,
+	.flags		= 0,
+	.mask		= CPU_MASK_NONE,
+	.name		= "NMI - XMON"
+};
+#endif
+
+static struct irqaction gatwick_cascade_action = {
+	.handler	= gatwick_action,
+	.flags		= SA_INTERRUPT,
+	.mask		= CPU_MASK_NONE,
+	.name		= "cascade",
+};
+
 void __init pmac_pic_init(void)
 {
         int i;
@@ -440,8 +463,9 @@ void __init pmac_pic_init(void)
 				OpenPIC_InitSenses = senses;
 				OpenPIC_NumInitSenses = 128;
 				openpic2_init(PMAC_OPENPIC2_OFFSET);
-				if (request_irq(irqctrler2->intrs[0].line, k2u3_action, 0,
-						"U3->K2 Cascade", NULL))
+
+				if (setup_irq(irqctrler2->intrs[0].line,
+					      &k2u3_cascade_action))
 					printk("Unable to get OpenPIC IRQ for cascade\n");
 			}
 #endif /* CONFIG_POWER4 */
@@ -455,8 +479,7 @@ void __init pmac_pic_init(void)
 				if (pswitch && pswitch->n_intrs) {
 					nmi_irq = pswitch->intrs[0].line;
 					openpic_init_nmi_irq(nmi_irq);
-					request_irq(nmi_irq, xmon_irq, 0,
-						    "NMI - XMON", NULL);
+					setup_irq(nmi_irq, &xmon_action);
 				}
 			}
 #endif	/* CONFIG_XMON */
@@ -553,8 +576,7 @@ void __init pmac_pic_init(void)
 			(int)irq_cascade);
 		for ( i = max_real_irqs ; i < max_irqs ; i++ )
 			irq_desc[i].handler = &gatwick_pic;
-		request_irq( irq_cascade, gatwick_action, SA_INTERRUPT,
-			     "cascade", NULL );
+		setup_irq(irq_cascade, &gatwick_cascade_action);
 	}
 	printk("System has %d possible interrupts\n", max_irqs);
 	if (max_irqs != max_real_irqs)
@@ -562,7 +584,7 @@ void __init pmac_pic_init(void)
 			max_real_irqs);
 
 #ifdef CONFIG_XMON
-	request_irq(20, xmon_irq, 0, "NMI - XMON", NULL);
+	setup_irq(20, &xmon_action);
 #endif	/* CONFIG_XMON */
 }
 
--- linux/arch/ppc/platforms/85xx/mpc85xx_cds_common.c.orig
+++ linux/arch/ppc/platforms/85xx/mpc85xx_cds_common.c
@@ -191,9 +191,7 @@ mpc85xx_cds_show_cpuinfo(struct seq_file
 static void cpm2_cascade(int irq, void *dev_id, struct pt_regs *regs)
 {
 	while((irq = cpm2_get_irq(regs)) >= 0)
-	{
-		ppc_irq_dispatch_handler(regs,irq);
-	}
+		__do_IRQ(irq, regs);
 }
 #endif /* CONFIG_CPM2 */
 
--- linux/arch/ppc/platforms/85xx/mpc8560_ads.c.orig
+++ linux/arch/ppc/platforms/85xx/mpc8560_ads.c
@@ -145,9 +145,8 @@ mpc8560ads_setup_arch(void)
 
 static irqreturn_t cpm2_cascade(int irq, void *dev_id, struct pt_regs *regs)
 {
-	while ((irq = cpm2_get_irq(regs)) >= 0) {
-		ppc_irq_dispatch_handler(regs, irq);
-	}
+	while ((irq = cpm2_get_irq(regs)) >= 0)
+		__do_IRQ(irq, regs);
 	return IRQ_HANDLED;
 }
 
--- linux/arch/ppc/platforms/adir_pic.c.orig
+++ linux/arch/ppc/platforms/adir_pic.c
@@ -11,7 +11,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/pci.h>
-#include <linux/irq.h>
+#include <linux/interrupt.h>
 
 #include <asm/io.h>
 #include <asm/i8259.h>
@@ -20,11 +20,6 @@
 static void adir_onboard_pic_enable(unsigned int irq);
 static void adir_onboard_pic_disable(unsigned int irq);
 
-static void
-no_action(int cpl, void *dev_id, struct pt_regs *regs)
-{
-}
-
 __init static void
 adir_onboard_pic_init(void)
 {
@@ -88,6 +83,13 @@ static struct hw_interrupt_type adir_onb
 	NULL
 };
 
+static struct irqaction noop_action = {
+	.handler	= no_action,
+	.flags          = SA_INTERRUPT,
+	.mask           = CPU_MASK_NONE,
+	.name           = "82c59 primary cascade",
+};
+
 /*
  * Linux interrupt values are assigned as follows:
  *
@@ -110,11 +112,7 @@ adir_init_IRQ(void)
 	adir_onboard_pic_init();
 
 	/* Enable 8259 interrupt cascade */
-	request_irq(ADIR_IRQ_VT82C686_INTR,
-			no_action,
-			SA_INTERRUPT,
-			"82c59 primary cascade",
-			NULL);
+	setup_irq(ADIR_IRQ_VT82C686_INTR, &noop_action);
 }
 
 int
--- linux/arch/ppc/platforms/sbc82xx.c.orig
+++ linux/arch/ppc/platforms/sbc82xx.c
@@ -142,7 +142,7 @@ static irqreturn_t sbc82xx_i8259_demux(i
 			return IRQ_HANDLED;
 		}
 	}
-	ppc_irq_dispatch_handler(regs, NR_SIU_INTS + irq);
+	__do_IRQ(NR_SIU_INTS + irq, regs);
 	return IRQ_HANDLED;
 }
 
--- linux/include/asm-ppc/irq.h.orig
+++ linux/include/asm-ppc/irq.h
@@ -6,10 +6,6 @@
 #include <asm/machdep.h>		/* ppc_md */
 #include <asm/atomic.h>
 
-extern void disable_irq(unsigned int);
-extern void disable_irq_nosync(unsigned int);
-extern void enable_irq(unsigned int);
-
 /*
  * These constants are used for passing information about interrupt
  * signal polarity and level/edge sensing to the low-level PIC chip
--- linux/include/asm-ppc/hardirq.h.orig
+++ linux/include/asm-ppc/hardirq.h
@@ -21,46 +21,11 @@ typedef struct {
 
 #define last_jiffy_stamp(cpu) __IRQ_STAT((cpu), __last_jiffy_stamp)
 
-/*
- * We put the hardirq and softirq counter into the preemption
- * counter. The bitmask has the following meaning:
- *
- * - bits 0-7 are the preemption count (max preemption depth: 256)
- * - bits 8-15 are the softirq count (max # of softirqs: 256)
- * - bits 16-23 are the hardirq count (max # of hardirqs: 256)
- *
- * - ( bit 26 is the PREEMPT_ACTIVE flag. )
- *
- * PREEMPT_MASK: 0x000000ff
- * SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x00ff0000
- */
-
-#define PREEMPT_BITS	8
-#define SOFTIRQ_BITS	8
-#define HARDIRQ_BITS	8
-
-#define PREEMPT_SHIFT	0
-#define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
-#define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
-#define irq_enter()		(preempt_count() += HARDIRQ_OFFSET)
-#define irq_exit()							\
-do {									\
-	preempt_count() -= IRQ_EXIT_OFFSET;				\
-	if (!in_interrupt() && softirq_pending(smp_processor_id()))	\
-		do_softirq();						\
-	preempt_enable_no_resched();					\
-} while (0)
+static inline void ack_bad_irq(int irq)
+{
+	printk(KERN_CRIT "illegal vector %d received!\n", irq);
+	BUG();
+}
 
 #endif /* __ASM_HARDIRQ_H */
 #endif /* __KERNEL__ */
--- linux/include/asm-ppc/hw_irq.h.orig
+++ linux/include/asm-ppc/hw_irq.h
@@ -9,7 +9,6 @@
 #include <asm/reg.h>
 
 extern void timer_interrupt(struct pt_regs *);
-extern void ppc_irq_dispatch_handler(struct pt_regs *regs, int irq);
 
 #define INLINE_IRQS
 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 5/5, 2.6.9-rc3] generic irq subsystem, ppc64 port
  2004-10-01 20:48     ` [patch 3/5, 2.6.9-rc3] generic irq subsystem, x64 port Ingo Molnar
  2004-10-01 20:48       ` [patch 4/5, 2.6.9-rc3] generic irq subsystem, ppc port Ingo Molnar
@ 2004-10-01 20:49       ` Ingo Molnar
  1 sibling, 0 replies; 15+ messages in thread
From: Ingo Molnar @ 2004-10-01 20:49 UTC (permalink / raw)
  To: linux-kernel; +Cc: Christoph Hellwig, Andrew Morton


ppc64 port of generic hardirq handling.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Christoph Hellwig <hch@lst.de>

--- linux/arch/ppc64/Kconfig.orig
+++ linux/arch/ppc64/Kconfig
@@ -207,6 +207,13 @@ config PREEMPT
 	  Say Y here if you are building a kernel for a desktop, embedded
 	  or real-time system.  Say N if you are unsure.
 
+#
+# Use the generic interrupt handling code in kernel/hardirq.c:
+#
+config GENERIC_HARDIRQS
+	bool
+	default y
+
 config MSCHUNKS
 	bool
 	depends on PPC_ISERIES
--- linux/arch/ppc64/kernel/misc.S.orig
+++ linux/arch/ppc64/kernel/misc.S
@@ -115,12 +115,12 @@ _GLOBAL(call_do_softirq)
 	mtlr	r0
 	blr
 
-_GLOBAL(call_handle_irq_event)
+_GLOBAL(call_handle_IRQ_event)
 	mflr	r0
 	std	r0,16(r1)
 	stdu	r1,THREAD_SIZE-112(r6)
 	mr	r1,r6
-	bl	.handle_irq_event
+	bl	.handle_IRQ_event
 	ld	r1,0(r1)
 	ld	r0,16(r1)
 	mtlr	r0
--- linux/arch/ppc64/kernel/irq.c.orig
+++ linux/arch/ppc64/kernel/irq.c
@@ -60,259 +60,12 @@
 extern void iSeries_smp_message_recv( struct pt_regs * );
 #endif
 
-static void register_irq_proc (unsigned int irq);
-
-irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
-	[0 ... NR_IRQS-1] = {
-		.lock = SPIN_LOCK_UNLOCKED
-	}
-};
+extern irq_desc_t irq_desc[NR_IRQS];
 
 int __irq_offset_value;
 int ppc_spurious_interrupts;
 unsigned long lpevent_count;
 
-int
-setup_irq(unsigned int irq, struct irqaction * new)
-{
-	int shared = 0;
-	unsigned long flags;
-	struct irqaction *old, **p;
-	irq_desc_t *desc = get_irq_desc(irq);
-
-	/*
-	 * Some drivers like serial.c use request_irq() heavily,
-	 * so we have to be careful not to interfere with a
-	 * running system.
-	 */
-	if (new->flags & SA_SAMPLE_RANDOM) {
-		/*
-		 * This function might sleep, we want to call it first,
-		 * outside of the atomic block.
-		 * Yes, this might clear the entropy pool if the wrong
-		 * driver is attempted to be loaded, without actually
-		 * installing a new handler, but is this really a problem,
-		 * only the sysadmin is able to do this.
-		 */
-		rand_initialize_irq(irq);
-	}
-
-	/*
-	 * The following block of code has to be executed atomically
-	 */
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	if ((old = *p) != NULL) {
-		/* Can't share interrupts unless both agree to */
-		if (!(old->flags & new->flags & SA_SHIRQ)) {
-			spin_unlock_irqrestore(&desc->lock,flags);
-			return -EBUSY;
-		}
-
-		/* add new interrupt at end of irq queue */
-		do {
-			p = &old->next;
-			old = *p;
-		} while (old);
-		shared = 1;
-	}
-
-	*p = new;
-
-	if (!shared) {
-		desc->depth = 0;
-		desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS);
-		if (desc->handler && desc->handler->startup)
-			desc->handler->startup(irq);
-		unmask_irq(irq);
-	}
-	spin_unlock_irqrestore(&desc->lock,flags);
-
-	register_irq_proc(irq);
-	return 0;
-}
-
-#ifdef CONFIG_SMP
-
-inline void synchronize_irq(unsigned int irq)
-{
-	while (get_irq_desc(irq)->status & IRQ_INPROGRESS)
-		cpu_relax();
-}
-
-EXPORT_SYMBOL(synchronize_irq);
-
-#endif /* CONFIG_SMP */
-
-int request_irq(unsigned int irq,
-	irqreturn_t (*handler)(int, void *, struct pt_regs *),
-	unsigned long irqflags, const char * devname, void *dev_id)
-{
-	struct irqaction *action;
-	int retval;
-
-	if (irq >= NR_IRQS)
-		return -EINVAL;
-	if (!handler)
-		return -EINVAL;
-
-	action = (struct irqaction *)
-		kmalloc(sizeof(struct irqaction), GFP_KERNEL);
-	if (!action) {
-		printk(KERN_ERR "kmalloc() failed for irq %d !\n", irq);
-		return -ENOMEM;
-	}
-
-	action->handler = handler;
-	action->flags = irqflags;
-	cpus_clear(action->mask);
-	action->name = devname;
-	action->dev_id = dev_id;
-	action->next = NULL;
-
-	retval = setup_irq(irq, action);
-	if (retval)
-		kfree(action);
-
-	return 0;
-}
-
-EXPORT_SYMBOL(request_irq);
-
-void free_irq(unsigned int irq, void *dev_id)
-{
-	irq_desc_t *desc = get_irq_desc(irq);
-	struct irqaction **p;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	for (;;) {
-		struct irqaction * action = *p;
-		if (action) {
-			struct irqaction **pp = p;
-			p = &action->next;
-			if (action->dev_id != dev_id)
-				continue;
-
-			/* Found it - now remove it from the list of entries */
-			*pp = action->next;
-			if (!desc->action) {
-				desc->status |= IRQ_DISABLED;
-				mask_irq(irq);
-			}
-			spin_unlock_irqrestore(&desc->lock,flags);
-
-			/* Wait to make sure it's not being used on another CPU */
-			synchronize_irq(irq);
-			kfree(action);
-			return;
-		}
-		printk("Trying to free free IRQ%d\n",irq);
-		spin_unlock_irqrestore(&desc->lock,flags);
-		break;
-	}
-	return;
-}
-
-EXPORT_SYMBOL(free_irq);
-
-/*
- * Generic enable/disable code: this just calls
- * down into the PIC-specific version for the actual
- * hardware disable after having gotten the irq
- * controller lock. 
- */
- 
-/**
- *	disable_irq_nosync - disable an irq without waiting
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line. Disables of an interrupt
- *	stack. Unlike disable_irq(), this function does not ensure existing
- *	instances of the IRQ handler have completed before returning.
- *
- *	This function may be called from IRQ context.
- */
- 
-inline void disable_irq_nosync(unsigned int irq)
-{
-	irq_desc_t *desc = get_irq_desc(irq);
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	if (!desc->depth++) {
-		if (!(desc->status & IRQ_PER_CPU))
-			desc->status |= IRQ_DISABLED;
-		mask_irq(irq);
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-EXPORT_SYMBOL(disable_irq_nosync);
-
-/**
- *	disable_irq - disable an irq and wait for completion
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line. Disables of an interrupt
- *	stack. That is for two disables you need two enables. This
- *	function waits for any pending IRQ handlers for this interrupt
- *	to complete before returning. If you use this function while
- *	holding a resource the IRQ handler may need you will deadlock.
- *
- *	This function may be called - with care - from IRQ context.
- */
- 
-void disable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = get_irq_desc(irq);
-	disable_irq_nosync(irq);
-	if (desc->action)
-		synchronize_irq(irq);
-}
-
-EXPORT_SYMBOL(disable_irq);
-
-/**
- *	enable_irq - enable interrupt handling on an irq
- *	@irq: Interrupt to enable
- *
- *	Re-enables the processing of interrupts on this IRQ line
- *	providing no disable_irq calls are now in effect.
- *
- *	This function may be called from IRQ context.
- */
- 
-void enable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = get_irq_desc(irq);
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	switch (desc->depth) {
-	case 1: {
-		unsigned int status = desc->status & ~IRQ_DISABLED;
-		desc->status = status;
-		if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
-			desc->status = status | IRQ_REPLAY;
-			hw_resend_irq(desc->handler,irq);
-		}
-		unmask_irq(irq);
-		/* fall-through */
-	}
-	default:
-		desc->depth--;
-		break;
-	case 0:
-		printk("enable_irq(%u) unbalanced from %p\n", irq,
-		       __builtin_return_address(0));
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-EXPORT_SYMBOL(enable_irq);
-
 int show_interrupts(struct seq_file *p, void *v)
 {
 	int i = *(loff_t *) v, j;
@@ -360,107 +113,7 @@ skip:
 	return 0;
 }
 
-int handle_irq_event(int irq, struct pt_regs *regs, struct irqaction *action)
-{
-	int status = 0;
-	int ret, retval = 0;
-
-	if (!(action->flags & SA_INTERRUPT))
-		local_irq_enable();
-
-	do {
-		ret = action->handler(irq, action->dev_id, regs);
-		if (ret == IRQ_HANDLED)
-			status |= action->flags;
-		retval |= ret;
-		action = action->next;
-	} while (action);
-	if (status & SA_SAMPLE_RANDOM)
-		add_interrupt_randomness(irq);
-	local_irq_disable();
-	return retval;
-}
-
-static void __report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
-{
-	struct irqaction *action;
-
-	if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
-		printk(KERN_ERR "irq event %d: bogus return value %x\n",
-				irq, action_ret);
-	} else {
-		printk(KERN_ERR "irq %d: nobody cared!\n", irq);
-	}
-	dump_stack();
-	printk(KERN_ERR "handlers:\n");
-	action = desc->action;
-	do {
-		printk(KERN_ERR "[<%p>]", action->handler);
-		print_symbol(" (%s)",
-			(unsigned long)action->handler);
-		printk("\n");
-		action = action->next;
-	} while (action);
-}
-
-static void report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
-{
-	static int count = 100;
-
-	if (count) {
-		count--;
-		__report_bad_irq(irq, desc, action_ret);
-	}
-}
-
-static int noirqdebug;
-
-static int __init noirqdebug_setup(char *str)
-{
-	noirqdebug = 1;
-	printk("IRQ lockup detection disabled\n");
-	return 1;
-}
-
-__setup("noirqdebug", noirqdebug_setup);
-
-/*
- * If 99,900 of the previous 100,000 interrupts have not been handled then
- * assume that the IRQ is stuck in some manner.  Drop a diagnostic and try to
- * turn the IRQ off.
- *
- * (The other 100-of-100,000 interrupts may have been a correctly-functioning
- *  device sharing an IRQ with the failing one)
- *
- * Called under desc->lock
- */
-static void note_interrupt(int irq, irq_desc_t *desc, irqreturn_t action_ret)
-{
-	if (action_ret != IRQ_HANDLED) {
-		desc->irqs_unhandled++;
-		if (action_ret != IRQ_NONE)
-			report_bad_irq(irq, desc, action_ret);
-	}
-
-	desc->irq_count++;
-	if (desc->irq_count < 100000)
-		return;
-
-	desc->irq_count = 0;
-	if (desc->irqs_unhandled > 99900) {
-		/*
-		 * The interrupt is stuck
-		 */
-		__report_bad_irq(irq, desc, action_ret);
-		/*
-		 * Now kill the IRQ
-		 */
-		printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
-		desc->status |= IRQ_DISABLED;
-		desc->handler->disable(irq);
-	}
-	desc->irqs_unhandled = 0;
-}
+extern int noirqdebug;
 
 /*
  * Eventually, this should take an array of interrupts and an array size
@@ -482,7 +135,7 @@ void ppc_irq_dispatch_handler(struct pt_
 	if (desc->status & IRQ_PER_CPU) {
 		/* no locking required for CPU-local interrupts: */
 		ack_irq(irq);
-		action_ret = handle_irq_event(irq, regs, desc->action);
+		action_ret = handle_IRQ_event(irq, regs, desc->action);
 		desc->handler->end(irq);
 		return;
 	}
@@ -550,13 +203,13 @@ void ppc_irq_dispatch_handler(struct pt_
 		if (curtp != irqtp) {
 			irqtp->task = curtp->task;
 			irqtp->flags = 0;
-			action_ret = call_handle_irq_event(irq, regs, action, irqtp);
+			action_ret = call_handle_IRQ_event(irq, regs, action, irqtp);
 			irqtp->task = NULL;
 			if (irqtp->flags)
 				set_bits(irqtp->flags, &curtp->flags);
 		} else
 #endif
-			action_ret = handle_irq_event(irq, regs, action);
+			action_ret = handle_IRQ_event(irq, regs, action);
 
 		spin_lock(&desc->lock);
 		if (!noirqdebug)
@@ -658,27 +311,6 @@ void do_IRQ(struct pt_regs *regs)
 }
 #endif	/* CONFIG_PPC_ISERIES */
 
-unsigned long probe_irq_on (void)
-{
-	return 0;
-}
-
-EXPORT_SYMBOL(probe_irq_on);
-
-int probe_irq_off (unsigned long irqs)
-{
-	return 0;
-}
-
-EXPORT_SYMBOL(probe_irq_off);
-
-unsigned int probe_irq_mask(unsigned long irqs)
-{
-	return 0;
-}
-
-EXPORT_SYMBOL(probe_irq_mask);
-
 void __init init_IRQ(void)
 {
 	static int once = 0;
@@ -692,130 +324,6 @@ void __init init_IRQ(void)
 	irq_ctx_init();
 }
 
-static struct proc_dir_entry * root_irq_dir;
-static struct proc_dir_entry * irq_dir [NR_IRQS];
-static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];
-
-/* Protected by get_irq_desc(irq)->lock. */
-#ifdef CONFIG_IRQ_ALL_CPUS
-cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
-#else  /* CONFIG_IRQ_ALL_CPUS */
-cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_NONE };
-#endif /* CONFIG_IRQ_ALL_CPUS */
-
-static int irq_affinity_read_proc (char *page, char **start, off_t off,
-			int count, int *eof, void *data)
-{
-	int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]);
-	if (count - len < 2)
-		return -EINVAL;
-	len += sprintf(page + len, "\n");
-	return len;
-}
-
-static int irq_affinity_write_proc (struct file *file, const char __user *buffer,
-					unsigned long count, void *data)
-{
-	unsigned int irq = (long)data;
-	irq_desc_t *desc = get_irq_desc(irq);
-	int ret;
-	cpumask_t new_value, tmp;
-
-	if (!desc->handler->set_affinity)
-		return -EIO;
-
-	ret = cpumask_parse(buffer, count, new_value);
-	if (ret != 0)
-		return ret;
-
-	/*
-	 * We check for CPU_MASK_ALL in xics to send irqs to all cpus.
-	 * In some cases CPU_MASK_ALL is smaller than the cpumask (eg
-	 * NR_CPUS == 32 and cpumask is a long), so we mask it here to
-	 * be consistent.
-	 */
-	cpus_and(new_value, new_value, CPU_MASK_ALL);
-
-	/*
-	 * Grab lock here so cpu_online_map can't change, and also
-	 * protect irq_affinity[].
-	 */
-	spin_lock(&desc->lock);
-
-	/*
-	 * Do not allow disabling IRQs completely - it's a too easy
-	 * way to make the system unusable accidentally :-) At least
-	 * one online CPU still has to be targeted.
-	 */
-	cpus_and(tmp, new_value, cpu_online_map);
-	if (cpus_empty(tmp)) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	irq_affinity[irq] = new_value;
-	desc->handler->set_affinity(irq, new_value);
-	ret = count;
-
-out:
-	spin_unlock(&desc->lock);
-	return ret;
-}
-
-#define MAX_NAMELEN 10
-
-static void register_irq_proc (unsigned int irq)
-{
-	struct proc_dir_entry *entry;
-	char name [MAX_NAMELEN];
-
-	if (!root_irq_dir || (irq_desc[irq].handler == NULL) || irq_dir[irq])
-		return;
-
-	memset(name, 0, MAX_NAMELEN);
-	sprintf(name, "%d", irq);
-
-	/* create /proc/irq/1234 */
-	irq_dir[irq] = proc_mkdir(name, root_irq_dir);
-
-	/* create /proc/irq/1234/smp_affinity */
-	entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
-
-	if (entry) {
-		entry->nlink = 1;
-		entry->data = (void *)(long)irq;
-		entry->read_proc = irq_affinity_read_proc;
-		entry->write_proc = irq_affinity_write_proc;
-	}
-
-	smp_affinity_entry[irq] = entry;
-}
-
-void init_irq_proc (void)
-{
-	int i;
-
-	/* create /proc/irq */
-	root_irq_dir = proc_mkdir("irq", NULL);
-
-	/* create /proc/irq/prof_cpu_mask */
-	create_prof_cpu_mask(root_irq_dir);
-
-	/*
-	 * Create entries for all existing IRQs.
-	 */
-	for_each_irq(i) {
-		if (get_irq_desc(i)->handler == NULL)
-			continue;
-		register_irq_proc(i);
-	}
-}
-
-irqreturn_t no_action(int irq, void *dev, struct pt_regs *regs)
-{
-	return IRQ_NONE;
-}
-
 #ifndef CONFIG_PPC_ISERIES
 /*
  * Virtual IRQ mapping code, used on systems with XICS interrupt controllers.
--- linux/include/asm-ppc64/irq.h.orig
+++ linux/include/asm-ppc64/irq.h
@@ -17,10 +17,6 @@
  */
 #define NR_IRQS		512
 
-extern void disable_irq(unsigned int);
-extern void disable_irq_nosync(unsigned int);
-extern void enable_irq(unsigned int);
-
 /* this number is used when no interrupt has been assigned */
 #define NO_IRQ			(-1)
 
@@ -80,7 +76,6 @@ static __inline__ int irq_canonicalize(i
 
 struct irqaction;
 struct pt_regs;
-int handle_irq_event(int, struct pt_regs *, struct irqaction *);
 
 #ifdef CONFIG_IRQSTACKS
 /*
@@ -91,7 +86,7 @@ extern struct thread_info *softirq_ctx[N
 
 extern void irq_ctx_init(void);
 extern void call_do_softirq(struct thread_info *tp);
-extern int call_handle_irq_event(int irq, struct pt_regs *regs,
+extern int call_handle_IRQ_event(int irq, struct pt_regs *regs,
 			struct irqaction *action, struct thread_info *tp);
 
 #define __ARCH_HAS_DO_SOFTIRQ
--- linux/include/asm-ppc64/hardirq.h.orig
+++ linux/include/asm-ppc64/hardirq.h
@@ -19,45 +19,10 @@ typedef struct {
 
 #include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
 
-/*
- * We put the hardirq and softirq counter into the preemption
- * counter. The bitmask has the following meaning:
- *
- * - bits 0-7 are the preemption count (max preemption depth: 256)
- * - bits 8-15 are the softirq count (max # of softirqs: 256)
- * - bits 16-24 are the hardirq count (max # of hardirqs: 512)
- *
- * - ( bit 26 is the PREEMPT_ACTIVE flag. )
- *
- * PREEMPT_MASK: 0x000000ff
- * SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x01ff0000
- */
-
-#define PREEMPT_BITS	8
-#define SOFTIRQ_BITS	8
-#define HARDIRQ_BITS	9
-
-#define PREEMPT_SHIFT	0
-#define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
-#define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
-#define irq_enter()		(preempt_count() += HARDIRQ_OFFSET)
-#define irq_exit()							\
-do {									\
-		preempt_count() -= IRQ_EXIT_OFFSET;			\
-		if (!in_interrupt() && softirq_pending(smp_processor_id())) \
-			do_softirq();					\
-		preempt_enable_no_resched();				\
-} while (0)
+static inline void ack_bad_irq(int irq)
+{
+	printk(KERN_CRIT "illegal vector %d received!\n", irq);
+	BUG();
+}
 
 #endif /* __ASM_HARDIRQ_H */
--- linux/include/asm-ppc64/smp.h.orig
+++ linux/include/asm-ppc64/smp.h
@@ -52,8 +52,6 @@ extern cpumask_t cpu_sibling_map[NR_CPUS
 #endif
 #define PPC_MSG_DEBUGGER_BREAK  3
 
-extern cpumask_t irq_affinity[];
-
 void smp_init_iSeries(void);
 void smp_init_pSeries(void);
 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH] i386 CPU hotplug updated for -mm
       [not found]     ` <Pine.LNX.4.61.0410091550300.2870@musoma.fsmlabs.com>
@ 2004-10-11  8:19       ` Zwane Mwaikambo
  2004-10-12  5:59         ` Nathan Lynch
  2004-10-18 15:10         ` Pavel Machek
  0 siblings, 2 replies; 15+ messages in thread
From: Zwane Mwaikambo @ 2004-10-11  8:19 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Ingo Molnar, Nathan Lynch, Linux Kernel, Rusty Russell

Hi Andrew,
	Find attached the i386 cpu hotplug patch updated for Ingo's latest 
round of goodies. In order to avoid dumping cpu hotplug code into 
kernel/irq/* i dropped the cpu_online check in do_IRQ() by modifying 
fixup_irqs(). The difference being that on cpu offline, fixup_irqs() is 
called before we clear the cpu from cpu_online_map and a long delay in 
order to ensure that we never have any queued external interrupts on the 
APICs. Due to my usual test victims being in boxes a continent away this 
hasn't been tested, but i'll cover bug reports (nudge, Nathan! ;)

1) Add CONFIG_HOTPLUG_CPU
2) disable local APIC timer on dead cpus.
3) Disable preempt around irq balancing to prevent CPUs going down.
4) Print irq stats for all possible cpus.
5) Debugging check for interrupts on offline cpus.
6) Hacky fixup_irqs() to redirect irqs when cpus go off/online.
7) play_dead() for offline cpus to spin inside.
8) Handle offline cpus set in flush_tlb_others().
9) Grab lock earlier in smp_call_function() to prevent CPUs going down.
10) Implement __cpu_disable() and __cpu_die().
11) Enable local interrupts in cpu_enable() after fixup_irqs()
12) Don't fiddle with NMI on dead cpu, but leave intact on other cpus.
13) Program IRQ affinity whilst cpu is still in cpu_online_map on offline.

Signed-off-by: Zwane Mwaikambo <zwane@linuxpower.ca>

 arch/i386/Kconfig          |    9 ++++
 arch/i386/kernel/apic.c    |    3 -
 arch/i386/kernel/io_apic.c |    2
 arch/i386/kernel/irq.c     |   66 +++++++++++++++++++++++++------
 arch/i386/kernel/msr.c     |    2
 arch/i386/kernel/process.c |   34 ++++++++++++++++
 arch/i386/kernel/smp.c     |   25 +++++++----
 arch/i386/kernel/smpboot.c |   94 ++++++++++++++++++++++++++++++++++++++++++---
 arch/i386/kernel/traps.c   |    8 +++
 arch/ia64/kernel/smpboot.c |    3 -
 arch/ppc64/kernel/smp.c    |    4 +
 arch/s390/kernel/smp.c     |    4 +
 include/asm-i386/cpu.h     |    2
 include/asm-i386/irq.h     |    4 +
 include/asm-i386/smp.h     |    3 +
 kernel/cpu.c               |   14 ++----
 16 files changed, 236 insertions(+), 41 deletions(-)

Index: linux-2.6.9-rc3-mm3/arch/i386/Kconfig
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/arch/i386/Kconfig,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 Kconfig
--- linux-2.6.9-rc3-mm3/arch/i386/Kconfig	8 Oct 2004 10:56:05 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/arch/i386/Kconfig	10 Oct 2004 18:31:21 -0000
@@ -1218,6 +1218,15 @@ config SCx200
 	  This support is also available as a module.  If compiled as a
 	  module, it will be called scx200.
 
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
+	depends on SMP && HOTPLUG && EXPERIMENTAL
+	---help---
+	  Say Y here to experiment with turning CPUs off and on.  CPUs
+	  can be controlled through /sys/devices/system/cpu.
+
+	  Say N.
+
 source "drivers/pcmcia/Kconfig"
 
 source "drivers/pci/hotplug/Kconfig"
Index: linux-2.6.9-rc3-mm3/arch/i386/kernel/apic.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/arch/i386/kernel/apic.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 apic.c
--- linux-2.6.9-rc3-mm3/arch/i386/kernel/apic.c	8 Oct 2004 10:56:07 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/arch/i386/kernel/apic.c	10 Oct 2004 18:31:21 -0000
@@ -26,6 +26,7 @@
 #include <linux/mc146818rtc.h>
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
+#include <linux/cpu.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -1055,7 +1056,7 @@ void __init setup_secondary_APIC_clock(v
 	local_irq_enable();
 }
 
-void __init disable_APIC_timer(void)
+void __devinit disable_APIC_timer(void)
 {
 	if (using_apic_timer) {
 		unsigned long v;
Index: linux-2.6.9-rc3-mm3/arch/i386/kernel/io_apic.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/arch/i386/kernel/io_apic.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 io_apic.c
--- linux-2.6.9-rc3-mm3/arch/i386/kernel/io_apic.c	8 Oct 2004 10:56:07 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/arch/i386/kernel/io_apic.c	10 Oct 2004 18:31:21 -0000
@@ -574,9 +574,11 @@ static int balanced_irq(void *unused)
 		time_remaining = schedule_timeout(time_remaining);
 		if (time_after(jiffies,
 				prev_balance_time+balanced_irq_interval)) {
+			preempt_disable();
 			do_irq_balance();
 			prev_balance_time = jiffies;
 			time_remaining = balanced_irq_interval;
+			preempt_enable();
 		}
 	}
 	return 0;
Index: linux-2.6.9-rc3-mm3/arch/i386/kernel/irq.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/arch/i386/kernel/irq.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 irq.c
--- linux-2.6.9-rc3-mm3/arch/i386/kernel/irq.c	8 Oct 2004 10:56:08 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/arch/i386/kernel/irq.c	10 Oct 2004 20:18:02 -0000
@@ -14,6 +14,9 @@
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
 
 #include <asm/uaccess.h>
 #include <asm/hardirq.h>
@@ -225,9 +228,8 @@ int show_interrupts(struct seq_file *p, 
 
 	if (i == 0) {
 		seq_printf(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 		seq_putc(p, '\n');
 	}
 
@@ -240,9 +242,8 @@ int show_interrupts(struct seq_file *p, 
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+		for_each_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 		seq_printf(p, "  %s", action->name);
@@ -255,16 +256,13 @@ skip:
 		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	} else if (i == NR_IRQS) {
 		seq_printf(p, "NMI: ");
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", nmi_count(j));
+		for_each_cpu(j)
+			seq_printf(p, "%10u ", nmi_count(j));
 		seq_putc(p, '\n');
 #ifdef CONFIG_X86_LOCAL_APIC
 		seq_printf(p, "LOC: ");
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ",
-					irq_stat[j].apic_timer_irqs);
+		for_each_cpu(j)
+			seq_printf(p, "%10u ", irq_stat[j].apic_timer_irqs);
 		seq_putc(p, '\n');
 #endif
 		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
@@ -274,3 +272,45 @@ skip:
 	}
 	return 0;
 }
+
+#ifdef CONFIG_HOTPLUG_CPU
+#include <mach_apic.h>
+
+void fixup_irqs(cpumask_t map)
+{
+	unsigned int irq;
+	static int warned;
+
+	for (irq = 0; irq < NR_IRQS; irq++) {
+		cpumask_t mask;
+		if (irq == 2)
+			continue;
+
+		cpus_and(mask, irq_affinity[irq], map);
+		if (any_online_cpu(mask) == NR_CPUS) {
+			printk("Breaking affinity for irq %i\n", irq);
+			mask = map;
+		}
+		if (irq_desc[irq].handler->set_affinity)
+			irq_desc[irq].handler->set_affinity(irq, mask);
+		else if (irq_desc[irq].action && !(warned++))
+			printk("Cannot set affinity for irq %i\n", irq);
+	}
+
+#if 0
+	barrier();
+	/* Ingo Molnar says: "after the IO-APIC masks have been redirected
+	   [note the nop - the interrupt-enable boundary on x86 is two
+	   instructions from sti] - to flush out pending hardirqs and
+	   IPIs. After this point nothing is supposed to reach this CPU." */
+	__asm__ __volatile__("sti; nop; cli");
+	barrier();
+#else
+	/* That doesn't seem sufficient.  Give it 1ms. */
+	local_irq_enable();
+	mdelay(1);
+	local_irq_disable();
+#endif
+}
+#endif
+
Index: linux-2.6.9-rc3-mm3/arch/i386/kernel/msr.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/arch/i386/kernel/msr.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 msr.c
--- linux-2.6.9-rc3-mm3/arch/i386/kernel/msr.c	8 Oct 2004 10:56:08 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/arch/i386/kernel/msr.c	10 Oct 2004 18:31:21 -0000
@@ -260,7 +260,7 @@ static struct file_operations msr_fops =
 	.open = msr_open,
 };
 
-static int msr_class_simple_device_add(int i)
+static int __devinit msr_class_simple_device_add(int i)
 {
 	int err = 0;
 	struct class_device *class_err;
Index: linux-2.6.9-rc3-mm3/arch/i386/kernel/process.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/arch/i386/kernel/process.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 process.c
--- linux-2.6.9-rc3-mm3/arch/i386/kernel/process.c	8 Oct 2004 10:56:08 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/arch/i386/kernel/process.c	10 Oct 2004 18:31:21 -0000
@@ -13,6 +13,7 @@
 
 #include <stdarg.h>
 
+#include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/fs.h>
@@ -54,6 +55,9 @@
 #include <linux/irq.h>
 #include <linux/err.h>
 
+#include <asm/tlbflush.h>
+#include <asm/cpu.h>
+
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
 int hlt_counter;
@@ -132,6 +136,34 @@ static void poll_idle (void)
 	}
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+#include <asm/nmi.h>
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+	/* Ack it */
+	__get_cpu_var(cpu_state) = CPU_DEAD;
+
+	/* We shouldn't have to disable interrupts while dead, but
+	 * some interrupts just don't seem to go away, and this makes
+	 * it "work" for testing purposes. */
+	/* Death loop */
+	while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
+		cpu_relax();
+
+	local_irq_disable();
+	__flush_tlb_all();
+	cpu_set(smp_processor_id(), cpu_online_map);
+	enable_APIC_timer();
+	local_irq_enable();
+}
+#else
+static inline void play_dead(void)
+{
+	BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
 /*
  * The idle thread. There's no useful work to be
  * done, so just try to conserve power and have a
@@ -155,6 +187,8 @@ void cpu_idle (void)
 			if (!idle)
 				idle = default_idle;
 
+			if (cpu_is_offline(smp_processor_id()))
+				play_dead();
 			irq_stat[smp_processor_id()].idle_timestamp = jiffies;
 			idle();
 			rcu_read_unlock();
Index: linux-2.6.9-rc3-mm3/arch/i386/kernel/smp.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/arch/i386/kernel/smp.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 smp.c
--- linux-2.6.9-rc3-mm3/arch/i386/kernel/smp.c	8 Oct 2004 10:56:08 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/arch/i386/kernel/smp.c	10 Oct 2004 18:31:21 -0000
@@ -19,6 +19,7 @@
 #include <linux/mc146818rtc.h>
 #include <linux/cache.h>
 #include <linux/interrupt.h>
+#include <linux/cpu.h>
 
 #include <asm/mtrr.h>
 #include <asm/tlbflush.h>
@@ -166,7 +167,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu
 	unsigned long flags;
 
 	local_irq_save(flags);
-		
+	WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
 	/*
 	 * Wait for idle.
 	 */
@@ -351,21 +352,21 @@ out:
 static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
 						unsigned long va)
 {
-	cpumask_t tmp;
 	/*
 	 * A couple of (to be removed) sanity checks:
 	 *
-	 * - we do not send IPIs to not-yet booted CPUs.
 	 * - current CPU must not be in mask
 	 * - mask must exist :)
 	 */
 	BUG_ON(cpus_empty(cpumask));
-
-	cpus_and(tmp, cpumask, cpu_online_map);
-	BUG_ON(!cpus_equal(cpumask, tmp));
 	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
 	BUG_ON(!mm);
 
+	/* If a CPU which we ran on has gone down, OK. */
+	cpus_and(cpumask, cpumask, cpu_online_map);
+	if (cpus_empty(cpumask))
+		return;
+
 	/*
 	 * i'm not happy about this global shared spinlock in the
 	 * MM hot path, but we'll see how contended it is.
@@ -490,6 +491,7 @@ void smp_send_nmi_allbutself(void)
  */
 void smp_send_reschedule(int cpu)
 {
+	WARN_ON(cpu_is_offline(cpu));
 	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
 }
 
@@ -535,10 +537,16 @@ int smp_call_function (void (*func) (voi
  */
 {
 	struct call_data_struct data;
-	int cpus = num_online_cpus()-1;
+	int cpus;
 
-	if (!cpus)
+	/* Holding any lock stops cpus from going down. */
+	spin_lock(&call_lock);
+	cpus = num_online_cpus()-1;
+
+	if (!cpus) {
+		spin_unlock(&call_lock);
 		return 0;
+	}
 
 	/* Can deadlock when called with interrupts disabled */
 	WARN_ON(irqs_disabled());
@@ -550,7 +558,6 @@ int smp_call_function (void (*func) (voi
 	if (wait)
 		atomic_set(&data.finished, 0);
 
-	spin_lock(&call_lock);
 	call_data = &data;
 	mb();
 	
Index: linux-2.6.9-rc3-mm3/arch/i386/kernel/smpboot.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/arch/i386/kernel/smpboot.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 smpboot.c
--- linux-2.6.9-rc3-mm3/arch/i386/kernel/smpboot.c	8 Oct 2004 10:56:08 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/arch/i386/kernel/smpboot.c	10 Oct 2004 20:15:58 -0000
@@ -44,6 +44,9 @@
 #include <linux/smp_lock.h>
 #include <linux/irq.h>
 #include <linux/bootmem.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
 
 #include <linux/delay.h>
 #include <linux/mc146818rtc.h>
@@ -88,6 +91,9 @@ extern unsigned char trampoline_end  [];
 static unsigned char *trampoline_base;
 static int trampoline_exec;
 
+/* State of each CPU. */
+DEFINE_PER_CPU(int, cpu_state) = { 0 };
+
 /*
  * Currently trivial. Write the real->protected mode
  * bootstrap into the page concerned. The caller
@@ -1094,6 +1100,9 @@ static void __init smp_boot_cpus(unsigne
    who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
+	smp_commenced_mask = cpumask_of_cpu(0);
+	cpu_callin_map = cpumask_of_cpu(0);
+	mb();
 	smp_boot_cpus(max_cpus);
 }
 
@@ -1103,20 +1112,99 @@ void __devinit smp_prepare_boot_cpu(void
 	cpu_set(smp_processor_id(), cpu_callout_map);
 }
 
-int __devinit __cpu_up(unsigned int cpu)
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* must be called with the cpucontrol mutex held */
+static int __devinit cpu_enable(unsigned int cpu)
 {
-	/* This only works at boot for x86.  See "rewrite" above. */
-	if (cpu_isset(cpu, smp_commenced_mask)) {
-		local_irq_enable();
-		return -ENOSYS;
+	/* get the target out of its holding state */
+	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+	wmb();
+
+	/* wait for the processor to ack it. timeout? */
+	while (!cpu_online(cpu))
+		cpu_relax();
+
+	fixup_irqs(cpu_online_map);
+	/* counter the disable in fixup_irqs() */
+	local_irq_enable();
+	return 0;
+}
+
+int __cpu_disable(void)
+{
+	cpumask_t map = cpu_online_map;
+	int cpu = smp_processor_id();
+
+	/*
+	 * Perhaps use cpufreq to drop frequency, but that could go
+	 * into generic code.
+ 	 *
+	 * We won't take down the boot processor on i386 due to some
+	 * interrupts only being able to be serviced by the BSP.
+	 * Especially so if we're not using an IOAPIC	-zwane
+	 */
+	if (cpu == 0)
+		return -EBUSY;
+
+	/* We enable the timer again on the exit path of the death loop */
+	disable_APIC_timer();
+	/* Allow any queued timer interrupts to get serviced */
+	local_irq_enable();
+	mdelay(1);
+	local_irq_disable();
+
+	cpu_clear(cpu, map);
+	fixup_irqs(map);
+	/* It's now safe to remove this processor from the online map */
+	cpu_clear(cpu, cpu_online_map);
+	return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+	/* We don't do anything here: idle task is faking death itself. */
+	unsigned int i;
+
+	for (i = 0; i < 10; i++) {
+		/* They ack this in play_dead by setting CPU_DEAD */
+		if (per_cpu(cpu_state, cpu) == CPU_DEAD)
+			return;
+		current->state = TASK_UNINTERRUPTIBLE;
+		schedule_timeout(HZ/10);
 	}
+ 	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+}
+#else /* ... !CONFIG_HOTPLUG_CPU */
+int __cpu_disable(void)
+{
+	return -ENOSYS;
+}
 
+void __cpu_die(unsigned int cpu)
+{
+	/* We said "no" in __cpu_disable */
+	BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+int __devinit __cpu_up(unsigned int cpu)
+{
 	/* In case one didn't come up */
 	if (!cpu_isset(cpu, cpu_callin_map)) {
+		printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
 		local_irq_enable();
 		return -EIO;
 	}
 
+#ifdef CONFIG_HOTPLUG_CPU
+	/* Already up, and in cpu_quiescent now? */
+	if (cpu_isset(cpu, smp_commenced_mask)) {
+		cpu_enable(cpu);
+		return 0;
+	}
+#endif
+
 	local_irq_enable();
 	/* Unleash the CPU! */
 	cpu_set(cpu, smp_commenced_mask);
Index: linux-2.6.9-rc3-mm3/arch/i386/kernel/traps.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/arch/i386/kernel/traps.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 traps.c
--- linux-2.6.9-rc3-mm3/arch/i386/kernel/traps.c	8 Oct 2004 10:56:08 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/arch/i386/kernel/traps.c	10 Oct 2004 18:31:21 -0000
@@ -692,6 +692,14 @@ asmlinkage void do_nmi(struct pt_regs * 
 	nmi_enter();
 
 	cpu = smp_processor_id();
+
+#ifdef CONFIG_HOTPLUG_CPU
+	if (!cpu_online(cpu)) {
+		nmi_exit();
+		return;
+	}
+#endif
+
 	++nmi_count(cpu);
 
 	if (!nmi_callback(regs, cpu))
Index: linux-2.6.9-rc3-mm3/arch/ia64/kernel/smpboot.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/arch/ia64/kernel/smpboot.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 smpboot.c
--- linux-2.6.9-rc3-mm3/arch/ia64/kernel/smpboot.c	8 Oct 2004 10:55:28 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/arch/ia64/kernel/smpboot.c	10 Oct 2004 18:31:21 -0000
@@ -591,9 +591,10 @@ int __cpu_disable(void)
 	if (cpu == 0)
 		return -EBUSY;
 
+	cpu_clear(cpu, cpu_online_map);
 	fixup_irqs();
 	local_flush_tlb_all();
-	printk ("Disabled cpu %u\n", smp_processor_id());
+	printk("Disabled cpu %u\n", cpu);
 	return 0;
 }
 
Index: linux-2.6.9-rc3-mm3/arch/ppc64/kernel/smp.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/arch/ppc64/kernel/smp.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 smp.c
--- linux-2.6.9-rc3-mm3/arch/ppc64/kernel/smp.c	8 Oct 2004 10:56:09 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/arch/ppc64/kernel/smp.c	10 Oct 2004 18:31:21 -0000
@@ -273,11 +273,13 @@ int __cpu_disable(void)
 {
 	/* FIXME: go put this in a header somewhere */
 	extern void xics_migrate_irqs_away(void);
+	int cpu = smp_processor_id();
 
+	cpu_clear(cpu, cpu_online_map);
 	systemcfg->processorCount--;
 
 	/*fix boot_cpuid here*/
-	if (smp_processor_id() == boot_cpuid)
+	if (cpu == boot_cpuid)
 		boot_cpuid = any_online_cpu(cpu_online_map);
 
 	/* FIXME: abstract this to not be platform specific later on */
Index: linux-2.6.9-rc3-mm3/arch/s390/kernel/smp.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/arch/s390/kernel/smp.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 smp.c
--- linux-2.6.9-rc3-mm3/arch/s390/kernel/smp.c	8 Oct 2004 10:55:45 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/arch/s390/kernel/smp.c	10 Oct 2004 18:31:21 -0000
@@ -688,13 +688,15 @@ __cpu_disable(void)
 {
 	unsigned long flags;
 	ec_creg_mask_parms cr_parms;
+	int cpu = smp_processor_id();
 
 	spin_lock_irqsave(&smp_reserve_lock, flags);
-	if (smp_cpu_reserved[smp_processor_id()] != 0) {
+	if (smp_cpu_reserved[cpu] != 0) {
 		spin_unlock_irqrestore(&smp_reserve_lock, flags);
 		return -EBUSY;
 	}
 
+	cpu_clear(cpu, cpu_online_map);
 	/* disable all external interrupts */
 
 	cr_parms.start_ctl = 0;
Index: linux-2.6.9-rc3-mm3/include/asm-i386/cpu.h
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/include/asm-i386/cpu.h,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 cpu.h
--- linux-2.6.9-rc3-mm3/include/asm-i386/cpu.h	8 Oct 2004 10:53:52 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/include/asm-i386/cpu.h	10 Oct 2004 18:31:21 -0000
@@ -5,6 +5,7 @@
 #include <linux/cpu.h>
 #include <linux/topology.h>
 #include <linux/nodemask.h>
+#include <linux/percpu.h>
 
 #include <asm/node.h>
 
@@ -26,4 +27,5 @@ static inline int arch_register_cpu(int 
 	return register_cpu(&cpu_devices[num].cpu, num, parent);
 }
 
+DECLARE_PER_CPU(int, cpu_state);
 #endif /* _ASM_I386_CPU_H_ */
Index: linux-2.6.9-rc3-mm3/include/asm-i386/irq.h
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/include/asm-i386/irq.h,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 irq.h
--- linux-2.6.9-rc3-mm3/include/asm-i386/irq.h	8 Oct 2004 10:53:52 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/include/asm-i386/irq.h	10 Oct 2004 20:34:06 -0000
@@ -34,4 +34,8 @@ extern void release_vm86_irqs(struct tas
 # define irq_ctx_init(cpu) do { } while (0)
 #endif
 
+#ifdef CONFIG_HOTPLUG_CPU
+extern void fixup_irqs(cpumask_t map);
+#endif
+
 #endif /* _ASM_IRQ_H */
Index: linux-2.6.9-rc3-mm3/include/asm-i386/smp.h
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/include/asm-i386/smp.h,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 smp.h
--- linux-2.6.9-rc3-mm3/include/asm-i386/smp.h	8 Oct 2004 10:53:52 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/include/asm-i386/smp.h	10 Oct 2004 18:31:21 -0000
@@ -85,6 +85,9 @@ static __inline int logical_smp_processo
 }
 
 #endif
+
+extern int __cpu_disable(void);
+extern void __cpu_die(unsigned int cpu);
 #endif /* !__ASSEMBLY__ */
 
 #define NO_PROC_ID		0xFF		/* No processor magic marker */
Index: linux-2.6.9-rc3-mm3/kernel/cpu.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.9-rc3-mm3/kernel/cpu.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 cpu.c
--- linux-2.6.9-rc3-mm3/kernel/cpu.c	8 Oct 2004 10:56:10 -0000	1.1.1.1
+++ linux-2.6.9-rc3-mm3/kernel/cpu.c	10 Oct 2004 18:31:21 -0000
@@ -62,19 +62,15 @@ static int take_cpu_down(void *unused)
 {
 	int err;
 
-	/* Take offline: makes arch_cpu_down somewhat easier. */
-	cpu_clear(smp_processor_id(), cpu_online_map);
-
 	/* Ensure this CPU doesn't handle any more interrupts. */
 	err = __cpu_disable();
 	if (err < 0)
-		cpu_set(smp_processor_id(), cpu_online_map);
-	else
-		/* Force idle task to run as soon as we yield: it should
-		   immediately notice cpu is offline and die quickly. */
-		sched_idle_next();
+		return err;
 
-	return err;
+	/* Force idle task to run as soon as we yield: it should
+	   immediately notice cpu is offline and die quickly. */
+	sched_idle_next();
+	return 0;
 }
 
 int cpu_down(unsigned int cpu)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] i386 CPU hotplug updated for -mm
  2004-10-11  8:19       ` [PATCH] i386 CPU hotplug updated for -mm Zwane Mwaikambo
@ 2004-10-12  5:59         ` Nathan Lynch
  2004-10-12  6:04           ` Ingo Molnar
                             ` (2 more replies)
  2004-10-18 15:10         ` Pavel Machek
  1 sibling, 3 replies; 15+ messages in thread
From: Nathan Lynch @ 2004-10-12  5:59 UTC (permalink / raw)
  To: Zwane Mwaikambo; +Cc: Andrew Morton, Ingo Molnar, Linux Kernel, Rusty Russell

On Mon, 2004-10-11 at 03:19, Zwane Mwaikambo wrote:
> Hi Andrew,
> 	Find attached the i386 cpu hotplug patch updated for Ingo's latest 
> round of goodies. In order to avoid dumping cpu hotplug code into 
> kernel/irq/* i dropped the cpu_online check in do_IRQ() by modifying 
> fixup_irqs(). The difference being that on cpu offline, fixup_irqs() is 
> called before we clear the cpu from cpu_online_map and a long delay in 
> order to ensure that we never have any queued external interrupts on the 
> APICs. Due to my usual test victims being in boxes a continent away this 
> hasn't been tested, but i'll cover bug reports (nudge, Nathan! ;)

Had to apply the patch to 2.6.9-rc4-mm1 (2.6.9-rc3-mm3 doesn't detect my
scsi controller).  Tested on a dual Pentium 3.  Simple offline/online
tests seem ok, except I see these warnings when taking a cpu down:

using smp_processor_id() in preemptible code: bash/3436
 [<c0106f37>] dump_stack+0x17/0x20
 [<c011a087>] smp_processor_id+0x87/0xa0
 [<c0134d54>] cpu_down+0x134/0x240
 [<c02770d9>] store_online+0x39/0x70
 [<c0274487>] sysdev_store+0x37/0x50
 [<c019339e>] flush_write_buffer+0x2e/0x40
 [<c0193427>] sysfs_write_file+0x77/0x90
 [<c015aa20>] vfs_write+0xe0/0x120
 [<c015ab0d>] sys_write+0x3d/0x70
 [<c0106123>] syscall_call+0x7/0xb

using smp_processor_id() in preemptible code: ksoftirqd/1/5233
 [<c0106f37>] dump_stack+0x17/0x20
 [<c011a087>] smp_processor_id+0x87/0xa0
 [<c0124128>] ksoftirqd+0x68/0x140
 [<c0133326>] kthread+0x96/0xe0
 [<c0104275>] kernel_thread_helper+0x5/0x10

Under load (unpacking a kernel source tree while continuous
online/offline of a cpu) I got a panic within a couple of minutes:

Unable to handle kernel NULL pointer dereference at virtual address 00000004
 printing eip:
c0145703
*pde = 00000000
Oops: 0000 [#1]
PREEMPT SMP
Modules linked in:
CPU:    1
EIP:    0060:[<c0145703>]    Not tainted VLI
EFLAGS: 00010082   (2.6.9-rc4-mm1i386cpuhp)
EIP is at kmem_cache_free+0x33/0x70
eax: cba2cd98   ebx: 00000000   ecx: 00000000   edx: cba2cd98
esi: c1561ac0   edi: cba2cd98   ebp: c058be8c   esp: c058be7c
ds: 007b   es: 007b   ss: 0068
Process swapper (pid: 0, threadinfo=c058b000 task=dff88540)
Stack: 00000082 c1559920 00001000 cba2cd9c c058bea4 c013e491 00000000 cf4abee0
       00001000 c015f050 c058beb0 c015f92c cf4abee0 c058beb8 c015fbac c058bec4
       c015f07d cf4abee0 c058beec c0160819 c0271a48 c0127c11 00000001 00000002
Call Trace:
 [<c0106f0a>] show_stack+0x7a/0x90
 [<c0107096>] show_registers+0x156/0x1d0
 [<c01072b4>] die+0xf4/0x180
 [<c0116bd7>] do_page_fault+0x457/0x66d
 [<c0106b8d>] error_code+0x2d/0x38
 [<c013e491>] mempool_free+0x81/0x90
 [<c015f92c>] bio_destructor+0x3c/0x60
 [<c015fbac>] bio_put+0x2c/0x40
 [<c015f07d>] end_bio_bh_io_sync+0x2d/0x60
 [<c0160819>] bio_endio+0x59/0x90
 [<c027c578>] __end_that_request_first+0x188/0x250
 [<c029db14>] __ide_end_request+0x54/0x150
 [<c029dc68>] ide_end_request+0x58/0xc0
 [<c02a3fe1>] task_end_request+0x31/0x70
 [<c02a418b>] task_out_intr+0x7b/0x100
 [<c029f6cd>] ide_intr+0xbd/0x160
 [<c0139d44>] handle_IRQ_event+0x34/0x70
 [<c0139e6c>] __do_IRQ+0xec/0x170
 [<c0108570>] do_IRQ+0x60/0xa0
 =======================
 [<c0106a90>] common_interrupt+0x18/0x20
 [<00000000>] 0x0
 [<dff84fbc>] 0xdff84fbc
Code: f8 89 c6 89 5d f4 89 7d fc 9c 8f 45 f0 fa 89 d7 e8 13 49 fd ff 8b 1c 86 e8 6b ea ff ff 8b 4d 04 89 fa 89 f0 e8 2f f3 ff ff 89 c7 <8b> 43 04 39 03 73 20 f0 ff 86 c4 00 00 00 8b 03 89 7c 83 10 ff
 <0>Kernel panic - not syncing: Fatal exception in interrupt

I fixed up the warning in cpu_down with the following patch and now am
running with that + 2.6.9-rc4-mm1 + your patch while doing continuous
online/offline and make -j8.  It's been running for about 45 minutes and
I haven't seen the panic yet, although I'm at a loss to explain why the
change would fix it.  Will let it run overnight and report back...

Nathan

--

Fix (harmless?) smp_processor_id() usage in preemptible section of
cpu_down.

Signed-off-by: Nathan Lynch <nathanl@austin.ibm.com>
---

 2.6.9-rc4-mm1-nathanl/kernel/cpu.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletion(-)

diff -puN kernel/cpu.c~cpu_down-fix-smp_processor_id-warning kernel/cpu.c
--- 2.6.9-rc4-mm1/kernel/cpu.c~cpu_down-fix-smp_processor_id-warning	2004-10-11 23:28:47.000000000 -0500
+++ 2.6.9-rc4-mm1-nathanl/kernel/cpu.c	2004-10-11 23:34:36.000000000 -0500
@@ -129,7 +129,8 @@ int cpu_down(unsigned int cpu)
 	__cpu_die(cpu);
 
 	/* Move it here so it can run. */
-	kthread_bind(p, smp_processor_id());
+	kthread_bind(p, get_cpu());
+	put_cpu();
 
 	/* CPU is completely dead: tell everyone.  Too late to complain. */
 	if (notifier_call_chain(&cpu_chain, CPU_DEAD, (void *)(long)cpu)
_



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] i386 CPU hotplug updated for -mm
  2004-10-12  5:59         ` Nathan Lynch
@ 2004-10-12  6:04           ` Ingo Molnar
  2004-10-12 14:16             ` Nathan Lynch
  2004-10-12  6:23           ` Dave Hansen
  2004-10-12 14:47           ` Zwane Mwaikambo
  2 siblings, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2004-10-12  6:04 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: Zwane Mwaikambo, Andrew Morton, Linux Kernel, Rusty Russell


* Nathan Lynch <nathanl@austin.ibm.com> wrote:

> I fixed up the warning in cpu_down with the following patch and now am
> running with that + 2.6.9-rc4-mm1 + your patch while doing continuous
> online/offline and make -j8.  It's been running for about 45 minutes
> and I haven't seen the panic yet, although I'm at a loss to explain
> why the change would fix it.  Will let it run overnight and report
> back...

>  	/* Move it here so it can run. */
> -	kthread_bind(p, smp_processor_id());
> +	kthread_bind(p, get_cpu());
> +	put_cpu();

>  	/* CPU is completely dead: tell everyone.  Too late to complain. */
>  	if (notifier_call_chain(&cpu_chain, CPU_DEAD, (void *)(long)cpu)

hm, is there any assurance that smp_processor_id() == cpu?

but i agree, this fix shouldnt impact the bio-destruct slab crash you
were seeing.

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] i386 CPU hotplug updated for -mm
  2004-10-12  5:59         ` Nathan Lynch
  2004-10-12  6:04           ` Ingo Molnar
@ 2004-10-12  6:23           ` Dave Hansen
  2004-10-12 14:47           ` Zwane Mwaikambo
  2 siblings, 0 replies; 15+ messages in thread
From: Dave Hansen @ 2004-10-12  6:23 UTC (permalink / raw)
  To: Nathan Lynch
  Cc: Zwane Mwaikambo, Andrew Morton, Ingo Molnar, Linux Kernel, Rusty Russell

On Mon, 2004-10-11 at 22:59, Nathan Lynch wrote:
> On Mon, 2004-10-11 at 03:19, Zwane Mwaikambo wrote:
> > Hi Andrew,
> > 	Find attached the i386 cpu hotplug patch updated for Ingo's latest 
> > round of goodies. In order to avoid dumping cpu hotplug code into 
> > kernel/irq/* i dropped the cpu_online check in do_IRQ() by modifying 
> > fixup_irqs(). The difference being that on cpu offline, fixup_irqs() is 
> > called before we clear the cpu from cpu_online_map and a long delay in 
> > order to ensure that we never have any queued external interrupts on the 
> > APICs. Due to my usual test victims being in boxes a continent away this 
> > hasn't been tested, but i'll cover bug reports (nudge, Nathan! ;)
> 
> Had to apply the patch to 2.6.9-rc4-mm1 (2.6.9-rc3-mm3 doesn't detect my
> scsi controller).  Tested on a dual Pentium 3.  Simple offline/online
> tests seem ok, except I see these warnings when taking a cpu down:
> 
> using smp_processor_id() in preemptible code: bash/3436
>  [<c0106f37>] dump_stack+0x17/0x20
>  [<c011a087>] smp_processor_id+0x87/0xa0
>  [<c0134d54>] cpu_down+0x134/0x240
>  [<c02770d9>] store_online+0x39/0x70
>  [<c0274487>] sysdev_store+0x37/0x50
>  [<c019339e>] flush_write_buffer+0x2e/0x40
>  [<c0193427>] sysfs_write_file+0x77/0x90
>  [<c015aa20>] vfs_write+0xe0/0x120
>  [<c015ab0d>] sys_write+0x3d/0x70
>  [<c0106123>] syscall_call+0x7/0xb

I think this warning is likely bogus:

        /* Move it here so it can run. */
        kthread_bind(p, smp_processor_id());

That just makes sure that the thread can get cleaned up somewhere on
some active cpu.  The only problem would be if the cpu that the thread
was bound to also went down.  But, I think that's prevented by the big
cpu hotplug semaphore.  (the "down"ing CPU can't be "down"ed in the
process of "down"ing :)

> diff -puN kernel/cpu.c~cpu_down-fix-smp_processor_id-warning kernel/cpu.c
> --- 2.6.9-rc4-mm1/kernel/cpu.c~cpu_down-fix-smp_processor_id-warning	2004-10-11 23:28:47.000000000 -0500
> +++ 2.6.9-rc4-mm1-nathanl/kernel/cpu.c	2004-10-11 23:34:36.000000000 -0500
> @@ -129,7 +129,8 @@ int cpu_down(unsigned int cpu)
>  	__cpu_die(cpu);
>  
>  	/* Move it here so it can run. */
> -	kthread_bind(p, smp_processor_id());
> +	kthread_bind(p, get_cpu());
> +	put_cpu();

This doesn't do much more than suppress the warning.  If there was some
kind race there before, then it's still there but is now hidden.  

-- Dave


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] i386 CPU hotplug updated for -mm
  2004-10-12  6:04           ` Ingo Molnar
@ 2004-10-12 14:16             ` Nathan Lynch
  2004-10-12 14:38               ` Zwane Mwaikambo
  0 siblings, 1 reply; 15+ messages in thread
From: Nathan Lynch @ 2004-10-12 14:16 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Zwane Mwaikambo, Andrew Morton, Linux Kernel, Rusty Russell

On Tue, 2004-10-12 at 01:04, Ingo Molnar wrote:
> * Nathan Lynch <nathanl@austin.ibm.com> wrote:
> 
> > I fixed up the warning in cpu_down with the following patch and now am
> > running with that + 2.6.9-rc4-mm1 + your patch while doing continuous
> > online/offline and make -j8.  It's been running for about 45 minutes
> > and I haven't seen the panic yet, although I'm at a loss to explain
> > why the change would fix it.  Will let it run overnight and report
> > back...
> 
> >  	/* Move it here so it can run. */
> > -	kthread_bind(p, smp_processor_id());
> > +	kthread_bind(p, get_cpu());
> > +	put_cpu();
> 
> >  	/* CPU is completely dead: tell everyone.  Too late to complain. */
> >  	if (notifier_call_chain(&cpu_chain, CPU_DEAD, (void *)(long)cpu)
> 
> hm, is there any assurance that smp_processor_id() == cpu?

Actually, cpu != smp_processor_id().  cpu is the processor we have just
taken down at that point; we want the kthread to run on some other cpu.

Nathan



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] i386 CPU hotplug updated for -mm
  2004-10-12 14:16             ` Nathan Lynch
@ 2004-10-12 14:38               ` Zwane Mwaikambo
  0 siblings, 0 replies; 15+ messages in thread
From: Zwane Mwaikambo @ 2004-10-12 14:38 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: Ingo Molnar, Andrew Morton, Linux Kernel, Rusty Russell

On Tue, 12 Oct 2004, Nathan Lynch wrote:

> On Tue, 2004-10-12 at 01:04, Ingo Molnar wrote:
> > * Nathan Lynch <nathanl@austin.ibm.com> wrote:
> > 
> > > I fixed up the warning in cpu_down with the following patch and now am
> > > running with that + 2.6.9-rc4-mm1 + your patch while doing continuous
> > > online/offline and make -j8.  It's been running for about 45 minutes
> > > and I haven't seen the panic yet, although I'm at a loss to explain
> > > why the change would fix it.  Will let it run overnight and report
> > > back...
> > 
> > >  	/* Move it here so it can run. */
> > > -	kthread_bind(p, smp_processor_id());
> > > +	kthread_bind(p, get_cpu());
> > > +	put_cpu();
> > 
> > >  	/* CPU is completely dead: tell everyone.  Too late to complain. */
> > >  	if (notifier_call_chain(&cpu_chain, CPU_DEAD, (void *)(long)cpu)
> > 
> > hm, is there any assurance that smp_processor_id() == cpu?
> 
> Actually, cpu != smp_processor_id().  cpu is the processor we have just
> taken down at that point; we want the kthread to run on some other cpu.

Nathan, thanks for doing that testing, the warning does indeed look to be 
a false positive. Looking at the code again, i realised that there might 
also be cases where someone does the following;

	preempt_disable();
	cpu = smp_processor_id();
	...
	preempt_enable();
	...
	mod_local_cpu_variable[cpu];

So cached values of smp_processor_id() might also be a problem, although i 
haven't found any cases, but maybe that's just being overly paranoid =)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] i386 CPU hotplug updated for -mm
  2004-10-12  5:59         ` Nathan Lynch
  2004-10-12  6:04           ` Ingo Molnar
  2004-10-12  6:23           ` Dave Hansen
@ 2004-10-12 14:47           ` Zwane Mwaikambo
  2 siblings, 0 replies; 15+ messages in thread
From: Zwane Mwaikambo @ 2004-10-12 14:47 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: Andrew Morton, Ingo Molnar, Linux Kernel, Rusty Russell

On Tue, 12 Oct 2004, Nathan Lynch wrote:

> Unable to handle kernel NULL pointer dereference at virtual address 00000004
>  printing eip:
> c0145703
> *pde = 00000000
> Oops: 0000 [#1]
> PREEMPT SMP
> Modules linked in:
> CPU:    1

Ok, cpu1 took an interrupt and exploded, perhaps put a printk in 
show_regs() to say whether it was offline, an interrupt must have snuck in 
whilst it was offline.

>  [<c029f6cd>] ide_intr+0xbd/0x160
>  [<c0139d44>] handle_IRQ_event+0x34/0x70
>  [<c0139e6c>] __do_IRQ+0xec/0x170
>  [<c0108570>] do_IRQ+0x60/0xa0

Thanks,
	Zwane

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] i386 CPU hotplug updated for -mm
  2004-10-11  8:19       ` [PATCH] i386 CPU hotplug updated for -mm Zwane Mwaikambo
  2004-10-12  5:59         ` Nathan Lynch
@ 2004-10-18 15:10         ` Pavel Machek
  2004-10-21 14:47           ` Zwane Mwaikambo
  1 sibling, 1 reply; 15+ messages in thread
From: Pavel Machek @ 2004-10-18 15:10 UTC (permalink / raw)
  To: Zwane Mwaikambo
  Cc: Andrew Morton, Ingo Molnar, Nathan Lynch, Linux Kernel, Rusty Russell

Hi!


> +#ifdef CONFIG_HOTPLUG_CPU
> +#include <asm/nmi.h>
> +/* We don't actually take CPU down, just spin without interrupts. */
> +static inline void play_dead(void)
> +{
> +	/* Ack it */
> +	__get_cpu_var(cpu_state) = CPU_DEAD;
> +
> +	/* We shouldn't have to disable interrupts while dead, but
> +	 * some interrupts just don't seem to go away, and this makes
> +	 * it "work" for testing purposes. */
> +	/* Death loop */
> +	while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
> +		cpu_relax();
> +
> +	local_irq_disable();
> +	__flush_tlb_all();
> +	cpu_set(smp_processor_id(), cpu_online_map);
> +	enable_APIC_timer();
> +	local_irq_enable();
> +}
> +#else

Having real implementation of this one would be very welcome for
suspend-to-{RAM,disk} on smp machines....

Are there really no i386 machines whose hardware supports hotplug?

							Pavel
-- 
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] i386 CPU hotplug updated for -mm
  2004-10-18 15:10         ` Pavel Machek
@ 2004-10-21 14:47           ` Zwane Mwaikambo
  0 siblings, 0 replies; 15+ messages in thread
From: Zwane Mwaikambo @ 2004-10-21 14:47 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Andrew Morton, Ingo Molnar, Nathan Lynch, Linux Kernel, Rusty Russell

On Mon, 18 Oct 2004, Pavel Machek wrote:

> Having real implementation of this one would be very welcome for
> suspend-to-{RAM,disk} on smp machines....

I actually started working on that already but progress got hindered by 
non technical related issues, i hope to get back to it very soon. This is 
indeed handy for P4 HT type laptops.
 
> Are there really no i386 machines whose hardware supports hotplug?

There are i386 systems which do support hardware hotplug, mostly addition 
of processors. I haven't personally worked with any but have been in 
contact with people who are interested in the code for that specific 
reason.

Regards,
	Zwane


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2004-10-21 15:17 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-10-01 20:45 [patch 0/5, 2.6.9-rc3] generic irq subsystem, description Ingo Molnar
2004-10-01 20:46 ` [patch 1/5, 2.6.9-rc3] generic irq subsystem, core Ingo Molnar
2004-10-01 20:47   ` [patch 2/5, 2.6.9-rc3] generic irq subsystem, x86 port Ingo Molnar
2004-10-01 20:48     ` [patch 3/5, 2.6.9-rc3] generic irq subsystem, x64 port Ingo Molnar
2004-10-01 20:48       ` [patch 4/5, 2.6.9-rc3] generic irq subsystem, ppc port Ingo Molnar
2004-10-01 20:49       ` [patch 5/5, 2.6.9-rc3] generic irq subsystem, ppc64 port Ingo Molnar
     [not found]   ` <20041001143332.7e3a5aba.akpm@osdl.org>
     [not found]     ` <Pine.LNX.4.61.0410091550300.2870@musoma.fsmlabs.com>
2004-10-11  8:19       ` [PATCH] i386 CPU hotplug updated for -mm Zwane Mwaikambo
2004-10-12  5:59         ` Nathan Lynch
2004-10-12  6:04           ` Ingo Molnar
2004-10-12 14:16             ` Nathan Lynch
2004-10-12 14:38               ` Zwane Mwaikambo
2004-10-12  6:23           ` Dave Hansen
2004-10-12 14:47           ` Zwane Mwaikambo
2004-10-18 15:10         ` Pavel Machek
2004-10-21 14:47           ` Zwane Mwaikambo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.