All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch] PPC/PPC64 port of voluntary preempt patch
@ 2004-08-23 22:18 Scott Wood
  2004-08-24  6:14 ` [patch] voluntary-preempt-2.6.8.1-P9 Ingo Molnar
  2004-08-24 19:51 ` [patch] PPC/PPC64 port of voluntary preempt patch Scott Wood
  0 siblings, 2 replies; 93+ messages in thread
From: Scott Wood @ 2004-08-23 22:18 UTC (permalink / raw)
  To: mingo; +Cc: manas.saksena, linux-kernel

I have attached a port of the voluntary preempt patch to PPC and
PPC64.  The patch is against P7, but it applies against P8 as well.

I've tested it on a dual G5 Mac, both in uniprocessor and SMP.

Some notes on changes to the generic part of the patch/existing
generic code:

	I changed the generic code so that request_irq() prior to the
	scheduler being ready to run.  Previously, if this happened,
	it'd try to spawn a thread anyway, and oops.
	
	I changed the no-op definitions of voluntary_resched() and such
	to be empty inline functions, rather than #defined to 0.  When 0
	is used, newer GCCs (I'm using 3.4.1) issue a warning about
	statements with no effect.  Due to this, I removed the redundant
	definition of voluntary_resched() from sched.h (it's also in
	kernel.h, which is always included by sched.h).  Does it need to
	be in kernel.h?
	
	The WARN_ON(system_state == SYSTEM_BOOTING) was flooding me
	with warnings; this stopped when I moved the setting of
	system_state before the init thread was started (it seems
	rather odd that one would not be able to schedule when creating
	a thread...).
	
	The latency tracker at one point used cpu_khz/1000, and at another
	used cpu_khz/1024.  Is there a reason why cycles_to_usecs isn't
	used in both places?
	
	It's not exactly related to PPC, but I changed 
	if (latency < preempt_max_latency) to use <= instead, as I was
	getting the same latency printed out over and over.
	
I haven't (yet) fixed any of the specific latencies I've found on the
Mac; this patch just supplies the generic functionality.

Signed-off-by: Scott Wood <scott.wood@timesys.com>

diff -urN vpP7/arch/ppc/Kconfig vpP7-ppc/arch/ppc/Kconfig
--- vpP7/arch/ppc/Kconfig	2004-08-17 15:22:33.000000000 -0400
+++ vpP7-ppc/arch/ppc/Kconfig	2004-08-23 13:57:14.000000000 -0400
@@ -808,6 +808,19 @@
 	  Say Y here if you are building a kernel for a desktop, embedded
 	  or real-time system.  Say N if you are unsure.
 
+config PREEMPT_VOLUNTARY
+	bool "Voluntary Kernel Preemption"
+	default y
+	help
+	  This option reduces the latency of the kernel by adding more
+	  "explicit preemption points" to the kernel code. These new
+	  preemption points have been selected to minimize the maximum
+	  latency of rescheduling, providing faster application reactions.
+
+	  Say Y here if you are building a kernel for a desktop system.
+	  Say N if you are unsure.
+
+
 config HIGHMEM
 	bool "High memory support"
 
diff -urN vpP7/arch/ppc/kernel/entry.S vpP7-ppc/arch/ppc/kernel/entry.S
--- vpP7/arch/ppc/kernel/entry.S	2004-08-17 15:22:33.000000000 -0400
+++ vpP7-ppc/arch/ppc/kernel/entry.S	2004-08-23 13:57:14.000000000 -0400
@@ -610,6 +610,11 @@
 
 /* N.B. the only way to get here is from the beq following ret_from_except. */
 resume_kernel:
+	lis	r9, kernel_preemption@ha
+	lwz	r9, kernel_preemption@l(r9)
+	cmpwi	r9, 0
+	bne	restore
+
 	/* check current_thread_info->preempt_count */
 	rlwinm	r9,r1,0,0,18
 	lwz	r0,TI_PREEMPT(r9)
diff -urN vpP7/arch/ppc/kernel/irq.c vpP7-ppc/arch/ppc/kernel/irq.c
--- vpP7/arch/ppc/kernel/irq.c	2004-08-17 15:22:33.000000000 -0400
+++ vpP7-ppc/arch/ppc/kernel/irq.c	2004-08-23 13:57:14.000000000 -0400
@@ -64,8 +64,6 @@
 void enable_irq(unsigned int irq_nr);
 void disable_irq(unsigned int irq_nr);
 
-static void register_irq_proc (unsigned int irq);
-
 #define MAXCOUNT 10000000
 
 irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
@@ -100,6 +98,7 @@
 	unsigned int i;
 	if ( mem_init_done )
 		return kmalloc(size,pri);
+		
 	for ( i = 0; i < IRQ_KMALLOC_ENTRIES ; i++ )
 		if ( ! ( cache_bitmask & (1<<i) ) )
 		{
@@ -121,107 +120,6 @@
 	kfree(ptr);
 }
 
-int
-setup_irq(unsigned int irq, struct irqaction * new)
-{
-	int shared = 0;
-	unsigned long flags;
-	struct irqaction *old, **p;
-	irq_desc_t *desc = irq_desc + irq;
-
-	/*
-	 * Some drivers like serial.c use request_irq() heavily,
-	 * so we have to be careful not to interfere with a
-	 * running system.
-	 */
-	if (new->flags & SA_SAMPLE_RANDOM) {
-		/*
-		 * This function might sleep, we want to call it first,
-		 * outside of the atomic block.
-		 * Yes, this might clear the entropy pool if the wrong
-		 * driver is attempted to be loaded, without actually
-		 * installing a new handler, but is this really a problem,
-		 * only the sysadmin is able to do this.
-		 */
-		rand_initialize_irq(irq);
-	}
-
-	/*
-	 * The following block of code has to be executed atomically
-	 */
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	if ((old = *p) != NULL) {
-		/* Can't share interrupts unless both agree to */
-		if (!(old->flags & new->flags & SA_SHIRQ)) {
-			spin_unlock_irqrestore(&desc->lock,flags);
-			return -EBUSY;
-		}
-
-		/* add new interrupt at end of irq queue */
-		do {
-			p = &old->next;
-			old = *p;
-		} while (old);
-		shared = 1;
-	}
-
-	*p = new;
-
-	if (!shared) {
-		desc->depth = 0;
-		desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
-		if (desc->handler) {
-			if (desc->handler->startup)
-				desc->handler->startup(irq);
-			else if (desc->handler->enable)
-				desc->handler->enable(irq);
-		}
-	}
-	spin_unlock_irqrestore(&desc->lock,flags);
-
-	register_irq_proc(irq);
-	return 0;
-}
-
-void free_irq(unsigned int irq, void* dev_id)
-{
-	irq_desc_t *desc;
-	struct irqaction **p;
-	unsigned long flags;
-
-	desc = irq_desc + irq;
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	for (;;) {
-		struct irqaction * action = *p;
-		if (action) {
-			struct irqaction **pp = p;
-			p = &action->next;
-			if (action->dev_id != dev_id)
-				continue;
-
-			/* Found it - now remove it from the list of entries */
-			*pp = action->next;
-			if (!desc->action) {
-				desc->status |= IRQ_DISABLED;
-				mask_irq(irq);
-			}
-			spin_unlock_irqrestore(&desc->lock,flags);
-
-			synchronize_irq(irq);
-			irq_kfree(action);
-			return;
-		}
-		printk("Trying to free free IRQ%d\n",irq);
-		spin_unlock_irqrestore(&desc->lock,flags);
-		break;
-	}
-	return;
-}
-
-EXPORT_SYMBOL(free_irq);
-
 int request_irq(unsigned int irq,
 	irqreturn_t (*handler)(int, void *, struct pt_regs *),
 	unsigned long irqflags, const char * devname, void *dev_id)
@@ -262,95 +160,6 @@
 
 EXPORT_SYMBOL(request_irq);
 
-/*
- * Generic enable/disable code: this just calls
- * down into the PIC-specific version for the actual
- * hardware disable after having gotten the irq
- * controller lock.
- */
-
-/**
- *	disable_irq_nosync - disable an irq without waiting
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line. Disables of an interrupt
- *	stack. Unlike disable_irq(), this function does not ensure existing
- *	instances of the IRQ handler have completed before returning.
- *
- *	This function may be called from IRQ context.
- */
-
-void disable_irq_nosync(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	if (!desc->depth++) {
-		if (!(desc->status & IRQ_PER_CPU))
-			desc->status |= IRQ_DISABLED;
-		mask_irq(irq);
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-/**
- *	disable_irq - disable an irq and wait for completion
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line. Disables of an interrupt
- *	stack. That is for two disables you need two enables. This
- *	function waits for any pending IRQ handlers for this interrupt
- *	to complete before returning. If you use this function while
- *	holding a resource the IRQ handler may need you will deadlock.
- *
- *	This function may be called - with care - from IRQ context.
- */
-
-void disable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	disable_irq_nosync(irq);
-	if (desc->action)
-		synchronize_irq(irq);
-}
-
-/**
- *	enable_irq - enable interrupt handling on an irq
- *	@irq: Interrupt to enable
- *
- *	Re-enables the processing of interrupts on this IRQ line
- *	providing no disable_irq calls are now in effect.
- *
- *	This function may be called from IRQ context.
- */
-
-void enable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	switch (desc->depth) {
-	case 1: {
-		unsigned int status = desc->status & ~IRQ_DISABLED;
-		desc->status = status;
-		if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
-			desc->status = status | IRQ_REPLAY;
-			hw_resend_irq(desc->handler,irq);
-		}
-		unmask_irq(irq);
-		/* fall-through */
-	}
-	default:
-		desc->depth--;
-		break;
-	case 0:
-		printk("enable_irq(%u) unbalanced\n", irq);
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
 int show_interrupts(struct seq_file *p, void *v)
 {
 	int i = *(loff_t *) v, j;
@@ -410,24 +219,6 @@
 	return 0;
 }
 
-static inline void
-handle_irq_event(int irq, struct pt_regs *regs, struct irqaction *action)
-{
-	int status = 0;
-
-	if (!(action->flags & SA_INTERRUPT))
-		local_irq_enable();
-
-	do {
-		status |= action->flags;
-		action->handler(irq, action->dev_id, regs);
-		action = action->next;
-	} while (action);
-	if (status & SA_SAMPLE_RANDOM)
-		add_interrupt_randomness(irq);
-	local_irq_disable();
-}
-
 /*
  * Eventually, this should take an array of interrupts and an array size
  * so it can dispatch multiple interrupts.
@@ -482,6 +273,8 @@
 	if (unlikely(!action))
 		goto out;
 
+	if (generic_redirect_hardirq(desc))
+		goto out_no_end;
 
 	/*
 	 * Edge triggered interrupts need to remember
@@ -494,10 +287,14 @@
 	 * SMP environment.
 	 */
 	for (;;) {
+		irqreturn_t action_ret;
+	
 		spin_unlock(&desc->lock);
-		handle_irq_event(irq, regs, action);
+		action_ret = generic_handle_IRQ_event(irq, regs, action);
 		spin_lock(&desc->lock);
-
+		
+		if (!noirqdebug)
+			generic_note_interrupt(irq, desc, action_ret);
 		if (likely(!(desc->status & IRQ_PENDING)))
 			break;
 		desc->status &= ~IRQ_PENDING;
@@ -514,13 +311,15 @@
 		else if (irq_desc[irq].handler->enable)
 			irq_desc[irq].handler->enable(irq);
 	}
+
+out_no_end:
 	spin_unlock(&desc->lock);
 }
 
 void do_IRQ(struct pt_regs *regs)
 {
 	int irq, first = 1;
-        irq_enter();
+	irq_enter();
 
 	/*
 	 * Every platform is required to implement ppc_md.get_irq.
@@ -537,7 +336,7 @@
 	if (irq != -2 && first)
 		/* That's not SMP safe ... but who cares ? */
 		ppc_spurious_interrupts++;
-        irq_exit();
+	irq_exit();
 }
 
 unsigned long probe_irq_on (void)
@@ -559,148 +358,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_SMP
-void synchronize_irq(unsigned int irq)
-{
-	while (irq_desc[irq].status & IRQ_INPROGRESS)
-		barrier();
-}
-#endif /* CONFIG_SMP */
-
-static struct proc_dir_entry *root_irq_dir;
-static struct proc_dir_entry *irq_dir[NR_IRQS];
-static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
-
-#ifdef CONFIG_IRQ_ALL_CPUS
-#define DEFAULT_CPU_AFFINITY CPU_MASK_ALL
-#else
-#define DEFAULT_CPU_AFFINITY cpumask_of_cpu(0)
-#endif
-
-cpumask_t irq_affinity [NR_IRQS];
-
-static int irq_affinity_read_proc (char *page, char **start, off_t off,
-			int count, int *eof, void *data)
-{
-	int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]);
-	if (count - len < 2)
-		return -EINVAL;
-	len += sprintf(page + len, "\n");
-	return len;
-}
-
-static int irq_affinity_write_proc (struct file *file, const char __user *buffer,
-					unsigned long count, void *data)
-{
-	int irq = (int) data, full_count = count, err;
-	cpumask_t new_value, tmp;
-
-	if (!irq_desc[irq].handler->set_affinity)
-		return -EIO;
-
-	err = cpumask_parse(buffer, count, new_value);
-
-	/*
-	 * Do not allow disabling IRQs completely - it's a too easy
-	 * way to make the system unusable accidentally :-) At least
-	 * one online CPU still has to be targeted.
-	 *
-	 * We assume a 1-1 logical<->physical cpu mapping here.  If
-	 * we assume that the cpu indices in /proc/irq/../smp_affinity
-	 * are actually logical cpu #'s then we have no problem.
-	 *  -- Cort <cort@fsmlabs.com>
-	 */
-	cpus_and(tmp, new_value, cpu_online_map);
-	if (cpus_empty(tmp))
-		return -EINVAL;
-
-	irq_affinity[irq] = new_value;
-	irq_desc[irq].handler->set_affinity(irq, new_value);
-
-	return full_count;
-}
-
-static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
-			int count, int *eof, void *data)
-{
-	int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
-	if (count - len < 2)
-		return -EINVAL;
-	len += sprintf(page + len, "\n");
-	return len;
-}
-
-static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffer,
-					unsigned long count, void *data)
-{
-	int err;
-	int full_count = count;
-	cpumask_t *mask = (cpumask_t *)data;
-	cpumask_t new_value;
-
-	err = cpumask_parse(buffer, count, new_value);
-	if (err)
-		return err;
-
-	*mask = new_value;
-	return full_count;
-}
-
-#define MAX_NAMELEN 10
-
-static void register_irq_proc (unsigned int irq)
-{
-	struct proc_dir_entry *entry;
-	char name [MAX_NAMELEN];
-
-	if (!root_irq_dir || (irq_desc[irq].handler == NULL) || irq_dir[irq])
-		return;
-
-	memset(name, 0, MAX_NAMELEN);
-	sprintf(name, "%d", irq);
-
-	/* create /proc/irq/1234 */
-	irq_dir[irq] = proc_mkdir(name, root_irq_dir);
-
-	/* create /proc/irq/1234/smp_affinity */
-	entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
-
-	entry->nlink = 1;
-	entry->data = (void *)irq;
-	entry->read_proc = irq_affinity_read_proc;
-	entry->write_proc = irq_affinity_write_proc;
-
-	smp_affinity_entry[irq] = entry;
-}
-
-unsigned long prof_cpu_mask = -1;
-
-void init_irq_proc (void)
-{
-	struct proc_dir_entry *entry;
-	int i;
-
-	/* create /proc/irq */
-	root_irq_dir = proc_mkdir("irq", NULL);
-
-	/* create /proc/irq/prof_cpu_mask */
-	entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
-
-	entry->nlink = 1;
-	entry->data = (void *)&prof_cpu_mask;
-	entry->read_proc = prof_cpu_mask_read_proc;
-	entry->write_proc = prof_cpu_mask_write_proc;
-
-	/*
-	 * Create entries for all existing IRQs.
-	 */
-	for (i = 0; i < NR_IRQS; i++) {
-		if (irq_desc[i].handler == NULL)
-			continue;
-		register_irq_proc(i);
-	}
-}
-
 irqreturn_t no_action(int irq, void *dev, struct pt_regs *regs)
 {
 	return IRQ_NONE;
@@ -708,10 +365,7 @@
 
 void __init init_IRQ(void)
 {
-	int i;
-
-	for (i = 0; i < NR_IRQS; ++i)
-		irq_affinity[i] = DEFAULT_CPU_AFFINITY;
-
 	ppc_md.init_IRQ();
 }
+
+struct hw_interrupt_type no_irq_type;
diff -urN vpP7/arch/ppc/kernel/misc.S vpP7-ppc/arch/ppc/kernel/misc.S
--- vpP7/arch/ppc/kernel/misc.S	2004-08-17 15:22:33.000000000 -0400
+++ vpP7-ppc/arch/ppc/kernel/misc.S	2004-08-23 16:07:29.000000000 -0400
@@ -1165,6 +1165,60 @@
 _GLOBAL(__main)
 	blr
 
+#ifdef CONFIG_LATENCY_TRACE
+
+_GLOBAL(_mcount)
+	stwu	r1, -48(r1)
+
+	stw	r3, 8(r1)
+	stw	r4, 12(r1)
+	stw	r5, 16(r1)
+	stw	r6, 20(r1)
+	stw	r7, 24(r1)
+	stw	r8, 28(r1)
+	stw	r9, 32(r1)
+	stw	r10, 36(r1)
+
+	mflr	r3
+	stw	r3, 40(r1)
+
+	mfcr	r0
+	stw	r0, 44(r1)
+
+	lwz	r4, 52(r1)
+
+	// Don't call do_mcount if we haven't relocated to 0xc0000000 yet.
+	// This assumes that the ordinary load address is below
+	// 0x80000000.
+
+	andis.	r0, r3, 0x8000
+	beq-	mcount_out
+	bl	do_mcount
+mcount_out:
+
+	lwz	r3, 8(r1)
+	lwz	r4, 12(r1)
+	lwz	r5, 16(r1)
+	lwz	r6, 20(r1)
+	lwz	r7, 24(r1)
+	lwz	r8, 28(r1)
+	lwz	r9, 32(r1)
+	lwz	r10, 36(r1)
+
+	lwz	r0, 40(r1)
+	mtctr	r0
+
+	lwz	r0, 44(r1)
+	mtcr	r0
+
+	lwz	r0, 52(r1)
+	mtlr	r0
+
+	addi	r1, r1, 48
+	bctr
+
+#endif
+
 #define SYSCALL(name) \
 _GLOBAL(name) \
 	li	r0,__NR_##name; \
diff -urN vpP7/arch/ppc/kernel/ppc_ksyms.c vpP7-ppc/arch/ppc/kernel/ppc_ksyms.c
--- vpP7/arch/ppc/kernel/ppc_ksyms.c	2004-08-17 15:22:33.000000000 -0400
+++ vpP7-ppc/arch/ppc/kernel/ppc_ksyms.c	2004-08-23 13:57:14.000000000 -0400
@@ -84,9 +84,6 @@
 EXPORT_SYMBOL(sys_sigreturn);
 EXPORT_SYMBOL(ppc_n_lost_interrupts);
 EXPORT_SYMBOL(ppc_lost_interrupts);
-EXPORT_SYMBOL(enable_irq);
-EXPORT_SYMBOL(disable_irq);
-EXPORT_SYMBOL(disable_irq_nosync);
 EXPORT_SYMBOL(probe_irq_mask);
 
 EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
@@ -205,7 +202,6 @@
 #ifdef CONFIG_SMP
 EXPORT_SYMBOL(smp_call_function);
 EXPORT_SYMBOL(smp_hw_index);
-EXPORT_SYMBOL(synchronize_irq);
 #endif
 
 EXPORT_SYMBOL(ppc_md);
diff -urN vpP7/arch/ppc/platforms/pmac_pic.c vpP7-ppc/arch/ppc/platforms/pmac_pic.c
--- vpP7/arch/ppc/platforms/pmac_pic.c	2004-08-17 15:22:33.000000000 -0400
+++ vpP7-ppc/arch/ppc/platforms/pmac_pic.c	2004-08-23 13:57:14.000000000 -0400
@@ -440,8 +440,9 @@
 				OpenPIC_InitSenses = senses;
 				OpenPIC_NumInitSenses = 128;
 				openpic2_init(PMAC_OPENPIC2_OFFSET);
-				if (request_irq(irqctrler2->intrs[0].line, k2u3_action, 0,
-						"U3->K2 Cascade", NULL))
+				if (request_irq(irqctrler2->intrs[0].line, k2u3_action,
+				                SA_NODELAY | SA_INTERRUPT,
+				                "U3->K2 Cascade", NULL))
 					printk("Unable to get OpenPIC IRQ for cascade\n");
 			}
 #endif /* CONFIG_POWER4 */
@@ -455,7 +456,7 @@
 				if (pswitch && pswitch->n_intrs) {
 					nmi_irq = pswitch->intrs[0].line;
 					openpic_init_nmi_irq(nmi_irq);
-					request_irq(nmi_irq, xmon_irq, 0,
+					request_irq(nmi_irq, xmon_irq, SA_NODELAY,
 						    "NMI - XMON", NULL);
 				}
 			}
@@ -553,7 +554,7 @@
 			(int)irq_cascade);
 		for ( i = max_real_irqs ; i < max_irqs ; i++ )
 			irq_desc[i].handler = &gatwick_pic;
-		request_irq( irq_cascade, gatwick_action, SA_INTERRUPT,
+		request_irq( irq_cascade, gatwick_action, SA_INTERRUPT | SA_NODELAY,
 			     "cascade", NULL );
 	}
 	printk("System has %d possible interrupts\n", max_irqs);
@@ -562,7 +563,7 @@
 			max_real_irqs);
 
 #ifdef CONFIG_XMON
-	request_irq(20, xmon_irq, 0, "NMI - XMON", NULL);
+	request_irq(20, xmon_irq, SA_NODELAY, "NMI - XMON", NULL);
 #endif	/* CONFIG_XMON */
 }
 
diff -urN vpP7/arch/ppc/platforms/sbc82xx.c vpP7-ppc/arch/ppc/platforms/sbc82xx.c
--- vpP7/arch/ppc/platforms/sbc82xx.c	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc/platforms/sbc82xx.c	2004-08-23 13:57:14.000000000 -0400
@@ -212,7 +212,7 @@
 	sbc82xx_i8259_map[1] = sbc82xx_i8259_mask; /* Set interrupt mask */
 
 	/* Request cascade IRQ */
-	if (request_irq(SIU_INT_IRQ6, sbc82xx_i8259_demux, SA_INTERRUPT,
+	if (request_irq(SIU_INT_IRQ6, sbc82xx_i8259_demux, SA_INTERRUPT | SA_NODELAY,
 			"i8259 demux", 0)) {
 		printk("Installation of i8259 IRQ demultiplexer failed.\n");
 	}
diff -urN vpP7/arch/ppc/syslib/i8259.c vpP7-ppc/arch/ppc/syslib/i8259.c
--- vpP7/arch/ppc/syslib/i8259.c	2004-06-16 01:19:22.000000000 -0400
+++ vpP7-ppc/arch/ppc/syslib/i8259.c	2004-08-23 13:57:14.000000000 -0400
@@ -185,7 +185,7 @@
 	spin_unlock_irqrestore(&i8259_lock, flags);
 
 	/* reserve our resources */
-	request_irq( i8259_pic_irq_offset + 2, no_action, SA_INTERRUPT,
+	request_irq( i8259_pic_irq_offset + 2, no_action, SA_INTERRUPT | SA_NODELAY,
 				"82c59 secondary cascade", NULL );
 	request_resource(&ioport_resource, &pic1_iores);
 	request_resource(&ioport_resource, &pic2_iores);
diff -urN vpP7/arch/ppc/syslib/m8xx_setup.c vpP7-ppc/arch/ppc/syslib/m8xx_setup.c
--- vpP7/arch/ppc/syslib/m8xx_setup.c	2004-06-16 01:19:22.000000000 -0400
+++ vpP7-ppc/arch/ppc/syslib/m8xx_setup.c	2004-08-23 13:57:14.000000000 -0400
@@ -281,7 +281,8 @@
                 irq_desc[i].handler = &i8259_pic;
         i8259_pic.irq_offset = NR_SIU_INTS;
         i8259_init();
-        request_8xxirq(ISA_BRIDGE_INT, mbx_i8259_action, 0, "8259 cascade", NULL);
+        request_8xxirq(ISA_BRIDGE_INT, mbx_i8259_action,
+                       SA_INTERRUPT | SA_NODELAY, "8259 cascade", NULL);
         enable_irq(ISA_BRIDGE_INT);
 #endif
 }
diff -urN vpP7/arch/ppc/syslib/open_pic.c vpP7-ppc/arch/ppc/syslib/open_pic.c
--- vpP7/arch/ppc/syslib/open_pic.c	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc/syslib/open_pic.c	2004-08-23 13:57:14.000000000 -0400
@@ -580,16 +580,16 @@
 
 	/* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */
 	request_irq(OPENPIC_VEC_IPI+open_pic_irq_offset,
-		    openpic_ipi_action, SA_INTERRUPT,
+		    openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		    "IPI0 (call function)", NULL);
 	request_irq(OPENPIC_VEC_IPI+open_pic_irq_offset+1,
-		    openpic_ipi_action, SA_INTERRUPT,
+		    openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		    "IPI1 (reschedule)", NULL);
 	request_irq(OPENPIC_VEC_IPI+open_pic_irq_offset+2,
-		    openpic_ipi_action, SA_INTERRUPT,
+		    openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		    "IPI2 (invalidate tlb)", NULL);
 	request_irq(OPENPIC_VEC_IPI+open_pic_irq_offset+3,
-		    openpic_ipi_action, SA_INTERRUPT,
+		    openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		    "IPI3 (xmon break)", NULL);
 
 	for ( i = 0; i < OPENPIC_NUM_IPI ; i++ )
@@ -687,7 +687,7 @@
 {
 	openpic_cascade_irq = irq;
 	openpic_cascade_fn = cascade_fn;
-	if (request_irq(irq, no_action, SA_INTERRUPT, name, NULL))
+	if (request_irq(irq, no_action, SA_INTERRUPT | SA_NODELAY, name, NULL))
 		printk("Unable to get OpenPIC IRQ %d for cascade\n",
 				irq - open_pic_irq_offset);
 }
@@ -798,6 +798,10 @@
 }
 #endif /* notused */
 
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+#define __SLOW_VERSION__
+#endif
+
 /* No spinlocks, should not be necessary with the OpenPIC
  * (1 register = 1 interrupt and we have the desc lock).
  */
diff -urN vpP7/arch/ppc64/Kconfig vpP7-ppc/arch/ppc64/Kconfig
--- vpP7/arch/ppc64/Kconfig	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc64/Kconfig	2004-08-23 14:39:44.000000000 -0400
@@ -206,6 +206,18 @@
 	  Say Y here if you are building a kernel for a desktop, embedded
 	  or real-time system.  Say N if you are unsure.
 
+config PREEMPT_VOLUNTARY
+	bool "Voluntary Kernel Preemption"
+	default y
+	help
+	  This option reduces the latency of the kernel by adding more
+	  "explicit preemption points" to the kernel code. These new
+	  preemption points have been selected to minimize the maximum
+	  latency of rescheduling, providing faster application reactions.
+
+	  Say Y here if you are building a kernel for a desktop system.
+	  Say N if you are unsure.
+
 config MSCHUNKS
 	bool
 	depends on PPC_ISERIES
diff -urN vpP7/arch/ppc64/kernel/entry.S vpP7-ppc/arch/ppc64/kernel/entry.S
--- vpP7/arch/ppc64/kernel/entry.S	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/entry.S	2004-08-23 13:57:14.000000000 -0400
@@ -548,6 +548,12 @@
 #ifdef CONFIG_PREEMPT
 	andi.	r0,r3,MSR_PR	/* Returning to user mode? */
 	bne	user_work
+
+	LOADBASE(r8, kernel_preemption)
+	lwz	r8, kernel_preemption@l(r8)
+	cmpwi	r8, 0
+	bne	restore
+
 	/* Check that preempt_count() == 0 and interrupts are enabled */
 	lwz	r8,TI_PREEMPT(r9)
 	cmpwi	cr1,r8,0
diff -urN vpP7/arch/ppc64/kernel/i8259.c vpP7-ppc/arch/ppc64/kernel/i8259.c
--- vpP7/arch/ppc64/kernel/i8259.c	2004-06-16 01:20:26.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/i8259.c	2004-08-23 13:57:14.000000000 -0400
@@ -160,7 +160,7 @@
         outb(cached_A1, 0xA1);
         outb(cached_21, 0x21);
 	spin_unlock_irqrestore(&i8259_lock, flags);
-        request_irq( i8259_pic_irq_offset + 2, no_action, SA_INTERRUPT,
+        request_irq( i8259_pic_irq_offset + 2, no_action, SA_INTERRUPT | SA_NODELAY,
                      "82c59 secondary cascade", NULL );
         
 }
diff -urN vpP7/arch/ppc64/kernel/irq.c vpP7-ppc/arch/ppc64/kernel/irq.c
--- vpP7/arch/ppc64/kernel/irq.c	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/irq.c	2004-08-23 13:57:14.000000000 -0400
@@ -59,8 +59,6 @@
 extern void iSeries_smp_message_recv( struct pt_regs * );
 #endif
 
-static void register_irq_proc (unsigned int irq);
-
 irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
 	[0 ... NR_IRQS-1] = {
 		.lock = SPIN_LOCK_UNLOCKED
@@ -71,78 +69,6 @@
 int ppc_spurious_interrupts;
 unsigned long lpevent_count;
 
-int
-setup_irq(unsigned int irq, struct irqaction * new)
-{
-	int shared = 0;
-	unsigned long flags;
-	struct irqaction *old, **p;
-	irq_desc_t *desc = get_irq_desc(irq);
-
-	/*
-	 * Some drivers like serial.c use request_irq() heavily,
-	 * so we have to be careful not to interfere with a
-	 * running system.
-	 */
-	if (new->flags & SA_SAMPLE_RANDOM) {
-		/*
-		 * This function might sleep, we want to call it first,
-		 * outside of the atomic block.
-		 * Yes, this might clear the entropy pool if the wrong
-		 * driver is attempted to be loaded, without actually
-		 * installing a new handler, but is this really a problem,
-		 * only the sysadmin is able to do this.
-		 */
-		rand_initialize_irq(irq);
-	}
-
-	/*
-	 * The following block of code has to be executed atomically
-	 */
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	if ((old = *p) != NULL) {
-		/* Can't share interrupts unless both agree to */
-		if (!(old->flags & new->flags & SA_SHIRQ)) {
-			spin_unlock_irqrestore(&desc->lock,flags);
-			return -EBUSY;
-		}
-
-		/* add new interrupt at end of irq queue */
-		do {
-			p = &old->next;
-			old = *p;
-		} while (old);
-		shared = 1;
-	}
-
-	*p = new;
-
-	if (!shared) {
-		desc->depth = 0;
-		desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS);
-		if (desc->handler && desc->handler->startup)
-			desc->handler->startup(irq);
-		unmask_irq(irq);
-	}
-	spin_unlock_irqrestore(&desc->lock,flags);
-
-	register_irq_proc(irq);
-	return 0;
-}
-
-#ifdef CONFIG_SMP
-
-inline void synchronize_irq(unsigned int irq)
-{
-	while (get_irq_desc(irq)->status & IRQ_INPROGRESS)
-		cpu_relax();
-}
-
-EXPORT_SYMBOL(synchronize_irq);
-
-#endif /* CONFIG_SMP */
-
 int request_irq(unsigned int irq,
 	irqreturn_t (*handler)(int, void *, struct pt_regs *),
 	unsigned long irqflags, const char * devname, void *dev_id)
@@ -152,8 +78,10 @@
 
 	if (irq >= NR_IRQS)
 		return -EINVAL;
-	if (!handler)
-		return -EINVAL;
+	if (!handler) {
+		free_irq(irq, dev_id);
+		return 0;
+	}
 
 	action = (struct irqaction *)
 		kmalloc(sizeof(struct irqaction), GFP_KERNEL);
@@ -178,140 +106,6 @@
 
 EXPORT_SYMBOL(request_irq);
 
-void free_irq(unsigned int irq, void *dev_id)
-{
-	irq_desc_t *desc = get_irq_desc(irq);
-	struct irqaction **p;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	for (;;) {
-		struct irqaction * action = *p;
-		if (action) {
-			struct irqaction **pp = p;
-			p = &action->next;
-			if (action->dev_id != dev_id)
-				continue;
-
-			/* Found it - now remove it from the list of entries */
-			*pp = action->next;
-			if (!desc->action) {
-				desc->status |= IRQ_DISABLED;
-				mask_irq(irq);
-			}
-			spin_unlock_irqrestore(&desc->lock,flags);
-
-			/* Wait to make sure it's not being used on another CPU */
-			synchronize_irq(irq);
-			kfree(action);
-			return;
-		}
-		printk("Trying to free free IRQ%d\n",irq);
-		spin_unlock_irqrestore(&desc->lock,flags);
-		break;
-	}
-	return;
-}
-
-EXPORT_SYMBOL(free_irq);
-
-/*
- * Generic enable/disable code: this just calls
- * down into the PIC-specific version for the actual
- * hardware disable after having gotten the irq
- * controller lock. 
- */
- 
-/**
- *	disable_irq_nosync - disable an irq without waiting
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line. Disables of an interrupt
- *	stack. Unlike disable_irq(), this function does not ensure existing
- *	instances of the IRQ handler have completed before returning.
- *
- *	This function may be called from IRQ context.
- */
- 
-inline void disable_irq_nosync(unsigned int irq)
-{
-	irq_desc_t *desc = get_irq_desc(irq);
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	if (!desc->depth++) {
-		if (!(desc->status & IRQ_PER_CPU))
-			desc->status |= IRQ_DISABLED;
-		mask_irq(irq);
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-EXPORT_SYMBOL(disable_irq_nosync);
-
-/**
- *	disable_irq - disable an irq and wait for completion
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line. Disables of an interrupt
- *	stack. That is for two disables you need two enables. This
- *	function waits for any pending IRQ handlers for this interrupt
- *	to complete before returning. If you use this function while
- *	holding a resource the IRQ handler may need you will deadlock.
- *
- *	This function may be called - with care - from IRQ context.
- */
- 
-void disable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = get_irq_desc(irq);
-	disable_irq_nosync(irq);
-	if (desc->action)
-		synchronize_irq(irq);
-}
-
-EXPORT_SYMBOL(disable_irq);
-
-/**
- *	enable_irq - enable interrupt handling on an irq
- *	@irq: Interrupt to enable
- *
- *	Re-enables the processing of interrupts on this IRQ line
- *	providing no disable_irq calls are now in effect.
- *
- *	This function may be called from IRQ context.
- */
- 
-void enable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = get_irq_desc(irq);
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	switch (desc->depth) {
-	case 1: {
-		unsigned int status = desc->status & ~IRQ_DISABLED;
-		desc->status = status;
-		if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
-			desc->status = status | IRQ_REPLAY;
-			hw_resend_irq(desc->handler,irq);
-		}
-		unmask_irq(irq);
-		/* fall-through */
-	}
-	default:
-		desc->depth--;
-		break;
-	case 0:
-		printk("enable_irq(%u) unbalanced from %p\n", irq,
-		       __builtin_return_address(0));
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-EXPORT_SYMBOL(enable_irq);
-
 int show_interrupts(struct seq_file *p, void *v)
 {
 	int i = *(loff_t *) v, j;
@@ -359,106 +153,6 @@
 	return 0;
 }
 
-int handle_irq_event(int irq, struct pt_regs *regs, struct irqaction *action)
-{
-	int status = 0;
-	int retval = 0;
-
-	if (!(action->flags & SA_INTERRUPT))
-		local_irq_enable();
-
-	do {
-		status |= action->flags;
-		retval |= action->handler(irq, action->dev_id, regs);
-		action = action->next;
-	} while (action);
-	if (status & SA_SAMPLE_RANDOM)
-		add_interrupt_randomness(irq);
-	local_irq_disable();
-	return retval;
-}
-
-static void __report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
-{
-	struct irqaction *action;
-
-	if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
-		printk(KERN_ERR "irq event %d: bogus return value %x\n",
-				irq, action_ret);
-	} else {
-		printk(KERN_ERR "irq %d: nobody cared!\n", irq);
-	}
-	dump_stack();
-	printk(KERN_ERR "handlers:\n");
-	action = desc->action;
-	do {
-		printk(KERN_ERR "[<%p>]", action->handler);
-		print_symbol(" (%s)",
-			(unsigned long)action->handler);
-		printk("\n");
-		action = action->next;
-	} while (action);
-}
-
-static void report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
-{
-	static int count = 100;
-
-	if (count) {
-		count--;
-		__report_bad_irq(irq, desc, action_ret);
-	}
-}
-
-static int noirqdebug;
-
-static int __init noirqdebug_setup(char *str)
-{
-	noirqdebug = 1;
-	printk("IRQ lockup detection disabled\n");
-	return 1;
-}
-
-__setup("noirqdebug", noirqdebug_setup);
-
-/*
- * If 99,900 of the previous 100,000 interrupts have not been handled then
- * assume that the IRQ is stuck in some manner.  Drop a diagnostic and try to
- * turn the IRQ off.
- *
- * (The other 100-of-100,000 interrupts may have been a correctly-functioning
- *  device sharing an IRQ with the failing one)
- *
- * Called under desc->lock
- */
-static void note_interrupt(int irq, irq_desc_t *desc, irqreturn_t action_ret)
-{
-	if (action_ret != IRQ_HANDLED) {
-		desc->irqs_unhandled++;
-		if (action_ret != IRQ_NONE)
-			report_bad_irq(irq, desc, action_ret);
-	}
-
-	desc->irq_count++;
-	if (desc->irq_count < 100000)
-		return;
-
-	desc->irq_count = 0;
-	if (desc->irqs_unhandled > 99900) {
-		/*
-		 * The interrupt is stuck
-		 */
-		__report_bad_irq(irq, desc, action_ret);
-		/*
-		 * Now kill the IRQ
-		 */
-		printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
-		desc->status |= IRQ_DISABLED;
-		desc->handler->disable(irq);
-	}
-	desc->irqs_unhandled = 0;
-}
-
 /*
  * Eventually, this should take an array of interrupts and an array size
  * so it can dispatch multiple interrupts.
@@ -479,7 +173,7 @@
 	if (desc->status & IRQ_PER_CPU) {
 		/* no locking required for CPU-local interrupts: */
 		ack_irq(irq);
-		action_ret = handle_irq_event(irq, regs, desc->action);
+		action_ret = generic_handle_IRQ_event(irq, regs, desc->action);
 		desc->handler->end(irq);
 		return;
 	}
@@ -527,6 +221,9 @@
 	if (unlikely(!action))
 		goto out;
 
+	if (generic_redirect_hardirq(desc))
+		goto out_no_end;
+
 	/*
 	 * Edge triggered interrupts need to remember
 	 * pending events.
@@ -553,11 +250,11 @@
 				set_bits(irqtp->flags, &curtp->flags);
 		} else
 #endif
-			action_ret = handle_irq_event(irq, regs, action);
+			action_ret = generic_handle_IRQ_event(irq, regs, action);
 
 		spin_lock(&desc->lock);
 		if (!noirqdebug)
-			note_interrupt(irq, desc, action_ret);
+			generic_note_interrupt(irq, desc, action_ret);
 		if (likely(!(desc->status & IRQ_PENDING)))
 			break;
 		desc->status &= ~IRQ_PENDING;
@@ -574,6 +271,8 @@
 		else if (desc->handler->enable)
 			desc->handler->enable(irq);
 	}
+
+out_no_end:
 	spin_unlock(&desc->lock);
 }
 
@@ -687,174 +386,6 @@
 	irq_ctx_init();
 }
 
-static struct proc_dir_entry * root_irq_dir;
-static struct proc_dir_entry * irq_dir [NR_IRQS];
-static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];
-
-/* Protected by get_irq_desc(irq)->lock. */
-#ifdef CONFIG_IRQ_ALL_CPUS
-cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
-#else  /* CONFIG_IRQ_ALL_CPUS */
-cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_NONE };
-#endif /* CONFIG_IRQ_ALL_CPUS */
-
-static int irq_affinity_read_proc (char *page, char **start, off_t off,
-			int count, int *eof, void *data)
-{
-	int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]);
-	if (count - len < 2)
-		return -EINVAL;
-	len += sprintf(page + len, "\n");
-	return len;
-}
-
-static int irq_affinity_write_proc (struct file *file, const char __user *buffer,
-					unsigned long count, void *data)
-{
-	unsigned int irq = (long)data;
-	irq_desc_t *desc = get_irq_desc(irq);
-	int ret;
-	cpumask_t new_value, tmp;
-
-	if (!desc->handler->set_affinity)
-		return -EIO;
-
-	ret = cpumask_parse(buffer, count, new_value);
-	if (ret != 0)
-		return ret;
-
-	/*
-	 * We check for CPU_MASK_ALL in xics to send irqs to all cpus.
-	 * In some cases CPU_MASK_ALL is smaller than the cpumask (eg
-	 * NR_CPUS == 32 and cpumask is a long), so we mask it here to
-	 * be consistent.
-	 */
-	cpus_and(new_value, new_value, CPU_MASK_ALL);
-
-	/*
-	 * Grab lock here so cpu_online_map can't change, and also
-	 * protect irq_affinity[].
-	 */
-	spin_lock(&desc->lock);
-
-	/*
-	 * Do not allow disabling IRQs completely - it's a too easy
-	 * way to make the system unusable accidentally :-) At least
-	 * one online CPU still has to be targeted.
-	 */
-	cpus_and(tmp, new_value, cpu_online_map);
-	if (cpus_empty(tmp)) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	irq_affinity[irq] = new_value;
-	desc->handler->set_affinity(irq, new_value);
-	ret = count;
-
-out:
-	spin_unlock(&desc->lock);
-	return ret;
-}
-
-static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
-			int count, int *eof, void *data)
-{
-	int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
-	if (count - len < 2)
-		return -EINVAL;
-	len += sprintf(page + len, "\n");
-	return len;
-}
-
-static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffer,
-					unsigned long count, void *data)
-{
-	cpumask_t *mask = (cpumask_t *)data;
-	unsigned long full_count = count, err;
-	cpumask_t new_value;
-
-	err = cpumask_parse(buffer, count, new_value);
-	if (err)
-		return err;
-
-	*mask = new_value;
-
-#ifdef CONFIG_PPC_ISERIES
-	{
-		unsigned i;
-		for (i=0; i<NR_CPUS; ++i) {
-			if ( paca[i].prof_buffer && cpu_isset(i, new_value) )
-				paca[i].prof_enabled = 1;
-			else
-				paca[i].prof_enabled = 0;
-		}
-	}
-#endif
-
-	return full_count;
-}
-
-#define MAX_NAMELEN 10
-
-static void register_irq_proc (unsigned int irq)
-{
-	struct proc_dir_entry *entry;
-	char name [MAX_NAMELEN];
-
-	if (!root_irq_dir || (irq_desc[irq].handler == NULL) || irq_dir[irq])
-		return;
-
-	memset(name, 0, MAX_NAMELEN);
-	sprintf(name, "%d", irq);
-
-	/* create /proc/irq/1234 */
-	irq_dir[irq] = proc_mkdir(name, root_irq_dir);
-
-	/* create /proc/irq/1234/smp_affinity */
-	entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
-
-	if (entry) {
-		entry->nlink = 1;
-		entry->data = (void *)(long)irq;
-		entry->read_proc = irq_affinity_read_proc;
-		entry->write_proc = irq_affinity_write_proc;
-	}
-
-	smp_affinity_entry[irq] = entry;
-}
-
-unsigned long prof_cpu_mask = -1;
-
-void init_irq_proc (void)
-{
-	struct proc_dir_entry *entry;
-	int i;
-
-	/* create /proc/irq */
-	root_irq_dir = proc_mkdir("irq", NULL);
-
-	/* create /proc/irq/prof_cpu_mask */
-	entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
-
-	if (!entry)
-		return;
-
-	entry->nlink = 1;
-	entry->data = (void *)&prof_cpu_mask;
-	entry->read_proc = prof_cpu_mask_read_proc;
-	entry->write_proc = prof_cpu_mask_write_proc;
-
-	/*
-	 * Create entries for all existing IRQs.
-	 */
-	for_each_irq(i) {
-		if (get_irq_desc(i)->handler == NULL)
-			continue;
-		register_irq_proc(i);
-	}
-}
-
 irqreturn_t no_action(int irq, void *dev, struct pt_regs *regs)
 {
 	return IRQ_NONE;
@@ -1014,3 +545,4 @@
 
 #endif /* CONFIG_IRQSTACKS */
 
+struct hw_interrupt_type no_irq_type;
diff -urN vpP7/arch/ppc64/kernel/misc.S vpP7-ppc/arch/ppc64/kernel/misc.S
--- vpP7/arch/ppc64/kernel/misc.S	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/misc.S	2004-08-23 17:27:11.000000000 -0400
@@ -120,7 +120,7 @@
 	std	r0,16(r1)
 	stdu	r1,THREAD_SIZE-112(r6)
 	mr	r1,r6
-	bl	.handle_irq_event
+	bl	.generic_handle_IRQ_event
 	ld	r1,0(r1)
 	ld	r0,16(r1)
 	mtlr	r0
@@ -600,6 +600,35 @@
 	ld	r30,-16(r1)
 	blr
 
+#ifdef CONFIG_LATENCY_TRACE
+
+_GLOBAL(_mcount)
+	ld	r5, 0(r1)
+	mflr	r3
+	stdu	r1, -112(r1)
+	ld	r4, 16(r5)
+	std	r3, 128(r1)
+
+	// Don't call do_mcount if we haven't relocated to
+	// 0xc000000000000000 yet.  This assumes that the ordinary
+	// load address is below 0x8000000000000000.
+
+	lis	r6, 0x8000
+	rldicr	r6, r6, 32, 31
+	and.	r0, r3, r6
+	
+	beq-	mcount_out
+	bl	.do_mcount
+mcount_out:
+
+	ld	r0, 128(r1)
+	mtlr	r0
+
+	addi	r1, r1, 112
+	blr
+
+#endif
+
 #ifdef CONFIG_PPC_ISERIES	/* hack hack hack */
 #define ppc_rtas	sys_ni_syscall
 #endif
diff -urN vpP7/arch/ppc64/kernel/open_pic.c vpP7-ppc/arch/ppc64/kernel/open_pic.c
--- vpP7/arch/ppc64/kernel/open_pic.c	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/open_pic.c	2004-08-23 16:44:16.000000000 -0400
@@ -78,6 +78,12 @@
 
 OpenPIC_SourcePtr ISU[OPENPIC_MAX_ISU];
 
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+static void openpic_ack_irq(unsigned int irq);
+#else
+#define openpic_ack_irq NULL
+#endif
+
 static void openpic_end_irq(unsigned int irq_nr);
 static void openpic_set_affinity(unsigned int irq_nr, cpumask_t cpumask);
 
@@ -87,7 +93,7 @@
 	NULL,
 	openpic_enable_irq,
 	openpic_disable_irq,
-	NULL,
+	openpic_ack_irq,
 	openpic_end_irq,
 	openpic_set_affinity
 };
@@ -440,7 +446,7 @@
 
 	if (naca->interrupt_controller == IC_OPEN_PIC) {
 		/* Initialize the cascade */
-		if (request_irq(NUM_ISA_INTERRUPTS, no_action, SA_INTERRUPT,
+		if (request_irq(NUM_ISA_INTERRUPTS, no_action, SA_INTERRUPT | SA_NODELAY,
 				"82c59 cascade", NULL))
 			printk(KERN_ERR "Unable to get OpenPIC IRQ 0 for cascade\n");
 		i8259_init();
@@ -641,13 +647,13 @@
 		return;
 
 	/* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */
-	request_irq(openpic_vec_ipi, openpic_ipi_action, SA_INTERRUPT,
+	request_irq(openpic_vec_ipi, openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		    "IPI0 (call function)", NULL);
-	request_irq(openpic_vec_ipi+1, openpic_ipi_action, SA_INTERRUPT,
+	request_irq(openpic_vec_ipi+1, openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		   "IPI1 (reschedule)", NULL);
-	request_irq(openpic_vec_ipi+2, openpic_ipi_action, SA_INTERRUPT,
+	request_irq(openpic_vec_ipi+2, openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		   "IPI2 (unused)", NULL);
-	request_irq(openpic_vec_ipi+3, openpic_ipi_action, SA_INTERRUPT,
+	request_irq(openpic_vec_ipi+3, openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		   "IPI3 (debugger break)", NULL);
 
 	for ( i = 0; i < OPENPIC_NUM_IPI ; i++ )
@@ -834,11 +840,28 @@
 }
 #endif
 
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+
+static void openpic_ack_irq(unsigned int irq_nr)
+{
+	openpic_disable_irq(irq_nr);
+	openpic_eoi();
+}
+
+static void openpic_end_irq(unsigned int irq_nr)
+{
+	openpic_enable_irq(irq_nr);
+}
+
+#else
+
 static void openpic_end_irq(unsigned int irq_nr)
 {
 	openpic_eoi();
 }
 
+#endif
+
 static void openpic_set_affinity(unsigned int irq_nr, cpumask_t cpumask)
 {
 	cpumask_t tmp;
diff -urN vpP7/arch/ppc64/kernel/open_pic_u3.c vpP7-ppc/arch/ppc64/kernel/open_pic_u3.c
--- vpP7/arch/ppc64/kernel/open_pic_u3.c	2004-06-16 01:18:37.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/open_pic_u3.c	2004-08-23 16:43:03.000000000 -0400
@@ -251,11 +251,30 @@
 				 (sense ? OPENPIC_SENSE_LEVEL : 0));
 }
 
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+
+static void openpic2_ack_irq(unsigned int irq_nr)
+{
+	openpic2_disable_irq(irq_nr);
+	openpic2_eoi();
+}
+
+static void openpic2_end_irq(unsigned int irq_nr)
+{
+	openpic2_enable_irq(irq_nr);
+}
+
+#else
+
+#define openpic2_ack_irq NULL
+
 static void openpic2_end_irq(unsigned int irq_nr)
 {
 	openpic2_eoi();
 }
 
+#endif
+
 int openpic2_get_irq(struct pt_regs *regs)
 {
 	int irq = openpic2_irq();
@@ -271,7 +290,7 @@
 	NULL,
 	openpic2_enable_irq,
 	openpic2_disable_irq,
-	NULL,
+	openpic2_ack_irq,
 	openpic2_end_irq,
 };
 
diff -urN vpP7/arch/ppc64/kernel/pmac_setup.c vpP7-ppc/arch/ppc64/kernel/pmac_setup.c
--- vpP7/arch/ppc64/kernel/pmac_setup.c	2004-06-16 01:18:58.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/pmac_setup.c	2004-08-23 13:57:14.000000000 -0400
@@ -409,8 +409,8 @@
  */
 static int __init pmac_irq_cascade_init(void)
 {
-	if (request_irq(pmac_cascade_irq, pmac_u3_do_cascade, 0,
-			"U3->K2 Cascade", NULL))
+	if (request_irq(pmac_cascade_irq, pmac_u3_do_cascade,
+	                SA_NODELAY | SA_INTERRUPT, "U3->K2 Cascade", NULL))
 		printk(KERN_ERR "Unable to get OpenPIC IRQ for cascade\n");
 	return 0;
 }
diff -urN vpP7/arch/ppc64/kernel/xics.c vpP7-ppc/arch/ppc64/kernel/xics.c
--- vpP7/arch/ppc64/kernel/xics.c	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/xics.c	2004-08-23 13:57:14.000000000 -0400
@@ -572,7 +572,7 @@
 	if (naca->interrupt_controller == IC_PPC_XIC &&
 	    xics_irq_8259_cascade != -1) {
 		if (request_irq(irq_offset_up(xics_irq_8259_cascade),
-				no_action, 0, "8259 cascade", NULL))
+				no_action, SA_NODELAY, "8259 cascade", NULL))
 			printk(KERN_ERR "xics_setup_i8259: couldn't get 8259 "
 					"cascade\n");
 		i8259_init();
@@ -587,7 +587,7 @@
 	virt_irq_to_real_map[XICS_IPI] = XICS_IPI;
 
 	/* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */
-	request_irq(irq_offset_up(XICS_IPI), xics_ipi_action, SA_INTERRUPT,
+	request_irq(irq_offset_up(XICS_IPI), xics_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		    "IPI", NULL);
 	get_irq_desc(irq_offset_up(XICS_IPI))->status |= IRQ_PER_CPU;
 }
diff -urN vpP7/include/asm-i386/hw_irq.h vpP7-ppc/include/asm-i386/hw_irq.h
--- vpP7/include/asm-i386/hw_irq.h	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/include/asm-i386/hw_irq.h	2004-08-23 13:57:14.000000000 -0400
@@ -54,7 +54,6 @@
 void init_8259A(int aeoi);
 void FASTCALL(send_IPI_self(int vector));
 void init_VISWS_APIC_irqs(void);
-extern void init_hardirqs(void);
 void setup_IO_APIC(void);
 void disable_IO_APIC(void);
 void print_IO_APIC(void);
diff -urN vpP7/include/asm-ppc/hardirq.h vpP7-ppc/include/asm-ppc/hardirq.h
--- vpP7/include/asm-ppc/hardirq.h	2004-06-16 01:18:37.000000000 -0400
+++ vpP7-ppc/include/asm-ppc/hardirq.h	2004-08-23 13:57:14.000000000 -0400
@@ -5,7 +5,7 @@
 #include <linux/config.h>
 #include <linux/cache.h>
 #include <linux/smp_lock.h>
-#include <asm/irq.h>
+#include <linux/irq.h>
 
 /* The __last_jiffy_stamp field is needed to ensure that no decrementer
  * interrupt is lost on SMP machines. Since on most CPUs it is in the same
@@ -71,15 +71,11 @@
  * Are we doing bottom half or hardware interrupt processing?
  * Are we in a softirq context? Interrupt context?
  */
-#define in_irq()		(hardirq_count())
-#define in_softirq()		(softirq_count())
+#define in_irq()		(hardirq_count() || (current->flags & PF_HARDIRQ))
+#define in_softirq()		(softirq_count() || (current->flags & PF_SOFTIRQ))
 #define in_interrupt()		(irq_count())
 
-
-#define hardirq_trylock()	(!in_interrupt())
-#define hardirq_endlock()	do { } while (0)
-
-#define irq_enter()		(preempt_count() += HARDIRQ_OFFSET)
+#define irq_enter()		(add_preempt_count(HARDIRQ_OFFSET))
 
 #ifdef CONFIG_PREEMPT
 # define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != kernel_locked())
@@ -94,17 +90,41 @@
 
 #define irq_exit()							\
 do {									\
-	preempt_count() -= IRQ_EXIT_OFFSET;				\
+	sub_preempt_count(IRQ_EXIT_OFFSET);				\
 	if (!in_interrupt() && softirq_pending(smp_processor_id()))	\
 		do_softirq();						\
 	preempt_enable_no_resched();					\
 } while (0)
 
-#ifndef CONFIG_SMP
-# define synchronize_irq(irq)	barrier()
-#else
-  extern void synchronize_irq(unsigned int irq);
-#endif /* CONFIG_SMP */
+static inline void synchronize_irq(unsigned int irq)
+{
+	generic_synchronize_irq(irq);
+}
+
+static inline void free_irq(unsigned int irq, void *dev_id)
+{
+	generic_free_irq(irq, dev_id);
+}
+
+static inline void disable_irq_nosync(unsigned int irq)
+{
+	generic_disable_irq_nosync(irq);
+}
+
+static inline void disable_irq(unsigned int irq)
+{
+	generic_disable_irq(irq);
+}
+
+static inline void enable_irq(unsigned int irq)
+{
+	generic_enable_irq(irq);
+}
+
+static inline int setup_irq(unsigned int irq, struct irqaction *action)
+{
+	return generic_setup_irq(irq, action);
+}
 
 #endif /* __ASM_HARDIRQ_H */
 #endif /* __KERNEL__ */
diff -urN vpP7/include/asm-ppc/irq.h vpP7-ppc/include/asm-ppc/irq.h
--- vpP7/include/asm-ppc/irq.h	2004-08-17 15:22:36.000000000 -0400
+++ vpP7-ppc/include/asm-ppc/irq.h	2004-08-23 13:57:14.000000000 -0400
@@ -6,10 +6,6 @@
 #include <asm/machdep.h>		/* ppc_md */
 #include <asm/atomic.h>
 
-extern void disable_irq(unsigned int);
-extern void disable_irq_nosync(unsigned int);
-extern void enable_irq(unsigned int);
-
 /*
  * These constants are used for passing information about interrupt
  * signal polarity and level/edge sensing to the low-level PIC chip
@@ -324,7 +320,6 @@
 
 struct irqaction;
 struct pt_regs;
-int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
 
 #endif /* _ASM_IRQ_H */
 #endif /* __KERNEL__ */
diff -urN vpP7/include/asm-ppc/signal.h vpP7-ppc/include/asm-ppc/signal.h
--- vpP7/include/asm-ppc/signal.h	2004-08-17 15:22:36.000000000 -0400
+++ vpP7-ppc/include/asm-ppc/signal.h	2004-08-23 13:57:14.000000000 -0400
@@ -111,6 +111,7 @@
 #define SA_PROBE		SA_ONESHOT
 #define SA_SAMPLE_RANDOM	SA_RESTART
 #define SA_SHIRQ		0x04000000
+#define SA_NODELAY              0x02000000
 #endif /* __KERNEL__ */
 
 #define SIG_BLOCK          0	/* for blocking signals */
diff -urN vpP7/include/asm-ppc64/hardirq.h vpP7-ppc/include/asm-ppc64/hardirq.h
--- vpP7/include/asm-ppc64/hardirq.h	2004-08-17 15:22:36.000000000 -0400
+++ vpP7-ppc/include/asm-ppc64/hardirq.h	2004-08-23 13:57:14.000000000 -0400
@@ -12,6 +12,7 @@
 #include <linux/config.h>
 #include <linux/cache.h>
 #include <linux/preempt.h>
+#include <linux/irq.h>
 
 typedef struct {
 	unsigned int __softirq_pending;
@@ -70,15 +71,11 @@
  * Are we doing bottom half or hardware interrupt processing?
  * Are we in a softirq context? Interrupt context?
  */
-#define in_irq()		(hardirq_count())
-#define in_softirq()		(softirq_count())
+#define in_irq()		(hardirq_count() || (current->flags & PF_HARDIRQ))
+#define in_softirq()		(softirq_count() || (current->flags & PF_SOFTIRQ))
 #define in_interrupt()		(irq_count())
 
-
-#define hardirq_trylock()	(!in_interrupt())
-#define hardirq_endlock()	do { } while (0)
-
-#define irq_enter()		(preempt_count() += HARDIRQ_OFFSET)
+#define irq_enter()		(add_preempt_count(HARDIRQ_OFFSET))
 
 #ifdef CONFIG_PREEMPT
 # define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != kernel_locked())
@@ -89,20 +86,44 @@
 # define preemptible()	0
 # define IRQ_EXIT_OFFSET HARDIRQ_OFFSET
 #endif
+
 #define irq_exit()							\
 do {									\
-		preempt_count() -= IRQ_EXIT_OFFSET;			\
-		if (!in_interrupt() && softirq_pending(smp_processor_id())) \
-			do_softirq();					\
-		preempt_enable_no_resched();				\
+	sub_preempt_count(IRQ_EXIT_OFFSET);				\
+	if (!in_interrupt() && softirq_pending(smp_processor_id()))	\
+		do_softirq();						\
+	preempt_enable_no_resched();					\
 } while (0)
 
-#ifndef CONFIG_SMP
-# define synchronize_irq(irq)	barrier()
-#else
-  extern void synchronize_irq(unsigned int irq);
-#endif /* CONFIG_SMP */
+static inline void synchronize_irq(unsigned int irq)
+{
+	generic_synchronize_irq(irq);
+}
+
+static inline void free_irq(unsigned int irq, void *dev_id)
+{
+	generic_free_irq(irq, dev_id);
+}
+
+static inline void disable_irq_nosync(unsigned int irq)
+{
+	generic_disable_irq_nosync(irq);
+}
+
+static inline void disable_irq(unsigned int irq)
+{
+	generic_disable_irq(irq);
+}
+
+static inline void enable_irq(unsigned int irq)
+{
+	generic_enable_irq(irq);
+}
+
+static inline int setup_irq(unsigned int irq, struct irqaction *action)
+{
+	return generic_setup_irq(irq, action);
+}
 
-#endif /* __KERNEL__ */
-	
 #endif /* __ASM_HARDIRQ_H */
+#endif /* __KERNEL__ */
diff -urN vpP7/include/asm-ppc64/irq.h vpP7-ppc/include/asm-ppc64/irq.h
--- vpP7/include/asm-ppc64/irq.h	2004-08-17 15:22:36.000000000 -0400
+++ vpP7-ppc/include/asm-ppc64/irq.h	2004-08-23 15:42:48.000000000 -0400
@@ -17,10 +17,6 @@
  */
 #define NR_IRQS		512
 
-extern void disable_irq(unsigned int);
-extern void disable_irq_nosync(unsigned int);
-extern void enable_irq(unsigned int);
-
 /* this number is used when no interrupt has been assigned */
 #define NO_IRQ			(-1)
 
@@ -80,7 +76,6 @@
 
 struct irqaction;
 struct pt_regs;
-int handle_irq_event(int, struct pt_regs *, struct irqaction *);
 
 #ifdef CONFIG_IRQSTACKS
 /*
diff -urN vpP7/include/asm-ppc64/signal.h vpP7-ppc/include/asm-ppc64/signal.h
--- vpP7/include/asm-ppc64/signal.h	2004-08-17 15:22:36.000000000 -0400
+++ vpP7-ppc/include/asm-ppc64/signal.h	2004-08-23 13:57:14.000000000 -0400
@@ -108,6 +108,7 @@
 #define SA_PROBE		SA_ONESHOT
 #define SA_SAMPLE_RANDOM	SA_RESTART
 #define SA_SHIRQ		0x04000000
+#define SA_NODELAY              0x02000000
 #endif
 
 #define SIG_BLOCK          0	/* for blocking signals */
diff -urN vpP7/include/linux/interrupt.h vpP7-ppc/include/linux/interrupt.h
--- vpP7/include/linux/interrupt.h	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/include/linux/interrupt.h	2004-08-23 13:57:14.000000000 -0400
@@ -95,7 +95,6 @@
 	void	*data;
 };
 
-extern void do_hardirq(irq_desc_t *desc);
 asmlinkage void do_softirq(void);
 extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data);
 extern void softirq_init(void);
diff -urN vpP7/include/linux/irq.h vpP7-ppc/include/linux/irq.h
--- vpP7/include/linux/irq.h	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/include/linux/irq.h	2004-08-23 13:57:14.000000000 -0400
@@ -83,6 +83,8 @@
 extern void generic_disable_irq(unsigned int irq);
 extern void generic_enable_irq(unsigned int irq);
 extern void generic_note_interrupt(int irq, irq_desc_t *desc, int action_ret);
+extern void do_hardirq(irq_desc_t *desc);
+extern void init_hardirqs(void);
 
 extern hw_irq_controller no_irq_type;  /* needed in every arch ? */
 
diff -urN vpP7/include/linux/kernel.h vpP7-ppc/include/linux/kernel.h
--- vpP7/include/linux/kernel.h	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/include/linux/kernel.h	2004-08-23 13:57:14.000000000 -0400
@@ -48,7 +48,10 @@
 #ifdef CONFIG_PREEMPT_VOLUNTARY
 extern int voluntary_resched(void);
 #else
-# define voluntary_resched() 0
+static inline int voluntary_resched(void)
+{
+	return 0;
+}
 #endif
 
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
diff -urN vpP7/include/linux/sched.h vpP7-ppc/include/linux/sched.h
--- vpP7/include/linux/sched.h	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/include/linux/sched.h	2004-08-23 13:57:14.000000000 -0400
@@ -1114,8 +1114,6 @@
  * submitted upstream will of course use need_resched()/cond_resched().
  */
 
-extern int voluntary_resched(void);
-
 static inline int voluntary_need_resched(void)
 {
 	if (voluntary_preemption >= 1)
@@ -1136,9 +1134,15 @@
 }
 
 #else
-# define voluntary_resched() 0
-# define voluntary_resched_lock(lock) 0
-# define voluntary_need_resched() 0
+static inline int voluntary_resched_lock(spinlock_t *lock)
+{
+	return 0;
+}
+
+static inline int voluntary_need_resched(void)
+{
+	return 0;
+}
 #endif
 
 /* Reevaluate whether the task has signals pending delivery.
diff -urN vpP7/init/main.c vpP7-ppc/init/main.c
--- vpP7/init/main.c	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/init/main.c	2004-08-23 13:57:14.000000000 -0400
@@ -397,9 +397,9 @@
 
 static void noinline rest_init(void)
 {
+	system_state = SYSTEM_BOOTING_SCHEDULER_OK;
 	kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND);
 	numa_default_policy();
-	system_state = SYSTEM_BOOTING_SCHEDULER_OK;
 	unlock_kernel();
  	cpu_idle();
 } 
@@ -669,6 +669,8 @@
 	smp_init();
 	sched_init_smp();
 
+	init_hardirqs();
+
 	/*
 	 * Do this before initcalls, because some drivers want to access
 	 * firmware files.
diff -urN vpP7/kernel/hardirq.c vpP7-ppc/kernel/hardirq.c
--- vpP7/kernel/hardirq.c	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/kernel/hardirq.c	2004-08-23 13:57:14.000000000 -0400
@@ -11,6 +11,7 @@
 #include <linux/mm.h>
 #include <linux/kallsyms.h>
 #include <linux/proc_fs.h>
+#include <linux/irq.h>
 #include <asm/uaccess.h>
 
 extern struct irq_desc irq_desc[NR_IRQS];
@@ -31,9 +32,8 @@
 	if (voluntary_preemption < 3 || (desc->status & IRQ_NODELAY))
 		return 0;
 
-	BUG_ON(!desc->thread);
 	BUG_ON(!irqs_disabled());
-	if (desc->thread->state != TASK_RUNNING)
+	if (desc->thread && desc->thread->state != TASK_RUNNING)
 		wake_up_process(desc->thread);
 
 	return 1;
@@ -369,7 +369,10 @@
 	if (!shared) {
 		desc->depth = 0;
 		desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS);
-		desc->handler->startup(irq);
+		if (desc->handler->startup)
+			desc->handler->startup(irq);
+		else
+			desc->handler->enable(irq);
 	}
 	spin_unlock_irqrestore(&desc->lock,flags);
 
@@ -420,7 +423,10 @@
 			*pp = action->next;
 			if (!desc->action) {
 				desc->status |= IRQ_DISABLED;
-				desc->handler->shutdown(irq);
+				if (desc->handler->shutdown)
+					desc->handler->shutdown(irq);
+				else
+					desc->handler->disable(irq);
 			}
 			recalculate_desc_flags(desc);
 			spin_unlock_irqrestore(&desc->lock,flags);
@@ -480,9 +486,11 @@
 	return 0;
 }
 
+static int ok_to_create_irq_threads;
+
 static int start_irq_thread(int irq, struct irq_desc *desc)
 {
-	if (desc->thread)
+	if (desc->thread || !ok_to_create_irq_threads)
 		return 0;
 
 	printk("requesting new irq thread for IRQ%d...\n", irq);
@@ -492,9 +500,31 @@
 		return -ENOMEM;
 	}
 
+	// An interrupt may have come in before the thread pointer was
+	// stored in desc->thread; make sure the thread gets woken up in
+	// such a case.
+	
+	smp_mb();
+	
+	if (desc->status & IRQ_INPROGRESS)
+		wake_up_process(desc->thread);
+	
 	return 0;
 }
 
+void init_hardirqs(void)
+{	
+	int i;
+	ok_to_create_irq_threads = 1;
+
+	for (i = 0; i < NR_IRQS; i++) {
+		irq_desc_t *desc = irq_desc + i;
+		
+		if (desc->action && !(desc->status & IRQ_NODELAY))
+			start_irq_thread(i, desc);
+	}
+}
+
 #ifdef CONFIG_SMP
 
 static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
diff -urN vpP7/kernel/latency.c vpP7-ppc/kernel/latency.c
--- vpP7/kernel/latency.c	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/kernel/latency.c	2004-08-23 14:06:38.000000000 -0400
@@ -16,6 +16,7 @@
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
+#include <asm/time.h>
 
 unsigned long preempt_thresh;
 unsigned long preempt_max_latency;
@@ -100,6 +101,8 @@
 	___trace(eip, parent_eip);
 }
 
+#ifdef CONFIG_X86
+
 void notrace mcount(void)
 {
 	MCOUNT_HEAD
@@ -112,6 +115,22 @@
 
 EXPORT_SYMBOL(mcount);
 
+#else
+
+#ifdef CONFIG_PPC
+void _mcount(void);
+EXPORT_SYMBOL(_mcount);
+#else
+#error What is mcount called?
+#endif
+
+void notrace do_mcount(void *func, void *called_from)
+{
+	___trace((unsigned long)func, (unsigned long)called_from);
+}
+
+#endif
+
 static void notrace print_name(struct seq_file *m, unsigned long eip)
 {
 	char namebuf[KSYM_NAME_LEN+1];
@@ -142,7 +161,13 @@
 
 static unsigned long notrace cycles_to_usecs(cycles_t delta)
 {
+#ifdef CONFIG_X86
 	do_div(delta, cpu_khz/1000);
+#elif defined(CONFIG_PPC)
+	delta = mulhwu(tb_to_us, delta);
+#else
+	#error Implement cycles_to_usecs.
+#endif
 
 	return (unsigned long) delta;
 }
@@ -248,18 +273,15 @@
 #endif
 	unsigned long parent_eip = (unsigned long)__builtin_return_address(1);
 	unsigned long latency;
-	cycles_t delta;
 
 	atomic_inc(&tr->disabled);
-	delta = get_cycles() - tr->preempt_timestamp;
-	do_div(delta, cpu_khz/1024);
-	latency = (unsigned long) delta;
+	latency = cycles_to_usecs(get_cycles() - tr->preempt_timestamp);
 
 	if (preempt_thresh) {
 		if (latency < preempt_thresh)
 			goto out;
 	} else {
-		if (latency < preempt_max_latency)
+		if (latency <= preempt_max_latency)
 			goto out;
 	}
 
--- vpP7/kernel/sysctl.c	2004-08-23 17:39:58.000000000 -0400
+++ vpP7-ppc/kernel/sysctl.c	2004-08-23 17:56:41.000000000 -0400
@@ -285,7 +285,7 @@
 		.data		= &preempt_max_latency,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_doulongvec_minmax,
 	},
 #ifdef CONFIG_LATENCY_TRACE
 	{

^ permalink raw reply	[flat|nested] 93+ messages in thread

* [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-23 22:18 [patch] PPC/PPC64 port of voluntary preempt patch Scott Wood
@ 2004-08-24  6:14 ` Ingo Molnar
  2004-08-24 17:43   ` K.R. Foley
                     ` (8 more replies)
  2004-08-24 19:51 ` [patch] PPC/PPC64 port of voluntary preempt patch Scott Wood
  1 sibling, 9 replies; 93+ messages in thread
From: Ingo Molnar @ 2004-08-24  6:14 UTC (permalink / raw)
  To: Scott Wood; +Cc: manas.saksena, linux-kernel


* Scott Wood <scott@timesys.com> wrote:

> I have attached a port of the voluntary preempt patch to PPC and
> PPC64.  The patch is against P7, but it applies against P8 as well.

thanks Scott, i've applied your patch to my tree - all the changes and
improvements look good (except for a small compilation problem on x86,
asm/time.h doesnt exist there - asm/rtc.h does). The resulting code
booted fine on an SMP and on a UP x86 system. I've uploaded -P9:

  http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.8.1-P9

(there are no other changes in -P9.)

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-24  6:14 ` [patch] voluntary-preempt-2.6.8.1-P9 Ingo Molnar
@ 2004-08-24 17:43   ` K.R. Foley
  2004-08-24 20:32     ` Lee Revell
  2004-08-24 19:20   ` K.R. Foley
                     ` (7 subsequent siblings)
  8 siblings, 1 reply; 93+ messages in thread
From: K.R. Foley @ 2004-08-24 17:43 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Scott Wood, manas.saksena, linux-kernel

Ingo Molnar wrote:
> * Scott Wood <scott@timesys.com> wrote:
> 
> 
>>I have attached a port of the voluntary preempt patch to PPC and
>>PPC64.  The patch is against P7, but it applies against P8 as well.
> 
> 
> thanks Scott, i've applied your patch to my tree - all the changes and
> improvements look good (except for a small compilation problem on x86,
> asm/time.h doesnt exist there - asm/rtc.h does). The resulting code
> booted fine on an SMP and on a UP x86 system. I've uploaded -P9:
> 
>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.8.1-P9
> 
> (there are no other changes in -P9.)
> 
> 	Ingo
> -

~254 usec latency seen in kswapd:

http://www.cybsft.com/testresults/2.6.8.1-P9/latency_trace2.txt




^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-24  6:14 ` [patch] voluntary-preempt-2.6.8.1-P9 Ingo Molnar
  2004-08-24 17:43   ` K.R. Foley
@ 2004-08-24 19:20   ` K.R. Foley
  2004-08-24 22:47   ` Lee Revell
                     ` (6 subsequent siblings)
  8 siblings, 0 replies; 93+ messages in thread
From: K.R. Foley @ 2004-08-24 19:20 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Scott Wood, manas.saksena, linux-kernel

Ingo Molnar wrote:
<snip>
>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.8.1-P9
> 
> (there are no other changes in -P9.)
> 
> 	Ingo

And a couple more interesting traces here:

http://www.cybsft.com/testresults/2.6.8.1-P9/latency_trace4.txt

and here:

http://www.cybsft.com/testresults/2.6.8.1-P9/latency_trace5.txt

kr

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] PPC/PPC64 port of voluntary preempt patch
  2004-08-23 22:18 [patch] PPC/PPC64 port of voluntary preempt patch Scott Wood
  2004-08-24  6:14 ` [patch] voluntary-preempt-2.6.8.1-P9 Ingo Molnar
@ 2004-08-24 19:51 ` Scott Wood
  2004-08-26  3:17   ` Lee Revell
  2004-08-28 12:36   ` Ingo Molnar
  1 sibling, 2 replies; 93+ messages in thread
From: Scott Wood @ 2004-08-24 19:51 UTC (permalink / raw)
  To: mingo; +Cc: manas.saksena, linux-kernel

On Mon, Aug 23, 2004 at 06:18:16PM -0400, Scott Wood wrote:
> I have attached a port of the voluntary preempt patch to PPC and
> PPC64.  The patch is against P7, but it applies against P8 as well.
> 
> I've tested it on a dual G5 Mac, both in uniprocessor and SMP.
> 
> Some notes on changes to the generic part of the patch/existing
> generic code:

Another thing that I forgot to mention is that I have some doubts as
to the current generic_synchronize_irq() implementation.  Given that
IRQs are now preemptible, a higher priority RT thread calling
synchronize_irq can't just spin waiting for the IRQ to complete, as
it never will (and it wouldn't be a great idea for non-RT tasks
either).  I see that a do_hardirq() call was added, presumably to
hurry completion of the interrupt, but is that really safe?  It looks
like that could end up re-entering handlers, and you'd still have a
partially executed handler after synchronize_irq() finishes (causing
not only an extra end() call, but possibly code being executed after
it's been unloaded, and other synchronization violations).

If I'm missing something, please let me know, but I don't see a good
way to implement it without blocking for the IRQ thread's completion
(such as with the per-IRQ waitqueues in M5).

-Scott

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-24 17:43   ` K.R. Foley
@ 2004-08-24 20:32     ` Lee Revell
  2004-08-24 20:53       ` Scott Wood
  0 siblings, 1 reply; 93+ messages in thread
From: Lee Revell @ 2004-08-24 20:32 UTC (permalink / raw)
  To: K.R. Foley; +Cc: Ingo Molnar, Scott Wood, manas.saksena, linux-kernel

On Tue, 2004-08-24 at 13:43, K.R. Foley wrote:
> Ingo Molnar wrote:
> > * Scott Wood <scott@timesys.com> wrote:
> > 
> > 
> >>I have attached a port of the voluntary preempt patch to PPC and
> >>PPC64.  The patch is against P7, but it applies against P8 as well.
> > 
> > 
> > thanks Scott, i've applied your patch to my tree - all the changes and
> > improvements look good (except for a small compilation problem on x86,
> > asm/time.h doesnt exist there - asm/rtc.h does). The resulting code
> > booted fine on an SMP and on a UP x86 system. I've uploaded -P9:
> > 
> >   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.8.1-P9
> > 
> > (there are no other changes in -P9.)
> > 
> > 	Ingo
> > -
> 
> ~254 usec latency seen in kswapd:
> 
> http://www.cybsft.com/testresults/2.6.8.1-P9/latency_trace2.txt
> 

I am able to generate unbounded latencies in kswapd by running `make
-j12' for any C++ program that uses KDE/Qt.  The build will allocate all
available RAM, then all available swap, then the machine grinds to a
halt.

I am not sure this is solvable though.  If you fire off a bunch of
processes that try to allocate way more memory than is physically
available then you will have worse problems than latency.

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-24 20:32     ` Lee Revell
@ 2004-08-24 20:53       ` Scott Wood
  0 siblings, 0 replies; 93+ messages in thread
From: Scott Wood @ 2004-08-24 20:53 UTC (permalink / raw)
  To: Lee Revell
  Cc: K.R. Foley, Ingo Molnar, Scott Wood, manas.saksena, linux-kernel

On Tue, Aug 24, 2004 at 04:32:39PM -0400, Lee Revell wrote:
> I am not sure this is solvable though.  If you fire off a bunch of
> processes that try to allocate way more memory than is physically
> available then you will have worse problems than latency.

I don't see why it would be unsolvable if you limit the expectation
of reasonable latency to processes that have mlockall()ed and
allocated all the memory they need in advance (and don't have to wait
on processes that haven't).  Obviously, the latency for actually
allocating memory isn't going to be too good in such a case (though
strict no-overcommit could decrease the latency of failure to
allocate).

-Scott

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-24  6:14 ` [patch] voluntary-preempt-2.6.8.1-P9 Ingo Molnar
  2004-08-24 17:43   ` K.R. Foley
  2004-08-24 19:20   ` K.R. Foley
@ 2004-08-24 22:47   ` Lee Revell
  2004-08-25  2:00   ` Lee Revell
                     ` (5 subsequent siblings)
  8 siblings, 0 replies; 93+ messages in thread
From: Lee Revell @ 2004-08-24 22:47 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Scott Wood, manas.saksena, linux-kernel

On Tue, 2004-08-24 at 02:14, Ingo Molnar wrote:
>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.8.1-P9
> 

modprobe'ing causes this latency:

http://krustophenia.net/testresults.php?dataset=2.6.8.1-P9#/var/www/2.6.8.1-P9/trace1.txt

This one is caused by flood pinging the broadcast address (ping -s 65507
-f $BROADCAST_ADDRESS):

http://krustophenia.net/testresults.php?dataset=2.6.8.1-P9#/var/www/2.6.8.1-P9/trace2.txt

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-24  6:14 ` [patch] voluntary-preempt-2.6.8.1-P9 Ingo Molnar
                     ` (2 preceding siblings ...)
  2004-08-24 22:47   ` Lee Revell
@ 2004-08-25  2:00   ` Lee Revell
  2004-08-25  3:17   ` K.R. Foley
                     ` (4 subsequent siblings)
  8 siblings, 0 replies; 93+ messages in thread
From: Lee Revell @ 2004-08-25  2:00 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Scott Wood, manas.saksena, linux-kernel

On Tue, 2004-08-24 at 02:14, Ingo Molnar wrote:

>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.8.1-P9
> 

Here is an 815 usec latency triggered by mounting a 120GB ext3
partition:

http://krustophenia.net/testresults.php?dataset=2.6.8.1-P9#/var/www/2.6.8.1-P9/trace3.txt

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-24  6:14 ` [patch] voluntary-preempt-2.6.8.1-P9 Ingo Molnar
                     ` (3 preceding siblings ...)
  2004-08-25  2:00   ` Lee Revell
@ 2004-08-25  3:17   ` K.R. Foley
  2004-08-25  3:22     ` Lee Revell
  2004-08-25  3:26   ` K.R. Foley
                     ` (3 subsequent siblings)
  8 siblings, 1 reply; 93+ messages in thread
From: K.R. Foley @ 2004-08-25  3:17 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Scott Wood, manas.saksena, linux-kernel

Ingo Molnar wrote:
> * Scott Wood <scott@timesys.com> wrote:
> 
> 
>>I have attached a port of the voluntary preempt patch to PPC and
>>PPC64.  The patch is against P7, but it applies against P8 as well.
> 
> 
> thanks Scott, i've applied your patch to my tree - all the changes and
> improvements look good (except for a small compilation problem on x86,
> asm/time.h doesnt exist there - asm/rtc.h does). The resulting code
> booted fine on an SMP and on a UP x86 system. I've uploaded -P9:
> 
>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.8.1-P9
> 
> (there are no other changes in -P9.)
> 
> 	Ingo

latency trace of ~148 usec in scsi_request? I don't know if this is real 
or not. Note the 79 usec here:

00000001 0.107ms (+0.079ms): sd_init_command (scsi_prep_fn)

Entire trace is here:

http://www.cybsft.com/testresults/2.6.8.1-P9/latency_trace7.txt


Is this possible? This is not the first time I have seen this. There is 
another one here:

http://www.cybsft.com/testresults/2.6.8.1-P9/latency_trace5.txt

kr

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-25  3:17   ` K.R. Foley
@ 2004-08-25  3:22     ` Lee Revell
  2004-08-25 14:34       ` K.R. Foley
  2004-08-25 16:00       ` K.R. Foley
  0 siblings, 2 replies; 93+ messages in thread
From: Lee Revell @ 2004-08-25  3:22 UTC (permalink / raw)
  To: K.R. Foley; +Cc: Ingo Molnar, Scott Wood, manas.saksena, linux-kernel

On Tue, 2004-08-24 at 23:17, K.R. Foley wrote:
> Ingo Molnar wrote:
> > 
> >   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.8.1-P9
> > 

> latency trace of ~148 usec in scsi_request? I don't know if this is real 
> or not. Note the 79 usec here:
> 
> 00000001 0.107ms (+0.079ms): sd_init_command (scsi_prep_fn)
> 
> Entire trace is here:
> 
> http://www.cybsft.com/testresults/2.6.8.1-P9/latency_trace7.txt
> 
> 
> Is this possible? This is not the first time I have seen this. There is 
> another one here:
> 
> http://www.cybsft.com/testresults/2.6.8.1-P9/latency_trace5.txt
> 

This looks like a real latency.  What is
/sys/block/sdX/queue/max_sectors_kb set to?  Does lowering it help?

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-24  6:14 ` [patch] voluntary-preempt-2.6.8.1-P9 Ingo Molnar
                     ` (4 preceding siblings ...)
  2004-08-25  3:17   ` K.R. Foley
@ 2004-08-25  3:26   ` K.R. Foley
  2004-08-25  9:58   ` [patch] voluntary-preempt-2.6.8.1-P9 : oprofile latency at 3.3ms P.O. Gaillard
                     ` (2 subsequent siblings)
  8 siblings, 0 replies; 93+ messages in thread
From: K.R. Foley @ 2004-08-25  3:26 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Scott Wood, manas.saksena, linux-kernel, rlrevell

Any idea what is going on here?

dmesg gives me this:


(kswapd0/32): new 305 us maximum-latency critical section.
  => started at: <get_swap_page+0x28/0x280>
  => ended at:   <get_swap_page+0x96/0x280>
  [<c0135dbf>] check_preempt_timing+0x19f/0x240
  [<c0151bf6>] get_swap_page+0x96/0x280
  [<c0151bf6>] get_swap_page+0x96/0x280
  [<c0135f88>] sub_preempt_count+0x48/0x60
  [<c0135f88>] sub_preempt_count+0x48/0x60
  [<c0151bf6>] get_swap_page+0x96/0x280
  [<c015155a>] add_to_swap+0x2a/0xd0
  [<c0142a73>] shrink_list+0x4e3/0x530
  [<c01413a6>] lru_add_drain+0x36/0x70
  [<c0135f88>] sub_preempt_count+0x48/0x60
  [<c0142bc2>] shrink_cache+0x102/0x370
  [<c0135f88>] sub_preempt_count+0x48/0x60
  [<c0142c26>] shrink_cache+0x166/0x370
  [<c014345e>] shrink_zone+0xae/0xe0
  [<c0143871>] balance_pgdat+0x1e1/0x250
  [<c01439a5>] kswapd+0xc5/0xe0
  [<c0117cc0>] autoremove_wake_function+0x0/0x60
  [<c0106336>] ret_from_fork+0x6/0x14
  [<c0117cc0>] autoremove_wake_function+0x0/0x60
  [<c01438e0>] kswapd+0x0/0xe0
  [<c0104505>] kernel_thread_helper+0x5/0x10


The trace actually looks like this:

preemption latency trace v1.0.2
-------------------------------
  latency: 305 us, entries: 2 (2)
     -----------------
     | task: kswapd0/32, uid:0 nice:0 policy:0 rt_prio:0
     -----------------
  => started at: get_swap_page+0x28/0x280
  => ended at:   get_swap_page+0x96/0x280
=======>
00000001 0.000ms (+0.000ms): get_swap_page (add_to_swap)
00000001 0.306ms (+0.306ms): sub_preempt_count (get_swap_page)


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9 : oprofile latency at 3.3ms
  2004-08-24  6:14 ` [patch] voluntary-preempt-2.6.8.1-P9 Ingo Molnar
                     ` (5 preceding siblings ...)
  2004-08-25  3:26   ` K.R. Foley
@ 2004-08-25  9:58   ` P.O. Gaillard
  2004-08-26 21:39   ` [patch] voluntary-preempt-2.6.8.1-P9 Lee Revell
  2004-08-28 12:03   ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q0 Ingo Molnar
  8 siblings, 0 replies; 93+ messages in thread
From: P.O. Gaillard @ 2004-08-25  9:58 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kernel

Hello,

I have not seen anybody report this trace, so I hope it is not redundant.
I got this when starting oprofile, but if I stop oprofile and start it again, I 
do not get another trace.
	
	sincerely,

	P.O. Gaillard


Aug 24 13:17:31 centaurus kernel: (mount/2642): new 3381 us maximum-latency 
critical section.
Aug 24 13:17:31 centaurus kernel:  => started at: <voluntary_resched+0x35/0x70>
Aug 24 13:17:31 centaurus kernel:  => ended at:   <voluntary_resched+0x35/0x70>
Aug 24 13:17:31 centaurus kernel:  [<c015a0e4>] check_preempt_timing+0x1a4/0x240
Aug 24 13:17:31 centaurus kernel:  [<c03aca95>] voluntary_resched+0x35/0x70
Aug 24 13:17:31 centaurus kernel:  [<c03aca95>] voluntary_resched+0x35/0x70
Aug 24 13:17:31 centaurus kernel:  [<c015a1b6>] touch_preempt_timing+0x36/0x40
Aug 24 13:17:31 centaurus kernel:  [<c015a1b6>] touch_preempt_timing+0x36/0x40
Aug 24 13:17:31 centaurus kernel:  [<c03aca95>] voluntary_resched+0x35/0x70
Aug 24 13:17:31 centaurus kernel:  [<c01d5e4c>] vfs_quota_sync+0x3c/0x610
Aug 24 13:17:31 centaurus kernel:  [<c01dc5aa>] sync_dquots+0x3a/0x70
Aug 24 13:17:31 centaurus kernel:  [<c018da42>] fsync_super+0x32/0xd0
Aug 24 13:17:31 centaurus kernel:  [<c01967b3>] do_remount_sb+0x33/0x140
Aug 24 13:17:31 centaurus kernel:  [<c019771c>] get_sb_single+0x8c/0xc0
Aug 24 13:17:31 centaurus kernel:  [<c01977fa>] do_kern_mount+0xaa/0x180
Aug 24 13:17:31 centaurus kernel:  [<f8a62e70>] oprofilefs_fill_super+0x0/0xa0 
[oprofile]
Aug 24 13:17:31 centaurus kernel:  [<c01be70a>] do_new_mount+0x7a/0xc0
Aug 24 13:17:31 centaurus kernel:  [<c01bf5a9>] do_mount+0x169/0x1b0
Aug 24 13:17:31 centaurus kernel:  [<c01bf394>] copy_mount_options+0x14/0xc0
Aug 24 13:17:31 centaurus kernel:  [<c01bfbce>] sys_mount+0x12e/0x2b0
Aug 24 13:17:31 centaurus kernel:  [<c0107bfd>] sysenter_past_esp+0x52/0x71
Aug 24 13:17:54 centaurus sshd(pam_unix)[2726]: session opened for user tmr by 
(uid=500)

preemption latency trace v1.0.2
-------------------------------
  latency: 400 us, entries: 139 (139)
     -----------------
     | task: mount/2642, uid:0 nice:0 policy:0 rt_prio:0
     -----------------
  => started at: voluntary_resched+0x35/0x70
  => ended at:   voluntary_resched+0x35/0x70
=======>
00000001 0.000ms (+0.000ms): touch_preempt_timing (voluntary_resched)
00000001 0.000ms (+0.000ms): d_rehash (__oprofilefs_create_file)
00000001 0.000ms (+0.000ms): do_remount_sb (get_sb_single)
00000001 0.000ms (+0.000ms): bdev_read_only (do_remount_sb)
00000001 0.001ms (+0.000ms): shrink_dcache_sb (do_remount_sb)
00010002 0.276ms (+0.274ms): do_IRQ (shrink_dcache_sb)
00010003 0.276ms (+0.000ms): ack_edge_ioapic_irq (do_IRQ)
00010003 0.276ms (+0.000ms): generic_redirect_hardirq (do_IRQ)
00010002 0.277ms (+0.000ms): generic_handle_IRQ_event (do_IRQ)
00010002 0.277ms (+0.000ms): timer_interrupt (generic_handle_IRQ_event)
00010003 0.278ms (+0.000ms): mark_offset_pmtmr (timer_interrupt)
00010003 0.282ms (+0.004ms): do_timer (timer_interrupt)
00010003 0.282ms (+0.000ms): update_process_times (do_timer)
00010003 0.283ms (+0.000ms): update_one_process (update_process_times)
00010003 0.283ms (+0.000ms): run_local_timers (update_process_times)
00010003 0.283ms (+0.000ms): raise_softirq (update_process_times)
00010003 0.283ms (+0.000ms): scheduler_tick (update_process_times)
00010003 0.284ms (+0.000ms): sched_clock (scheduler_tick)
00010003 0.285ms (+0.001ms): update_wall_time (do_timer)
00010003 0.285ms (+0.000ms): update_wall_time_one_tick (update_wall_time)
00010003 0.286ms (+0.000ms): generic_note_interrupt (do_IRQ)
00010003 0.286ms (+0.000ms): end_edge_ioapic_irq (do_IRQ)
00000003 0.287ms (+0.000ms): do_softirq (do_IRQ)
00000003 0.287ms (+0.000ms): __do_softirq (do_softirq)
00000003 0.287ms (+0.000ms): wake_up_process (do_softirq)
00000003 0.287ms (+0.000ms): try_to_wake_up (wake_up_process)
00000003 0.287ms (+0.000ms): task_rq_lock (try_to_wake_up)
00000004 0.288ms (+0.000ms): activate_task (try_to_wake_up)
00000004 0.288ms (+0.000ms): sched_clock (activate_task)
00000004 0.288ms (+0.000ms): recalc_task_prio (activate_task)
00000004 0.288ms (+0.000ms): effective_prio (recalc_task_prio)
00000004 0.288ms (+0.000ms): enqueue_task (activate_task)
00000003 0.289ms (+0.000ms): preempt_schedule (try_to_wake_up)
00000002 0.660ms (+0.370ms): smp_apic_timer_interrupt (shrink_dcache_sb)
00010002 0.660ms (+0.000ms): profile_hook (smp_apic_timer_interrupt)
00010003 0.660ms (+0.000ms): notifier_call_chain (profile_hook)
00010002 0.661ms (+0.000ms): preempt_schedule (smp_apic_timer_interrupt)
00000003 0.661ms (+0.000ms): do_softirq (smp_apic_timer_interrupt)
00000003 0.661ms (+0.000ms): __do_softirq (do_softirq)
00010002 1.275ms (+0.613ms): do_IRQ (shrink_dcache_sb)
00010003 1.275ms (+0.000ms): ack_edge_ioapic_irq (do_IRQ)
00010003 1.275ms (+0.000ms): generic_redirect_hardirq (do_IRQ)
00010002 1.276ms (+0.000ms): preempt_schedule (do_IRQ)
00010002 1.276ms (+0.000ms): generic_handle_IRQ_event (do_IRQ)
00010002 1.276ms (+0.000ms): timer_interrupt (generic_handle_IRQ_event)
00010003 1.276ms (+0.000ms): mark_offset_pmtmr (timer_interrupt)
00010003 1.279ms (+0.002ms): preempt_schedule (mark_offset_pmtmr)
00010003 1.281ms (+0.001ms): preempt_schedule (timer_interrupt)
00010003 1.281ms (+0.000ms): do_timer (timer_interrupt)
00010003 1.281ms (+0.000ms): update_process_times (do_timer)
00010003 1.281ms (+0.000ms): update_one_process (update_process_times)
00010003 1.281ms (+0.000ms): run_local_timers (update_process_times)
00010003 1.282ms (+0.000ms): raise_softirq (update_process_times)
00010003 1.282ms (+0.000ms): scheduler_tick (update_process_times)
00010003 1.282ms (+0.000ms): sched_clock (scheduler_tick)
00010003 1.283ms (+0.000ms): preempt_schedule (scheduler_tick)
00010003 1.283ms (+0.000ms): update_wall_time (do_timer)
00010003 1.283ms (+0.000ms): update_wall_time_one_tick (update_wall_time)
00010002 1.283ms (+0.000ms): preempt_schedule (timer_interrupt)
00010003 1.284ms (+0.000ms): generic_note_interrupt (do_IRQ)
00010003 1.284ms (+0.000ms): end_edge_ioapic_irq (do_IRQ)
00010002 1.284ms (+0.000ms): preempt_schedule (do_IRQ)
00000003 1.284ms (+0.000ms): do_softirq (do_IRQ)
00000003 1.284ms (+0.000ms): __do_softirq (do_softirq)
00000002 1.659ms (+0.375ms): smp_apic_timer_interrupt (shrink_dcache_sb)
00010002 1.659ms (+0.000ms): profile_hook (smp_apic_timer_interrupt)
00010003 1.659ms (+0.000ms): notifier_call_chain (profile_hook)
00010002 1.660ms (+0.000ms): preempt_schedule (smp_apic_timer_interrupt)
00000003 1.660ms (+0.000ms): do_softirq (smp_apic_timer_interrupt)
00000003 1.660ms (+0.000ms): __do_softirq (do_softirq)
00010002 2.275ms (+0.614ms): do_IRQ (shrink_dcache_sb)
00010003 2.275ms (+0.000ms): ack_edge_ioapic_irq (do_IRQ)
00010003 2.275ms (+0.000ms): generic_redirect_hardirq (do_IRQ)
00010002 2.275ms (+0.000ms): preempt_schedule (do_IRQ)
00010002 2.275ms (+0.000ms): generic_handle_IRQ_event (do_IRQ)
00010002 2.275ms (+0.000ms): timer_interrupt (generic_handle_IRQ_event)
00010003 2.276ms (+0.000ms): mark_offset_pmtmr (timer_interrupt)
00010003 2.278ms (+0.002ms): preempt_schedule (mark_offset_pmtmr)
00010003 2.280ms (+0.001ms): preempt_schedule (timer_interrupt)
00010003 2.280ms (+0.000ms): do_timer (timer_interrupt)
00010003 2.280ms (+0.000ms): update_process_times (do_timer)
00010003 2.280ms (+0.000ms): update_one_process (update_process_times)
00010003 2.281ms (+0.000ms): run_local_timers (update_process_times)
00010003 2.281ms (+0.000ms): raise_softirq (update_process_times)
00010003 2.281ms (+0.000ms): scheduler_tick (update_process_times)
00010003 2.281ms (+0.000ms): sched_clock (scheduler_tick)
00010003 2.282ms (+0.000ms): preempt_schedule (scheduler_tick)
00010003 2.282ms (+0.000ms): update_wall_time (do_timer)
00010003 2.282ms (+0.000ms): update_wall_time_one_tick (update_wall_time)
00010002 2.282ms (+0.000ms): preempt_schedule (timer_interrupt)
00010003 2.282ms (+0.000ms): generic_note_interrupt (do_IRQ)
00010003 2.283ms (+0.000ms): end_edge_ioapic_irq (do_IRQ)
00010002 2.283ms (+0.000ms): preempt_schedule (do_IRQ)
00000003 2.283ms (+0.000ms): do_softirq (do_IRQ)
00000003 2.283ms (+0.000ms): __do_softirq (do_softirq)
00000002 2.659ms (+0.376ms): smp_apic_timer_interrupt (shrink_dcache_sb)
00010002 2.659ms (+0.000ms): profile_hook (smp_apic_timer_interrupt)
00010003 2.659ms (+0.000ms): notifier_call_chain (profile_hook)
00010002 2.660ms (+0.000ms): preempt_schedule (smp_apic_timer_interrupt)
00000003 2.660ms (+0.000ms): do_softirq (smp_apic_timer_interrupt)
00000003 2.660ms (+0.000ms): __do_softirq (do_softirq)
00010002 3.275ms (+0.615ms): do_IRQ (shrink_dcache_sb)
00010003 3.275ms (+0.000ms): ack_edge_ioapic_irq (do_IRQ)
00010003 3.275ms (+0.000ms): generic_redirect_hardirq (do_IRQ)
00010002 3.276ms (+0.000ms): preempt_schedule (do_IRQ)
00010002 3.276ms (+0.000ms): generic_handle_IRQ_event (do_IRQ)
00010002 3.276ms (+0.000ms): timer_interrupt (generic_handle_IRQ_event)
00010003 3.276ms (+0.000ms): mark_offset_pmtmr (timer_interrupt)
00010003 3.279ms (+0.002ms): preempt_schedule (mark_offset_pmtmr)
00010003 3.281ms (+0.001ms): preempt_schedule (timer_interrupt)
00010003 3.281ms (+0.000ms): do_timer (timer_interrupt)
00010003 3.281ms (+0.000ms): update_process_times (do_timer)
00010003 3.281ms (+0.000ms): update_one_process (update_process_times)
00010003 3.281ms (+0.000ms): run_local_timers (update_process_times)
00010003 3.281ms (+0.000ms): raise_softirq (update_process_times)
00010003 3.281ms (+0.000ms): scheduler_tick (update_process_times)
00010003 3.281ms (+0.000ms): sched_clock (scheduler_tick)
00010003 3.282ms (+0.000ms): preempt_schedule (scheduler_tick)
00010003 3.282ms (+0.000ms): update_wall_time (do_timer)
00010003 3.282ms (+0.000ms): update_wall_time_one_tick (update_wall_time)
00010002 3.282ms (+0.000ms): preempt_schedule (timer_interrupt)
00010003 3.282ms (+0.000ms): generic_note_interrupt (do_IRQ)
00010003 3.283ms (+0.000ms): end_edge_ioapic_irq (do_IRQ)
00010002 3.283ms (+0.000ms): preempt_schedule (do_IRQ)
00000003 3.283ms (+0.000ms): do_softirq (do_IRQ)
00000003 3.283ms (+0.000ms): __do_softirq (do_softirq)
00000001 3.378ms (+0.095ms): preempt_schedule (do_remount_sb)
00000001 3.379ms (+0.000ms): fsync_super (do_remount_sb)
00000001 3.379ms (+0.000ms): sync_inodes_sb (fsync_super)
00000001 3.379ms (+0.000ms): __read_page_state (sync_inodes_sb)
00000001 3.379ms (+0.000ms): __read_page_state (sync_inodes_sb)
00000002 3.380ms (+0.000ms): sync_sb_inodes (sync_inodes_sb)
00000001 3.380ms (+0.000ms): preempt_schedule (sync_inodes_sb)
00000001 3.380ms (+0.000ms): sync_dquots (fsync_super)
00000001 3.381ms (+0.000ms): vfs_quota_sync (sync_dquots)
00000001 3.381ms (+0.000ms): __might_sleep (vfs_quota_sync)
00000001 3.381ms (+0.000ms): voluntary_resched (vfs_quota_sync)
00000001 3.381ms (+0.000ms): __might_sleep (voluntary_resched)
00000001 3.382ms (+0.000ms): touch_preempt_timing (voluntary_resched)


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-25  3:22     ` Lee Revell
@ 2004-08-25 14:34       ` K.R. Foley
  2004-08-25 16:00       ` K.R. Foley
  1 sibling, 0 replies; 93+ messages in thread
From: K.R. Foley @ 2004-08-25 14:34 UTC (permalink / raw)
  To: Lee Revell; +Cc: Ingo Molnar, Scott Wood, manas.saksena, linux-kernel

Lee Revell wrote:
> On Tue, 2004-08-24 at 23:17, K.R. Foley wrote:
> 
>>Ingo Molnar wrote:
>>
>>>  http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.8.1-P9
>>>
> 
> 
>>latency trace of ~148 usec in scsi_request? I don't know if this is real 
>>or not. Note the 79 usec here:
>>
>>00000001 0.107ms (+0.079ms): sd_init_command (scsi_prep_fn)
>>
>>Entire trace is here:
>>
>>http://www.cybsft.com/testresults/2.6.8.1-P9/latency_trace7.txt
>>
>>
>>Is this possible? This is not the first time I have seen this. There is 
>>another one here:
>>
>>http://www.cybsft.com/testresults/2.6.8.1-P9/latency_trace5.txt
>>
> 
> 
> This looks like a real latency.  What is
> /sys/block/sdX/queue/max_sectors_kb set to?  Does lowering it help?
> 
> Lee
> 
> 
/sys/block/sda/queue/max_sectors_kb was set to 512, trying it at 256. 
Hard to say whether it is helping or not. Looking at dmesg I do see some 
traces for scsi_request in the range of 39 - 72 usec. However, anything 
higher (up to 115 usec) could be masked by one of the netif_skb 
latencies that I am still seeing. Not only that, but I have only caught 
a hand-full of these scsi traces in all of my testing.

kr

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-25  3:22     ` Lee Revell
  2004-08-25 14:34       ` K.R. Foley
@ 2004-08-25 16:00       ` K.R. Foley
  1 sibling, 0 replies; 93+ messages in thread
From: K.R. Foley @ 2004-08-25 16:00 UTC (permalink / raw)
  To: Lee Revell; +Cc: Ingo Molnar, Scott Wood, manas.saksena, linux-kernel

Lee Revell wrote:
> On Tue, 2004-08-24 at 23:17, K.R. Foley wrote:
> 
>>Ingo Molnar wrote:
>>
>>>  http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.8.1-P9
>>>
> 
> 
>>latency trace of ~148 usec in scsi_request? I don't know if this is real 
>>or not. Note the 79 usec here:
>>
>>00000001 0.107ms (+0.079ms): sd_init_command (scsi_prep_fn)
>>
>>Entire trace is here:
>>
>>http://www.cybsft.com/testresults/2.6.8.1-P9/latency_trace7.txt
>>
>>
>>Is this possible? This is not the first time I have seen this. There is 
>>another one here:
>>
>>http://www.cybsft.com/testresults/2.6.8.1-P9/latency_trace5.txt
>>
> 
> 
> This looks like a real latency.  What is
> /sys/block/sdX/queue/max_sectors_kb set to?  Does lowering it help?
> 
> Lee
> 
> 
Well I had no sooner sent the last message and another one of these 
popped up. This one is 123 usec:

http://www.cybsft.com/testresults/2.6.8.1-P9/latency_trace12.txt

kr

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] PPC/PPC64 port of voluntary preempt patch
  2004-08-24 19:51 ` [patch] PPC/PPC64 port of voluntary preempt patch Scott Wood
@ 2004-08-26  3:17   ` Lee Revell
  2004-08-26 16:38     ` Scott Wood
  2004-08-27  1:18     ` Fernando Pablo Lopez-Lezcano
  2004-08-28 12:36   ` Ingo Molnar
  1 sibling, 2 replies; 93+ messages in thread
From: Lee Revell @ 2004-08-26  3:17 UTC (permalink / raw)
  To: Scott Wood; +Cc: Ingo Molnar, manas.saksena, linux-kernel, nando

On Tue, 2004-08-24 at 15:51, Scott Wood wrote:
> On Mon, Aug 23, 2004 at 06:18:16PM -0400, Scott Wood wrote:
> > I have attached a port of the voluntary preempt patch to PPC and
> > PPC64.  The patch is against P7, but it applies against P8 as well.
> > 
> > I've tested it on a dual G5 Mac, both in uniprocessor and SMP.
> > 
> > Some notes on changes to the generic part of the patch/existing
> > generic code:
> 
> Another thing that I forgot to mention is that I have some doubts as
> to the current generic_synchronize_irq() implementation.  Given that
> IRQs are now preemptible, a higher priority RT thread calling
> synchronize_irq can't just spin waiting for the IRQ to complete, as
> it never will (and it wouldn't be a great idea for non-RT tasks
> either).  I see that a do_hardirq() call was added, presumably to
> hurry completion of the interrupt, but is that really safe?  It looks
> like that could end up re-entering handlers, and you'd still have a
> partially executed handler after synchronize_irq() finishes (causing
> not only an extra end() call, but possibly code being executed after
> it's been unloaded, and other synchronization violations).
> 
> If I'm missing something, please let me know, but I don't see a good
> way to implement it without blocking for the IRQ thread's completion
> (such as with the per-IRQ waitqueues in M5).

I think Scott may be on to something.  There are several reports that P9
does not work on SMP machines at all - it either doesn't boot, locks up
the first time there is heavy IRQ activity (starting KDE), or locks up
as soon as the first RT process is run.  This is exactly the behavior
that would be expected if Scott is correct.  See this thread:

http://ccrma-mail.stanford.edu/pipermail/planetccrma/2004-August/005899.html

Does anyone have P9 working on SMP?  Fernando, can you see if M5 works
on SMP?  If this works it would seem that the preemptible IRQs are the
problem.

Lee



^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] PPC/PPC64 port of voluntary preempt patch
  2004-08-26  3:17   ` Lee Revell
@ 2004-08-26 16:38     ` Scott Wood
  2004-08-27  1:18     ` Fernando Pablo Lopez-Lezcano
  1 sibling, 0 replies; 93+ messages in thread
From: Scott Wood @ 2004-08-26 16:38 UTC (permalink / raw)
  To: Lee Revell; +Cc: Scott Wood, Ingo Molnar, manas.saksena, linux-kernel, nando

On Wed, Aug 25, 2004 at 11:17:32PM -0400, Lee Revell wrote:
> I think Scott may be on to something.  There are several reports that P9
> does not work on SMP machines at all - it either doesn't boot, locks up
> the first time there is heavy IRQ activity (starting KDE), or locks up
> as soon as the first RT process is run.  This is exactly the behavior
> that would be expected if Scott is correct.  See this thread:
> 
> http://ccrma-mail.stanford.edu/pipermail/planetccrma/2004-August/005899.html
> 
> Does anyone have P9 working on SMP?  Fernando, can you see if M5 works
> on SMP?  If this works it would seem that the preemptible IRQs are the
> problem.

It worked for me on an SMP G5, but it would depend on which drivers
are used; some use synchronize_irq() more than others.

IIRC, though, M5 didn't have the IO-APIC fixes, so that's not likely
to work well on SMP either.

-Scott

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-24  6:14 ` [patch] voluntary-preempt-2.6.8.1-P9 Ingo Molnar
                     ` (6 preceding siblings ...)
  2004-08-25  9:58   ` [patch] voluntary-preempt-2.6.8.1-P9 : oprofile latency at 3.3ms P.O. Gaillard
@ 2004-08-26 21:39   ` Lee Revell
  2004-08-27 16:54     ` Lee Revell
  2004-08-28 12:14     ` Ingo Molnar
  2004-08-28 12:03   ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q0 Ingo Molnar
  8 siblings, 2 replies; 93+ messages in thread
From: Lee Revell @ 2004-08-26 21:39 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Scott Wood, manas.saksena, linux-kernel

On Tue, 2004-08-24 at 02:14, Ingo Molnar wrote:

>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.8.1-P9
> 

Hmm, it seems that those strange ~1ms latencies are back.  This was
triggered by mounting an NTFS volume:

http://krustophenia.net/testresults.php?dataset=2.6.8.1-P9#/var/www/2.6.8.1-P9/trace5.txt

Lee




^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] PPC/PPC64 port of voluntary preempt patch
  2004-08-26  3:17   ` Lee Revell
  2004-08-26 16:38     ` Scott Wood
@ 2004-08-27  1:18     ` Fernando Pablo Lopez-Lezcano
  1 sibling, 0 replies; 93+ messages in thread
From: Fernando Pablo Lopez-Lezcano @ 2004-08-27  1:18 UTC (permalink / raw)
  To: Lee Revell
  Cc: Scott Wood, Ingo Molnar, manas.saksena, linux-kernel,
	Fernando Pablo Lopez-Lezcano

On Wed, 2004-08-25 at 20:17, Lee Revell wrote:
> On Tue, 2004-08-24 at 15:51, Scott Wood wrote:
> > On Mon, Aug 23, 2004 at 06:18:16PM -0400, Scott Wood wrote:
> > > I have attached a port of the voluntary preempt patch to PPC and
> > > PPC64.  The patch is against P7, but it applies against P8 as well.
> > > I've tested it on a dual G5 Mac, both in uniprocessor and SMP.
> > > Some notes on changes to the generic part of the patch/existing
> > > generic code:
> > 
> > Another thing that I forgot to mention is that I have some doubts as
> > to the current generic_synchronize_irq() implementation.  Given that
> > IRQs are now preemptible, a higher priority RT thread calling
> > synchronize_irq can't just spin waiting for the IRQ to complete, as
> > it never will (and it wouldn't be a great idea for non-RT tasks
> > either).  I see that a do_hardirq() call was added, presumably to
> > hurry completion of the interrupt, but is that really safe?  It looks
> > like that could end up re-entering handlers, and you'd still have a
> > partially executed handler after synchronize_irq() finishes (causing
> > not only an extra end() call, but possibly code being executed after
> > it's been unloaded, and other synchronization violations).
> > 
> > If I'm missing something, please let me know, but I don't see a good
> > way to implement it without blocking for the IRQ thread's completion
> > (such as with the per-IRQ waitqueues in M5).
> 
> I think Scott may be on to something.  There are several reports that P9
> does not work on SMP machines at all - it either doesn't boot, locks up
> the first time there is heavy IRQ activity (starting KDE), or locks up
> as soon as the first RT process is run.  This is exactly the behavior
> that would be expected if Scott is correct.  See this thread:
> 
> http://ccrma-mail.stanford.edu/pipermail/planetccrma/2004-August/005899.html
> 
> Does anyone have P9 working on SMP?  Fernando, can you see if M5 works
> on SMP?  If this works it would seem that the preemptible IRQs are the
> problem.

Sorry, I could not get SMP 2.6.8.1 + voluntary M5 to boot on my dual
Athlon test system. Again problems with interrupts but worse than P9,
this time acpi=off or pci=noacpi did not help (I can boot single user,
but the machine hang in the network startup - or if I disable that,
later on X startup). I saw two messages, one "irq 9: nobody cared!" and
then "Disabling IRQ # 9" (that's the one for the network card). On a
different boot:
  Badness in free_irq at  .... irq.c
free_irq
load_balance_new_idle
floppy_release_irq_and_dma
set_dor
motor_off_callback
...

So I could not get to the point where I could test jack and SCHED_FIFO
processes. 

-- Fernando



^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-26 21:39   ` [patch] voluntary-preempt-2.6.8.1-P9 Lee Revell
@ 2004-08-27 16:54     ` Lee Revell
  2004-08-28  7:37       ` Ingo Molnar
  2004-08-28 12:14     ` Ingo Molnar
  1 sibling, 1 reply; 93+ messages in thread
From: Lee Revell @ 2004-08-27 16:54 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Scott Wood, manas.saksena, linux-kernel

On Thu, 2004-08-26 at 17:39, Lee Revell wrote:
> On Tue, 2004-08-24 at 02:14, Ingo Molnar wrote:
> 
> >   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.8.1-P9
> > 
> 
> Hmm, it seems that those strange ~1ms latencies are back.  This was
> triggered by mounting an NTFS volume:
> 
> http://krustophenia.net/testresults.php?dataset=2.6.8.1-P9#/var/www/2.6.8.1-P9/trace5.txt
> 

I think vger was messed up yesterday, as this did not get through the
first time I sent it, and I did not get any messages from the list for
4-5 hours last night.

I am seeing large latencies (600-2000 usec) latencies in
dcache_readdir.  This started when the machine became a Samba server and
the dcache presumably got large.  Traces are at the above url (8 and 9 I
believe).  I think this patch fixes it.

--- fs/libfs.c~	2004-08-14 06:54:47.000000000 -0400
+++ fs/libfs.c	2004-08-27 00:44:17.000000000 -0400
@@ -140,6 +140,7 @@
 			}
 			for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
 				struct dentry *next;
+				voluntary_resched_lock(&dcache_lock);
 				next = list_entry(p, struct dentry, d_child);
 				if (d_unhashed(next) || !next->d_inode)
 					continue;


Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-27 16:54     ` Lee Revell
@ 2004-08-28  7:37       ` Ingo Molnar
  2004-08-28 15:10         ` Lee Revell
  0 siblings, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-28  7:37 UTC (permalink / raw)
  To: Lee Revell; +Cc: Scott Wood, manas.saksena, linux-kernel


* Lee Revell <rlrevell@joe-job.com> wrote:

> I am seeing large latencies (600-2000 usec) latencies in
> dcache_readdir.  This started when the machine became a Samba server
> and the dcache presumably got large.  Traces are at the above url (8
> and 9 I believe).  I think this patch fixes it.
> 
> --- fs/libfs.c~	2004-08-14 06:54:47.000000000 -0400
> +++ fs/libfs.c	2004-08-27 00:44:17.000000000 -0400
> @@ -140,6 +140,7 @@
>  			}
>  			for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
>  				struct dentry *next;
> +				voluntary_resched_lock(&dcache_lock);
>  				next = list_entry(p, struct dentry, d_child);
>  				if (d_unhashed(next) || !next->d_inode)
>  					continue;

In this loop we are iterating over the child-directories of this
directory. In the next line (not shown in this patch) we drop the
dcache_lock - so the issue is the 'continue' - where we skip already
deleted entries. Are you positive this fixes the latencies you are
seeing? The 'deleted entries' situation ought to be relatively rare.

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* [patch] voluntary-preempt-2.6.9-rc1-bk4-Q0
  2004-08-24  6:14 ` [patch] voluntary-preempt-2.6.8.1-P9 Ingo Molnar
                     ` (7 preceding siblings ...)
  2004-08-26 21:39   ` [patch] voluntary-preempt-2.6.8.1-P9 Lee Revell
@ 2004-08-28 12:03   ` Ingo Molnar
  2004-08-28 16:18     ` Felipe Alfaro Solana
  8 siblings, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-28 12:03 UTC (permalink / raw)
  To: linux-kernel; +Cc: Lee Revell, Mark_H_Johnson


i've uploaded the -Q0 patch:

  http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q0

note that since -bk4 doesnt exist yet, i've uploaded a patch that brings
2.6.8.1 up to BK-curr:

  http://redhat.com/~mingo/voluntary-preempt/diff-bk-040828-2.6.8.1.bz2

apply this patch to 2.6.8.1 before applying the -Q0 patch.

Changes:

there are a number of fundamental changes in the -Q0, of both structural
and functional nature.

Structural changes:

Linus' current BK tree (what will be 2.6.9-rc1-bk4) has just merged most
of the might_sleep() improvements we did for -mm and a bunch of other
changes that were part of the voluntary-preempt patchset. So i've
started a pre-merge cleanup of the voluntary-preempt patchset, to be
able to merge as much of the remaining stuff upstream as possible. This
doesnt (necessarily) mean voluntary-preempt itself will be merged, it
means that the independent latency improvements move out of the
voluntary-preemption umbrella and will go upstream.

About the cleanup:

Firstly, the user controls have changed. There are now 4 independent
flags in /proc/sys/kernel/: kernel_preemption, voluntary_preemption,
softirq_preemption and hardirq_preemption - each default to a value of 1
(enabled). NOTE: levels 2,3 for voluntary_preemption is not valid
anymore, each of the flags can be 0 or 1. The flags control what their
name says, for best latencies one should keep all of them enabled.

Similarly, there are 4 independent options for the .config:
CONFIG_PREEMPT, CONFIG_PREEMPT_VOLUNTARY, CONFIG_PREEMPT_SOFTIRQS and
CONFIG_PREEMPT_HARDIRQS. (In theory all of these options should compile
independently, but i've only tested all-enabled so far.)

Internally, the voluntary_ prefixed conditional reschedule variants were
replaced by the existing cond_resched(), cond_resched_lock(),
need_resched()/etc. infrastructure.

Functional changes:

i took another look at SMP latencies, the last larger chunk of code that
produced millisec-category latencies. CONFIG_PREEMPT tries to solve some
of the SMP issues but there were still lots of problems remaining: main
problem area is spinlocks nested at multiple levels. If a piece of code
(e.g. the MM or ext3's journalling code) does the following:

	spin_lock(&spinlock_1);
	...
	spin_lock(&spinlock_2);
	...

then even with CONFIG_PREEMPT enabled, current kernels may spin on
spinlock_2 indefinitely. A number of critical sections break their long
paths by using cond_resched_lock(), but this does not break the path on
SMP, because need_resched() is not set in the above case.

(The -mm kernel introduced a couple of patches that try to drop
spinlocks unconditionally at a high frequency: but besides being a
kludge it's also a performance problem, we keep
dropping/waiting/retaking locks quite frequently. That solution also
doesnt solve the problem of cond_resched_lock() not working on SMP.)

to solve the problem i've introduced a new spinlock field,
lock->break_lock, which signals towards the holding CPU that a
spinlock-break is requested by another CPU. This field is only set if a
CPU is spinning in __preempt_spin_lock [at any locking depth], so the
default overhead is zero. I've extended cond_resched_lock() to check for
this flag - in this case we can also save a reschedule. I've added the
lock_need_resched(lock) and need_lockbreak(lock) methods to check for
the need to break out of a critical section.

preliminary results on a dual x86 box show a dramatic reduction in
latencies on SMP - where there used to be 5-10 msec latencies there are
close-to-UP latencies now. But it needs more testing.

the -Q0 patch also adds a number of lock-breaks that are part of the -mm
tree: e.g. the PTY lock-break.

please re-send any patches that i havent merged yet, and re-report
latencies that still occur with -Q0.

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-26 21:39   ` [patch] voluntary-preempt-2.6.8.1-P9 Lee Revell
  2004-08-27 16:54     ` Lee Revell
@ 2004-08-28 12:14     ` Ingo Molnar
  2004-08-30  9:27       ` voluntary-preempt-2.6.8.1-P9 : big latency when logging on console P.O. Gaillard
  2004-08-30  9:48       ` [patch] voluntary-preempt-2.6.8.1-P9 : a few submillisecond latencies P.O. Gaillard
  1 sibling, 2 replies; 93+ messages in thread
From: Ingo Molnar @ 2004-08-28 12:14 UTC (permalink / raw)
  To: Lee Revell; +Cc: Scott Wood, manas.saksena, linux-kernel


* Lee Revell <rlrevell@joe-job.com> wrote:

> >   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.8.1-P9
> 
> Hmm, it seems that those strange ~1ms latencies are back.  This was
> triggered by mounting an NTFS volume:
> 
> http://krustophenia.net/testresults.php?dataset=2.6.8.1-P9#/var/www/2.6.8.1-P9/trace5.txt

this is genuine NTFS overhead.

the 1 msec events you get because the timer fires once every 1 msec, but
this is not irregular. There is a single NTFS function running, which is
profiled nicely by the timer IRQ:

 00010001 0.372ms (+0.372ms): do_IRQ (load_and_init_upcase)
 ...
 00010001 1.374ms (+0.981ms): do_IRQ (load_and_init_upcase)
 ...
 00010001 2.376ms (+0.978ms): do_IRQ (load_and_init_upcase)
 ...
 00000001 2.615ms (+0.216ms): vfree (load_and_init_upcase)

to be able to debug such latencies is one reason why i changed the
do_IRQ() trace-entry to show the interrupted function's name. (it
wouldnt normally, mcount() doesnt reach across IRQ frames.)

load_and_init_upcase() is called by ntfs_fill_super() which is called by
the mount code, which runs under lock_kernel(). It seems NTFS does not
rely on the BKL - could you try the patch below, does it solve the
latency?

	Ingo

--- linux/fs/ntfs/super.c.orig
+++ linux/fs/ntfs/super.c
@@ -2288,6 +2288,8 @@ static int ntfs_fill_super(struct super_
 	vol->fmask = 0177;
 	vol->dmask = 0077;
 
+	unlock_kernel();
+
 	/* Important to get the mount options dealt with now. */
 	if (!parse_options(vol, (char*)opt))
 		goto err_out_now;
@@ -2424,6 +2426,7 @@ static int ntfs_fill_super(struct super_
 		}
 		up(&ntfs_lock);
 		sb->s_export_op = &ntfs_export_ops;
+		lock_kernel();
 		return 0;
 	}
 	ntfs_error(sb, "Failed to allocate root directory.");
@@ -2527,6 +2530,7 @@ iput_tmp_ino_err_out_now:
 	}
 	/* Errors at this stage are irrelevant. */
 err_out_now:
+	lock_kernel();
 	sb->s_fs_info = NULL;
 	kfree(vol);
 	ntfs_debug("Failed, returning -EINVAL.");

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] PPC/PPC64 port of voluntary preempt patch
  2004-08-24 19:51 ` [patch] PPC/PPC64 port of voluntary preempt patch Scott Wood
  2004-08-26  3:17   ` Lee Revell
@ 2004-08-28 12:36   ` Ingo Molnar
  2004-08-28 13:01     ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q1 Ingo Molnar
  1 sibling, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-28 12:36 UTC (permalink / raw)
  To: Scott Wood; +Cc: manas.saksena, linux-kernel, Lee Revell


* Scott Wood <scott@timesys.com> wrote:

> Another thing that I forgot to mention is that I have some doubts as
> to the current generic_synchronize_irq() implementation.  Given that
> IRQs are now preemptible, a higher priority RT thread calling
> synchronize_irq can't just spin waiting for the IRQ to complete, as it
> never will (and it wouldn't be a great idea for non-RT tasks either). 
> I see that a do_hardirq() call was added, presumably to hurry
> completion of the interrupt, but is that really safe?  It looks like
> that could end up re-entering handlers, and you'd still have a
> partially executed handler after synchronize_irq() finishes (causing
> not only an extra end() call, but possibly code being executed after
> it's been unloaded, and other synchronization violations).
> 
> If I'm missing something, please let me know, but I don't see a good
> way to implement it without blocking for the IRQ thread's completion
> (such as with the per-IRQ waitqueues in M5).

agreed, this is a hole in generic_synchronize_irq(). I've added
handler-completion waitqueues to my current tree, it will show up in
-Q1.

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* [patch] voluntary-preempt-2.6.9-rc1-bk4-Q1
  2004-08-28 12:36   ` Ingo Molnar
@ 2004-08-28 13:01     ` Ingo Molnar
  2004-08-30  1:06       ` Fernando Pablo Lopez-Lezcano
  0 siblings, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-28 13:01 UTC (permalink / raw)
  To: Scott Wood
  Cc: manas.saksena, linux-kernel, Lee Revell, Mark_H_Johnson,
	Fernando Pablo Lopez-Lezcano


* Ingo Molnar <mingo@elte.hu> wrote:

> * Scott Wood <scott@timesys.com> wrote:
> 
> > If I'm missing something, please let me know, but I don't see a good
> > way to implement it without blocking for the IRQ thread's completion
> > (such as with the per-IRQ waitqueues in M5).
> 
> agreed, this is a hole in generic_synchronize_irq(). I've added
> handler-completion waitqueues to my current tree, it will show up in
> -Q1.

i've uploaded -Q1:

  http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q1

as with -Q0, the following patch has to be applied to 2.6.8.1 first:

  http://redhat.com/~mingo/voluntary-preempt/diff-bk-040828-2.6.8.1.bz2

those who still have DRI problems under -Q1 - please unapply the
drm_os_linux.h change, does the fix the lockups?

Changes since -Q0:

- the synchronize_irq() fix - this might help SMP problems.

- adds unlock_kernel() to the NTFS and ext3 mount path, to fix the
  latencies reported by Lee Revell.

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9
  2004-08-28  7:37       ` Ingo Molnar
@ 2004-08-28 15:10         ` Lee Revell
  0 siblings, 0 replies; 93+ messages in thread
From: Lee Revell @ 2004-08-28 15:10 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Scott Wood, manas.saksena, linux-kernel

On Sat, 2004-08-28 at 03:37, Ingo Molnar wrote:
> * Lee Revell <rlrevell@joe-job.com> wrote:
> 
> > I am seeing large latencies (600-2000 usec) latencies in
> > dcache_readdir.  This started when the machine became a Samba server
> > and the dcache presumably got large.  Traces are at the above url (8
> > and 9 I believe).  I think this patch fixes it.
> > 
> > --- fs/libfs.c~	2004-08-14 06:54:47.000000000 -0400
> > +++ fs/libfs.c	2004-08-27 00:44:17.000000000 -0400
> > @@ -140,6 +140,7 @@
> >  			}
> >  			for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
> >  				struct dentry *next;
> > +				voluntary_resched_lock(&dcache_lock);
> >  				next = list_entry(p, struct dentry, d_child);
> >  				if (d_unhashed(next) || !next->d_inode)
> >  					continue;
> 
> In this loop we are iterating over the child-directories of this
> directory. In the next line (not shown in this patch) we drop the
> dcache_lock - so the issue is the 'continue' - where we skip already
> deleted entries. Are you positive this fixes the latencies you are
> seeing? The 'deleted entries' situation ought to be relatively rare.

No, I am not sure this fixes the problem.  This is a pretty rare one, I
only saw it twice.  I have not seen it since making the above change,
but this doesn't mean anything.

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q0
  2004-08-28 12:03   ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q0 Ingo Molnar
@ 2004-08-28 16:18     ` Felipe Alfaro Solana
  2004-08-28 16:50       ` K.R. Foley
  0 siblings, 1 reply; 93+ messages in thread
From: Felipe Alfaro Solana @ 2004-08-28 16:18 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Lee Revell, Mark_H_Johnson

On Saturday 28 August 2004 14:03, Ingo Molnar wrote:

> Similarly, there are 4 independent options for the .config:
> CONFIG_PREEMPT, CONFIG_PREEMPT_VOLUNTARY, CONFIG_PREEMPT_SOFTIRQS and
> CONFIG_PREEMPT_HARDIRQS. (In theory all of these options should compile
> independently, but i've only tested all-enabled so far.)

I must be missing something, but after applying diff-bk-040828-2.6.8.1.bz2 and 
voluntary-preempt-2.6.9-rc1-bk4-Q1 on top of 2.6.8.1, I'm unable to find 
neither CONFIG_PREEMPT_VOLUNTARY, CONFIG_PREEMPT_SOFTIRQS, nor 
CONFIG_PREEMPT_HARDIRQS.

Any ideas are welcome.

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q0
  2004-08-28 16:18     ` Felipe Alfaro Solana
@ 2004-08-28 16:50       ` K.R. Foley
  2004-08-28 17:52         ` Lee Revell
  0 siblings, 1 reply; 93+ messages in thread
From: K.R. Foley @ 2004-08-28 16:50 UTC (permalink / raw)
  To: Felipe Alfaro Solana
  Cc: Ingo Molnar, linux-kernel, Lee Revell, Mark_H_Johnson

Felipe Alfaro Solana wrote:
> On Saturday 28 August 2004 14:03, Ingo Molnar wrote:
> 
> 
>>Similarly, there are 4 independent options for the .config:
>>CONFIG_PREEMPT, CONFIG_PREEMPT_VOLUNTARY, CONFIG_PREEMPT_SOFTIRQS and
>>CONFIG_PREEMPT_HARDIRQS. (In theory all of these options should compile
>>independently, but i've only tested all-enabled so far.)
> 
> 
> I must be missing something, but after applying diff-bk-040828-2.6.8.1.bz2 and 
> voluntary-preempt-2.6.9-rc1-bk4-Q1 on top of 2.6.8.1, I'm unable to find 
> neither CONFIG_PREEMPT_VOLUNTARY, CONFIG_PREEMPT_SOFTIRQS, nor 
> CONFIG_PREEMPT_HARDIRQS.
> 
> Any ideas are welcome.

Looks like all of these config options are missing from Q1 also. I was 
just looking myself.

kr

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q0
  2004-08-28 16:50       ` K.R. Foley
@ 2004-08-28 17:52         ` Lee Revell
  2004-08-28 19:44           ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q2 Ingo Molnar
  0 siblings, 1 reply; 93+ messages in thread
From: Lee Revell @ 2004-08-28 17:52 UTC (permalink / raw)
  To: K.R. Foley
  Cc: Felipe Alfaro Solana, Ingo Molnar, linux-kernel, Mark_H_Johnson

On Sat, 2004-08-28 at 12:50, K.R. Foley wrote:
> Felipe Alfaro Solana wrote:
> > On Saturday 28 August 2004 14:03, Ingo Molnar wrote:
> > 
> > 
> >>Similarly, there are 4 independent options for the .config:
> >>CONFIG_PREEMPT, CONFIG_PREEMPT_VOLUNTARY, CONFIG_PREEMPT_SOFTIRQS and
> >>CONFIG_PREEMPT_HARDIRQS. (In theory all of these options should compile
> >>independently, but i've only tested all-enabled so far.)
> > 
> > 
> > I must be missing something, but after applying diff-bk-040828-2.6.8.1.bz2 and 
> > voluntary-preempt-2.6.9-rc1-bk4-Q1 on top of 2.6.8.1, I'm unable to find 
> > neither CONFIG_PREEMPT_VOLUNTARY, CONFIG_PREEMPT_SOFTIRQS, nor 
> > CONFIG_PREEMPT_HARDIRQS.
> > 
> > Any ideas are welcome.
> 
> Looks like all of these config options are missing from Q1 also. I was 
> just looking myself.
> 

Same results here, none of those config options seem to exist.  I also
get this warning a lot:

include/linux/rwsem.h: In function `down_read':
include/linux/rwsem.h:43: warning: implicit declaration of function `cond_resched'

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* [patch] voluntary-preempt-2.6.9-rc1-bk4-Q2
  2004-08-28 17:52         ` Lee Revell
@ 2004-08-28 19:44           ` Ingo Molnar
  2004-08-28 20:01             ` Lee Revell
  2004-08-28 20:10             ` Daniel Schmitt
  0 siblings, 2 replies; 93+ messages in thread
From: Ingo Molnar @ 2004-08-28 19:44 UTC (permalink / raw)
  To: Lee Revell; +Cc: K.R. Foley, Felipe Alfaro Solana, linux-kernel, Mark_H_Johnson


* Lee Revell <rlrevell@joe-job.com> wrote:

> > > I must be missing something, but after applying diff-bk-040828-2.6.8.1.bz2 and 
> > > voluntary-preempt-2.6.9-rc1-bk4-Q1 on top of 2.6.8.1, I'm unable to find 
> > > neither CONFIG_PREEMPT_VOLUNTARY, CONFIG_PREEMPT_SOFTIRQS, nor 
> > > CONFIG_PREEMPT_HARDIRQS.
> > > 
> > > Any ideas are welcome.
> > 
> > Looks like all of these config options are missing from Q1 also. I was 
> > just looking myself.
> > 
> 
> Same results here, none of those config options seem to exist.  I also
> get this warning a lot:
> 
> include/linux/rwsem.h: In function `down_read':
> include/linux/rwsem.h:43: warning: implicit declaration of function `cond_resched'

there's a Kconfig chunk missing from the Q0/Q1 patches, i've uploaded Q2
that fixes this:

  http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q2

ontop of:

  http://redhat.com/~mingo/voluntary-preempt/diff-bk-040828-2.6.8.1.bz2

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q2
  2004-08-28 19:44           ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q2 Ingo Molnar
@ 2004-08-28 20:01             ` Lee Revell
  2004-08-28 20:04               ` Ingo Molnar
  2004-08-28 20:10             ` Daniel Schmitt
  1 sibling, 1 reply; 93+ messages in thread
From: Lee Revell @ 2004-08-28 20:01 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: K.R. Foley, Felipe Alfaro Solana, linux-kernel, Mark_H_Johnson

On Sat, 2004-08-28 at 15:44, Ingo Molnar wrote:

> there's a Kconfig chunk missing from the Q0/Q1 patches, i've uploaded Q2

Still not quite right:

  HOSTLD  scripts/mod/modpost
  CC      arch/i386/kernel/asm-offsets.s
In file included from arch/i386/kernel/asm-offsets.c:7:
include/linux/sched.h: In function `lock_need_resched':
include/linux/sched.h:983: error: structure has no member named `break_lock'
make[1]: *** [arch/i386/kernel/asm-offsets.s] Error 1
make: *** [arch/i386/kernel/asm-offsets.s] Error 2

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q2
  2004-08-28 20:01             ` Lee Revell
@ 2004-08-28 20:04               ` Ingo Molnar
  2004-08-28 20:08                 ` Lee Revell
  0 siblings, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-28 20:04 UTC (permalink / raw)
  To: Lee Revell; +Cc: K.R. Foley, Felipe Alfaro Solana, linux-kernel, Mark_H_Johnson


* Lee Revell <rlrevell@joe-job.com> wrote:

> On Sat, 2004-08-28 at 15:44, Ingo Molnar wrote:
> 
> > there's a Kconfig chunk missing from the Q0/Q1 patches, i've uploaded Q2
> 
> Still not quite right:
> 
>   HOSTLD  scripts/mod/modpost
>   CC      arch/i386/kernel/asm-offsets.s
> In file included from arch/i386/kernel/asm-offsets.c:7:
> include/linux/sched.h: In function `lock_need_resched':
> include/linux/sched.h:983: error: structure has no member named `break_lock'
> make[1]: *** [arch/i386/kernel/asm-offsets.s] Error 1
> make: *** [arch/i386/kernel/asm-offsets.s] Error 2

you probably have CONFIG_PREEMPT_VOLUNTARY disabled in the .config?

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q2
  2004-08-28 20:04               ` Ingo Molnar
@ 2004-08-28 20:08                 ` Lee Revell
  0 siblings, 0 replies; 93+ messages in thread
From: Lee Revell @ 2004-08-28 20:08 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: K.R. Foley, Felipe Alfaro Solana, linux-kernel, Mark_H_Johnson

On Sat, 2004-08-28 at 16:04, Ingo Molnar wrote:
> * Lee Revell <rlrevell@joe-job.com> wrote:
> 
> > On Sat, 2004-08-28 at 15:44, Ingo Molnar wrote:
> > 
> > > there's a Kconfig chunk missing from the Q0/Q1 patches, i've uploaded Q2
> > 
> > Still not quite right:
> > 
> >   HOSTLD  scripts/mod/modpost
> >   CC      arch/i386/kernel/asm-offsets.s
> > In file included from arch/i386/kernel/asm-offsets.c:7:
> > include/linux/sched.h: In function `lock_need_resched':
> > include/linux/sched.h:983: error: structure has no member named `break_lock'
> > make[1]: *** [arch/i386/kernel/asm-offsets.s] Error 1
> > make: *** [arch/i386/kernel/asm-offsets.s] Error 2
> 
> you probably have CONFIG_PREEMPT_VOLUNTARY disabled in the .config?
> 

Nope:

# CONFIG_SMP is not set
CONFIG_PREEMPT=y
CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_PREEMPT_SOFTIRQS=y
CONFIG_PREEMPT_HARDIRQS=y
# CONFIG_X86_UP_APIC is not set

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q2
  2004-08-28 19:44           ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q2 Ingo Molnar
  2004-08-28 20:01             ` Lee Revell
@ 2004-08-28 20:10             ` Daniel Schmitt
  2004-08-28 20:31               ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3 Ingo Molnar
  1 sibling, 1 reply; 93+ messages in thread
From: Daniel Schmitt @ 2004-08-28 20:10 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Lee Revell, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

On Saturday 28 August 2004 21:44, Ingo Molnar wrote:
>
> there's a Kconfig chunk missing from the Q0/Q1 patches, i've uploaded Q2
> that fixes this:
>
This breaks here unless CONFIG_SMP is defined, with the following error:

  CC      arch/i386/kernel/asm-offsets.s
In file included from arch/i386/kernel/asm-offsets.c:7:
include/linux/sched.h: In function `lock_need_resched':
include/linux/sched.h:983: error: structure has no member named `break_lock'

Probably missing a check for CONFIG_SMP around the need_lockbreak defines in 
sched.h, and maybe also in cond_resched_lock().

Daniel.


^ permalink raw reply	[flat|nested] 93+ messages in thread

* [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3
  2004-08-28 20:10             ` Daniel Schmitt
@ 2004-08-28 20:31               ` Ingo Molnar
  2004-08-28 21:10                 ` Lee Revell
  2004-08-29  7:40                 ` Matt Heler
  0 siblings, 2 replies; 93+ messages in thread
From: Ingo Molnar @ 2004-08-28 20:31 UTC (permalink / raw)
  To: Daniel Schmitt
  Cc: Lee Revell, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson


* Daniel Schmitt <pnambic@unu.nu> wrote:

> > there's a Kconfig chunk missing from the Q0/Q1 patches, i've uploaded Q2
> > that fixes this:
> >
> This breaks here unless CONFIG_SMP is defined, with the following error:
> 
>   CC      arch/i386/kernel/asm-offsets.s
> In file included from arch/i386/kernel/asm-offsets.c:7:
> include/linux/sched.h: In function `lock_need_resched':
> include/linux/sched.h:983: error: structure has no member named `break_lock'
> 
> Probably missing a check for CONFIG_SMP around the need_lockbreak
> defines in sched.h, and maybe also in cond_resched_lock().

doh - right indeed. -Q3 has this fixed, it is at:

  http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q3

ontop of the usual:

  http://redhat.com/~mingo/voluntary-preempt/diff-bk-040828-2.6.8.1.bz2

        Ingo


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3
  2004-08-28 20:31               ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3 Ingo Molnar
@ 2004-08-28 21:10                 ` Lee Revell
  2004-08-28 21:13                   ` Ingo Molnar
  2004-08-30 12:52                   ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3 Ingo Molnar
  2004-08-29  7:40                 ` Matt Heler
  1 sibling, 2 replies; 93+ messages in thread
From: Lee Revell @ 2004-08-28 21:10 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

On Sat, 2004-08-28 at 16:31, Ingo Molnar wrote:
>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q3
> 

I get this error: 

WARNING: /lib/modules/2.6.9-rc1-Q3/kernel/fs/ntfs/ntfs.ko needs unknown symbol unlock_kernel
WARNING: /lib/modules/2.6.9-rc1-Q3/kernel/fs/ntfs/ntfs.ko needs unknown symbol lock_kernel

I believe this is the correct fix:

--- fs/ntfs/super.c~	2004-08-28 16:31:33.000000000 -0400
+++ fs/ntfs/super.c	2004-08-28 17:08:11.000000000 -0400
@@ -29,6 +29,7 @@
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
 #include <linux/moduleparam.h>
+#include <linux/smp_lock.h>
 
 #include "ntfs.h"
 #include "sysctl.h"

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3
  2004-08-28 21:10                 ` Lee Revell
@ 2004-08-28 21:13                   ` Ingo Molnar
  2004-08-28 21:16                     ` Lee Revell
  2004-08-30 12:52                   ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3 Ingo Molnar
  1 sibling, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-28 21:13 UTC (permalink / raw)
  To: Lee Revell
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson


* Lee Revell <rlrevell@joe-job.com> wrote:

> On Sat, 2004-08-28 at 16:31, Ingo Molnar wrote:
> >   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q3
> > 
> 
> I get this error: 
> 
> WARNING: /lib/modules/2.6.9-rc1-Q3/kernel/fs/ntfs/ntfs.ko needs unknown symbol unlock_kernel
> WARNING: /lib/modules/2.6.9-rc1-Q3/kernel/fs/ntfs/ntfs.ko needs unknown symbol lock_kernel
> 
> I believe this is the correct fix:
> 
> --- fs/ntfs/super.c~	2004-08-28 16:31:33.000000000 -0400
> +++ fs/ntfs/super.c	2004-08-28 17:08:11.000000000 -0400
> @@ -29,6 +29,7 @@
>  #include <linux/buffer_head.h>
>  #include <linux/vfs.h>
>  #include <linux/moduleparam.h>
> +#include <linux/smp_lock.h>
>  
>  #include "ntfs.h"
>  #include "sysctl.h"

ok, will add this to -Q4.

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3
  2004-08-28 21:13                   ` Ingo Molnar
@ 2004-08-28 21:16                     ` Lee Revell
  2004-08-28 23:51                       ` Lee Revell
  0 siblings, 1 reply; 93+ messages in thread
From: Lee Revell @ 2004-08-28 21:16 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

On Sat, 2004-08-28 at 17:13, Ingo Molnar wrote:
> ok, will add this to -Q4.
> 

Hrm, Q3 broke my PS/2 keyboard.

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3
  2004-08-28 21:16                     ` Lee Revell
@ 2004-08-28 23:51                       ` Lee Revell
  2004-08-29  2:35                         ` Lee Revell
  0 siblings, 1 reply; 93+ messages in thread
From: Lee Revell @ 2004-08-28 23:51 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

On Sat, 2004-08-28 at 17:16, Lee Revell wrote:
> On Sat, 2004-08-28 at 17:13, Ingo Molnar wrote:
> > ok, will add this to -Q4.
> > 
> 
> Hrm, Q3 broke my PS/2 keyboard.
> 

The problem goes away when I disable CONFIG_PREEMPT_HARDIRQS.  In both
cases CONFIG_PREEMPT_SOFTIRQS was enabled.

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3
  2004-08-28 23:51                       ` Lee Revell
@ 2004-08-29  2:35                         ` Lee Revell
  2004-08-29  5:43                           ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q4 Ingo Molnar
  0 siblings, 1 reply; 93+ messages in thread
From: Lee Revell @ 2004-08-29  2:35 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

On Sat, 2004-08-28 at 19:51, Lee Revell wrote:
> On Sat, 2004-08-28 at 17:16, Lee Revell wrote:
> > On Sat, 2004-08-28 at 17:13, Ingo Molnar wrote:
> > > ok, will add this to -Q4.
> > > 
> > 
> > Hrm, Q3 broke my PS/2 keyboard.
> > 

Some more info:

This bug is 100% reproducible.  During boot, as soon as the i8042 driver
is loaded:

serio: i8042 AUX port at 0x60,0x64 irq 12
serio: i8042 KBD port at 0x60,0x64 irq 1
input: AT Translated Set 2 keyboard on isa0060/serio0

the keyboard freezes, with 'Num Lock' stuck on.

The problem only occurs when CONFIG_PREEMPT_HARDIRQS=y.  Works fine
otherwise.

/proc/interrupts:

           CPU0       
  0:     509819          XT-PIC  timer
  1:       1649          XT-PIC  i8042
  2:          0          XT-PIC  cascade
  8:          4          XT-PIC  rtc
 10:          0          XT-PIC  uhci_hcd, EMU10K1
 11:      24394          XT-PIC  uhci_hcd, eth0
 12:          0          XT-PIC  uhci_hcd
 14:          1          XT-PIC  ide0
 15:      12864          XT-PIC  ide1
NMI:          0 
ERR:          0

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* [patch] voluntary-preempt-2.6.9-rc1-bk4-Q4
  2004-08-29  2:35                         ` Lee Revell
@ 2004-08-29  5:43                           ` Ingo Molnar
  2004-08-29  6:57                             ` Lee Revell
  2004-08-30  9:06                             ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5 Ingo Molnar
  0 siblings, 2 replies; 93+ messages in thread
From: Ingo Molnar @ 2004-08-29  5:43 UTC (permalink / raw)
  To: Lee Revell
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson


* Lee Revell <rlrevell@joe-job.com> wrote:

> Some more info:
> 
> This bug is 100% reproducible.  During boot, as soon as the i8042 driver
> is loaded:
> 
> serio: i8042 AUX port at 0x60,0x64 irq 12
> serio: i8042 KBD port at 0x60,0x64 irq 1
> input: AT Translated Set 2 keyboard on isa0060/serio0
> 
> the keyboard freezes, with 'Num Lock' stuck on.
> 
> The problem only occurs when CONFIG_PREEMPT_HARDIRQS=y.  Works fine
> otherwise.

i suspect it's the generic_synchronize_irq() change. Does -Q4 boot?:

  http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q4

-Q4 reverts this change. (this doesnt solve the problems Scott noticed
though.)

another solution would be to boot Q3 with preempt_hardirqs=0 and then
turn on threading for all IRQs but the keyboard.

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q4
  2004-08-29  5:43                           ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q4 Ingo Molnar
@ 2004-08-29  6:57                             ` Lee Revell
  2004-08-29 18:01                               ` Ingo Molnar
  2004-08-29 19:06                               ` Ingo Molnar
  2004-08-30  9:06                             ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5 Ingo Molnar
  1 sibling, 2 replies; 93+ messages in thread
From: Lee Revell @ 2004-08-29  6:57 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

On Sun, 2004-08-29 at 01:43, Ingo Molnar wrote:
> * Lee Revell <rlrevell@joe-job.com> wrote:
> 
> > Some more info:
> > 
> > This bug is 100% reproducible.  During boot, as soon as the i8042 driver
> > is loaded:
> > 
> > serio: i8042 AUX port at 0x60,0x64 irq 12
> > serio: i8042 KBD port at 0x60,0x64 irq 1
> > input: AT Translated Set 2 keyboard on isa0060/serio0
> > 
> > the keyboard freezes, with 'Num Lock' stuck on.
> > 
> > The problem only occurs when CONFIG_PREEMPT_HARDIRQS=y.  Works fine
> > otherwise.
> 
> i suspect it's the generic_synchronize_irq() change. Does -Q4 boot?:
> 
>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q4
> 
> -Q4 reverts this change. (this doesnt solve the problems Scott noticed
> though.)
> 
> another solution would be to boot Q3 with preempt_hardirqs=0 and then
> turn on threading for all IRQs but the keyboard.
> 

Nope, neither of these fixes the problem.

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3
  2004-08-28 20:31               ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3 Ingo Molnar
  2004-08-28 21:10                 ` Lee Revell
@ 2004-08-29  7:40                 ` Matt Heler
  1 sibling, 0 replies; 93+ messages in thread
From: Matt Heler @ 2004-08-29  7:40 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, Lee Revell, K.R. Foley, Felipe Alfaro Solana,
	linux-kernel, Mark_H_Johnson

I had this ambitional feeling to port this over to the mm tree. 

So here's a link for Q3 .. 

http://mobius.lpbproductions.com/kernel/voluntary-preempt-2.6.9-rc1-mm1-Q3

I test compiled this patch fine. I havent had time yet to boot up onto it. But 
I'm fairly positive it will work.

Matt H.

On Saturday 28 August 2004 1:31 pm, Ingo Molnar wrote:
> * Daniel Schmitt <pnambic@unu.nu> wrote:
> > > there's a Kconfig chunk missing from the Q0/Q1 patches, i've uploaded
> > > Q2 that fixes this:
> >
> > This breaks here unless CONFIG_SMP is defined, with the following error:
> >
> >   CC      arch/i386/kernel/asm-offsets.s
> > In file included from arch/i386/kernel/asm-offsets.c:7:
> > include/linux/sched.h: In function `lock_need_resched':
> > include/linux/sched.h:983: error: structure has no member named
> > `break_lock'
> >
> > Probably missing a check for CONFIG_SMP around the need_lockbreak
> > defines in sched.h, and maybe also in cond_resched_lock().
>
> doh - right indeed. -Q3 has this fixed, it is at:
>
>  
> http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-
>Q3
>
> ontop of the usual:
>
>   http://redhat.com/~mingo/voluntary-preempt/diff-bk-040828-2.6.8.1.bz2
>
>         Ingo
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q4
  2004-08-29  6:57                             ` Lee Revell
@ 2004-08-29 18:01                               ` Ingo Molnar
  2004-08-29 19:06                               ` Ingo Molnar
  1 sibling, 0 replies; 93+ messages in thread
From: Ingo Molnar @ 2004-08-29 18:01 UTC (permalink / raw)
  To: Lee Revell
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson


* Lee Revell <rlrevell@joe-job.com> wrote:

> > > The problem only occurs when CONFIG_PREEMPT_HARDIRQS=y.  Works fine
> > > otherwise.
> > 
> > i suspect it's the generic_synchronize_irq() change. Does -Q4 boot?:
> > 
> >   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q4
> > 
> > -Q4 reverts this change. (this doesnt solve the problems Scott noticed
> > though.)
> > 
> > another solution would be to boot Q3 with preempt_hardirqs=0 and then
> > turn on threading for all IRQs but the keyboard.
> > 
> 
> Nope, neither of these fixes the problem.

the correct boot parameter is hardirq-preempt=0.

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q4
  2004-08-29  6:57                             ` Lee Revell
  2004-08-29 18:01                               ` Ingo Molnar
@ 2004-08-29 19:06                               ` Ingo Molnar
  2004-08-30  0:47                                 ` K.R. Foley
  1 sibling, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-29 19:06 UTC (permalink / raw)
  To: Lee Revell
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson


* Lee Revell <rlrevell@joe-job.com> wrote:

> > -Q4 reverts this change. (this doesnt solve the problems Scott noticed
> > though.)
> > 
> > another solution would be to boot Q3 with preempt_hardirqs=0 and then
> > turn on threading for all IRQs but the keyboard.
> > 
> 
> Nope, neither of these fixes the problem.

i can reproduce a PS2 keyboard problem on a testsystem. It's not clear
yet what the issue is, something in the atkbd.c code changed between
2.6.8.1 and 2.6.9-rc1-bk4 that broke IRQ redirection - even using the P9
hardirq.c code doesnt fix the problem. Investigating it.

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q4
  2004-08-29 19:06                               ` Ingo Molnar
@ 2004-08-30  0:47                                 ` K.R. Foley
  2004-08-30  3:42                                   ` K.R. Foley
  2004-08-30 13:06                                   ` Alan Cox
  0 siblings, 2 replies; 93+ messages in thread
From: K.R. Foley @ 2004-08-30  0:47 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Lee Revell, Daniel Schmitt, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

Ingo Molnar wrote:
> * Lee Revell <rlrevell@joe-job.com> wrote:
> 
> 
>>>-Q4 reverts this change. (this doesnt solve the problems Scott noticed
>>>though.)
>>>
>>>another solution would be to boot Q3 with preempt_hardirqs=0 and then
>>>turn on threading for all IRQs but the keyboard.
>>>
>>
>>Nope, neither of these fixes the problem.
> 
> 
> i can reproduce a PS2 keyboard problem on a testsystem. It's not clear
> yet what the issue is, something in the atkbd.c code changed between
> 2.6.8.1 and 2.6.9-rc1-bk4 that broke IRQ redirection - even using the P9
> hardirq.c code doesnt fix the problem. Investigating it.
> 
> 	Ingo
> 

Something of interest on this, maybe:

Here is the (pertinent) log of the system booting:

Aug 29 09:32:50 daffy kernel: requesting new irq thread for IRQ1...
Aug 29 09:32:50 daffy kernel: atkbd.c: Spurious ACK on isa0060/serio1. 
Some program, like XFree86, might be trying access hardware directly.
Aug 29 09:32:50 daffy kernel: md: md driver 0.90.0 MAX_MD_DEVS=256, 
MD_SB_DISKS=27
Aug 29 09:32:50 daffy kernel: IRQ#1 thread started up.

And some further entries:

Aug 29 16:48:50 daffy kernel: atkbd.c: Spurious NAK on isa0060/serio1. 
Some program, like XFree86, might be trying access hardware directly.
Aug 29 16:48:50 daffy kernel: atkbd.c: Unknown key pressed (raw set 2, 
code 0x0 on isa0060/serio1).
Aug 29 16:48:50 daffy kernel: atkbd.c: Use 'setkeycodes 00 <keycode>' to 
make it known.
Aug 29 16:48:50 daffy kernel: atkbd.c: Unknown key pressed (raw set 2, 
code 0x18 on isa0060/serio1).
Aug 29 16:48:50 daffy kernel: atkbd.c: Use 'setkeycodes 18 <keycode>' to 
make it known.

I get the "Unknown key pressed" and "Use 'setkeycodes" messages whenever 
I press a key on the keyboard. I don't see very many of the "Spurious 
NAK" messages though.

kr

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q1
  2004-08-28 13:01     ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q1 Ingo Molnar
@ 2004-08-30  1:06       ` Fernando Pablo Lopez-Lezcano
  0 siblings, 0 replies; 93+ messages in thread
From: Fernando Pablo Lopez-Lezcano @ 2004-08-30  1:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Scott Wood, manas.saksena, linux-kernel, Lee Revell,
	Mark_H_Johnson, nando

On Sat, 2004-08-28 at 06:01, Ingo Molnar wrote:
> * Ingo Molnar <mingo@elte.hu> wrote:
> 
> > * Scott Wood <scott@timesys.com> wrote:
> > 
> > > If I'm missing something, please let me know, but I don't see a good
> > > way to implement it without blocking for the IRQ thread's completion
> > > (such as with the per-IRQ waitqueues in M5).
> > 
> > agreed, this is a hole in generic_synchronize_irq(). I've added
> > handler-completion waitqueues to my current tree, it will show up in
> > -Q1.
> 
> i've uploaded -Q1:
> 
>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q1
> 
> as with -Q0, the following patch has to be applied to 2.6.8.1 first:
> 
>   http://redhat.com/~mingo/voluntary-preempt/diff-bk-040828-2.6.8.1.bz2
> 
> those who still have DRI problems under -Q1 - please unapply the
> drm_os_linux.h change, does the fix the lockups?

I managed to do a few quick tests yesterday of voluntary Q3:

SMP kernel on UP machine (Athlon64): hangs during boot, goes a little
further than before but hangs anyway:

requesting new irq thread for IRQ169
ata1: dev 0 ATA, max UDMA/133 ...
IRQ#169 thread started up
ata1: dev 0 configured for UDMA/133 ...
scsi0: sata_promise
ata2: dev 0 ATA ...
  --- hangs ---

SMP kernel on SMP machine (dual Athlon):
softirq-preempt=0 hardirq-preempt=0 acpi=on: hangs
  afaik hang happens when something needs interrupts, first culprit in
  my machine is eth0, if I disable it I can go further ahead and boot
  but alsa has timeouts, presumably because it is not getting
  interrupts.
softirq-preempt=0 hardirq-preempt=0 acpi=off: boots normally, 
  jack, glxgears work fine (but high latency spikes)
softirq-preempt=1 hardirq-preempt=1 acpi=off: boots normally,
  jack works fine, glxgears hangs machine after a while
Sorry I was not able to check all permutations, I was late and stopped
rebooting the machine :-) I think I did one more test with sort=1,
hard=1, acpi=off and did not manage to hang the machine. 

-- Fernando



^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q4
  2004-08-30  0:47                                 ` K.R. Foley
@ 2004-08-30  3:42                                   ` K.R. Foley
  2004-08-30 13:06                                   ` Alan Cox
  1 sibling, 0 replies; 93+ messages in thread
From: K.R. Foley @ 2004-08-30  3:42 UTC (permalink / raw)
  To: K.R. Foley
  Cc: Ingo Molnar, Lee Revell, Daniel Schmitt, Felipe Alfaro Solana,
	linux-kernel, Mark_H_Johnson

K.R. Foley wrote:
> Ingo Molnar wrote:
> 
>> * Lee Revell <rlrevell@joe-job.com> wrote:
>>
>>
>>>> -Q4 reverts this change. (this doesnt solve the problems Scott noticed
>>>> though.)
>>>>
>>>> another solution would be to boot Q3 with preempt_hardirqs=0 and then
>>>> turn on threading for all IRQs but the keyboard.
>>>>
>>>
>>> Nope, neither of these fixes the problem.
>>
>>
>>
>> i can reproduce a PS2 keyboard problem on a testsystem. It's not clear
>> yet what the issue is, something in the atkbd.c code changed between
>> 2.6.8.1 and 2.6.9-rc1-bk4 that broke IRQ redirection - even using the P9
>> hardirq.c code doesnt fix the problem. Investigating it.
>>
>>     Ingo
>>

Actually after doing a diff on atkbd, there don't seem to be any 
differences between 2.6.9-rc1 and 2.6.8.1. :( Also after looking back 
through my logs some things worth noting here:

> 
> Something of interest on this, maybe:
> 
> Here is the (pertinent) log of the system booting:
> 
> Aug 29 09:32:50 daffy kernel: requesting new irq thread for IRQ1...
> Aug 29 09:32:50 daffy kernel: atkbd.c: Spurious ACK on isa0060/serio1. 
> Some program, like XFree86, might be trying access hardware directly.

These have been here for a while.

> Aug 29 09:32:50 daffy kernel: md: md driver 0.90.0 MAX_MD_DEVS=256, 
> MD_SB_DISKS=27
> Aug 29 09:32:50 daffy kernel: IRQ#1 thread started up.
> 
> And some further entries:
> 
> Aug 29 16:48:50 daffy kernel: atkbd.c: Spurious NAK on isa0060/serio1. 
> Some program, like XFree86, might be trying access hardware directly.
> Aug 29 16:48:50 daffy kernel: atkbd.c: Unknown key pressed (raw set 2, 
> code 0x0 on isa0060/serio1).
> Aug 29 16:48:50 daffy kernel: atkbd.c: Use 'setkeycodes 00 <keycode>' to 
> make it known.
> Aug 29 16:48:50 daffy kernel: atkbd.c: Unknown key pressed (raw set 2, 
> code 0x18 on isa0060/serio1).
> Aug 29 16:48:50 daffy kernel: atkbd.c: Use 'setkeycodes 18 <keycode>' to 
> make it known.

Looking at the messages above, it appears to me that this is being 
detected as an AT keyboard (or default) instead of a PS/2. Could this be 
a problem?

> 
> I get the "Unknown key pressed" and "Use 'setkeycodes" messages whenever 
> I press a key on the keyboard. I don't see very many of the "Spurious 
> NAK" messages though.
> 
> kr


^ permalink raw reply	[flat|nested] 93+ messages in thread

* [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-29  5:43                           ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q4 Ingo Molnar
  2004-08-29  6:57                             ` Lee Revell
@ 2004-08-30  9:06                             ` Ingo Molnar
  2004-08-30 14:25                               ` Thomas Charbonnel
                                                 ` (6 more replies)
  1 sibling, 7 replies; 93+ messages in thread
From: Ingo Molnar @ 2004-08-30  9:06 UTC (permalink / raw)
  To: Lee Revell
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson


i've uploaded -Q5 to:

  http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q5

ontop of:

  http://redhat.com/~mingo/voluntary-preempt/diff-bk-040828-2.6.8.1.bz2

-Q5 should fix the PS2 problems and the early boot problems, and it
might even fix the USB, ACPI and APIC problems some people were
reporting.

There were a number of bugs that led to the PS2 problems:

 - a change to __cond_resched() in the -Q series caused the starvation
   of the IRQ1 and IRQ12 threads during init - causing a silent timeout
   and misdetection in the ps2 driver(s).

 - even with the starvation bug fixed, we must set system_state to
   SCHEDULER_OK only once the init thread has started - otherwise the
   idle thread might hang during bootup.

 - the redirected IRQ handling now matches that of non-redirected IRQs
   better, the outer loop in generic_handle_IRQ has been flattened.

i also re-added the synchronize_irq() fix, it was not causing the PS2
problems.

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* voluntary-preempt-2.6.8.1-P9 : big latency when logging on console
  2004-08-28 12:14     ` Ingo Molnar
@ 2004-08-30  9:27       ` P.O. Gaillard
  2004-08-30  9:41         ` Ingo Molnar
  2004-08-30  9:48       ` [patch] voluntary-preempt-2.6.8.1-P9 : a few submillisecond latencies P.O. Gaillard
  1 sibling, 1 reply; 93+ messages in thread
From: P.O. Gaillard @ 2004-08-30  9:27 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar

Hello,

I have a 1.6ms latency every time I log in with P9.

Here are the traces. The PC is a P4@3GHz with half a gigabyte of memory and a 
SuperMicro i875p motherboard. The graphics board is integrated which might be 
relevant to this issue since I see vt_ioctl in the backtrace.

I can reproduce the problem every time I try to.

    thank you for your help,

	Pierre-Olivier Gaillard



Aug 30 11:17:40 canopus kernel: (setfont/2332): new 1647 us maximum-latency 
critical section.
Aug 30 11:17:40 canopus kernel:  => started at: <voluntary_resched+0x35/0x70>
Aug 30 11:17:40 canopus kernel:  => ended at:   <voluntary_resched+0x35/0x70>
Aug 30 11:17:40 canopus kernel:  [<c015a0e4>] check_preempt_timing+0x1a4/0x240
Aug 30 11:17:40 canopus kernel:  [<c03aca95>] voluntary_resched+0x35/0x70
Aug 30 11:17:40 canopus kernel:  [<c03aca95>] voluntary_resched+0x35/0x70
Aug 30 11:17:40 canopus kernel:  [<c015a1b6>] touch_preempt_timing+0x36/0x40
Aug 30 11:17:40 canopus kernel:  [<c015a1b6>] touch_preempt_timing+0x36/0x40
Aug 30 11:17:40 canopus kernel:  [<c03aca95>] voluntary_resched+0x35/0x70
Aug 30 11:17:40 canopus kernel:  [<c0229b61>] copy_to_user+0x31/0x90
Aug 30 11:17:40 canopus kernel:  [<c026f143>] vt_ioctl+0x1433/0x1b80
Aug 30 11:17:40 canopus kernel:  [<c01a1027>] .text.lock.pipe+0xf/0xc8
Aug 30 11:17:40 canopus kernel:  [<c0174714>] handle_mm_fault+0x154/0x360
Aug 30 11:17:40 canopus kernel:  [<c015a119>] check_preempt_timing+0x1d9/0x240
Aug 30 11:17:40 canopus kernel:  [<c015a2b8>] sub_preempt_count+0x48/0x60
Aug 30 11:17:40 canopus kernel:  [<c018cd43>] fget+0xe3/0x180
Aug 30 11:17:40 canopus kernel:  [<c026dd10>] vt_ioctl+0x0/0x1b80
Aug 30 11:17:40 canopus kernel:  [<c02668ad>] tty_ioctl+0x57d/0x670
Aug 30 11:17:40 canopus kernel:  [<c01a8cdd>] sys_ioctl+0x23d/0x400
Aug 30 11:17:40 canopus kernel:  [<c018ba10>] sys_read+0x50/0x80
Aug 30 11:17:40 canopus kernel:  [<c0107bfd>] sysenter_past_esp+0x52/0x71

preemption latency trace v1.0.2
-------------------------------
  latency: 1647 us, entries: 82 (82)
     -----------------
     | task: setfont/2332, uid:0 nice:0 policy:0 rt_prio:0
     -----------------
  => started at: voluntary_resched+0x35/0x70
  => ended at:   voluntary_resched+0x35/0x70
=======>
00000001 0.000ms (+0.000ms): touch_preempt_timing (voluntary_resched)
00000001 0.000ms (+0.000ms): vgacon_font_set (con_font_set)
00000001 0.000ms (+0.000ms): vgacon_do_font_op (vgacon_font_set)
00010001 0.084ms (+0.083ms): do_IRQ (vgacon_do_font_op)
00010002 0.084ms (+0.000ms): ack_edge_ioapic_irq (do_IRQ)
00010002 0.084ms (+0.000ms): generic_redirect_hardirq (do_IRQ)
00010001 0.084ms (+0.000ms): generic_handle_IRQ_event (do_IRQ)
00010001 0.084ms (+0.000ms): timer_interrupt (generic_handle_IRQ_event)
00010002 0.085ms (+0.000ms): mark_offset_pmtmr (timer_interrupt)
00010002 0.091ms (+0.006ms): do_timer (timer_interrupt)
00010002 0.091ms (+0.000ms): update_process_times (do_timer)
00010002 0.091ms (+0.000ms): update_one_process (update_process_times)
00010002 0.091ms (+0.000ms): run_local_timers (update_process_times)
00010002 0.091ms (+0.000ms): raise_softirq (update_process_times)
00010002 0.092ms (+0.000ms): scheduler_tick (update_process_times)
00010002 0.092ms (+0.000ms): sched_clock (scheduler_tick)
00010002 0.092ms (+0.000ms): update_wall_time (do_timer)
00010002 0.092ms (+0.000ms): update_wall_time_one_tick (update_wall_time)
00010002 0.093ms (+0.000ms): generic_note_interrupt (do_IRQ)
00010002 0.093ms (+0.000ms): end_edge_ioapic_irq (do_IRQ)
00000002 0.093ms (+0.000ms): do_softirq (do_IRQ)
00000002 0.093ms (+0.000ms): __do_softirq (do_softirq)
00000002 0.093ms (+0.000ms): wake_up_process (do_softirq)
00000002 0.094ms (+0.000ms): try_to_wake_up (wake_up_process)
00000002 0.094ms (+0.000ms): task_rq_lock (try_to_wake_up)
00000003 0.094ms (+0.000ms): activate_task (try_to_wake_up)
00000003 0.094ms (+0.000ms): sched_clock (activate_task)
00000003 0.094ms (+0.000ms): recalc_task_prio (activate_task)
00000003 0.094ms (+0.000ms): effective_prio (recalc_task_prio)
00000003 0.094ms (+0.000ms): enqueue_task (activate_task)
00000002 0.095ms (+0.000ms): preempt_schedule (try_to_wake_up)
00000001 0.513ms (+0.418ms): smp_apic_timer_interrupt (vgacon_do_font_op)
00010001 0.513ms (+0.000ms): profile_hook (smp_apic_timer_interrupt)
00010002 0.513ms (+0.000ms): notifier_call_chain (profile_hook)
00010001 0.513ms (+0.000ms): preempt_schedule (smp_apic_timer_interrupt)
00000002 0.513ms (+0.000ms): do_softirq (smp_apic_timer_interrupt)
00000002 0.514ms (+0.000ms): __do_softirq (do_softirq)
00010001 1.083ms (+0.569ms): do_IRQ (vgacon_do_font_op)
00010002 1.083ms (+0.000ms): ack_edge_ioapic_irq (do_IRQ)
00010002 1.084ms (+0.000ms): generic_redirect_hardirq (do_IRQ)
00010001 1.084ms (+0.000ms): preempt_schedule (do_IRQ)
00010001 1.084ms (+0.000ms): generic_handle_IRQ_event (do_IRQ)
00010001 1.084ms (+0.000ms): timer_interrupt (generic_handle_IRQ_event)
00010002 1.084ms (+0.000ms): mark_offset_pmtmr (timer_interrupt)
00010002 1.089ms (+0.004ms): preempt_schedule (mark_offset_pmtmr)
00010002 1.091ms (+0.001ms): preempt_schedule (timer_interrupt)
00010002 1.091ms (+0.000ms): do_timer (timer_interrupt)
00010002 1.091ms (+0.000ms): update_process_times (do_timer)
00010002 1.091ms (+0.000ms): update_one_process (update_process_times)
00010002 1.091ms (+0.000ms): run_local_timers (update_process_times)
00010002 1.091ms (+0.000ms): raise_softirq (update_process_times)
00010002 1.091ms (+0.000ms): scheduler_tick (update_process_times)
00010002 1.091ms (+0.000ms): sched_clock (scheduler_tick)
00010003 1.092ms (+0.000ms): dequeue_task (scheduler_tick)
00010003 1.092ms (+0.000ms): effective_prio (scheduler_tick)
00010003 1.092ms (+0.000ms): task_timeslice (scheduler_tick)
00010003 1.092ms (+0.000ms): enqueue_task (scheduler_tick)
00010002 1.092ms (+0.000ms): preempt_schedule (scheduler_tick)
00010002 1.093ms (+0.000ms): update_wall_time (do_timer)
00010002 1.093ms (+0.000ms): update_wall_time_one_tick (update_wall_time)
00010001 1.093ms (+0.000ms): preempt_schedule (timer_interrupt)
00010002 1.093ms (+0.000ms): generic_note_interrupt (do_IRQ)
00010002 1.093ms (+0.000ms): end_edge_ioapic_irq (do_IRQ)
00010001 1.093ms (+0.000ms): preempt_schedule (do_IRQ)
00000002 1.094ms (+0.000ms): do_softirq (do_IRQ)
00000002 1.094ms (+0.000ms): __do_softirq (do_softirq)
00000001 1.513ms (+0.418ms): smp_apic_timer_interrupt (vgacon_do_font_op)
00010001 1.513ms (+0.000ms): profile_hook (smp_apic_timer_interrupt)
00010002 1.513ms (+0.000ms): notifier_call_chain (profile_hook)
00010001 1.513ms (+0.000ms): preempt_schedule (smp_apic_timer_interrupt)
00000002 1.513ms (+0.000ms): do_softirq (smp_apic_timer_interrupt)
00000002 1.513ms (+0.000ms): __do_softirq (do_softirq)
00000001 1.645ms (+0.131ms): preempt_schedule (vgacon_do_font_op)
00000001 1.645ms (+0.000ms): vgacon_adjust_height (vgacon_font_set)
00000001 1.646ms (+0.000ms): release_console_sem (con_font_set)
00000001 1.646ms (+0.000ms): preempt_schedule (release_console_sem)
00000001 1.646ms (+0.000ms): kfree (con_font_set)
00000001 1.646ms (+0.000ms): copy_to_user (vt_ioctl)
00000001 1.646ms (+0.000ms): __might_sleep (copy_to_user)
00000001 1.646ms (+0.000ms): voluntary_resched (copy_to_user)
00000001 1.647ms (+0.000ms): __might_sleep (voluntary_resched)
00000001 1.647ms (+0.000ms): touch_preempt_timing (voluntary_resched)


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: voluntary-preempt-2.6.8.1-P9 : big latency when logging on console
  2004-08-30  9:27       ` voluntary-preempt-2.6.8.1-P9 : big latency when logging on console P.O. Gaillard
@ 2004-08-30  9:41         ` Ingo Molnar
  2004-08-30 12:25           ` P.O. Gaillard
  0 siblings, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-30  9:41 UTC (permalink / raw)
  To: P.O. Gaillard; +Cc: linux-kernel, Andrew Morton


* P.O. Gaillard <pierre-olivier.gaillard@fr.thalesgroup.com> wrote:

> Hello,
> 
> I have a 1.6ms latency every time I log in with P9.

could you try the patch below, ontop of P9? (or ontop of the latest, -Q5
patch)

The problem with font loading is that vt_ioctl runs with the BKL held
(as all ioctls) which disables preemption, but in this case it seems
pretty safe to drop the lock - the vga console has its own spinlock.

	Ingo

--- linux/drivers/video/console/vgacon.c.orig	
+++ linux/drivers/video/console/vgacon.c	
@@ -763,6 +763,7 @@ static int vgacon_do_font_op(struct vgas
 		charmap += 4 * cmapsz;
 #endif
 
+	unlock_kernel();
 	spin_lock_irq(&vga_lock);
 	/* First, the Sequencer */
 	vga_wseq(state->vgabase, VGA_SEQ_RESET, 0x1);
@@ -848,6 +849,7 @@ static int vgacon_do_font_op(struct vgas
 		vga_wattr(state->vgabase, VGA_AR_ENABLE_DISPLAY, 0);	
 	}
 	spin_unlock_irq(&vga_lock);
+	lock_kernel();
 	return 0;
 }
 

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.8.1-P9 : a few submillisecond latencies
  2004-08-28 12:14     ` Ingo Molnar
  2004-08-30  9:27       ` voluntary-preempt-2.6.8.1-P9 : big latency when logging on console P.O. Gaillard
@ 2004-08-30  9:48       ` P.O. Gaillard
  1 sibling, 0 replies; 93+ messages in thread
From: P.O. Gaillard @ 2004-08-30  9:48 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar

Hello,

here are a few submillisecond latencies.

First, the lspci of the machine (which is different from the one I just posted 
about).
http://po.gaillard.free.fr/latency/dell.lspci-v.txt

First, the 3c59x Ethernet card seems to cause 450us latencies :
http://po.gaillard.free.fr/latency/latency_bt.3c59x.txt
http://po.gaillard.free.fr/latency/latency_trace.3c59x.txt

Then the e1000 driver seems to cause 200us latencies :
http://po.gaillard.free.fr/latency/latency_bt.e1000_stats.txt
http://po.gaillard.free.fr/latency/latency_trace.e1000_stats.txt

Since the other machine with an Intel e1000 controller, shows a twice smaller 
latency, I post its lpsci entry for the controller  (note that latency is twice 
smaller) :
02:0a.0 Ethernet controller: Intel Corp. 82541EI Gigabit Ethernet Controller 
(Copper)
         Subsystem: Intel Corp.: Unknown device 1213
         Flags: bus master, 66Mhz, medium devsel, latency 32, IRQ 22
         Memory at f2000000 (32-bit, non-prefetchable)
         I/O ports at b400 [size=64]
         Capabilities: [dc] Power Management version 2
         Capabilities: [e4] PCI-X non-bridge device.

  and thank you to Ingo for the speedy response to the console issue. I applied 
the patch and the kernel is compiling.

	P.O. Gaillard


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: voluntary-preempt-2.6.8.1-P9 : big latency when logging on console
  2004-08-30  9:41         ` Ingo Molnar
@ 2004-08-30 12:25           ` P.O. Gaillard
  0 siblings, 0 replies; 93+ messages in thread
From: P.O. Gaillard @ 2004-08-30 12:25 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Andrew Morton

Ingo Molnar wrote:
> * P.O. Gaillard <pierre-olivier.gaillard@fr.thalesgroup.com> wrote:
> 
> 
>>Hello,
>>
>>I have a 1.6ms latency every time I log in with P9.
> 
> 
> could you try the patch below, ontop of P9? (or ontop of the latest, -Q5
> patch)
> 
> The problem with font loading is that vt_ioctl runs with the BKL held
> (as all ioctls) which disables preemption, but in this case it seems
> pretty safe to drop the lock - the vga console has its own spinlock.
> 
Thank you very much. I had to add a "#include <linux/smp_lock.h>" at the start 
of vga_con.c to get it to compile and then :
1) I can login on the console without getting any latency above 100 microseconds.
2) Nothing seems to be broken by your change.

Note: I tested on 2.6.8.1 + P9.

	thanks !

	Pierre-Olivier



^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3
  2004-08-28 21:10                 ` Lee Revell
  2004-08-28 21:13                   ` Ingo Molnar
@ 2004-08-30 12:52                   ` Ingo Molnar
  1 sibling, 0 replies; 93+ messages in thread
From: Ingo Molnar @ 2004-08-30 12:52 UTC (permalink / raw)
  To: Lee Revell
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson


* Lee Revell <rlrevell@joe-job.com> wrote:

> On Sat, 2004-08-28 at 16:31, Ingo Molnar wrote:
> >   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q3
> > 
> 
> I get this error: 
> 
> WARNING: /lib/modules/2.6.9-rc1-Q3/kernel/fs/ntfs/ntfs.ko needs unknown symbol unlock_kernel
> WARNING: /lib/modules/2.6.9-rc1-Q3/kernel/fs/ntfs/ntfs.ko needs unknown symbol lock_kernel
> 
> I believe this is the correct fix:
> 
> --- fs/ntfs/super.c~	2004-08-28 16:31:33.000000000 -0400
> +++ fs/ntfs/super.c	2004-08-28 17:08:11.000000000 -0400
> @@ -29,6 +29,7 @@
>  #include <linux/buffer_head.h>
>  #include <linux/vfs.h>
>  #include <linux/moduleparam.h>
> +#include <linux/smp_lock.h>

indeed. Applied your fix to my tree and it will show up in -Q6.

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q4
  2004-08-30  0:47                                 ` K.R. Foley
  2004-08-30  3:42                                   ` K.R. Foley
@ 2004-08-30 13:06                                   ` Alan Cox
  2004-08-30 17:37                                     ` Ingo Molnar
  2004-08-31 16:39                                     ` K.R. Foley
  1 sibling, 2 replies; 93+ messages in thread
From: Alan Cox @ 2004-08-30 13:06 UTC (permalink / raw)
  To: K.R. Foley
  Cc: Ingo Molnar, Lee Revell, Daniel Schmitt, Felipe Alfaro Solana,
	Linux Kernel Mailing List, Mark_H_Johnson

On Llu, 2004-08-30 at 01:47, K.R. Foley wrote:
> Aug 29 09:32:50 daffy kernel: requesting new irq thread for IRQ1...
> Aug 29 09:32:50 daffy kernel: atkbd.c: Spurious ACK on isa0060/serio1. 
> Some program, like XFree86, might be trying access hardware directly.

This is a known bug in the ps/2 driver layer. The printk can be
triggered by multiple quite valid situations. I've suggested it be
removed several times. Also XFree86 is a trademark so it should be
XFree86(tm) ;)

The later ones are odd. It might be interesting to try turning off USB
legacy support in the BIOS, that may be causing real problems in your
case.

Alan


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-30  9:06                             ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5 Ingo Molnar
@ 2004-08-30 14:25                               ` Thomas Charbonnel
  2004-08-30 18:00                                 ` Ingo Molnar
  2004-08-31  6:40                               ` Lee Revell
                                                 ` (5 subsequent siblings)
  6 siblings, 1 reply; 93+ messages in thread
From: Thomas Charbonnel @ 2004-08-30 14:25 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Lee Revell, Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana,
	linux-kernel, Mark_H_Johnson

Ingo Molnar wrote :
> i've uploaded -Q5 to:
> 
>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q5
> 

Here are the problematic spots for me with Q5:

rtl8139_poll (this one was also present with previous versions of the
patch) :
http://www.undata.org/~thomas/q5_rtl8139.trace

use_module (modprobe) :
preemption latency trace v1.0.2
-------------------------------
 latency: 154 us, entries: 4 (4)
    -----------------
    | task: modprobe/8172, uid:0 nice:0 policy:0 rt_prio:0
    -----------------
 => started at: resolve_symbol+0x21/0xa0
 => ended at:   resolve_symbol+0x57/0xa0
=======>
00000001 0.000ms (+0.000ms): resolve_symbol (simplify_symbols)
00000001 0.000ms (+0.000ms): __find_symbol (resolve_symbol)
00000001 0.154ms (+0.154ms): use_module (resolve_symbol)
00000001 0.154ms (+0.000ms): sub_preempt_count (resolve_symbol)

and a weird one with do_timer (called from do_IRQ) taking more than 1ms
to complete :
http://www.undata.org/~thomas/do_irq.trace

Thomas



^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q4
  2004-08-30 13:06                                   ` Alan Cox
@ 2004-08-30 17:37                                     ` Ingo Molnar
  2004-08-31 16:39                                     ` K.R. Foley
  1 sibling, 0 replies; 93+ messages in thread
From: Ingo Molnar @ 2004-08-30 17:37 UTC (permalink / raw)
  To: Alan Cox
  Cc: K.R. Foley, Lee Revell, Daniel Schmitt, Felipe Alfaro Solana,
	Linux Kernel Mailing List, Mark_H_Johnson


* Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:

> On Llu, 2004-08-30 at 01:47, K.R. Foley wrote:
> > Aug 29 09:32:50 daffy kernel: requesting new irq thread for IRQ1...
> > Aug 29 09:32:50 daffy kernel: atkbd.c: Spurious ACK on isa0060/serio1. 
> > Some program, like XFree86, might be trying access hardware directly.
> 
> This is a known bug in the ps/2 driver layer. The printk can be
> triggered by multiple quite valid situations. I've suggested it be
> removed several times. Also XFree86 is a trademark so it should be
> XFree86(tm) ;)

since the message was right during detection it was indication of deeper
trouble - and indeed it was caused by the IRQ1 thread being starved by
init and thus the handler not running at all - the 'spurious ACK' was a
weird (and probably buggy) way of the PS2 layer telling that the
expected IRQ never arrived ...

in any case, this was a bug in the hardirq redirection code.

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-30 14:25                               ` Thomas Charbonnel
@ 2004-08-30 18:00                                 ` Ingo Molnar
  2004-08-31 19:23                                   ` Thomas Charbonnel
  0 siblings, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-30 18:00 UTC (permalink / raw)
  To: Thomas Charbonnel
  Cc: Lee Revell, Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana,
	linux-kernel, Mark_H_Johnson

[-- Attachment #1: Type: text/plain, Size: 1704 bytes --]


* Thomas Charbonnel <thomas@undata.org> wrote:

> Ingo Molnar wrote :
> > i've uploaded -Q5 to:
> > 
> >   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q5
> > 
> 
> Here are the problematic spots for me with Q5:
> 
> rtl8139_poll (this one was also present with previous versions of the
> patch) :
> http://www.undata.org/~thomas/q5_rtl8139.trace

ok, rx processing latency again. You've set netdev_max_backlog to a low
value, right? I think we can break this particular loop independently of
netdev_max_backlog, could you try the attached patch ontop of -Q5, does
it help?

> =======>
> 00000001 0.000ms (+0.000ms): resolve_symbol (simplify_symbols)
> 00000001 0.000ms (+0.000ms): __find_symbol (resolve_symbol)
> 00000001 0.154ms (+0.154ms): use_module (resolve_symbol)
> 00000001 0.154ms (+0.000ms): sub_preempt_count (resolve_symbol)

seems resolve_symbol() is quite expensive ... no idea how to fix this
one right away, it seems to be pure algorithmic overhead.

> and a weird one with do_timer (called from do_IRQ) taking more than 1ms
> to complete :
> http://www.undata.org/~thomas/do_irq.trace

hm, indeed this is a weird one. 1 msec is too close to the timer 
frequency to be accidental. According to the trace:

 00010000 0.002ms (+0.000ms): timer_interrupt (generic_handle_IRQ_event)
 00010001 0.002ms (+0.000ms): mark_offset_tsc (timer_interrupt)
 00010001 1.028ms (+1.025ms): do_timer (timer_interrupt)
 00010001 1.028ms (+0.000ms): update_process_times (do_timer)

the latency happened between the beginning of mark_offset_tsc() and the
calling of do_timer() - i.e. the delay happened somewhere within
mark_offset_tsc() itself. Is this an SMP system?

	Ingo

[-- Attachment #2: 2 --]
[-- Type: text/plain, Size: 738 bytes --]

--- linux/net/core/dev.c.orig2	
+++ linux/net/core/dev.c	
@@ -1903,7 +1903,7 @@ static void net_rx_action(struct softirq
 {
 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
 	unsigned long start_time = jiffies;
-	int budget = netdev_max_backlog;
+	int budget = netdev_max_backlog, loops;
 
 	
 	local_irq_disable();
@@ -1926,7 +1926,10 @@ static void net_rx_action(struct softirq
 		dev = list_entry(queue->poll_list.next,
 				 struct net_device, poll_list);
 
-		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
+		loops = 1;
+		if (dev->quota <= 0 || dev->poll(dev, &loops)) {
+			if (loops < 1)
+				budget--;
 			local_irq_disable();
 			list_del(&dev->poll_list);
 			list_add_tail(&dev->poll_list, &queue->poll_list);

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-30  9:06                             ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5 Ingo Molnar
  2004-08-30 14:25                               ` Thomas Charbonnel
@ 2004-08-31  6:40                               ` Lee Revell
  2004-08-31  6:53                                 ` Ingo Molnar
  2004-08-31  7:06                                 ` Ingo Molnar
  2004-08-31 17:40                               ` Peter Zijlstra
                                                 ` (4 subsequent siblings)
  6 siblings, 2 replies; 93+ messages in thread
From: Lee Revell @ 2004-08-31  6:40 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

On Mon, 2004-08-30 at 05:06, Ingo Molnar wrote:
> i've uploaded -Q5 to:
> 
>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q5

This fixes the PS/2 issue.  Entropy rekeying is still a big problem:

http://krustophenia.net/testresults.php?dataset=2.6.9-rc1-bk4-Q5#/var/www/2.6.9-rc1-bk4-Q5/trace3.txt

Otherwise, this looks pretty good.  Here is a new one, I got this
starting X:

http://krustophenia.net/testresults.php?dataset=2.6.9-rc1-bk4-Q5#/var/www/2.6.9-rc1-bk4-Q5/trace2.txt

Lee    


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31  6:40                               ` Lee Revell
@ 2004-08-31  6:53                                 ` Ingo Molnar
  2004-08-31 23:03                                   ` Lee Revell
  2004-08-31  7:06                                 ` Ingo Molnar
  1 sibling, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-31  6:53 UTC (permalink / raw)
  To: Lee Revell
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson, tytso


* Lee Revell <rlrevell@joe-job.com> wrote:

> On Mon, 2004-08-30 at 05:06, Ingo Molnar wrote:
> > i've uploaded -Q5 to:
> > 
> >   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q5
> 
> This fixes the PS/2 issue.  Entropy rekeying is still a big problem:
> 
> http://krustophenia.net/testresults.php?dataset=2.6.9-rc1-bk4-Q5#/var/www/2.6.9-rc1-bk4-Q5/trace3.txt

ok. It seems the random driver is _mostly_ in shape latency-wise, except
the IP rekeying visible in the above trace. To solve this problem, could
you try the patch below, ontop of -Q5? It moves the random seed
generation outside of the spinlock - AFAICS the spinlock is only needed
to protect the IP sequence counter itself.

	Ingo

--- linux/drivers/char/random.c.orig
+++ linux/drivers/char/random.c
@@ -2226,17 +2226,18 @@ static unsigned int ip_cnt;
 
 static void rekey_seq_generator(void *private_)
 {
-	struct keydata *keyptr;
+	struct keydata *keyptr, tmp;
 	struct timeval 	tv;
 
 	do_gettimeofday(&tv);
+	get_random_bytes(tmp.secret, sizeof(tmp.secret));
 
 	spin_lock_bh(&ip_lock);
 	keyptr = &ip_keydata[ip_cnt&1];
 
 	keyptr = &ip_keydata[1^(ip_cnt&1)];
 	keyptr->rekey_time = tv.tv_sec;
-	get_random_bytes(keyptr->secret, sizeof(keyptr->secret));
+	memcpy(keyptr->secret, tmp.secret, sizeof(keyptr->secret));
 	keyptr->count = (ip_cnt&COUNT_MASK)<<HASH_BITS;
 	mb();
 	ip_cnt++;

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31  6:40                               ` Lee Revell
  2004-08-31  6:53                                 ` Ingo Molnar
@ 2004-08-31  7:06                                 ` Ingo Molnar
  2004-08-31 19:21                                   ` Lee Revell
  1 sibling, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-31  7:06 UTC (permalink / raw)
  To: Lee Revell
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson


* Lee Revell <rlrevell@joe-job.com> wrote:

> Otherwise, this looks pretty good.  Here is a new one, I got this
> starting X:
> 
> http://krustophenia.net/testresults.php?dataset=2.6.9-rc1-bk4-Q5#/var/www/2.6.9-rc1-bk4-Q5/trace2.txt

ok, MTRR setting overhead. It is not quite clear to me which precise
code took so much time, could you stick a couple of 'mcount();' lines
into arch/i386/kernel/cpu/mtrr/generic.c's prepare_set() and
generic_set_mtrr() functions? In particular the wbinvd() [cache
invalidation] instructions within prepare_set() look like a possible
source of latency.

(explicit calls to mcount() can be used to break up latency paths
manually - they wont affect the latency itself, they make the resulting
trace more finegrained.)

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q4
  2004-08-30 13:06                                   ` Alan Cox
  2004-08-30 17:37                                     ` Ingo Molnar
@ 2004-08-31 16:39                                     ` K.R. Foley
  1 sibling, 0 replies; 93+ messages in thread
From: K.R. Foley @ 2004-08-31 16:39 UTC (permalink / raw)
  To: Alan Cox
  Cc: Ingo Molnar, Lee Revell, Daniel Schmitt, Felipe Alfaro Solana,
	Linux Kernel Mailing List, Mark_H_Johnson

Alan Cox wrote:
> On Llu, 2004-08-30 at 01:47, K.R. Foley wrote:
> 
>>Aug 29 09:32:50 daffy kernel: requesting new irq thread for IRQ1...
>>Aug 29 09:32:50 daffy kernel: atkbd.c: Spurious ACK on isa0060/serio1. 
>>Some program, like XFree86, might be trying access hardware directly.
> 
> 
> This is a known bug in the ps/2 driver layer. The printk can be
> triggered by multiple quite valid situations. I've suggested it be
> removed several times. Also XFree86 is a trademark so it should be
> XFree86(tm) ;)
> 

Thanks for pointing this out. It would appear that I get this same 
messages when things are working properly, with the exception of getting 
"serio0" when it works vs. "serio1" when it doesn't.

> The later ones are odd. It might be interesting to try turning off USB
> legacy support in the BIOS, that may be causing real problems in your
> case.
> 
I didn't try this because Ingo's latest patch seems to have resolved it.

> Alan
> 
> 
Thanks,

kr

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-30  9:06                             ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5 Ingo Molnar
  2004-08-30 14:25                               ` Thomas Charbonnel
  2004-08-31  6:40                               ` Lee Revell
@ 2004-08-31 17:40                               ` Peter Zijlstra
  2004-09-01  1:43                               ` Lee Revell
                                                 ` (3 subsequent siblings)
  6 siblings, 0 replies; 93+ messages in thread
From: Peter Zijlstra @ 2004-08-31 17:40 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

On Mon, 2004-08-30 at 11:06 +0200, Ingo Molnar wrote:
> i've uploaded -Q5 to:
> 
>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q5
> 
> ontop of:
> 
>   http://redhat.com/~mingo/voluntary-preempt/diff-bk-040828-2.6.8.1.bz2
> 
> -Q5 should fix the PS2 problems and the early boot problems, and it
> might even fix the USB, ACPI and APIC problems some people were
> reporting.
> 
> There were a number of bugs that led to the PS2 problems:
> 
>  - a change to __cond_resched() in the -Q series caused the starvation
>    of the IRQ1 and IRQ12 threads during init - causing a silent timeout
>    and misdetection in the ps2 driver(s).
> 
>  - even with the starvation bug fixed, we must set system_state to
>    SCHEDULER_OK only once the init thread has started - otherwise the
>    idle thread might hang during bootup.
> 
>  - the redirected IRQ handling now matches that of non-redirected IRQs
>    better, the outer loop in generic_handle_IRQ has been flattened.
> 
> i also re-added the synchronize_irq() fix, it was not causing the PS2
> problems.
> 
> 	Ingo

Hi Ingo,

this one is great, it's been rock solid for over 24h now. No more SMP
problems for me. Thanks for all the hard work.

Kind regards.

-- 
Peter Zijlstra <a.p.zijlstra@chello.nl>


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31  7:06                                 ` Ingo Molnar
@ 2004-08-31 19:21                                   ` Lee Revell
  2004-08-31 19:37                                     ` Ingo Molnar
  0 siblings, 1 reply; 93+ messages in thread
From: Lee Revell @ 2004-08-31 19:21 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

On Tue, 2004-08-31 at 03:06, Ingo Molnar wrote:
> * Lee Revell <rlrevell@joe-job.com> wrote:
> 
> > Otherwise, this looks pretty good.  Here is a new one, I got this
> > starting X:
> > 
> > http://krustophenia.net/testresults.php?dataset=2.6.9-rc1-bk4-Q5#/var/www/2.6.9-rc1-bk4-Q5/trace2.txt
> 
> ok, MTRR setting overhead. It is not quite clear to me which precise
> code took so much time, could you stick a couple of 'mcount();' lines
> into arch/i386/kernel/cpu/mtrr/generic.c's prepare_set() and
> generic_set_mtrr() functions? In particular the wbinvd() [cache
> invalidation] instructions within prepare_set() look like a possible
> source of latency.
> 
> (explicit calls to mcount() can be used to break up latency paths
> manually - they wont affect the latency itself, they make the resulting
> trace more finegrained.)

OK, here is the trace after adding a bunch of mcount()s:

preemption latency trace v1.0.2
-------------------------------
 latency: 713 us, entries: 31 (31)
    -----------------
    | task: X/1398, uid:0 nice:0 policy:0 rt_prio:0
    -----------------
 => started at: cond_resched+0xd/0x40
 => ended at:   sys_ioctl+0xdf/0x290
=======>
00000001 0.000ms (+0.000ms): touch_preempt_timing (cond_resched)
00000001 0.000ms (+0.000ms): generic_get_mtrr (mtrr_add_page)
00000001 0.001ms (+0.000ms): generic_get_mtrr (mtrr_add_page)
00000001 0.002ms (+0.000ms): generic_get_mtrr (mtrr_add_page)
00000001 0.002ms (+0.000ms): generic_get_mtrr (mtrr_add_page)
00000001 0.003ms (+0.000ms): generic_get_mtrr (mtrr_add_page)
00000001 0.004ms (+0.000ms): generic_get_mtrr (mtrr_add_page)
00000001 0.004ms (+0.000ms): generic_get_mtrr (mtrr_add_page)
00000001 0.005ms (+0.000ms): generic_get_mtrr (mtrr_add_page)
00000001 0.005ms (+0.000ms): generic_get_free_region (mtrr_add_page)
00000001 0.006ms (+0.000ms): generic_get_mtrr (generic_get_free_region)
00000001 0.006ms (+0.000ms): generic_get_mtrr (generic_get_free_region)
00000001 0.007ms (+0.000ms): generic_get_mtrr (generic_get_free_region)
00000001 0.008ms (+0.000ms): generic_get_mtrr (generic_get_free_region)
00000001 0.008ms (+0.000ms): set_mtrr (mtrr_add_page)
00000001 0.009ms (+0.000ms): generic_set_mtrr (set_mtrr)
00000001 0.009ms (+0.000ms): generic_set_mtrr (set_mtrr)
00000001 0.009ms (+0.000ms): prepare_set (generic_set_mtrr)
00000002 0.010ms (+0.000ms): prepare_set (generic_set_mtrr)
00000002 0.010ms (+0.000ms): prepare_set (generic_set_mtrr)
00000002 0.375ms (+0.364ms): prepare_set (generic_set_mtrr)
00000002 0.375ms (+0.000ms): prepare_set (generic_set_mtrr)
00000002 0.526ms (+0.150ms): prepare_set (generic_set_mtrr)
00000002 0.534ms (+0.008ms): generic_set_mtrr (set_mtrr)
00000002 0.541ms (+0.007ms): generic_set_mtrr (set_mtrr)
00000002 0.548ms (+0.006ms): generic_set_mtrr (set_mtrr)
00000002 0.552ms (+0.004ms): post_set (generic_set_mtrr)
00000001 0.708ms (+0.155ms): set_mtrr (mtrr_add_page)
00000001 0.713ms (+0.005ms): sub_preempt_count (sys_ioctl)
00000001 0.714ms (+0.000ms): _mmx_memcpy (check_preempt_timing)
00000001 0.715ms (+0.000ms): kernel_fpu_begin (_mmx_memcpy)

And here is a patch showing where I added the mcount()s, with some extra
context for clarity:

--- linux-2.6.8.1-Q3-preemptible-hardirqs/arch/i386/kernel/cpu/mtrr/generic.c	2004-08-14 06:55:33.000000000 -0400
+++ linux-2.6.9-rc1-bk4-Q5/arch/i386/kernel/cpu/mtrr/generic.c	2004-08-31 15:05:36.000000000 -0400
@@ -234,28 +234,33 @@
 static spinlock_t set_atomicity_lock = SPIN_LOCK_UNLOCKED;
 
 static void prepare_set(void)
 {
 	unsigned long cr0;
 
 	/*  Note that this is not ideal, since the cache is only flushed/disabled
 	   for this CPU while the MTRRs are changed, but changing this requires
 	   more invasive changes to the way the kernel boots  */
 	spin_lock(&set_atomicity_lock);
 
 	/*  Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
+	mcount();
 	cr0 = read_cr0() | 0x40000000;	/* set CD flag */
+	mcount();
 	wbinvd();
+	mcount();
 	write_cr0(cr0);
+	mcount();
 	wbinvd();
+	mcount(); 
 
 	/*  Save value of CR4 and clear Page Global Enable (bit 7)  */
 	if ( cpu_has_pge ) {
 		cr4 = read_cr4();
 		write_cr4(cr4 & (unsigned char) ~(1 << 7));
 	}
 
 	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
 	__flush_tlb();
 
 	/*  Save MTRR state */
 	rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
@@ -305,38 +310,41 @@
 static void generic_set_mtrr(unsigned int reg, unsigned long base,
 			     unsigned long size, mtrr_type type)
 /*  [SUMMARY] Set variable MTRR register on the local CPU.
     <reg> The register to set.
     <base> The base address of the region.
     <size> The size of the region. If this is 0 the region is disabled.
     <type> The type of the region.
     <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
     be done externally.
     [RETURNS] Nothing.
 */
 {
+	mcount();
 	prepare_set();
-
+	mcount();
 	if (size == 0) {
 		/* The invalid bit is kept in the mask, so we simply clear the
 		   relevant mask register to disable a range. */
 		wrmsr(MTRRphysMask_MSR(reg), 0, 0);
 	} else {
 		wrmsr(MTRRphysBase_MSR(reg), base << PAGE_SHIFT | type,
 		      (base & size_and_mask) >> (32 - PAGE_SHIFT));
+		mcount();
 		wrmsr(MTRRphysMask_MSR(reg), -size << PAGE_SHIFT | 0x800,
 		      (-size & size_and_mask) >> (32 - PAGE_SHIFT));
 	}
-
+	mcount();
 	post_set();
+	mcount();
 }
 
 int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
 {
 	unsigned long lbase, last;
 
 	/*  For Intel PPro stepping <= 7, must be 4 MiB aligned 
 	    and not touch 0x70000000->0x7003FFFF */
 	if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
 	    boot_cpu_data.x86_model == 1 &&
 	    boot_cpu_data.x86_mask <= 7) {
 		if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {


Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-30 18:00                                 ` Ingo Molnar
@ 2004-08-31 19:23                                   ` Thomas Charbonnel
  2004-08-31 19:30                                     ` Ingo Molnar
  0 siblings, 1 reply; 93+ messages in thread
From: Thomas Charbonnel @ 2004-08-31 19:23 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Lee Revell, Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana,
	linux-kernel, Mark_H_Johnson

Ingo Molnar wrote :

(...)
> > and a weird one with do_timer (called from do_IRQ) taking more than 1ms
> > to complete :
> > http://www.undata.org/~thomas/do_irq.trace
> 
> hm, indeed this is a weird one. 1 msec is too close to the timer 
> frequency to be accidental. According to the trace:
> 
>  00010000 0.002ms (+0.000ms): timer_interrupt (generic_handle_IRQ_event)
>  00010001 0.002ms (+0.000ms): mark_offset_tsc (timer_interrupt)
>  00010001 1.028ms (+1.025ms): do_timer (timer_interrupt)
>  00010001 1.028ms (+0.000ms): update_process_times (do_timer)
> 
> the latency happened between the beginning of mark_offset_tsc() and the
> calling of do_timer() - i.e. the delay happened somewhere within
> mark_offset_tsc() itself. Is this an SMP system?
> 
> 	Ingo

It isn't an SMP system, but here are some other traces that can prove
useful :
preemption latency trace v1.0.2
-------------------------------
 latency: 567 us, entries: 35 (35)
    -----------------
    | task: swapper/0, uid:0 nice:0 policy:0 rt_prio:0
    -----------------
 => started at: do_IRQ+0x19/0x190
 => ended at:   do_IRQ+0x13d/0x190
=======>
00010000 0.000ms (+0.000ms): do_IRQ (common_interrupt)
00010000 0.000ms (+0.000ms): do_IRQ (default_idle)
00010001 0.000ms (+0.000ms): mask_and_ack_8259A (do_IRQ)
00010001 0.002ms (+0.002ms): generic_redirect_hardirq (do_IRQ)
00010000 0.002ms (+0.000ms): generic_handle_IRQ_event (do_IRQ)
00010000 0.002ms (+0.000ms): timer_interrupt (generic_handle_IRQ_event)
00010001 0.003ms (+0.000ms): mark_offset_tsc (timer_interrupt)
00010001 0.562ms (+0.559ms): do_timer (timer_interrupt)
00010001 0.562ms (+0.000ms): update_process_times (do_timer)
00010001 0.562ms (+0.000ms): update_one_process (update_process_times)
00010001 0.562ms (+0.000ms): run_local_timers (update_process_times)
00010001 0.562ms (+0.000ms): raise_softirq (update_process_times)
00010001 0.562ms (+0.000ms): scheduler_tick (update_process_times)
00010001 0.562ms (+0.000ms): sched_clock (scheduler_tick)
00010001 0.563ms (+0.000ms): update_wall_time (do_timer)
00010001 0.563ms (+0.000ms): update_wall_time_one_tick
(update_wall_time)
00010001 0.563ms (+0.000ms): profile_tick (timer_interrupt)
00010001 0.563ms (+0.000ms): profile_hook (profile_tick)
00010002 0.563ms (+0.000ms): notifier_call_chain (profile_hook)
00010001 0.564ms (+0.000ms): profile_hit (timer_interrupt)
00010001 0.564ms (+0.000ms): generic_note_interrupt (do_IRQ)
00010001 0.564ms (+0.000ms): end_8259A_irq (do_IRQ)
00010001 0.564ms (+0.000ms): enable_8259A_irq (do_IRQ)
00000001 0.565ms (+0.000ms): do_softirq (do_IRQ)
00000001 0.565ms (+0.000ms): __do_softirq (do_softirq)
00000001 0.565ms (+0.000ms): wake_up_process (do_softirq)
00000001 0.565ms (+0.000ms): try_to_wake_up (wake_up_process)
00000001 0.566ms (+0.000ms): task_rq_lock (try_to_wake_up)
00000002 0.566ms (+0.000ms): activate_task (try_to_wake_up)
00000002 0.566ms (+0.000ms): sched_clock (activate_task)
00000002 0.566ms (+0.000ms): recalc_task_prio (activate_task)
00000002 0.566ms (+0.000ms): effective_prio (recalc_task_prio)
00000002 0.567ms (+0.000ms): enqueue_task (activate_task)
00000001 0.567ms (+0.000ms): preempt_schedule (try_to_wake_up)
00000001 0.567ms (+0.000ms): sub_preempt_count (do_IRQ)

preemption latency trace v1.0.2
-------------------------------
 latency: 624 us, entries: 35 (35)
    -----------------
    | task: swapper/0, uid:0 nice:0 policy:0 rt_prio:0
    -----------------
 => started at: do_IRQ+0x19/0x190
 => ended at:   do_IRQ+0x13d/0x190
=======>
00010000 0.000ms (+0.000ms): do_IRQ (common_interrupt)
00010000 0.000ms (+0.000ms): do_IRQ (default_idle)
00010001 0.000ms (+0.000ms): mask_and_ack_8259A (do_IRQ)
00010001 0.613ms (+0.612ms): generic_redirect_hardirq (do_IRQ)
00010000 0.613ms (+0.000ms): generic_handle_IRQ_event (do_IRQ)
00010000 0.613ms (+0.000ms): timer_interrupt (generic_handle_IRQ_event)
00010001 0.613ms (+0.000ms): mark_offset_tsc (timer_interrupt)
00010001 0.619ms (+0.005ms): do_timer (timer_interrupt)
00010001 0.619ms (+0.000ms): update_process_times (do_timer)
00010001 0.619ms (+0.000ms): update_one_process (update_process_times)
00010001 0.619ms (+0.000ms): run_local_timers (update_process_times)
00010001 0.619ms (+0.000ms): raise_softirq (update_process_times)
00010001 0.619ms (+0.000ms): scheduler_tick (update_process_times)
00010001 0.619ms (+0.000ms): sched_clock (scheduler_tick)
00010001 0.620ms (+0.000ms): update_wall_time (do_timer)
00010001 0.620ms (+0.000ms): update_wall_time_one_tick
(update_wall_time)
00010001 0.620ms (+0.000ms): profile_tick (timer_interrupt)
00010001 0.620ms (+0.000ms): profile_hook (profile_tick)
00010002 0.620ms (+0.000ms): notifier_call_chain (profile_hook)
00010001 0.621ms (+0.000ms): profile_hit (timer_interrupt)
00010001 0.621ms (+0.000ms): generic_note_interrupt (do_IRQ)
00010001 0.621ms (+0.000ms): end_8259A_irq (do_IRQ)
00010001 0.621ms (+0.000ms): enable_8259A_irq (do_IRQ)
00000001 0.622ms (+0.000ms): do_softirq (do_IRQ)
00000001 0.622ms (+0.000ms): __do_softirq (do_softirq)
00000001 0.622ms (+0.000ms): wake_up_process (do_softirq)
00000001 0.622ms (+0.000ms): try_to_wake_up (wake_up_process)
00000001 0.623ms (+0.000ms): task_rq_lock (try_to_wake_up)
00000002 0.623ms (+0.000ms): activate_task (try_to_wake_up)
00000002 0.623ms (+0.000ms): sched_clock (activate_task)
00000002 0.623ms (+0.000ms): recalc_task_prio (activate_task)
00000002 0.623ms (+0.000ms): effective_prio (recalc_task_prio)
00000002 0.623ms (+0.000ms): enqueue_task (activate_task)
00000001 0.624ms (+0.000ms): preempt_schedule (try_to_wake_up)
00000001 0.624ms (+0.000ms): sub_preempt_count (do_IRQ)

As you can see ~1ms was probably an accident, and the latency does not
always come from do_timer. The constant is do_IRQ interrupting the idle
thread.

Thomas



^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31 19:23                                   ` Thomas Charbonnel
@ 2004-08-31 19:30                                     ` Ingo Molnar
  2004-08-31 19:45                                       ` Thomas Charbonnel
  0 siblings, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-31 19:30 UTC (permalink / raw)
  To: Thomas Charbonnel
  Cc: Lee Revell, Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana,
	linux-kernel, Mark_H_Johnson


* Thomas Charbonnel <thomas@undata.org> wrote:

> As you can see ~1ms was probably an accident, and the latency does not
> always come from do_timer. The constant is do_IRQ interrupting the
> idle thread.

(do you have any sort of powersaving mode (ACPI/APM) enabled? If yes,
could you try to tune it down as much as possible - disable any
powersaving option in the BIOS and in the .config - kill apmd, etc.)

but i dont think it's powersaving - why would such an overhead show up
in those functions. The only common thing seems to be that both
mark_offset_tsc() and mask_and_ack_8259A() does port IO, which is slow -
but still it shouldnt take ~0.5 msecs!

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31 19:21                                   ` Lee Revell
@ 2004-08-31 19:37                                     ` Ingo Molnar
  2004-08-31 19:47                                       ` Lee Revell
  0 siblings, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-31 19:37 UTC (permalink / raw)
  To: Lee Revell
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson


* Lee Revell <rlrevell@joe-job.com> wrote:

> 00000001 0.009ms (+0.000ms): generic_set_mtrr (set_mtrr)
> 00000001 0.009ms (+0.000ms): prepare_set (generic_set_mtrr)

this is the call to prepare_set() [implicit mcount()].

> 00000002 0.010ms (+0.000ms): prepare_set (generic_set_mtrr)

explicit mcount() #1,

> 00000002 0.010ms (+0.000ms): prepare_set (generic_set_mtrr)

#2,

> 00000002 0.375ms (+0.364ms): prepare_set (generic_set_mtrr)

#3. So the latency is this codepath:

+       mcount();
        wbinvd();
+       mcount();

bingo ...

to continue:

> 00000002 0.375ms (+0.000ms): prepare_set (generic_set_mtrr)

mcount #4

> 00000002 0.526ms (+0.150ms): prepare_set (generic_set_mtrr)

#5. This means the following code had the latency:

        write_cr0(cr0);
+       mcount();
        wbinvd();
+       mcount();

the other wbinvd(). Since we didnt execute all that much it didnt take
as much time as the first wbinvd() [the cache was just write-flushed, so
less flushing had to be done second time around].

plus:

 00000002 0.548ms (+0.006ms): generic_set_mtrr (set_mtrr)
 00000002 0.552ms (+0.004ms): post_set (generic_set_mtrr)
 00000001 0.708ms (+0.155ms): set_mtrr (mtrr_add_page)
 00000001 0.713ms (+0.005ms): sub_preempt_count (sys_ioctl)

proves that it's post_set() that took 155 usecs here, which too does a 
wbinvd().

so it's the invalidation of the cache that takes so long.

i believe that the invalidations are excessive. It is quite likely that
no invalidation has to be done at all. Does your box still start up X
fine if you uncomment all those wbinvd() calls?

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31 19:30                                     ` Ingo Molnar
@ 2004-08-31 19:45                                       ` Thomas Charbonnel
  0 siblings, 0 replies; 93+ messages in thread
From: Thomas Charbonnel @ 2004-08-31 19:45 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Lee Revell, Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana,
	linux-kernel, Mark_H_Johnson

Ingo Molnar wrote :
> * Thomas Charbonnel <thomas@undata.org> wrote:
> 
> > As you can see ~1ms was probably an accident, and the latency does not
> > always come from do_timer. The constant is do_IRQ interrupting the
> > idle thread.
> 
> (do you have any sort of powersaving mode (ACPI/APM) enabled? If yes,
> could you try to tune it down as much as possible - disable any
> powersaving option in the BIOS and in the .config - kill apmd, etc.)
> 
> but i dont think it's powersaving - why would such an overhead show up
> in those functions. The only common thing seems to be that both
> mark_offset_tsc() and mask_and_ack_8259A() does port IO, which is slow -
> but still it shouldnt take ~0.5 msecs!
> 
> 	Ingo

Indeed, I just checked and my xrun every ~8 seconds problem is back. I
have acpi compiled in but acpi=off, but it doesn't seem to be honoured
(it was with 2.6.8.1, IIRC).

Thomas





^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31 19:37                                     ` Ingo Molnar
@ 2004-08-31 19:47                                       ` Lee Revell
  2004-08-31 19:51                                         ` Ingo Molnar
  0 siblings, 1 reply; 93+ messages in thread
From: Lee Revell @ 2004-08-31 19:47 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

On Tue, 2004-08-31 at 15:37, Ingo Molnar wrote:
> i believe that the invalidations are excessive. It is quite likely that
> no invalidation has to be done at all. Does your box still start up X
> fine if you uncomment all those wbinvd() calls?
> 

Commented out all calls to wbinvd(), seems to work fine.  I even tried
repeatedly killing the X server before it could finish starting, no
problems at all.

I guess the worst that could happen here would be display corruption,
which would get fixed on the next refresh?

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31 19:47                                       ` Lee Revell
@ 2004-08-31 19:51                                         ` Ingo Molnar
  2004-08-31 20:09                                           ` Ingo Molnar
  0 siblings, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-31 19:51 UTC (permalink / raw)
  To: Lee Revell
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson


* Lee Revell <rlrevell@joe-job.com> wrote:

> Commented out all calls to wbinvd(), seems to work fine.  I even tried
> repeatedly killing the X server before it could finish starting, no
> problems at all.
> 
> I guess the worst that could happen here would be display corruption,
> which would get fixed on the next refresh?

it's more complex than that - MTRR's are caching attributes that the CPU
listens to. Mis-setting them can cause anything from memory corruption
to hard lockups. The question is, does any of the Intel (or AMD) docs
say that the CPU cache has to be write-back flushed when setting MTRRs,
or were those calls only done out of paranoia?

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31 19:51                                         ` Ingo Molnar
@ 2004-08-31 20:09                                           ` Ingo Molnar
  2004-08-31 20:10                                             ` Lee Revell
  0 siblings, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-31 20:09 UTC (permalink / raw)
  To: Lee Revell
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson


* Ingo Molnar <mingo@elte.hu> wrote:

> it's more complex than that - MTRR's are caching attributes that the
> CPU listens to. Mis-setting them can cause anything from memory
> corruption to hard lockups. The question is, does any of the Intel (or
> AMD) docs say that the CPU cache has to be write-back flushed when
> setting MTRRs, or were those calls only done out of paranoia?

the Intel docs suggest a cache-flush when changing MTRR's, so i guess
we've got to live with this. _Perhaps_ we could move the cache-disabling
and the wbinvd() out of the spinlocked section, but this would make it
preemptable, possibly causing other tasks to run with the CPU cache
disabled! I'd say that is worse than a single 0.5 msec latency during
MTRR setting.

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31 20:09                                           ` Ingo Molnar
@ 2004-08-31 20:10                                             ` Lee Revell
  2004-08-31 20:14                                               ` Ingo Molnar
  0 siblings, 1 reply; 93+ messages in thread
From: Lee Revell @ 2004-08-31 20:10 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

On Tue, 2004-08-31 at 16:09, Ingo Molnar wrote:
> * Ingo Molnar <mingo@elte.hu> wrote:
> 
> > it's more complex than that - MTRR's are caching attributes that the
> > CPU listens to. Mis-setting them can cause anything from memory
> > corruption to hard lockups. The question is, does any of the Intel (or
> > AMD) docs say that the CPU cache has to be write-back flushed when
> > setting MTRRs, or were those calls only done out of paranoia?
> 
> the Intel docs suggest a cache-flush when changing MTRR's, so i guess
> we've got to live with this. _Perhaps_ we could move the cache-disabling
> and the wbinvd() out of the spinlocked section, but this would make it
> preemptable, possibly causing other tasks to run with the CPU cache
> disabled! I'd say that is worse than a single 0.5 msec latency during
> MTRR setting.
> 

File under boot-time stuff, I guess.  This could be bad if X crashes,
but I can't remember the last time this happened to me, and I use xorg
CVS.

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31 20:10                                             ` Lee Revell
@ 2004-08-31 20:14                                               ` Ingo Molnar
  2004-08-31 20:20                                                 ` Ingo Molnar
  0 siblings, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-31 20:14 UTC (permalink / raw)
  To: Lee Revell
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson


* Lee Revell <rlrevell@joe-job.com> wrote:

> File under boot-time stuff, I guess.  This could be bad if X crashes,
> but I can't remember the last time this happened to me, and I use xorg
> CVS.

but the first wbinvd() within prepare_set() seems completely unnecessary
- we can flush the cache after disabling the cache just fine.

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31 20:14                                               ` Ingo Molnar
@ 2004-08-31 20:20                                                 ` Ingo Molnar
  2004-08-31 20:34                                                   ` Lee Revell
  0 siblings, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-31 20:20 UTC (permalink / raw)
  To: Lee Revell
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson


* Ingo Molnar <mingo@elte.hu> wrote:

> 
> * Lee Revell <rlrevell@joe-job.com> wrote:
> 
> > File under boot-time stuff, I guess.  This could be bad if X crashes,
> > but I can't remember the last time this happened to me, and I use xorg
> > CVS.
> 
> but the first wbinvd() within prepare_set() seems completely unnecessary
> - we can flush the cache after disabling the cache just fine.

the third wbinvd() in post_set() seems unnecessary too - what kind of
cache do we expect to flush, we've disabled caching in the CPU ... But
the Intel pseudocode does it too - this is a thinko i think.

another thing is that interrupts are not disabled (although the Intel
docs suggest so). It is best to disable interrupts because any handler
executing in this window will perform extremely slowly (because caches
are disabled), and might even interfere with MTRR setting. Best disable
IRQs.

so ... could you try the patch below - does it work and how does the
latency look like now? (ontop of an unmodified generic.c)

	Ingo

--- linux/arch/i386/kernel/cpu/mtrr/generic.c.orig	
+++ linux/arch/i386/kernel/cpu/mtrr/generic.c	
@@ -240,11 +240,14 @@ static void prepare_set(void)
 	/*  Note that this is not ideal, since the cache is only flushed/disabled
 	   for this CPU while the MTRRs are changed, but changing this requires
 	   more invasive changes to the way the kernel boots  */
-	spin_lock(&set_atomicity_lock);
+	/*
+	 * Since we are disabling the cache dont allow any interrupts - they
+	 * would run extremely slow and would only increase the pain:
+	 */
+	spin_lock_irq(&set_atomicity_lock);
 
 	/*  Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
 	cr0 = read_cr0() | 0x40000000;	/* set CD flag */
-	wbinvd();
 	write_cr0(cr0);
 	wbinvd();
 
@@ -266,8 +269,7 @@ static void prepare_set(void)
 
 static void post_set(void)
 {
-	/*  Flush caches and TLBs  */
-	wbinvd();
+	/*  Flush TLBs (no need to flush caches - they are disabled)  */
 	__flush_tlb();
 
 	/* Intel (P6) standard MTRRs */
@@ -279,7 +281,7 @@ static void post_set(void)
 	/*  Restore value of CR4  */
 	if ( cpu_has_pge )
 		write_cr4(cr4);
-	spin_unlock(&set_atomicity_lock);
+	spin_unlock_irq(&set_atomicity_lock);
 }
 
 static void generic_set_all(void)

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31 20:20                                                 ` Ingo Molnar
@ 2004-08-31 20:34                                                   ` Lee Revell
  2004-08-31 20:39                                                     ` Ingo Molnar
  0 siblings, 1 reply; 93+ messages in thread
From: Lee Revell @ 2004-08-31 20:34 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

On Tue, 2004-08-31 at 16:20, Ingo Molnar wrote:
> so ... could you try the patch below - does it work and how does the
> latency look like now? (ontop of an unmodified generic.c)
> 

Now it looks like this:

preemption latency trace v1.0.2
-------------------------------
 latency: 574 us, entries: 19 (19)
    -----------------
    | task: X/1391, uid:0 nice:0 policy:0 rt_prio:0
    -----------------
 => started at: cond_resched+0xd/0x40
 => ended at:   sys_ioctl+0xdf/0x290
=======>
00000001 0.000ms (+0.000ms): touch_preempt_timing (cond_resched)
00000001 0.000ms (+0.000ms): do_blank_screen (vt_ioctl)
00000001 0.000ms (+0.000ms): is_console_locked (do_blank_screen)
00000001 0.001ms (+0.000ms): hide_cursor (do_blank_screen)
00000001 0.002ms (+0.000ms): vgacon_cursor (hide_cursor)
00000001 0.004ms (+0.001ms): hide_softcursor (do_blank_screen)
00000001 0.004ms (+0.000ms): is_console_locked (do_blank_screen)
00000001 0.004ms (+0.000ms): vgacon_save_screen (do_blank_screen)
00000001 0.005ms (+0.000ms): _mmx_memcpy (vgacon_save_screen)
00000001 0.006ms (+0.000ms): kernel_fpu_begin (_mmx_memcpy)
00000001 0.481ms (+0.475ms): vgacon_blank (do_blank_screen)
00000001 0.481ms (+0.000ms): vgacon_set_origin (vgacon_blank)
00000001 0.573ms (+0.091ms): set_origin (vt_ioctl)
00000001 0.573ms (+0.000ms): is_console_locked (set_origin)
00000001 0.573ms (+0.000ms): vgacon_set_origin (set_origin)
00000001 0.574ms (+0.000ms): release_console_sem (vt_ioctl)
00000001 0.575ms (+0.000ms): sub_preempt_count (sys_ioctl)
00000001 0.575ms (+0.000ms): _mmx_memcpy (check_preempt_timing)
00000001 0.575ms (+0.000ms): kernel_fpu_begin (_mmx_memcpy)

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31 20:34                                                   ` Lee Revell
@ 2004-08-31 20:39                                                     ` Ingo Molnar
  2004-08-31 20:41                                                       ` Lee Revell
  0 siblings, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-08-31 20:39 UTC (permalink / raw)
  To: Lee Revell
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson


* Lee Revell <rlrevell@joe-job.com> wrote:

> On Tue, 2004-08-31 at 16:20, Ingo Molnar wrote:
> > so ... could you try the patch below - does it work and how does the
> > latency look like now? (ontop of an unmodified generic.c)
> > 
> 
> Now it looks like this:
> 
> preemption latency trace v1.0.2
> -------------------------------
>  latency: 574 us, entries: 19 (19)
>     -----------------
>     | task: X/1391, uid:0 nice:0 policy:0 rt_prio:0
>     -----------------
>  => started at: cond_resched+0xd/0x40
>  => ended at:   sys_ioctl+0xdf/0x290
> =======>
> 00000001 0.000ms (+0.000ms): touch_preempt_timing (cond_resched)
> 00000001 0.000ms (+0.000ms): do_blank_screen (vt_ioctl)
> 00000001 0.000ms (+0.000ms): is_console_locked (do_blank_screen)
> 00000001 0.001ms (+0.000ms): hide_cursor (do_blank_screen)
> 00000001 0.002ms (+0.000ms): vgacon_cursor (hide_cursor)
> 00000001 0.004ms (+0.001ms): hide_softcursor (do_blank_screen)
> 00000001 0.004ms (+0.000ms): is_console_locked (do_blank_screen)
> 00000001 0.004ms (+0.000ms): vgacon_save_screen (do_blank_screen)
> 00000001 0.005ms (+0.000ms): _mmx_memcpy (vgacon_save_screen)
> 00000001 0.006ms (+0.000ms): kernel_fpu_begin (_mmx_memcpy)
> 00000001 0.481ms (+0.475ms): vgacon_blank (do_blank_screen)
> 00000001 0.481ms (+0.000ms): vgacon_set_origin (vgacon_blank)
> 00000001 0.573ms (+0.091ms): set_origin (vt_ioctl)
> 00000001 0.573ms (+0.000ms): is_console_locked (set_origin)
> 00000001 0.573ms (+0.000ms): vgacon_set_origin (set_origin)
> 00000001 0.574ms (+0.000ms): release_console_sem (vt_ioctl)
> 00000001 0.575ms (+0.000ms): sub_preempt_count (sys_ioctl)
> 00000001 0.575ms (+0.000ms): _mmx_memcpy (check_preempt_timing)
> 00000001 0.575ms (+0.000ms): kernel_fpu_begin (_mmx_memcpy)

hm, this doesnt seem to be an mtrr latency - this is a text-console 
blanking operation apparently running with the BKL enabled.

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31 20:39                                                     ` Ingo Molnar
@ 2004-08-31 20:41                                                       ` Lee Revell
  0 siblings, 0 replies; 93+ messages in thread
From: Lee Revell @ 2004-08-31 20:41 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

On Tue, 2004-08-31 at 16:39, Ingo Molnar wrote:

> hm, this doesnt seem to be an mtrr latency - this is a text-console 
> blanking operation apparently running with the BKL enabled.
> 

Yes, this seemed strange to me too, but I reproduced this several times,
even across reboots.  This one must have been happening right before the
MTRR initialization, and was getting masked by it.

Lee 


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31  6:53                                 ` Ingo Molnar
@ 2004-08-31 23:03                                   ` Lee Revell
  2004-09-01 15:52                                     ` Martin Josefsson
  0 siblings, 1 reply; 93+ messages in thread
From: Lee Revell @ 2004-08-31 23:03 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson, tytso

On Tue, 2004-08-31 at 02:53, Ingo Molnar wrote:
> > > 
> > >   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q5
>
> ok. It seems the random driver is _mostly_ in shape latency-wise, except
> the IP rekeying visible in the above trace. To solve this problem, could
> you try the patch below, ontop of -Q5? It moves the random seed
> generation outside of the spinlock - AFAICS the spinlock is only needed
> to protect the IP sequence counter itself.

This solves the problem with the random driver.  The worst latencies I
am seeing are in netif_receive_skb().  With netdev_max_backlog set to 8,
the worst is about 160 usecs:

http://krustophenia.net/testresults.php?dataset=2.6.9-rc1-Q5#/var/www/2.6.9-rc1-Q5/trace2.txt
http://krustophenia.net/testresults.php?dataset=2.6.9-rc1-Q5#/var/www/2.6.9-rc1-Q5/trace3.txt

Setting netdev_max_backlog to 1 has no effect:

http://krustophenia.net/testresults.php?dataset=2.6.9-rc1-Q5#/var/www/2.6.9-rc1-Q5/trace4.txt

I would expect this one to scale with CPU speed, so this is pretty good
considering my relatively underpowered system.  I would imagine on a
fast UP system you would not see any latencies worse than 100 usecs.

Lee





^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-30  9:06                             ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5 Ingo Molnar
                                                 ` (2 preceding siblings ...)
  2004-08-31 17:40                               ` Peter Zijlstra
@ 2004-09-01  1:43                               ` Lee Revell
  2004-09-01  2:30                               ` Lee Revell
                                                 ` (2 subsequent siblings)
  6 siblings, 0 replies; 93+ messages in thread
From: Lee Revell @ 2004-09-01  1:43 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

On Mon, 2004-08-30 at 05:06, Ingo Molnar wrote:
> i've uploaded -Q5 to:
> 
>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q5

This is with netdev_max_backlog = 1:

preemption latency trace v1.0.2
-------------------------------
 latency: 386 us, entries: 328 (328)
    -----------------
    | task: ksoftirqd/0/2, uid:0 nice:-10 policy:0 rt_prio:0
    -----------------
 => started at: tcp_delack_timer+0x1c/0x1d0
 => ended at:   tcp_delack_timer+0x127/0x1d0
=======>
00000001 0.000ms (+0.000ms): tcp_delack_timer (run_timer_softirq)
00000001 0.000ms (+0.000ms): __sk_stream_mem_reclaim (tcp_delack_timer)
00000001 0.004ms (+0.003ms): tcp_v4_do_rcv (tcp_delack_timer)
00000001 0.005ms (+0.001ms): tcp_rcv_established (tcp_v4_do_rcv)
00000001 0.007ms (+0.001ms): __tcp_checksum_complete_user (tcp_rcv_established)
00000001 0.008ms (+0.000ms): skb_checksum (__tcp_checksum_complete_user)
00000001 0.018ms (+0.010ms): tcp_ack (tcp_rcv_established)
00000001 0.019ms (+0.001ms): tcp_ack_update_window (tcp_ack)
00000001 0.021ms (+0.001ms): tcp_urg (tcp_rcv_established)
00000001 0.021ms (+0.000ms): tcp_data_queue (tcp_rcv_established)
00000001 0.023ms (+0.001ms): sk_stream_mem_schedule (tcp_data_queue)

...

00000103 0.377ms (+0.000ms): skb_release_data (kfree_skbmem)
00000103 0.378ms (+0.000ms): kfree (kfree_skbmem)
00000103 0.379ms (+0.000ms): kmem_cache_free (kfree_skbmem)
00000103 0.383ms (+0.004ms): qdisc_restart (dev_queue_xmit)
00000103 0.384ms (+0.000ms): pfifo_fast_dequeue (qdisc_restart)
00000102 0.384ms (+0.000ms): local_bh_enable (dev_queue_xmit)
00000001 0.386ms (+0.001ms): sub_preempt_count (tcp_delack_timer)
00000001 0.387ms (+0.000ms): _mmx_memcpy (check_preempt_timing)
00000001 0.387ms (+0.000ms): kernel_fpu_begin (_mmx_memcpy)

Also, I do not get the packet loss/timeout problems that another poster
reported when setting this to 1.  The network works normally, it just
does not affect the latency at all.

Full trace:

http://krustophenia.net/testresults.php?dataset=2.6.9-rc1-Q5#/var/www/2.6.9-rc1-Q5/trace5.txt

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-30  9:06                             ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5 Ingo Molnar
                                                 ` (3 preceding siblings ...)
  2004-09-01  1:43                               ` Lee Revell
@ 2004-09-01  2:30                               ` Lee Revell
  2004-09-01  7:27                               ` Lee Revell
  2004-09-01  8:29                               ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q6 Ingo Molnar
  6 siblings, 0 replies; 93+ messages in thread
From: Lee Revell @ 2004-09-01  2:30 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

On Mon, 2004-08-30 at 05:06, Ingo Molnar wrote:
> i've uploaded -Q5 to:
> 
>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q5

Hmm, 385 usec latency triggered by 'lsof /smb' (this is a samba export):

preemption latency trace v1.0.2
-------------------------------
 latency: 385 us, entries: 626 (626)
    -----------------
    | task: lsof/17876, uid:0 nice:0 policy:0 rt_prio:0
    -----------------
 => started at: unix_seq_start+0x10/0x50
 => ended at:   unix_seq_stop+0xf/0x30
=======>
00000001 0.000ms (+0.000ms): unix_seq_start (seq_read)
00000001 0.000ms (+0.000ms): unix_seq_idx (unix_seq_start)
00000001 0.027ms (+0.027ms): unix_seq_show (seq_read)
00000002 0.028ms (+0.000ms): sock_i_ino (unix_seq_show)
00000002 0.029ms (+0.001ms): seq_printf (unix_seq_show)
00000002 0.030ms (+0.000ms): vsnprintf (seq_printf)
00000002 0.032ms (+0.002ms): number (vsnprintf)
00000002 0.036ms (+0.004ms): skip_atoi (vsnprintf)
00000002 0.037ms (+0.000ms): number (vsnprintf)
00000002 0.038ms (+0.001ms): skip_atoi (vsnprintf)
00000002 0.039ms (+0.000ms): number (vsnprintf)
00000002 0.039ms (+0.000ms): skip_atoi (vsnprintf)
00000002 0.040ms (+0.000ms): number (vsnprintf)
00000002 0.041ms (+0.000ms): skip_atoi (vsnprintf)
00000002 0.041ms (+0.000ms): number (vsnprintf)
00000002 0.042ms (+0.000ms): skip_atoi (vsnprintf)
00000002 0.043ms (+0.000ms): number (vsnprintf)
00000002 0.043ms (+0.000ms): skip_atoi (vsnprintf)
00000002 0.044ms (+0.000ms): number (vsnprintf)
00000002 0.046ms (+0.001ms): seq_putc (unix_seq_show)
00000002 0.047ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.047ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.047ms (+0.000ms): seq_putc (unix_seq_show)

...

00000002 0.374ms (+0.000ms): number (vsnprintf)
00000002 0.375ms (+0.000ms): skip_atoi (vsnprintf)
00000002 0.376ms (+0.000ms): number (vsnprintf)
00000002 0.377ms (+0.001ms): seq_putc (unix_seq_show)
00000002 0.377ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.378ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.378ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.378ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.379ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.379ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.380ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.380ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.380ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.381ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.381ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.381ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.382ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.382ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.382ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.383ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.383ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.383ms (+0.000ms): seq_putc (unix_seq_show)
00000002 0.384ms (+0.000ms): seq_putc (unix_seq_show)
00000001 0.384ms (+0.000ms): preempt_schedule (unix_seq_show)
00000001 0.384ms (+0.000ms): seq_putc (unix_seq_show)
00000001 0.385ms (+0.000ms): unix_seq_stop (seq_read)
00000001 0.386ms (+0.000ms): sub_preempt_count (unix_seq_stop)
00000001 0.386ms (+0.000ms): _mmx_memcpy (check_preempt_timing)
00000001 0.387ms (+0.000ms): kernel_fpu_begin (_mmx_memcpy)

Full trace:

http://krustophenia.net/testresults.php?dataset=2.6.9-rc1-Q5#/var/www/2.6.9-rc1-Q5/trace9.txt

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-30  9:06                             ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5 Ingo Molnar
                                                 ` (4 preceding siblings ...)
  2004-09-01  2:30                               ` Lee Revell
@ 2004-09-01  7:27                               ` Lee Revell
  2004-09-01  8:29                               ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q6 Ingo Molnar
  6 siblings, 0 replies; 93+ messages in thread
From: Lee Revell @ 2004-09-01  7:27 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana, linux-kernel,
	Mark_H_Johnson

On Mon, 2004-08-30 at 05:06, Ingo Molnar wrote:

>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q5

Also, the rt_garbage_collect latency is still present:

http://krustophenia.net/testresults.php?dataset=2.6.9-rc1-Q5#/var/www/2.6.9-rc1-Q5/trace11.txt

Lee



^ permalink raw reply	[flat|nested] 93+ messages in thread

* [patch] voluntary-preempt-2.6.9-rc1-bk4-Q6
  2004-08-30  9:06                             ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5 Ingo Molnar
                                                 ` (5 preceding siblings ...)
  2004-09-01  7:27                               ` Lee Revell
@ 2004-09-01  8:29                               ` Ingo Molnar
  2004-09-01 13:51                                 ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q7 Ingo Molnar
  6 siblings, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-09-01  8:29 UTC (permalink / raw)
  To: linux-kernel; +Cc: K.R. Foley, Mark_H_Johnson, Lee Revell


i've released the -Q6 patch:

  http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q6

ontop of:

  http://redhat.com/~mingo/voluntary-preempt/diff-bk-040828-2.6.8.1.bz2

this patch includes two changes that should shorten the networking
latencies reported. There's a new 'RX granularity' sysctl now:

    /proc/sys/net/core/netdev_backlog_granularity

It defaults to the most finegrained value, 1.

netdev_max_backlog has been moved back to the upstream value of 300.

Also, the backlog processing is now sensitive to preemption requests and
will break out early in that case.

(This should not result in TCP connection quality issues (all processing
is restarted after such a breakout), but nevertheless i'd suggest
everyone to keep an eye on lost packets and seemingly hung TCP
connections.)

other changes since -Q5:

 - mtrr simplifications and IRQ-disabling. (reported & tested by Lee
   Revell) Still under discussion though.

 - fix /dev/random driver latency (reported & tested by Lee Revell)

 - move vgacon_do_font_op out of the BKL (reported by P.O. Gaillard)

 - increase percpu space for tracing (by Mark H Johnson)

 - added user-triggerable generic kernel tracing enabled via
   tracing_enabled=2 and turned on via gettimeofday(0,1) and turned off
   via gettimeofday(0,0).

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* [patch] voluntary-preempt-2.6.9-rc1-bk4-Q7
  2004-09-01  8:29                               ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q6 Ingo Molnar
@ 2004-09-01 13:51                                 ` Ingo Molnar
  2004-09-01 17:09                                   ` Thomas Charbonnel
       [not found]                                   ` <41367E5D.3040605@cybsft.com>
  0 siblings, 2 replies; 93+ messages in thread
From: Ingo Molnar @ 2004-09-01 13:51 UTC (permalink / raw)
  To: linux-kernel; +Cc: K.R. Foley, Mark_H_Johnson, Lee Revell


i've released the -Q7 patch:

  http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q7

ontop of:

  http://redhat.com/~mingo/voluntary-preempt/diff-bk-040828-2.6.8.1.bz2

the main change in this patch are more SMP latency fixes. The stock
kernel, even with CONFIG_PREEMPT enabled, didnt have any spin-nicely
preemption logic for the following, commonly used SMP locking
primitives: read_lock(), spin_lock_irqsave(), spin_lock_irq(),
spin_lock_bh(), read_lock_irqsave(), read_lock_irq(), read_lock_bh(),
write_lock_irqsave(), write_lock_irq(), write_lock_bh(). Only
spin_lock() and write_lock() [the two simplest cases] where covered.

In addition to the preemption latency problems, the _irq() variants in
the above list didnt do any IRQ-enabling while spinning - possibly
resulting in excessive irqs-off sections of code!

-Q7 fixes all of these latency problems: we now re-enable interrupts
while spinning in all possible cases, and a spinning op stays
preemptible if this is a beginning of a new critical section.

there's also an SMP related tracing improvement in -Q7: the NMI tracing
code now traces the other CPUs too - this way if an NMI hits a
particulary long section, we'll have a chance to see what the other CPU
was doing. These show up as double do_nmi() trace entries on a 2-CPU x86
box. The first one is the current CPU, subsequent entries are the other
CPUs in the system.

(-Q7 is not that interesting to uniprocessor kernel users, but it would
still be useful to test it, just to see nothing broke (on the
compilation side), lots of spinlock code had to be changed.)

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-08-31 23:03                                   ` Lee Revell
@ 2004-09-01 15:52                                     ` Martin Josefsson
  2004-09-01 21:15                                       ` Lee Revell
  2004-09-01 21:30                                       ` Lee Revell
  0 siblings, 2 replies; 93+ messages in thread
From: Martin Josefsson @ 2004-09-01 15:52 UTC (permalink / raw)
  To: Lee Revell
  Cc: Ingo Molnar, Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana,
	linux-kernel, Mark_H_Johnson, tytso

[-- Attachment #1: Type: text/plain, Size: 338 bytes --]

On Wed, 2004-09-01 at 01:03, Lee Revell wrote:

Hi Lee

> This solves the problem with the random driver.  The worst latencies I
> am seeing are in netif_receive_skb().  With netdev_max_backlog set to 8,
> the worst is about 160 usecs:

I'm a bit curious... have you tried these tests with ip_conntrack
enabled?

-- 
/Martin

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q7
  2004-09-01 13:51                                 ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q7 Ingo Molnar
@ 2004-09-01 17:09                                   ` Thomas Charbonnel
  2004-09-01 19:03                                     ` K.R. Foley
  2004-09-01 20:11                                     ` Peter Zijlstra
       [not found]                                   ` <41367E5D.3040605@cybsft.com>
  1 sibling, 2 replies; 93+ messages in thread
From: Thomas Charbonnel @ 2004-09-01 17:09 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, K.R. Foley, Mark_H_Johnson, Lee Revell

Ingo Molnar wrote :
> i've released the -Q7 patch:
> 
>   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q7

With Q7 I still get rx latency issues (> 130 us non-preemptible section
from rtl8139_poll). Moreover network connections were extremely slow
(almost hung) until I set /proc/sys/net/core/netdev_backlog_granularity
to 2.

Thomas



^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q7
  2004-09-01 17:09                                   ` Thomas Charbonnel
@ 2004-09-01 19:03                                     ` K.R. Foley
  2004-09-01 20:11                                     ` Peter Zijlstra
  1 sibling, 0 replies; 93+ messages in thread
From: K.R. Foley @ 2004-09-01 19:03 UTC (permalink / raw)
  To: Thomas Charbonnel; +Cc: Ingo Molnar, linux-kernel, Mark_H_Johnson, Lee Revell

Thomas Charbonnel wrote:
> Ingo Molnar wrote :
> 
>>i've released the -Q7 patch:
>>
>>  http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q7
> 
> 
> With Q7 I still get rx latency issues (> 130 us non-preemptible section
> from rtl8139_poll). Moreover network connections were extremely slow
> (almost hung) until I set /proc/sys/net/core/netdev_backlog_granularity
> to 2.
> 
> Thomas
> 
> 
> 
I too am still getting these latencies, although not as often (maybe?). 
I on the other hand am having no problems with slow connections. 
However, this is with very little load on the system. Here is one such 
trace:

http://www.cybsft.com/testresults/2.6.9-rc1-bk4-Q7/latencytrace4.txt

I do have a couple of new traces that seem to be related to transmitting 
data, I think. They are here:

http://www.cybsft.com/testresults/2.6.9-rc1-bk4-Q7/latencytrace2.txt

http://www.cybsft.com/testresults/2.6.9-rc1-bk4-Q7/latencytrace3.txt

kr

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q7
  2004-09-01 17:09                                   ` Thomas Charbonnel
  2004-09-01 19:03                                     ` K.R. Foley
@ 2004-09-01 20:11                                     ` Peter Zijlstra
  2004-09-01 20:16                                       ` Lee Revell
  2004-09-01 20:53                                       ` K.R. Foley
  1 sibling, 2 replies; 93+ messages in thread
From: Peter Zijlstra @ 2004-09-01 20:11 UTC (permalink / raw)
  To: Thomas Charbonnel
  Cc: Ingo Molnar, LKML, K.R. Foley, Mark_H_Johnson, Lee Revell

On Wed, 2004-09-01 at 19:09 +0200, Thomas Charbonnel wrote:
> Ingo Molnar wrote :
> > i've released the -Q7 patch:
> > 
> >   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q7
> 
> With Q7 I still get rx latency issues (> 130 us non-preemptible section
> from rtl8139_poll). Moreover network connections were extremely slow
> (almost hung) until I set /proc/sys/net/core/netdev_backlog_granularity
> to 2.
> 
> Thomas
> 

Me too!
I too have a rtl8139 network card.

kr, what kind of nic do you have since this does not occur on your
machine?

-- 
Peter Zijlstra <a.p.zijlstra@chello.nl>


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q7
  2004-09-01 20:11                                     ` Peter Zijlstra
@ 2004-09-01 20:16                                       ` Lee Revell
  2004-09-01 20:53                                       ` K.R. Foley
  1 sibling, 0 replies; 93+ messages in thread
From: Lee Revell @ 2004-09-01 20:16 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Charbonnel, Ingo Molnar, LKML, K.R. Foley, Mark_H_Johnson

On Wed, 2004-09-01 at 16:11, Peter Zijlstra wrote:
> On Wed, 2004-09-01 at 19:09 +0200, Thomas Charbonnel wrote:
> > Ingo Molnar wrote :
> > > i've released the -Q7 patch:
> > > 
> > >   http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q7
> > 
> > With Q7 I still get rx latency issues (> 130 us non-preemptible section
> > from rtl8139_poll). Moreover network connections were extremely slow
> > (almost hung) until I set /proc/sys/net/core/netdev_backlog_granularity
> > to 2.
> > 
> > Thomas
> > 
> 
> Me too!
> I too have a rtl8139 network card.
> 
> kr, what kind of nic do you have since this does not occur on your
> machine?

Hmm, I am not a network driver expert, and this is just a guess, but if
they work anything like sound cards, I would say that that that hardware
will only generate an interrupt when there are 2 packets in its queue.

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q7
  2004-09-01 20:11                                     ` Peter Zijlstra
  2004-09-01 20:16                                       ` Lee Revell
@ 2004-09-01 20:53                                       ` K.R. Foley
  1 sibling, 0 replies; 93+ messages in thread
From: K.R. Foley @ 2004-09-01 20:53 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Charbonnel, Ingo Molnar, LKML, Mark_H_Johnson, Lee Revell

Peter Zijlstra wrote:
> On Wed, 2004-09-01 at 19:09 +0200, Thomas Charbonnel wrote:
> 
>>Ingo Molnar wrote :
>>
>>>i've released the -Q7 patch:
>>>
>>>  http://redhat.com/~mingo/voluntary-preempt/voluntary-preempt-2.6.9-rc1-bk4-Q7
>>
>>With Q7 I still get rx latency issues (> 130 us non-preemptible section
>>from rtl8139_poll). Moreover network connections were extremely slow
>>(almost hung) until I set /proc/sys/net/core/netdev_backlog_granularity
>>to 2.
>>
>>Thomas
>>
> 
> 
> Me too!
> I too have a rtl8139 network card.
> 
> kr, what kind of nic do you have since this does not occur on your
> machine?
> 

Ethernet Pro 100.

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-09-01 15:52                                     ` Martin Josefsson
@ 2004-09-01 21:15                                       ` Lee Revell
  2004-09-01 21:30                                       ` Lee Revell
  1 sibling, 0 replies; 93+ messages in thread
From: Lee Revell @ 2004-09-01 21:15 UTC (permalink / raw)
  To: Martin Josefsson
  Cc: Ingo Molnar, Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana,
	linux-kernel, Mark_H_Johnson, tytso

On Wed, 2004-09-01 at 11:52, Martin Josefsson wrote:
> On Wed, 2004-09-01 at 01:03, Lee Revell wrote:
> 
> Hi Lee
> 
> > This solves the problem with the random driver.  The worst latencies I
> > am seeing are in netif_receive_skb().  With netdev_max_backlog set to 8,
> > the worst is about 160 usecs:
> 
> I'm a bit curious... have you tried these tests with ip_conntrack
> enabled?

No, this is disabled in my config.  I will try enabling it.

What would the expected result be?

Lee


^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
  2004-09-01 15:52                                     ` Martin Josefsson
  2004-09-01 21:15                                       ` Lee Revell
@ 2004-09-01 21:30                                       ` Lee Revell
  1 sibling, 0 replies; 93+ messages in thread
From: Lee Revell @ 2004-09-01 21:30 UTC (permalink / raw)
  To: Martin Josefsson
  Cc: Ingo Molnar, Daniel Schmitt, K.R. Foley, Felipe Alfaro Solana,
	linux-kernel, Mark_H_Johnson, tytso

On Wed, 2004-09-01 at 11:52, Martin Josefsson wrote:
> On Wed, 2004-09-01 at 01:03, Lee Revell wrote:
> 
> Hi Lee
> 
> > This solves the problem with the random driver.  The worst latencies I
> > am seeing are in netif_receive_skb().  With netdev_max_backlog set to 8,
> > the worst is about 160 usecs:
> 
> I'm a bit curious... have you tried these tests with ip_conntrack
> enabled?

OK, loaded the ip_conntrack module.  'cat /proc/net/ip_conntrack'
produced a 2906 usec latency!

preemption latency trace v1.0.2
-------------------------------
 latency: 2906 us, entries: 4000 (7910)
    -----------------
    | task: cat/2091, uid:0 nice:0 policy:0 rt_prio:0
    -----------------
 => started at: cond_resched+0xd/0x40
 => ended at:   local_bh_enable+0x12/0xa0
=======>
00000101 0.000ms (+0.000ms): touch_preempt_timing (cond_resched)
00000101 0.001ms (+0.001ms): ct_seq_show (seq_read)
00000101 0.002ms (+0.001ms): ct_seq_next (seq_read)
00000101 0.002ms (+0.000ms): ct_seq_show (seq_read)
00000101 0.002ms (+0.000ms): ct_seq_next (seq_read)
00000101 0.003ms (+0.000ms): ct_seq_show (seq_read)
00000101 0.003ms (+0.000ms): ct_seq_next (seq_read)
00000101 0.004ms (+0.000ms): ct_seq_show (seq_read)
00000101 0.004ms (+0.000ms): ct_seq_next (seq_read)
00000101 0.004ms (+0.000ms): ct_seq_show (seq_read)

[ this repeats hundreds of times ]

Full trace:

http://krustophenia.net/testresults.php?dataset=2.6.9-rc1-Q6#/var/www/2.6.9-rc1-Q6/trace1.txt

netif_receive_skb still produces ~150 usec latencies with ip_conntrack,
but the code path is different: 

preemption latency trace v1.0.2
-------------------------------
 latency: 145 us, entries: 145 (145)
    -----------------
    | task: ksoftirqd/0/2, uid:0 nice:-10 policy:0 rt_prio:0
    -----------------
 => started at: netif_receive_skb+0x6a/0x1d0
 => ended at:   netif_receive_skb+0x153/0x1d0
=======>
00000001 0.000ms (+0.000ms): netif_receive_skb (process_backlog)
00000001 0.001ms (+0.001ms): packet_rcv_spkt (netif_receive_skb)
00000001 0.002ms (+0.000ms): skb_clone (packet_rcv_spkt)
00000001 0.003ms (+0.000ms): kmem_cache_alloc (skb_clone)
00000001 0.004ms (+0.001ms): memcpy (skb_clone)
00000001 0.006ms (+0.002ms): strlcpy (packet_rcv_spkt)
00000002 0.008ms (+0.001ms): sk_run_filter (packet_rcv_spkt)
00000001 0.011ms (+0.002ms): __kfree_skb (packet_rcv_spkt)
00000001 0.012ms (+0.000ms): kfree_skbmem (__kfree_skb)
00000001 0.012ms (+0.000ms): skb_release_data (kfree_skbmem)
00000001 0.012ms (+0.000ms): kmem_cache_free (kfree_skbmem)
00000001 0.013ms (+0.000ms): ip_rcv (netif_receive_skb)
00000001 0.015ms (+0.001ms): nf_hook_slow (ip_rcv)
00000002 0.016ms (+0.000ms): nf_iterate (nf_hook_slow)
00000002 0.017ms (+0.001ms): ip_conntrack_defrag (nf_iterate)
00000002 0.018ms (+0.000ms): ip_conntrack_in (nf_iterate)
00000002 0.018ms (+0.000ms): ip_ct_find_proto (ip_conntrack_in)
00000103 0.019ms (+0.000ms): __ip_ct_find_proto (ip_ct_find_proto)
00000102 0.019ms (+0.000ms): local_bh_enable (ip_ct_find_proto)
00000002 0.021ms (+0.001ms): tcp_error (ip_conntrack_in)
00000002 0.022ms (+0.001ms): skb_checksum (tcp_error)
00000002 0.031ms (+0.008ms): ip_ct_get_tuple (ip_conntrack_in)
00000002 0.031ms (+0.000ms): tcp_pkt_to_tuple (ip_ct_get_tuple)
00000002 0.032ms (+0.000ms): ip_conntrack_find_get (ip_conntrack_in)
00000103 0.033ms (+0.000ms): __ip_conntrack_find (ip_conntrack_find_get)
00000103 0.033ms (+0.000ms): hash_conntrack (__ip_conntrack_find)
00000102 0.035ms (+0.002ms): local_bh_enable (ip_conntrack_find_get)
00000002 0.036ms (+0.000ms): tcp_packet (ip_conntrack_in)
00000103 0.037ms (+0.000ms): get_conntrack_index (tcp_packet)
00000103 0.038ms (+0.001ms): tcp_in_window (tcp_packet)
00000103 0.039ms (+0.000ms): tcp_sack (tcp_in_window)
00000102 0.041ms (+0.002ms): local_bh_enable (tcp_packet)
00000002 0.042ms (+0.000ms): ip_ct_refresh_acct (tcp_packet)
00000103 0.043ms (+0.000ms): del_timer (ip_ct_refresh_acct)
00000103 0.044ms (+0.000ms): __mod_timer (ip_ct_refresh_acct)
00000105 0.045ms (+0.001ms): internal_add_timer (__mod_timer)
00000102 0.046ms (+0.001ms): local_bh_enable (tcp_packet)
00000002 0.047ms (+0.001ms): ip_rcv_finish (nf_hook_slow)
00000002 0.048ms (+0.000ms): ip_route_input (ip_rcv_finish)
00000002 0.048ms (+0.000ms): rt_hash_code (ip_route_input)
00000002 0.051ms (+0.003ms): ip_local_deliver (ip_rcv_finish)
00000002 0.052ms (+0.000ms): nf_hook_slow (ip_local_deliver)
00000003 0.052ms (+0.000ms): nf_iterate (nf_hook_slow)
00000003 0.053ms (+0.000ms): ip_confirm (nf_iterate)
00000003 0.054ms (+0.000ms): ip_local_deliver_finish (nf_hook_slow)
00000004 0.055ms (+0.001ms): tcp_v4_rcv (ip_local_deliver_finish)
00000004 0.056ms (+0.000ms): tcp_v4_checksum_init (tcp_v4_rcv)
00000005 0.060ms (+0.004ms): tcp_v4_do_rcv (tcp_v4_rcv)
00000005 0.061ms (+0.000ms): tcp_rcv_established (tcp_v4_do_rcv)
00000005 0.062ms (+0.001ms): __tcp_checksum_complete_user (tcp_rcv_established)
00000005 0.063ms (+0.000ms): skb_checksum (__tcp_checksum_complete_user)
00000005 0.065ms (+0.001ms): tcp_rcv_rtt_update (tcp_rcv_established)
00000005 0.066ms (+0.001ms): tcp_event_data_recv (tcp_rcv_established)
00000005 0.069ms (+0.002ms): __tcp_ack_snd_check (tcp_rcv_established)
00000005 0.070ms (+0.000ms): __tcp_select_window (__tcp_ack_snd_check)
00000005 0.070ms (+0.000ms): tcp_send_ack (tcp_rcv_established)
00000005 0.071ms (+0.000ms): alloc_skb (tcp_send_ack)
00000005 0.071ms (+0.000ms): kmem_cache_alloc (alloc_skb)
00000005 0.072ms (+0.000ms): __kmalloc (alloc_skb)
00000005 0.074ms (+0.002ms): tcp_transmit_skb (tcp_send_ack)
00000005 0.076ms (+0.001ms): __tcp_select_window (tcp_transmit_skb)
00000005 0.078ms (+0.001ms): tcp_v4_send_check (tcp_transmit_skb)
00000005 0.079ms (+0.001ms): ip_queue_xmit (tcp_transmit_skb)
00000005 0.082ms (+0.003ms): nf_hook_slow (ip_queue_xmit)
00000006 0.083ms (+0.000ms): nf_iterate (nf_hook_slow)
00000006 0.084ms (+0.000ms): ip_conntrack_defrag (nf_iterate)
00000006 0.084ms (+0.000ms): ip_conntrack_local (nf_iterate)
00000006 0.085ms (+0.000ms): ip_conntrack_in (nf_iterate)
00000006 0.085ms (+0.000ms): ip_ct_find_proto (ip_conntrack_in)
00000107 0.086ms (+0.000ms): __ip_ct_find_proto (ip_ct_find_proto)
00000106 0.086ms (+0.000ms): local_bh_enable (ip_ct_find_proto)
00000006 0.087ms (+0.000ms): tcp_error (ip_conntrack_in)
00000006 0.088ms (+0.000ms): ip_ct_get_tuple (ip_conntrack_in)
00000006 0.088ms (+0.000ms): tcp_pkt_to_tuple (ip_ct_get_tuple)
00000006 0.089ms (+0.000ms): ip_conntrack_find_get (ip_conntrack_in)
00000107 0.089ms (+0.000ms): __ip_conntrack_find (ip_conntrack_find_get)
00000107 0.089ms (+0.000ms): hash_conntrack (__ip_conntrack_find)
00000106 0.090ms (+0.000ms): local_bh_enable (ip_conntrack_find_get)
00000006 0.091ms (+0.000ms): tcp_packet (ip_conntrack_in)
00000107 0.091ms (+0.000ms): get_conntrack_index (tcp_packet)
00000107 0.092ms (+0.000ms): tcp_in_window (tcp_packet)
00000107 0.093ms (+0.000ms): tcp_sack (tcp_in_window)
00000106 0.094ms (+0.001ms): local_bh_enable (tcp_packet)
00000006 0.094ms (+0.000ms): ip_ct_refresh_acct (tcp_packet)
00000107 0.095ms (+0.000ms): del_timer (ip_ct_refresh_acct)
00000107 0.095ms (+0.000ms): __mod_timer (ip_ct_refresh_acct)
00000109 0.096ms (+0.000ms): internal_add_timer (__mod_timer)
00000106 0.097ms (+0.000ms): local_bh_enable (tcp_packet)
00000006 0.098ms (+0.000ms): dst_output (nf_hook_slow)
00000006 0.098ms (+0.000ms): ip_output (dst_output)
00000006 0.099ms (+0.000ms): ip_finish_output (dst_output)
00000006 0.099ms (+0.000ms): nf_hook_slow (ip_finish_output)
00000007 0.100ms (+0.000ms): nf_iterate (nf_hook_slow)
00000007 0.100ms (+0.000ms): ip_refrag (nf_iterate)
00000007 0.101ms (+0.000ms): ip_confirm (ip_refrag)
00000007 0.101ms (+0.000ms): ip_finish_output2 (nf_hook_slow)
00000107 0.102ms (+0.001ms): local_bh_enable (ip_finish_output2)
00000007 0.103ms (+0.000ms): neigh_resolve_output (ip_finish_output2)
00000108 0.104ms (+0.001ms): eth_header (neigh_resolve_output)
00000107 0.106ms (+0.001ms): local_bh_enable (neigh_resolve_output)
00000007 0.107ms (+0.001ms): dev_queue_xmit (neigh_resolve_output)
00000109 0.108ms (+0.001ms): pfifo_fast_enqueue (dev_queue_xmit)
00000109 0.109ms (+0.000ms): qdisc_restart (dev_queue_xmit)
00000109 0.110ms (+0.000ms): pfifo_fast_dequeue (qdisc_restart)
00000109 0.111ms (+0.001ms): dev_queue_xmit_nit (qdisc_restart)
0000010a 0.112ms (+0.000ms): skb_clone (dev_queue_xmit_nit)
0000010a 0.112ms (+0.000ms): kmem_cache_alloc (skb_clone)
0000010a 0.113ms (+0.000ms): memcpy (skb_clone)
0000010a 0.114ms (+0.001ms): packet_rcv_spkt (dev_queue_xmit_nit)
0000010a 0.114ms (+0.000ms): strlcpy (packet_rcv_spkt)
0000010b 0.115ms (+0.000ms): sk_run_filter (packet_rcv_spkt)
0000010a 0.116ms (+0.000ms): __kfree_skb (packet_rcv_spkt)
0000010a 0.116ms (+0.000ms): kfree_skbmem (__kfree_skb)
0000010a 0.117ms (+0.000ms): skb_release_data (kfree_skbmem)
0000010a 0.117ms (+0.000ms): kmem_cache_free (kfree_skbmem)
00000109 0.118ms (+0.000ms): rhine_start_tx (qdisc_restart)
00000109 0.122ms (+0.004ms): qdisc_restart (dev_queue_xmit)
00000109 0.122ms (+0.000ms): pfifo_fast_dequeue (qdisc_restart)
00000108 0.123ms (+0.000ms): local_bh_enable (dev_queue_xmit)
00000005 0.124ms (+0.001ms): sock_def_readable (tcp_rcv_established)
00000006 0.125ms (+0.000ms): __wake_up (sock_def_readable)
00000007 0.126ms (+0.000ms): __wake_up_common (__wake_up)
00000007 0.127ms (+0.000ms): default_wake_function (__wake_up_common)
00000007 0.127ms (+0.000ms): try_to_wake_up (__wake_up_common)
00000007 0.128ms (+0.000ms): task_rq_lock (try_to_wake_up)
00000008 0.129ms (+0.000ms): activate_task (try_to_wake_up)
00000008 0.129ms (+0.000ms): sched_clock (activate_task)
00000008 0.130ms (+0.000ms): recalc_task_prio (activate_task)
00000008 0.131ms (+0.000ms): effective_prio (recalc_task_prio)
00000008 0.131ms (+0.000ms): enqueue_task (activate_task)
00010007 0.133ms (+0.001ms): do_IRQ (__wake_up)
00010008 0.134ms (+0.000ms): mask_and_ack_8259A (do_IRQ)
00010008 0.138ms (+0.004ms): generic_redirect_hardirq (do_IRQ)
00010008 0.138ms (+0.000ms): wake_up_process (generic_redirect_hardirq)
00010008 0.139ms (+0.000ms): try_to_wake_up (wake_up_process)
00010008 0.139ms (+0.000ms): task_rq_lock (try_to_wake_up)
00010009 0.140ms (+0.000ms): activate_task (try_to_wake_up)
00010009 0.140ms (+0.000ms): sched_clock (activate_task)
00010009 0.140ms (+0.000ms): recalc_task_prio (activate_task)
00010009 0.141ms (+0.000ms): effective_prio (recalc_task_prio)
00010009 0.141ms (+0.000ms): enqueue_task (activate_task)
00000001 0.145ms (+0.003ms): sub_preempt_count (netif_receive_skb)
00000001 0.146ms (+0.000ms): update_max_trace (check_preempt_timing)
00000001 0.146ms (+0.000ms): _mmx_memcpy (update_max_trace)
00000001 0.147ms (+0.000ms): kernel_fpu_begin (_mmx_memcpy)

Lee



^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q7
       [not found]                                   ` <41367E5D.3040605@cybsft.com>
@ 2004-09-02  5:37                                     ` Ingo Molnar
  2004-09-02  5:40                                       ` Ingo Molnar
  0 siblings, 1 reply; 93+ messages in thread
From: Ingo Molnar @ 2004-09-02  5:37 UTC (permalink / raw)
  To: K.R. Foley; +Cc: linux-kernel, Mark_H_Johnson, Lee Revell


* K.R. Foley <kr@cybsft.com> wrote:

> This is an interesting one. ~3.9ms generated here by amlat in do_IRQ:

the overhead is not in do_IRQ():

> 00000001 0.000ms (+0.000ms): n_tty_receive_buf (pty_write)
> 00010001 3.992ms (+3.992ms): do_IRQ (n_tty_receive_buf)

the overhead is always relative to the previous entry - so the overhead
was in n_tty_receive_buf() [that is the function that was interrupted by
do_IRQ()]. But it's a bit weird - you should have gotten timer IRQs
every 1 msec. Does n_tty_receive_buf() run with irqs disabled perhaps?

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q7
  2004-09-02  5:37                                     ` Ingo Molnar
@ 2004-09-02  5:40                                       ` Ingo Molnar
  0 siblings, 0 replies; 93+ messages in thread
From: Ingo Molnar @ 2004-09-02  5:40 UTC (permalink / raw)
  To: K.R. Foley; +Cc: linux-kernel, Mark_H_Johnson, Lee Revell


* Ingo Molnar <mingo@elte.hu> wrote:

> > 00000001 0.000ms (+0.000ms): n_tty_receive_buf (pty_write)
> > 00010001 3.992ms (+3.992ms): do_IRQ (n_tty_receive_buf)
> 
> the overhead is always relative to the previous entry [...]

i've changed the /proc/latency_trace output in my tree to print the
latency of this entry relative to the next entry, not the previous
entry. This should be more intuitive than using the previous entry.

	Ingo

^ permalink raw reply	[flat|nested] 93+ messages in thread

end of thread, other threads:[~2004-09-02  5:58 UTC | newest]

Thread overview: 93+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-08-23 22:18 [patch] PPC/PPC64 port of voluntary preempt patch Scott Wood
2004-08-24  6:14 ` [patch] voluntary-preempt-2.6.8.1-P9 Ingo Molnar
2004-08-24 17:43   ` K.R. Foley
2004-08-24 20:32     ` Lee Revell
2004-08-24 20:53       ` Scott Wood
2004-08-24 19:20   ` K.R. Foley
2004-08-24 22:47   ` Lee Revell
2004-08-25  2:00   ` Lee Revell
2004-08-25  3:17   ` K.R. Foley
2004-08-25  3:22     ` Lee Revell
2004-08-25 14:34       ` K.R. Foley
2004-08-25 16:00       ` K.R. Foley
2004-08-25  3:26   ` K.R. Foley
2004-08-25  9:58   ` [patch] voluntary-preempt-2.6.8.1-P9 : oprofile latency at 3.3ms P.O. Gaillard
2004-08-26 21:39   ` [patch] voluntary-preempt-2.6.8.1-P9 Lee Revell
2004-08-27 16:54     ` Lee Revell
2004-08-28  7:37       ` Ingo Molnar
2004-08-28 15:10         ` Lee Revell
2004-08-28 12:14     ` Ingo Molnar
2004-08-30  9:27       ` voluntary-preempt-2.6.8.1-P9 : big latency when logging on console P.O. Gaillard
2004-08-30  9:41         ` Ingo Molnar
2004-08-30 12:25           ` P.O. Gaillard
2004-08-30  9:48       ` [patch] voluntary-preempt-2.6.8.1-P9 : a few submillisecond latencies P.O. Gaillard
2004-08-28 12:03   ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q0 Ingo Molnar
2004-08-28 16:18     ` Felipe Alfaro Solana
2004-08-28 16:50       ` K.R. Foley
2004-08-28 17:52         ` Lee Revell
2004-08-28 19:44           ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q2 Ingo Molnar
2004-08-28 20:01             ` Lee Revell
2004-08-28 20:04               ` Ingo Molnar
2004-08-28 20:08                 ` Lee Revell
2004-08-28 20:10             ` Daniel Schmitt
2004-08-28 20:31               ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3 Ingo Molnar
2004-08-28 21:10                 ` Lee Revell
2004-08-28 21:13                   ` Ingo Molnar
2004-08-28 21:16                     ` Lee Revell
2004-08-28 23:51                       ` Lee Revell
2004-08-29  2:35                         ` Lee Revell
2004-08-29  5:43                           ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q4 Ingo Molnar
2004-08-29  6:57                             ` Lee Revell
2004-08-29 18:01                               ` Ingo Molnar
2004-08-29 19:06                               ` Ingo Molnar
2004-08-30  0:47                                 ` K.R. Foley
2004-08-30  3:42                                   ` K.R. Foley
2004-08-30 13:06                                   ` Alan Cox
2004-08-30 17:37                                     ` Ingo Molnar
2004-08-31 16:39                                     ` K.R. Foley
2004-08-30  9:06                             ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5 Ingo Molnar
2004-08-30 14:25                               ` Thomas Charbonnel
2004-08-30 18:00                                 ` Ingo Molnar
2004-08-31 19:23                                   ` Thomas Charbonnel
2004-08-31 19:30                                     ` Ingo Molnar
2004-08-31 19:45                                       ` Thomas Charbonnel
2004-08-31  6:40                               ` Lee Revell
2004-08-31  6:53                                 ` Ingo Molnar
2004-08-31 23:03                                   ` Lee Revell
2004-09-01 15:52                                     ` Martin Josefsson
2004-09-01 21:15                                       ` Lee Revell
2004-09-01 21:30                                       ` Lee Revell
2004-08-31  7:06                                 ` Ingo Molnar
2004-08-31 19:21                                   ` Lee Revell
2004-08-31 19:37                                     ` Ingo Molnar
2004-08-31 19:47                                       ` Lee Revell
2004-08-31 19:51                                         ` Ingo Molnar
2004-08-31 20:09                                           ` Ingo Molnar
2004-08-31 20:10                                             ` Lee Revell
2004-08-31 20:14                                               ` Ingo Molnar
2004-08-31 20:20                                                 ` Ingo Molnar
2004-08-31 20:34                                                   ` Lee Revell
2004-08-31 20:39                                                     ` Ingo Molnar
2004-08-31 20:41                                                       ` Lee Revell
2004-08-31 17:40                               ` Peter Zijlstra
2004-09-01  1:43                               ` Lee Revell
2004-09-01  2:30                               ` Lee Revell
2004-09-01  7:27                               ` Lee Revell
2004-09-01  8:29                               ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q6 Ingo Molnar
2004-09-01 13:51                                 ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q7 Ingo Molnar
2004-09-01 17:09                                   ` Thomas Charbonnel
2004-09-01 19:03                                     ` K.R. Foley
2004-09-01 20:11                                     ` Peter Zijlstra
2004-09-01 20:16                                       ` Lee Revell
2004-09-01 20:53                                       ` K.R. Foley
     [not found]                                   ` <41367E5D.3040605@cybsft.com>
2004-09-02  5:37                                     ` Ingo Molnar
2004-09-02  5:40                                       ` Ingo Molnar
2004-08-30 12:52                   ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3 Ingo Molnar
2004-08-29  7:40                 ` Matt Heler
2004-08-24 19:51 ` [patch] PPC/PPC64 port of voluntary preempt patch Scott Wood
2004-08-26  3:17   ` Lee Revell
2004-08-26 16:38     ` Scott Wood
2004-08-27  1:18     ` Fernando Pablo Lopez-Lezcano
2004-08-28 12:36   ` Ingo Molnar
2004-08-28 13:01     ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q1 Ingo Molnar
2004-08-30  1:06       ` Fernando Pablo Lopez-Lezcano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.