linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* 2.6.17-rt1 : x86_64 oops
@ 2006-06-27 20:01 Dipankar Sarma
  2006-06-28 18:21 ` [PATCH] 2.6.17-rt1 : fix " Dipankar Sarma
  0 siblings, 1 reply; 16+ messages in thread
From: Dipankar Sarma @ 2006-06-27 20:01 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel

I used the attached patch below to work around the already known
compilation problem and a bunch of warnings in slab.c. In my
4-way x86_64 box, I get a few oops and then the machine panics.

Starting udevd
Creating devices
BUG: scheduling while atomic: udevd/0x00000001/1875
BUG: scheduling while atomic: swapper/0x00000001/0

Call Trace:
       <ffffffff804fcd76>{__schedule+158}
       <ffffffff804ffcdf>{_raw_spin_unlock_irqrestore+48}
       <ffffffff8024998b>{task_blocks_on_rt_mutex+518}
       <ffffffff80502986>{kprobe_flush_task+21}
       <ffffffff80502986>{kprobe_flush_task+21}
       <ffffffff804fdf0d>{schedule+236}
       <ffffffff804fedb3>{rt_lock_slowlock+327}
       <ffffffff80502986>{kprobe_flush_task+21}
       <ffffffff804fdc57>{thread_return+177}
       <ffffffff80208968>{mwait_idle+0}
       <ffffffff80208958>{cpu_idle+232}
       <ffffffff80217108>{start_secondary+1102}
---------------------------
| preempt count: 00000001 ]
| 1-level deep critical section nesting:
----------------------------------------
.. [<ffffffff804fcd8e>] .... __schedule+0xb6/0xece
.....[<00000000>] ..   ( <= 0x0)

BUG: scheduling from the idle thread!

Call Trace:
       <ffffffff804fcde5>{__schedule+269}
       <ffffffff804ffcdf>{_raw_spin_unlock_irqrestore+48}
       <ffffffff8024998b>{task_blocks_on_rt_mutex+518}
       <ffffffff80502986>{kprobe_flush_task+21}
       <ffffffff80502986>{kprobe_flush_task+21}
       <ffffffff804fdf0d>{schedule+236}
       <ffffffff804fedb3>{rt_lock_slowlock+327}
       <ffffffff80502986>{kprobe_flush_task+21}
       <ffffffff804fdc57>{thread_return+177}
       <ffffffff80208968>{mwait_idle+0}
       <ffffffff80208958>{cpu_idle+232}
       <ffffffff80217108>{start_secondary+1102}
---------------------------
| preempt count: 00000002 ]
| 2-level deep critical section nesting:
----------------------------------------
Unable to handle kernel NULL pointer dereference at 0000000000000008 RIP:
<ffffffff8022739c>{dequeue_task+13}
PGD 21dfe2067 PUD 21d036067 PMD 0
Oops: 0002 [1] PREEMPT SMP
CPU 3
Modules linked in:
Pid: 0, comm: swapper Not tainted 2.6.17-rt1 #2
RIP: 0010:[<ffffffff8022739c>] <ffffffff8022739c>{dequeue_task+13}
RSP: 0000:ffff81022083bc30  EFLAGS: 00010086
RAX: ffff810084af6000 RBX: ffff810220834580 RCX: ffff8102208345b8
RDX: ffffffff807b1718 RSI: 0000000000000000 RDI: ffff810220834580
RBP: ffff81022083bc40 R08: 00000000ffffffff R09: ffff81022083b8a0
R10: 0000000000000001 R11: ffffffff8024aec4 R12: 0000000000000000
R13: ffffffff80502986 R14: ffff8100052a5600 R15: 0000000000000020
FS:  0000000000000000(0000) GS:ffff81022080cc40(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000000000000008 CR3: 000000021dbad000 CR4: 00000000000006e0
Process swapper (pid: 0, threadinfo ffff81022083a000, task ffff810220834580)
Stack: ffff81022083bd30 ffff810220834580 ffff81022083bc60 ffffffff80227433
       ffff81022083bd30 0000000010000040 ffff81022083bd30 ffffffff804fcf09
       ffffffff806c1036 ffffffff806c1020
Call Trace:
       <ffffffff80227433>{deactivate_task+25}
       <ffffffff804fcf09>{__schedule+561}
       <ffffffff804ffcdf>{_raw_spin_unlock_irqrestore+48}
       <ffffffff8024998b>{task_blocks_on_rt_mutex+518}
       <ffffffff80502986>{kprobe_flush_task+21}
       <ffffffff80502986>{kprobe_flush_task+21}
       <ffffffff804fdf0d>{schedule+236}
       <ffffffff804fedb3>{rt_lock_slowlock+327}
       <ffffffff80502986>{kprobe_flush_task+21}
       <ffffffff804fdc57>{thread_return+177}
       <ffffffff80208968>{mwait_idle+0}
       <ffffffff80208958>{cpu_idle+232}
       <ffffffff80217108>{start_secondary+1102}
---------------------------
| preempt count: 00000004 ]
| 4-level deep critical section nesting:
----------------------------------------


I am digging to see what is causing this, just a heads up.
The compilation and warning fix patch is below.

Thanks
Dipankar



Fix a compilation error in numa slab code. Also fixes warnings
due to slab_irq_disable().

Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
---



diff -puN mm/slab.c~fix-slab-numa mm/slab.c
--- linux-2.6.17-rt1-rcu/mm/slab.c~fix-slab-numa	2006-06-27 15:26:38.000000000 +0530
+++ linux-2.6.17-rt1-rcu-dipankar/mm/slab.c	2006-06-27 18:45:32.000000000 +0530
@@ -149,7 +149,8 @@
  	do { spin_unlock_irqrestore(lock, flags); } while (0)
 #else
 DEFINE_PER_CPU_LOCKED(int, slab_irq_locks) = { 0, };
-# define slab_irq_disable(cpu)		get_cpu_var_locked(slab_irq_locks, &(cpu))
+# define slab_irq_disable(cpu)	({get_cpu_var_locked(slab_irq_locks, &(cpu));})
+
 # define slab_irq_enable(cpu)		put_cpu_var_locked(slab_irq_locks, cpu)
 # define slab_irq_save(flags, cpu) \
 	do { slab_irq_disable(cpu); (void) (flags); } while (0)
@@ -3243,14 +3244,15 @@ __cache_free(struct kmem_cache *cachep, 
 				if (unlikely(alien->avail == alien->limit)) {
 					STATS_INC_ACOVERFLOW(cachep);
 					__drain_alien_cache(cachep,
-							    alien, nodeid);
+							    alien, nodeid,
+							    this_cpu);
 				}
 				alien->entry[alien->avail++] = objp;
 				spin_unlock(&alien->lock);
 			} else {
 				spin_lock(&(cachep->nodelists[nodeid])->
 					  list_lock);
-				free_block(cachep, &objp, 1, nodeid);
+				free_block(cachep, &objp, 1, nodeid, this_cpu);
 				spin_unlock(&(cachep->nodelists[nodeid])->
 					    list_lock);
 			}
diff -puN include/linux/spinlock_api_smp.h~fix-slab-numa include/linux/spinlock_api_smp.h
--- linux-2.6.17-rt1-rcu/include/linux/spinlock_api_smp.h~fix-slab-numa	2006-06-27 16:37:39.000000000 +0530
+++ linux-2.6.17-rt1-rcu-dipankar/include/linux/spinlock_api_smp.h	2006-06-27 16:38:46.000000000 +0530
@@ -37,6 +37,7 @@ unsigned long __lockfunc _raw_write_lock
 int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock);
 int __lockfunc _raw_read_trylock(raw_rwlock_t *lock);
 int __lockfunc _raw_write_trylock(raw_rwlock_t *lock);
+int __lockfunc _raw_spin_trylock_irq(raw_spinlock_t *lock);
 int __lockfunc _raw_spin_trylock_irqsave(raw_spinlock_t *lock,
 					 unsigned long *flags);
 int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock);

_

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH] 2.6.17-rt1 : fix x86_64 oops
  2006-06-27 20:01 2.6.17-rt1 : x86_64 oops Dipankar Sarma
@ 2006-06-28 18:21 ` Dipankar Sarma
  2006-06-28 19:32   ` Ingo Molnar
  0 siblings, 1 reply; 16+ messages in thread
From: Dipankar Sarma @ 2006-06-28 18:21 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Ananth N Mavinakayanahalli, Prasanna Panchamukhi

On Wed, Jun 28, 2006 at 01:31:05AM +0530, Dipankar Sarma wrote:
> I used the attached patch below to work around the already known
> compilation problem and a bunch of warnings in slab.c. In my
> 4-way x86_64 box, I get a few oops and then the machine panics.
> 
> Starting udevd
> Creating devices
> BUG: scheduling while atomic: udevd/0x00000001/1875
> BUG: scheduling while atomic: swapper/0x00000001/0
> 
> Call Trace:
>        <ffffffff804fcd76>{__schedule+158}
>        <ffffffff804ffcdf>{_raw_spin_unlock_irqrestore+48}
>        <ffffffff8024998b>{task_blocks_on_rt_mutex+518}
>        <ffffffff80502986>{kprobe_flush_task+21}

Turns out that kprobe_flush_task() is called from finish_task_switch()
with preemption disabled and it uses a converted spin lock. The following
patch fixed the problem for me and I was able to boot my x86_64 box.

Thanks
Dipankar


kprobe_flush_task() is called from finish_task_switch() with
preemption disabled. This requires kretprobe_lock to be a
raw spinlock. Without this, I get a lot of scheduling while
atomic oopses and then an eventual panic in my x86_64 box.
Tested by booting my x86_64 box.

Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
---



diff -puN kernel/kprobes.c~fix-kprobe-atomic-sched kernel/kprobes.c
--- linux-2.6.17-rt1-rcu/kernel/kprobes.c~fix-kprobe-atomic-sched	2006-06-28 23:23:06.000000000 +0530
+++ linux-2.6.17-rt1-rcu-dipankar/kernel/kprobes.c	2006-06-28 23:24:43.000000000 +0530
@@ -49,7 +49,11 @@ static struct hlist_head kprobe_table[KP
 static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
 
 DEFINE_MUTEX(kprobe_mutex);		/* Protects kprobe_table */
-DEFINE_SPINLOCK(kretprobe_lock);	/* Protects kretprobe_inst_table */
+/*
+ * It is acquired from finish_task_switch() with preemption disbaled.
+ * Needs to be raw.
+ */
+DEFINE_RAW_SPINLOCK(kretprobe_lock);	/* Protects kretprobe_inst_table */
 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
 
 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
diff -puN include/linux/kprobes.h~fix-kprobe-atomic-sched include/linux/kprobes.h
--- linux-2.6.17-rt1-rcu/include/linux/kprobes.h~fix-kprobe-atomic-sched	2006-06-28 23:30:55.000000000 +0530
+++ linux-2.6.17-rt1-rcu-dipankar/include/linux/kprobes.h	2006-06-28 23:32:20.000000000 +0530
@@ -152,7 +152,7 @@ struct kretprobe_instance {
 	struct task_struct *task;
 };
 
-extern spinlock_t kretprobe_lock;
+extern raw_spinlock_t kretprobe_lock;
 extern struct mutex kprobe_mutex;
 extern int arch_prepare_kprobe(struct kprobe *p);
 extern void arch_arm_kprobe(struct kprobe *p);

_

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] 2.6.17-rt1 : fix x86_64 oops
  2006-06-28 18:21 ` [PATCH] 2.6.17-rt1 : fix " Dipankar Sarma
@ 2006-06-28 19:32   ` Ingo Molnar
  2006-06-28 20:02     ` Dipankar Sarma
  0 siblings, 1 reply; 16+ messages in thread
From: Ingo Molnar @ 2006-06-28 19:32 UTC (permalink / raw)
  To: Dipankar Sarma
  Cc: linux-kernel, Ananth N Mavinakayanahalli, Prasanna Panchamukhi


* Dipankar Sarma <dipankar@in.ibm.com> wrote:

> Turns out that kprobe_flush_task() is called from finish_task_switch() 
> with preemption disabled and it uses a converted spin lock. The 
> following patch fixed the problem for me and I was able to boot my 
> x86_64 box.

ah, subtle problem and nice fix! We are using an RCU trick to delay task 
freeing in finish_task_switch(), but kprobe_flush_task() isnt done in 
put_task_struct(). [neither would it be right to flush kprobes there.] 
I've released -rt4 with this included.

	Ingo

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] 2.6.17-rt1 : fix x86_64 oops
  2006-06-28 19:32   ` Ingo Molnar
@ 2006-06-28 20:02     ` Dipankar Sarma
  2006-06-29 14:24       ` Ingo Molnar
  0 siblings, 1 reply; 16+ messages in thread
From: Dipankar Sarma @ 2006-06-28 20:02 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Ananth N Mavinakayanahalli, Prasanna Panchamukhi

On Wed, Jun 28, 2006 at 09:32:56PM +0200, Ingo Molnar wrote:
> 
> * Dipankar Sarma <dipankar@in.ibm.com> wrote:
> 
> > Turns out that kprobe_flush_task() is called from finish_task_switch() 
> > with preemption disabled and it uses a converted spin lock. The 
> > following patch fixed the problem for me and I was able to boot my 
> > x86_64 box.
> 
> ah, subtle problem and nice fix! We are using an RCU trick to delay task 
> freeing in finish_task_switch(), but kprobe_flush_task() isnt done in 

Yes, otherwise it would have been hell to do __put_task_struct()
with preemption disabled.

> put_task_struct(). [neither would it be right to flush kprobes there.] 
> I've released -rt4 with this included.

OK, I need to catch up, but I see a lot of oops while running rcutorture
in my box (rt1). I am investigating this atm.

Thanks
Dipankar

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] 2.6.17-rt1 : fix x86_64 oops
  2006-06-28 20:02     ` Dipankar Sarma
@ 2006-06-29 14:24       ` Ingo Molnar
  2006-06-29 16:32         ` Paul E. McKenney
  0 siblings, 1 reply; 16+ messages in thread
From: Ingo Molnar @ 2006-06-29 14:24 UTC (permalink / raw)
  To: Dipankar Sarma
  Cc: linux-kernel, Ananth N Mavinakayanahalli, Prasanna Panchamukhi


* Dipankar Sarma <dipankar@in.ibm.com> wrote:

> OK, I need to catch up, but I see a lot of oops while running 
> rcutorture in my box (rt1). I am investigating this atm.

fyi, 2.6.17-mm4 throws tons of these:

 BUG: scheduling while atomic: rcu_torture_rea/0x00010000/1471
  [<c0106123>] show_trace+0xd/0x10
  [<c010613d>] dump_stack+0x17/0x1a
  [<c123b4e2>] schedule+0x61/0xc61
  [<c015f380>] rcu_torture_reader+0x12e/0x17e
  [<c014101f>] kthread+0xc4/0xf0
  [<c0102005>] kernel_thread_helper+0x5/0xb

	Ingo

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] 2.6.17-rt1 : fix x86_64 oops
  2006-06-29 14:24       ` Ingo Molnar
@ 2006-06-29 16:32         ` Paul E. McKenney
  2006-06-29 19:41           ` Paul E. McKenney
  0 siblings, 1 reply; 16+ messages in thread
From: Paul E. McKenney @ 2006-06-29 16:32 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Dipankar Sarma, linux-kernel, Ananth N Mavinakayanahalli,
	Prasanna Panchamukhi

On Thu, Jun 29, 2006 at 04:24:42PM +0200, Ingo Molnar wrote:
> 
> * Dipankar Sarma <dipankar@in.ibm.com> wrote:
> 
> > OK, I need to catch up, but I see a lot of oops while running 
> > rcutorture in my box (rt1). I am investigating this atm.
> 
> fyi, 2.6.17-mm4 throws tons of these:
> 
>  BUG: scheduling while atomic: rcu_torture_rea/0x00010000/1471
>   [<c0106123>] show_trace+0xd/0x10
>   [<c010613d>] dump_stack+0x17/0x1a
>   [<c123b4e2>] schedule+0x61/0xc61
>   [<c015f380>] rcu_torture_reader+0x12e/0x17e
>   [<c014101f>] kthread+0xc4/0xf0
>   [<c0102005>] kernel_thread_helper+0x5/0xb

Probably the fault of my new ops-ization of rcutorture.c.  :-/
(For whatever it is worth, Dipankar would still be using the older
non-ops-ized version of rcutorture.c.)

Did you get these oopses with default setting of torture_type, or did you
specify torture_type=rcu_bh or torture_type=srcu to the modprobe command?

This was on i386, x86_64, or on something else?

Ah!  This would have been a CONFIG_PREEMPT build, right?

						Thanx, Paul

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] 2.6.17-rt1 : fix x86_64 oops
  2006-06-29 16:32         ` Paul E. McKenney
@ 2006-06-29 19:41           ` Paul E. McKenney
  2006-06-29 20:11             ` Ingo Molnar
  0 siblings, 1 reply; 16+ messages in thread
From: Paul E. McKenney @ 2006-06-29 19:41 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Dipankar Sarma, linux-kernel, Ananth N Mavinakayanahalli,
	Prasanna Panchamukhi

On Thu, Jun 29, 2006 at 09:32:36AM -0700, Paul E. McKenney wrote:
> On Thu, Jun 29, 2006 at 04:24:42PM +0200, Ingo Molnar wrote:
> > 
> > * Dipankar Sarma <dipankar@in.ibm.com> wrote:
> > 
> > > OK, I need to catch up, but I see a lot of oops while running 
> > > rcutorture in my box (rt1). I am investigating this atm.
> > 
> > fyi, 2.6.17-mm4 throws tons of these:
> > 
> >  BUG: scheduling while atomic: rcu_torture_rea/0x00010000/1471
> >   [<c0106123>] show_trace+0xd/0x10
> >   [<c010613d>] dump_stack+0x17/0x1a
> >   [<c123b4e2>] schedule+0x61/0xc61
> >   [<c015f380>] rcu_torture_reader+0x12e/0x17e
> >   [<c014101f>] kthread+0xc4/0xf0
> >   [<c0102005>] kernel_thread_helper+0x5/0xb
> 
> Probably the fault of my new ops-ization of rcutorture.c.  :-/
> (For whatever it is worth, Dipankar would still be using the older
> non-ops-ized version of rcutorture.c.)
> 
> Did you get these oopses with default setting of torture_type, or did you
> specify torture_type=rcu_bh or torture_type=srcu to the modprobe command?
> 
> This was on i386, x86_64, or on something else?
> 
> Ah!  This would have been a CONFIG_PREEMPT build, right?

OK, I ran this with both torture types (rcu and rcu_bh) on i386 with
CONFIG_PREEMPT=y on 2.6.17-mm4 and didn't see any "scheduling while
atomic" oopses -- or any other oopses, for that matter.

Here is the .config file I used.  What am I missing here?

						Thanx, Paul

#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.17-mm4-autokern1
# Thu Jun 29 17:44:25 2006
#
CONFIG_X86_32=y
CONFIG_GENERIC_TIME=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_X86=y
CONFIG_MMU=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_DMI=y
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"

#
# Code maturity level options
#
CONFIG_EXPERIMENTAL=y
CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32

#
# General setup
#
CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_SWAP=y
CONFIG_SWAP_PREFETCH=y
CONFIG_SYSVIPC=y
# CONFIG_IPC_NS is not set
# CONFIG_POSIX_MQUEUE is not set
# CONFIG_BSD_PROCESS_ACCT is not set
# CONFIG_TASKSTATS is not set
CONFIG_SYSCTL=y
# CONFIG_UTS_NS is not set
# CONFIG_AUDIT is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
# CONFIG_CPUSETS is not set
# CONFIG_RELAY is not set
CONFIG_INITRAMFS_SOURCE=""
CONFIG_KLIBC_ERRLIST=y
CONFIG_KLIBC_ZLIB=y
CONFIG_UID16=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
# CONFIG_KALLSYMS_EXTRA_PASS is not set
CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_BASE_FULL=y
CONFIG_RT_MUTEXES=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SHMEM=y
CONFIG_SLAB=y
CONFIG_VM_EVENT_COUNTERS=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
# CONFIG_SLOB is not set

#
# Loadable module support
#
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_MODULE_FORCE_UNLOAD is not set
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y

#
# Block layer
#
CONFIG_LBD=y
# CONFIG_BLK_DEV_IO_TRACE is not set
# CONFIG_LSF is not set

#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_AS=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
CONFIG_DEFAULT_AS=y
# CONFIG_DEFAULT_DEADLINE is not set
# CONFIG_DEFAULT_CFQ is not set
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="anticipatory"

#
# Processor type and features
#
CONFIG_SMP=y
# CONFIG_X86_PC is not set
# CONFIG_X86_ELAN is not set
# CONFIG_X86_VOYAGER is not set
CONFIG_X86_NUMAQ=y
# CONFIG_X86_SUMMIT is not set
# CONFIG_X86_BIGSMP is not set
# CONFIG_X86_VISWS is not set
# CONFIG_X86_GENERICARCH is not set
# CONFIG_X86_ES7000 is not set
# CONFIG_M386 is not set
# CONFIG_M486 is not set
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
# CONFIG_M686 is not set
# CONFIG_MPENTIUMII is not set
CONFIG_MPENTIUMIII=y
# CONFIG_MPENTIUMM is not set
# CONFIG_MPENTIUM4 is not set
# CONFIG_MK6 is not set
# CONFIG_MK7 is not set
# CONFIG_MK8 is not set
# CONFIG_MCRUSOE is not set
# CONFIG_MEFFICEON is not set
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP2 is not set
# CONFIG_MWINCHIP3D is not set
# CONFIG_MGEODEGX1 is not set
# CONFIG_MGEODE_LX is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_X86_GENERIC is not set
CONFIG_X86_CMPXCHG=y
CONFIG_X86_XADD=y
CONFIG_X86_L1_CACHE_SHIFT=5
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
CONFIG_X86_CMPXCHG64=y
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
# CONFIG_HPET_TIMER is not set
CONFIG_NR_CPUS=32
# CONFIG_SCHED_SMT is not set
CONFIG_SCHED_MC=y
# CONFIG_PREEMPT_NONE is not set
# CONFIG_PREEMPT_VOLUNTARY is not set
CONFIG_PREEMPT=y
CONFIG_PREEMPT_BKL=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
# CONFIG_X86_MCE is not set
CONFIG_VM86=y
# CONFIG_TOSHIBA is not set
# CONFIG_I8K is not set
# CONFIG_X86_REBOOTFIXUPS is not set
# CONFIG_MICROCODE is not set
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y

#
# Firmware Drivers
#
# CONFIG_EDD is not set
# CONFIG_DELL_RBU is not set
# CONFIG_DCDBAS is not set
# CONFIG_NOHIGHMEM is not set
# CONFIG_HIGHMEM4G is not set
CONFIG_HIGHMEM64G=y
CONFIG_PAGE_OFFSET=0xC0000000
CONFIG_HIGHMEM=y
CONFIG_X86_PAE=y
CONFIG_NUMA=y
CONFIG_NODES_SHIFT=4
CONFIG_HAVE_ARCH_BOOTMEM_NODE=y
CONFIG_ARCH_HAVE_MEMORY_PRESENT=y
CONFIG_NEED_NODE_MEMMAP_SIZE=y
CONFIG_HAVE_ARCH_ALLOC_REMAP=y
CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_SELECT_MEMORY_MODEL=y
# CONFIG_FLATMEM_MANUAL is not set
CONFIG_DISCONTIGMEM_MANUAL=y
# CONFIG_SPARSEMEM_MANUAL is not set
CONFIG_DISCONTIGMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_NEED_MULTIPLE_NODES=y
CONFIG_HAVE_MEMORY_PRESENT=y
CONFIG_SPARSEMEM_STATIC=y
CONFIG_SPLIT_PTLOCK_CPUS=4
CONFIG_MIGRATION=y
CONFIG_RESOURCES_64BIT=y
CONFIG_ADAPTIVE_READAHEAD=y
# CONFIG_READAHEAD_ALLOW_OVERHEADS is not set
CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
# CONFIG_HIGHPTE is not set
# CONFIG_MATH_EMULATION is not set
CONFIG_MTRR=y
CONFIG_IRQBALANCE=y
# CONFIG_REGPARM is not set
CONFIG_SECCOMP=y
# CONFIG_VGA_NOPROBE is not set
# CONFIG_HZ_100 is not set
CONFIG_HZ_250=y
# CONFIG_HZ_1000 is not set
CONFIG_HZ=250
# CONFIG_KEXEC is not set
# CONFIG_CRASH_DUMP is not set
CONFIG_PHYSICAL_START=0x100000
# CONFIG_HOTPLUG_CPU is not set
CONFIG_COMPAT_VDSO=y
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y

#
# Power management options (ACPI, APM)
#
# CONFIG_PM is not set

#
# ACPI (Advanced Configuration and Power Interface) Support
#
# CONFIG_ACPI is not set

#
# CPU Frequency scaling
#
# CONFIG_CPU_FREQ is not set

#
# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
#
CONFIG_PCI=y
# CONFIG_PCI_GOBIOS is not set
# CONFIG_PCI_GOMMCONFIG is not set
# CONFIG_PCI_GODIRECT is not set
CONFIG_PCI_GOANY=y
CONFIG_PCI_BIOS=y
CONFIG_PCI_DIRECT=y
# CONFIG_PCIEPORTBUS is not set
# CONFIG_PCI_MSI is not set
# CONFIG_PCI_DEBUG is not set
CONFIG_ISA_DMA_API=y
CONFIG_ISA=y
# CONFIG_EISA is not set
# CONFIG_MCA is not set
# CONFIG_SCx200 is not set

#
# PCCARD (PCMCIA/CardBus) support
#
# CONFIG_PCCARD is not set

#
# PCI Hotplug Support
#
# CONFIG_HOTPLUG_PCI is not set

#
# Executable file formats
#
CONFIG_BINFMT_ELF=y
CONFIG_BINFMT_AOUT=y
CONFIG_BINFMT_MISC=y

#
# Networking
#
CONFIG_NET=y

#
# Networking options
#
# CONFIG_NETDEBUG is not set
CONFIG_PACKET=y
CONFIG_PACKET_MMAP=y
CONFIG_UNIX=y
CONFIG_XFRM=y
# CONFIG_XFRM_USER is not set
# CONFIG_NET_KEY is not set
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
# CONFIG_IP_ADVANCED_ROUTER is not set
CONFIG_IP_FIB_HASH=y
# CONFIG_IP_PNP is not set
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE is not set
# CONFIG_IP_MROUTE is not set
# CONFIG_ARPD is not set
# CONFIG_SYN_COOKIES is not set
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
# CONFIG_INET_XFRM_TUNNEL is not set
# CONFIG_INET_TUNNEL is not set
CONFIG_INET_XFRM_MODE_TRANSPORT=y
CONFIG_INET_XFRM_MODE_TUNNEL=y
CONFIG_INET_DIAG=y
CONFIG_INET_TCP_DIAG=y
# CONFIG_TCP_CONG_ADVANCED is not set
CONFIG_TCP_CONG_BIC=y
# CONFIG_IPV6 is not set
# CONFIG_INET6_XFRM_TUNNEL is not set
# CONFIG_INET6_TUNNEL is not set
# CONFIG_NETWORK_SECMARK is not set
# CONFIG_NETFILTER is not set

#
# DCCP Configuration (EXPERIMENTAL)
#
# CONFIG_IP_DCCP is not set

#
# SCTP Configuration (EXPERIMENTAL)
#
# CONFIG_IP_SCTP is not set

#
# TIPC Configuration (EXPERIMENTAL)
#
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
# CONFIG_BRIDGE is not set
# CONFIG_VLAN_8021Q is not set
# CONFIG_DECNET is not set
# CONFIG_LLC2 is not set
# CONFIG_IPX is not set
# CONFIG_ATALK is not set
# CONFIG_X25 is not set
# CONFIG_LAPB is not set
# CONFIG_NET_DIVERT is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set

#
# QoS and/or fair queueing
#
# CONFIG_NET_SCHED is not set

#
# Network testing
#
# CONFIG_NET_PKTGEN is not set
# CONFIG_HAMRADIO is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
# CONFIG_IEEE80211 is not set

#
# Device Drivers
#

#
# Generic Driver Options
#
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
# CONFIG_FW_LOADER is not set
# CONFIG_DEBUG_DRIVER is not set
# CONFIG_SYS_HYPERVISOR is not set

#
# Connector - unified userspace <-> kernelspace linker
#
# CONFIG_CONNECTOR is not set

#
# Memory Technology Devices (MTD)
#
# CONFIG_MTD is not set

#
# Parallel port support
#
# CONFIG_PARPORT is not set

#
# Plug and Play support
#
# CONFIG_PNP is not set

#
# Block devices
#
# CONFIG_BLK_DEV_FD is not set
# CONFIG_BLK_DEV_XD is not set
# CONFIG_BLK_CPQ_DA is not set
# CONFIG_BLK_CPQ_CISS_DA is not set
# CONFIG_BLK_DEV_DAC960 is not set
# CONFIG_BLK_DEV_UMEM is not set
# CONFIG_BLK_DEV_COW_COMMON is not set
CONFIG_BLK_DEV_LOOP=y
# CONFIG_BLK_DEV_CRYPTOLOOP is not set
# CONFIG_BLK_DEV_NBD is not set
# CONFIG_BLK_DEV_SX8 is not set
# CONFIG_BLK_DEV_RAM is not set
# CONFIG_BLK_DEV_INITRD is not set
# CONFIG_CDROM_PKTCDVD is not set
# CONFIG_ATA_OVER_ETH is not set

#
# ATA/ATAPI/MFM/RLL support
#
# CONFIG_IDE is not set

#
# SCSI device support
#
# CONFIG_RAID_ATTRS is not set
CONFIG_SCSI=y
# CONFIG_SCSI_TGT is not set
CONFIG_SCSI_PROC_FS=y

#
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=y
# CONFIG_CHR_DEV_OSST is not set
CONFIG_BLK_DEV_SR=y
CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
# CONFIG_CHR_DEV_SCH is not set

#
# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
#
CONFIG_SCSI_MULTI_LUN=y
# CONFIG_SCSI_CONSTANTS is not set
# CONFIG_SCSI_LOGGING is not set

#
# SCSI Transports
#
CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_FC_ATTRS is not set
# CONFIG_SCSI_ISCSI_ATTRS is not set
# CONFIG_SCSI_SAS_ATTRS is not set
# CONFIG_SCSI_SAS_DOMAIN_ATTRS is not set

#
# SCSI low-level drivers
#
# CONFIG_ISCSI_TCP is not set
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_7000FASST is not set
# CONFIG_SCSI_ACARD is not set
# CONFIG_SCSI_AHA152X is not set
# CONFIG_SCSI_AHA1542 is not set
# CONFIG_SCSI_AACRAID is not set
CONFIG_SCSI_AIC7XXX=y
CONFIG_AIC7XXX_CMDS_PER_DEVICE=253
CONFIG_AIC7XXX_RESET_DELAY_MS=2000
# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
CONFIG_AIC7XXX_DEBUG_MASK=0
# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
# CONFIG_SCSI_AIC7XXX_OLD is not set
# CONFIG_SCSI_AIC79XX is not set
# CONFIG_SCSI_AIC94XX is not set
# CONFIG_SCSI_DPT_I2O is not set
# CONFIG_SCSI_ADVANSYS is not set
# CONFIG_SCSI_IN2000 is not set
# CONFIG_SCSI_ARCMSR is not set
# CONFIG_MEGARAID_NEWGEN is not set
# CONFIG_MEGARAID_LEGACY is not set
# CONFIG_MEGARAID_SAS is not set
# CONFIG_SCSI_SATA is not set
# CONFIG_SCSI_HPTIOP is not set
# CONFIG_SCSI_BUSLOGIC is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_DTC3280 is not set
# CONFIG_SCSI_EATA is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_GDTH is not set
# CONFIG_SCSI_GENERIC_NCR5380 is not set
# CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set
CONFIG_SCSI_IPS=y
# CONFIG_SCSI_INITIO is not set
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_NCR53C406A is not set
# CONFIG_SCSI_STEX is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
# CONFIG_SCSI_IPR is not set
# CONFIG_SCSI_PAS16 is not set
# CONFIG_SCSI_PSI240I is not set
# CONFIG_SCSI_QLOGIC_FAS is not set
CONFIG_SCSI_QLOGIC_1280=y
# CONFIG_SCSI_QLA_FC is not set
# CONFIG_SCSI_LPFC is not set
# CONFIG_SCSI_SYM53C416 is not set
# CONFIG_SCSI_DC395x is not set
# CONFIG_SCSI_DC390T is not set
# CONFIG_SCSI_T128 is not set
# CONFIG_SCSI_U14_34F is not set
# CONFIG_SCSI_ULTRASTOR is not set
# CONFIG_SCSI_NSP32 is not set
# CONFIG_SCSI_DEBUG is not set
# CONFIG_SCSI_SRP is not set

#
# Old CD-ROM drivers (not SCSI, not IDE)
#
# CONFIG_CD_NO_IDESCSI is not set

#
# Multi-device support (RAID and LVM)
#
# CONFIG_MD is not set

#
# Fusion MPT device support
#
# CONFIG_FUSION is not set
# CONFIG_FUSION_SPI is not set
# CONFIG_FUSION_FC is not set
# CONFIG_FUSION_SAS is not set

#
# IEEE 1394 (FireWire) support
#
# CONFIG_IEEE1394 is not set

#
# I2O device support
#
# CONFIG_I2O is not set

#
# Network device support
#
CONFIG_NETDEVICES=y
CONFIG_DUMMY=y
# CONFIG_BONDING is not set
# CONFIG_EQUALIZER is not set
# CONFIG_TUN is not set

#
# ARCnet devices
#
# CONFIG_ARCNET is not set

#
# PHY device support
#
# CONFIG_PHYLIB is not set

#
# Ethernet (10 or 100Mbit)
#
CONFIG_NET_ETHERNET=y
CONFIG_MII=y
# CONFIG_HAPPYMEAL is not set
# CONFIG_SUNGEM is not set
# CONFIG_CASSINI is not set
# CONFIG_NET_VENDOR_3COM is not set
# CONFIG_LANCE is not set
# CONFIG_NET_VENDOR_SMC is not set
# CONFIG_NET_VENDOR_RACAL is not set

#
# Tulip family network device support
#
CONFIG_NET_TULIP=y
# CONFIG_DE2104X is not set
CONFIG_TULIP=y
# CONFIG_TULIP_MWI is not set
# CONFIG_TULIP_MMIO is not set
# CONFIG_TULIP_NAPI is not set
# CONFIG_DE4X5 is not set
# CONFIG_WINBOND_840 is not set
# CONFIG_DM9102 is not set
# CONFIG_ULI526X is not set
# CONFIG_AT1700 is not set
# CONFIG_DEPCA is not set
# CONFIG_HP100 is not set
# CONFIG_NET_ISA is not set
CONFIG_NET_PCI=y
# CONFIG_PCNET32 is not set
# CONFIG_AMD8111_ETH is not set
CONFIG_ADAPTEC_STARFIRE=y
# CONFIG_ADAPTEC_STARFIRE_NAPI is not set
# CONFIG_AC3200 is not set
# CONFIG_APRICOT is not set
# CONFIG_B44 is not set
# CONFIG_FORCEDETH is not set
# CONFIG_CS89x0 is not set
# CONFIG_DGRS is not set
CONFIG_EEPRO100=y
# CONFIG_E100 is not set
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
# CONFIG_NE2K_PCI is not set
# CONFIG_8139CP is not set
# CONFIG_8139TOO is not set
# CONFIG_SIS900 is not set
# CONFIG_EPIC100 is not set
# CONFIG_SUNDANCE is not set
# CONFIG_TLAN is not set
# CONFIG_VIA_RHINE is not set

#
# Ethernet (1000 Mbit)
#
CONFIG_ACENIC=y
# CONFIG_ACENIC_OMIT_TIGON_I is not set
# CONFIG_DL2K is not set
CONFIG_E1000=y
# CONFIG_E1000_NAPI is not set
# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
# CONFIG_NS83820 is not set
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
# CONFIG_R8169 is not set
# CONFIG_SIS190 is not set
# CONFIG_SKGE is not set
# CONFIG_SKY2 is not set
# CONFIG_SK98LIN is not set
# CONFIG_VIA_VELOCITY is not set
# CONFIG_TIGON3 is not set
# CONFIG_BNX2 is not set
# CONFIG_QLA3XXX is not set

#
# Ethernet (10000 Mbit)
#
# CONFIG_CHELSIO_T1 is not set
# CONFIG_IXGB is not set
# CONFIG_S2IO is not set
# CONFIG_MYRI10GE is not set

#
# Token Ring devices
#
# CONFIG_TR is not set

#
# Wireless LAN (non-hamradio)
#
# CONFIG_NET_RADIO is not set

#
# Wan interfaces
#
# CONFIG_WAN is not set
# CONFIG_FDDI is not set
# CONFIG_HIPPI is not set
# CONFIG_PPP is not set
# CONFIG_SLIP is not set
# CONFIG_NET_FC is not set
# CONFIG_SHAPER is not set
# CONFIG_NETCONSOLE is not set
# CONFIG_NETPOLL is not set
# CONFIG_NET_POLL_CONTROLLER is not set

#
# ISDN subsystem
#
# CONFIG_ISDN is not set

#
# Telephony Support
#
# CONFIG_PHONE is not set

#
# Input device support
#
CONFIG_INPUT=y
# CONFIG_INPUT_FF_EFFECTS is not set

#
# Userland interfaces
#
CONFIG_INPUT_MOUSEDEV=y
CONFIG_INPUT_MOUSEDEV_PSAUX=y
CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
CONFIG_INPUT_JOYDEV=y
# CONFIG_INPUT_TSDEV is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_EVBUG is not set

#
# Input Device Drivers
#
CONFIG_INPUT_KEYBOARD=y
CONFIG_KEYBOARD_ATKBD=y
# CONFIG_KEYBOARD_SUNKBD is not set
# CONFIG_KEYBOARD_LKKBD is not set
# CONFIG_KEYBOARD_XTKBD is not set
# CONFIG_KEYBOARD_NEWTON is not set
CONFIG_INPUT_MOUSE=y
CONFIG_MOUSE_PS2=y
# CONFIG_MOUSE_SERIAL is not set
# CONFIG_MOUSE_INPORT is not set
# CONFIG_MOUSE_LOGIBM is not set
# CONFIG_MOUSE_PC110PAD is not set
# CONFIG_MOUSE_VSXXXAA is not set
# CONFIG_INPUT_JOYSTICK is not set
# CONFIG_INPUT_TOUCHSCREEN is not set
# CONFIG_INPUT_MISC is not set

#
# Hardware I/O ports
#
CONFIG_SERIO=y
CONFIG_SERIO_I8042=y
# CONFIG_SERIO_SERPORT is not set
# CONFIG_SERIO_CT82C710 is not set
# CONFIG_SERIO_PCIPS2 is not set
CONFIG_SERIO_LIBPS2=y
# CONFIG_SERIO_RAW is not set
# CONFIG_GAMEPORT is not set

#
# Character devices
#
CONFIG_VT=y
CONFIG_VT_CONSOLE=y
CONFIG_HW_CONSOLE=y
# CONFIG_VT_HW_CONSOLE_BINDING is not set
# CONFIG_SERIAL_NONSTANDARD is not set

#
# Serial drivers
#
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_PCI=y
CONFIG_SERIAL_8250_NR_UARTS=4
CONFIG_SERIAL_8250_RUNTIME_UARTS=4
# CONFIG_SERIAL_8250_EXTENDED is not set

#
# Non-8250 serial port support
#
CONFIG_SERIAL_CORE=y
CONFIG_SERIAL_CORE_CONSOLE=y
# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
CONFIG_LEGACY_PTYS=y
CONFIG_LEGACY_PTY_COUNT=256

#
# IPMI
#
# CONFIG_IPMI_HANDLER is not set

#
# Watchdog Cards
#
# CONFIG_WATCHDOG is not set
# CONFIG_HW_RANDOM is not set
# CONFIG_NVRAM is not set
# CONFIG_RTC is not set
# CONFIG_GEN_RTC is not set
# CONFIG_DTLK is not set
# CONFIG_R3964 is not set
# CONFIG_APPLICOM is not set
# CONFIG_SONYPI is not set

#
# Ftape, the floppy tape device driver
#
# CONFIG_AGP is not set
# CONFIG_DRM is not set
# CONFIG_MWAVE is not set
# CONFIG_PC8736x_GPIO is not set
# CONFIG_NSC_GPIO is not set
# CONFIG_CS5535_GPIO is not set
# CONFIG_RAW_DRIVER is not set
# CONFIG_HANGCHECK_TIMER is not set

#
# TPM devices
#
# CONFIG_TCG_TPM is not set
# CONFIG_TELCLOCK is not set

#
# I2C support
#
# CONFIG_I2C is not set

#
# SPI support
#
# CONFIG_SPI is not set
# CONFIG_SPI_MASTER is not set

#
# Dallas's 1-wire bus
#

#
# Hardware Monitoring support
#
CONFIG_HWMON=y
# CONFIG_HWMON_VID is not set
# CONFIG_SENSORS_ABITUGURU is not set
# CONFIG_SENSORS_F71805F is not set
# CONFIG_SENSORS_HDAPS is not set
# CONFIG_HWMON_DEBUG_CHIP is not set

#
# Misc devices
#
# CONFIG_IBM_ASM is not set

#
# Multimedia devices
#
# CONFIG_VIDEO_DEV is not set
CONFIG_VIDEO_V4L2=y

#
# Digital Video Broadcasting Devices
#
# CONFIG_DVB is not set

#
# Graphics support
#
CONFIG_FIRMWARE_EDID=y
# CONFIG_FB is not set

#
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
# CONFIG_VGACON_SOFT_SCROLLBACK is not set
# CONFIG_VIDEO_SELECT is not set
# CONFIG_MDA_CONSOLE is not set
CONFIG_DUMMY_CONSOLE=y

#
# Sound
#
# CONFIG_SOUND is not set

#
# USB support
#
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
CONFIG_USB_ARCH_HAS_EHCI=y
# CONFIG_USB is not set

#
# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
#

#
# USB Gadget Support
#
# CONFIG_USB_GADGET is not set

#
# MMC/SD Card support
#
# CONFIG_MMC is not set

#
# LED devices
#
# CONFIG_NEW_LEDS is not set

#
# LED drivers
#

#
# LED Triggers
#

#
# InfiniBand support
#
# CONFIG_INFINIBAND is not set

#
# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
#
# CONFIG_EDAC is not set

#
# Real Time Clock
#
# CONFIG_RTC_CLASS is not set

#
# DMA Engine support
#
# CONFIG_DMA_ENGINE is not set

#
# DMA Clients
#

#
# DMA Devices
#

#
# File systems
#
CONFIG_EXT2_FS=y
# CONFIG_EXT2_FS_XATTR is not set
# CONFIG_EXT2_FS_XIP is not set
CONFIG_EXT3_FS=y
CONFIG_EXT3_FS_XATTR=y
# CONFIG_EXT3_FS_POSIX_ACL is not set
# CONFIG_EXT3_FS_SECURITY is not set
CONFIG_JBD=y
# CONFIG_JBD_DEBUG is not set
CONFIG_FS_MBCACHE=y
# CONFIG_REISER4_FS is not set
# CONFIG_REISERFS_FS is not set
CONFIG_JFS_FS=y
# CONFIG_JFS_POSIX_ACL is not set
# CONFIG_JFS_SECURITY is not set
# CONFIG_JFS_DEBUG is not set
# CONFIG_JFS_STATISTICS is not set
# CONFIG_FS_POSIX_ACL is not set
# CONFIG_XFS_FS is not set
# CONFIG_GFS2_FS is not set
# CONFIG_OCFS2_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_ROMFS_FS is not set
CONFIG_INOTIFY=y
CONFIG_INOTIFY_USER=y
# CONFIG_QUOTA is not set
CONFIG_DNOTIFY=y
# CONFIG_AUTOFS_FS is not set
# CONFIG_AUTOFS4_FS is not set
# CONFIG_FUSE_FS is not set

#
# CD-ROM/DVD Filesystems
#
# CONFIG_ISO9660_FS is not set
# CONFIG_UDF_FS is not set

#
# DOS/FAT/NT Filesystems
#
# CONFIG_MSDOS_FS is not set
# CONFIG_VFAT_FS is not set
# CONFIG_NTFS_FS is not set

#
# Pseudo filesystems
#
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
# CONFIG_HUGETLBFS is not set
# CONFIG_HUGETLB_PAGE is not set
CONFIG_RAMFS=y
# CONFIG_CONFIGFS_FS is not set

#
# Miscellaneous filesystems
#
# CONFIG_ADFS_FS is not set
# CONFIG_AFFS_FS is not set
# CONFIG_HFS_FS is not set
# CONFIG_HFSPLUS_FS is not set
# CONFIG_BEFS_FS is not set
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
# CONFIG_CRAMFS is not set
# CONFIG_VXFS_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set

#
# Network File Systems
#
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
# CONFIG_NFS_V3_ACL is not set
# CONFIG_NFS_V4 is not set
# CONFIG_NFS_DIRECTIO is not set
CONFIG_NFSD=y
CONFIG_NFSD_V3=y
# CONFIG_NFSD_V3_ACL is not set
# CONFIG_NFSD_V4 is not set
# CONFIG_NFSD_TCP is not set
CONFIG_LOCKD=y
CONFIG_LOCKD_V4=y
CONFIG_EXPORTFS=y
CONFIG_NFS_COMMON=y
CONFIG_SUNRPC=y
# CONFIG_RPCSEC_GSS_KRB5 is not set
# CONFIG_RPCSEC_GSS_SPKM3 is not set
# CONFIG_SMB_FS is not set
# CONFIG_CIFS is not set
# CONFIG_CIFS_DEBUG2 is not set
# CONFIG_NCP_FS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set
# CONFIG_9P_FS is not set

#
# Partition Types
#
# CONFIG_PARTITION_ADVANCED is not set
CONFIG_MSDOS_PARTITION=y

#
# Native Language Support
#
CONFIG_NLS=y
CONFIG_NLS_DEFAULT="iso8859-1"
# CONFIG_NLS_CODEPAGE_437 is not set
# CONFIG_NLS_CODEPAGE_737 is not set
# CONFIG_NLS_CODEPAGE_775 is not set
# CONFIG_NLS_CODEPAGE_850 is not set
# CONFIG_NLS_CODEPAGE_852 is not set
# CONFIG_NLS_CODEPAGE_855 is not set
# CONFIG_NLS_CODEPAGE_857 is not set
# CONFIG_NLS_CODEPAGE_860 is not set
# CONFIG_NLS_CODEPAGE_861 is not set
# CONFIG_NLS_CODEPAGE_862 is not set
# CONFIG_NLS_CODEPAGE_863 is not set
# CONFIG_NLS_CODEPAGE_864 is not set
# CONFIG_NLS_CODEPAGE_865 is not set
# CONFIG_NLS_CODEPAGE_866 is not set
# CONFIG_NLS_CODEPAGE_869 is not set
# CONFIG_NLS_CODEPAGE_936 is not set
# CONFIG_NLS_CODEPAGE_950 is not set
# CONFIG_NLS_CODEPAGE_932 is not set
# CONFIG_NLS_CODEPAGE_949 is not set
# CONFIG_NLS_CODEPAGE_874 is not set
# CONFIG_NLS_ISO8859_8 is not set
# CONFIG_NLS_CODEPAGE_1250 is not set
# CONFIG_NLS_CODEPAGE_1251 is not set
# CONFIG_NLS_ASCII is not set
# CONFIG_NLS_ISO8859_1 is not set
# CONFIG_NLS_ISO8859_2 is not set
# CONFIG_NLS_ISO8859_3 is not set
# CONFIG_NLS_ISO8859_4 is not set
# CONFIG_NLS_ISO8859_5 is not set
# CONFIG_NLS_ISO8859_6 is not set
# CONFIG_NLS_ISO8859_7 is not set
# CONFIG_NLS_ISO8859_9 is not set
# CONFIG_NLS_ISO8859_13 is not set
# CONFIG_NLS_ISO8859_14 is not set
# CONFIG_NLS_ISO8859_15 is not set
# CONFIG_NLS_KOI8_R is not set
# CONFIG_NLS_KOI8_U is not set
# CONFIG_NLS_UTF8 is not set

#
# Distributed Lock Manager
#

#
# Instrumentation Support
#
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
# CONFIG_KPROBES is not set

#
# Kernel hacking
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
# CONFIG_PRINTK_TIME is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_UNUSED_SYMBOLS=y
# CONFIG_DEBUG_SHIRQ is not set
CONFIG_DEBUG_KERNEL=y
CONFIG_LOG_BUF_SHIFT=17
CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_SCHEDSTATS is not set
# CONFIG_DEBUG_SLAB is not set
CONFIG_DEBUG_PREEMPT=y
# CONFIG_DEBUG_RT_MUTEXES is not set
# CONFIG_RT_MUTEX_TESTER is not set
# CONFIG_DEBUG_SPINLOCK is not set
# CONFIG_DEBUG_MUTEXES is not set
# CONFIG_DEBUG_RWSEMS is not set
# CONFIG_DEBUG_LOCK_ALLOC is not set
# CONFIG_PROVE_LOCKING is not set
# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_HIGHMEM is not set
CONFIG_DEBUG_BUGVERBOSE=y
CONFIG_DEBUG_INFO=y
# CONFIG_PAGE_OWNER is not set
# CONFIG_DEBUG_FS is not set
# CONFIG_DEBUG_VM is not set
# CONFIG_FRAME_POINTER is not set
# CONFIG_UNWIND_INFO is not set
# CONFIG_PROFILE_LIKELY is not set
CONFIG_FORCED_INLINING=y
# CONFIG_DEBUG_SYNCHRO_TEST is not set
CONFIG_RCU_TORTURE_TEST=m
CONFIG_EARLY_PRINTK=y
# CONFIG_DEBUG_STACKOVERFLOW is not set
# CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_DEBUG_RODATA is not set
# CONFIG_4KSTACKS is not set
CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y
CONFIG_DOUBLEFAULT=y

#
# Security options
#
# CONFIG_KEYS is not set
# CONFIG_SECURITY is not set

#
# Cryptographic options
#
CONFIG_CRYPTO=y
# CONFIG_CRYPTO_HMAC is not set
# CONFIG_CRYPTO_NULL is not set
CONFIG_CRYPTO_MD4=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_SHA1=y
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_WP512 is not set
# CONFIG_CRYPTO_TGR192 is not set
CONFIG_CRYPTO_DES=y
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_TWOFISH is not set
# CONFIG_CRYPTO_TWOFISH_586 is not set
# CONFIG_CRYPTO_SERPENT is not set
# CONFIG_CRYPTO_AES is not set
# CONFIG_CRYPTO_AES_586 is not set
# CONFIG_CRYPTO_CAST5 is not set
# CONFIG_CRYPTO_CAST6 is not set
# CONFIG_CRYPTO_TEA is not set
# CONFIG_CRYPTO_ARC4 is not set
# CONFIG_CRYPTO_KHAZAD is not set
# CONFIG_CRYPTO_ANUBIS is not set
# CONFIG_CRYPTO_DEFLATE is not set
# CONFIG_CRYPTO_MICHAEL_MIC is not set
# CONFIG_CRYPTO_CRC32C is not set
# CONFIG_CRYPTO_TEST is not set

#
# Hardware crypto devices
#
# CONFIG_CRYPTO_DEV_PADLOCK is not set

#
# Library routines
#
# CONFIG_CRC_CCITT is not set
# CONFIG_CRC16 is not set
CONFIG_CRC32=y
# CONFIG_LIBCRC32C is not set
CONFIG_PLIST=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_X86_SMP=y
CONFIG_X86_HT=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_X86_TRAMPOLINE=y
CONFIG_KTIME_SCALAR=y

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] 2.6.17-rt1 : fix x86_64 oops
  2006-06-29 19:41           ` Paul E. McKenney
@ 2006-06-29 20:11             ` Ingo Molnar
  2006-06-29 21:35               ` Paul E. McKenney
  2006-07-03 16:57               ` Dipankar Sarma
  0 siblings, 2 replies; 16+ messages in thread
From: Ingo Molnar @ 2006-06-29 20:11 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Dipankar Sarma, linux-kernel, Ananth N Mavinakayanahalli,
	Prasanna Panchamukhi


* Paul E. McKenney <paulmck@us.ibm.com> wrote:

> > This was on i386, x86_64, or on something else?
> > 
> > Ah!  This would have been a CONFIG_PREEMPT build, right?
> 
> OK, I ran this with both torture types (rcu and rcu_bh) on i386 with 
> CONFIG_PREEMPT=y on 2.6.17-mm4 and didn't see any "scheduling while 
> atomic" oopses -- or any other oopses, for that matter.
> 
> Here is the .config file I used.  What am I missing here?

hm, i'm seeing some other types of crashes too - so rcutorture could 
just have been collateral damage. It was on i386, an allyesconfig 
bzImage kernel.

	Ingo

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] 2.6.17-rt1 : fix x86_64 oops
  2006-06-29 20:11             ` Ingo Molnar
@ 2006-06-29 21:35               ` Paul E. McKenney
  2006-07-03 16:57               ` Dipankar Sarma
  1 sibling, 0 replies; 16+ messages in thread
From: Paul E. McKenney @ 2006-06-29 21:35 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Dipankar Sarma, linux-kernel, Ananth N Mavinakayanahalli,
	Prasanna Panchamukhi

On Thu, Jun 29, 2006 at 10:11:45PM +0200, Ingo Molnar wrote:
> 
> * Paul E. McKenney <paulmck@us.ibm.com> wrote:
> 
> > > This was on i386, x86_64, or on something else?
> > > 
> > > Ah!  This would have been a CONFIG_PREEMPT build, right?
> > 
> > OK, I ran this with both torture types (rcu and rcu_bh) on i386 with 
> > CONFIG_PREEMPT=y on 2.6.17-mm4 and didn't see any "scheduling while 
> > atomic" oopses -- or any other oopses, for that matter.
> > 
> > Here is the .config file I used.  What am I missing here?
> 
> hm, i'm seeing some other types of crashes too - so rcutorture could 
> just have been collateral damage. It was on i386, an allyesconfig 
> bzImage kernel.

Thanks for the info -- I will consider rcutorture free from suspicion
until you tell me otherwise.  ;-)

						Thanx, Paul

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] 2.6.17-rt1 : fix x86_64 oops
  2006-06-29 20:11             ` Ingo Molnar
  2006-06-29 21:35               ` Paul E. McKenney
@ 2006-07-03 16:57               ` Dipankar Sarma
  2006-07-04  4:15                 ` Dipankar Sarma
  1 sibling, 1 reply; 16+ messages in thread
From: Dipankar Sarma @ 2006-07-03 16:57 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Paul E. McKenney, linux-kernel, Ananth N Mavinakayanahalli,
	Prasanna Panchamukhi

On Thu, Jun 29, 2006 at 10:11:45PM +0200, Ingo Molnar wrote:
> 
> * Paul E. McKenney <paulmck@us.ibm.com> wrote:
> > OK, I ran this with both torture types (rcu and rcu_bh) on i386 with 
> > CONFIG_PREEMPT=y on 2.6.17-mm4 and didn't see any "scheduling while 
> > atomic" oopses -- or any other oopses, for that matter.
> > 
> > Here is the .config file I used.  What am I missing here?
> 
> hm, i'm seeing some other types of crashes too - so rcutorture could 
> just have been collateral damage. It was on i386, an allyesconfig 
> bzImage kernel.

With 2.6.17-rt5 I see this -

llm17:~/rcutorture # set -o vi^H^H^H^H^H^H^H^H^H./rcutorture.sh ^H^H^H^H^H^H^H^H^H^H^H^H^H^H^H^Hls^H^H./rcutorture.sh ^H^H^H^H^H^H^H^H^H^H^H^H^H^H^H^H
Starting pass 0
Unable to handle kernel paging request at ffffffff88006bd0 RIP:
<ffffffff802597d5>{rcu_process_callbacks+107}
rcutorture: --- End of test: SUCCESS: nreaders=8 stat_interval=1PGD 203027 PUD 205027 PMD 21eb18067 PTE 21829f163
Oops: 0000 [1] PREEMPT SMP
CPU 1
Modules linked in:
Pid: 19, comm: softirq-tasklet Not tainted 2.6.17-rt5 #1
RIP: 0010:[<ffffffff802597d5>] <ffffffff802597d5>{rcu_process_callbacks+107}
RSP: 0000:ffff810220a9deb8  EFLAGS: 00010246
RAX: 0000000000000000 RBX: ffffffff80713570 RCX: 0000000000000003
RDX: 0000000000000001 RSI: ffff810220a9c010 RDI: 0000000000000003
RBP: ffffffff88006bd0 R08: ffff810220a9c000 R09: ffff810220a8fed8
R10: ffff810220a8fe08 R11: ffffffff804fdb7e R12: 0000000000000000
R13: ffff8100051a7310 R14: ffffffff80531258 R15: ffffffff807b1310
FS:  0000000000000000(0000) GS:ffff810220b0cd40(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: ffffffff88006bd0 CR3: 0000000000201000 CR4: 00000000000006e0
Process softirq-tasklet (pid: 19, threadinfo ffff810220a9c000, task ffff810220a9ad30)
Stack: ffff810220a41bc8 ffffffff80713570 00000000000f4240 ffffffff802357ef
       ffff8100051a7310 0000000000000020 ffff810220a41bc8 ffffffff80235f3b
       ffffffff00000001 ffffffff807b1310
Call Trace:
       <ffffffff802357ef>{__tasklet_action+181}
       <ffffffff80235f3b>{ksoftirqd+280}
       <ffffffff80235e23>{ksoftirqd+0}
       <ffffffff80243251>{kthread+212}
       <ffffffff80235e23>{ksoftirqd+0}
       <ffffffff8020a74e>{child_rip+8}
       <ffffffff80235e23>{ksoftirqd+0}
       <ffffffff8024317d>{kthread+0}
       <ffffffff8020a746>{child_rip+0}
---------------------------
| preempt count: 00000001 ]
| 1-level deep critical section nesting:
----------------------------------------

I have been able to reproduce a similar looking oopse with 2.6.16-rt29.
2.6.16-rt20 works fine. I will try to track it down to the exact
release as far as I can.

Thanks
Dipankar

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] 2.6.17-rt1 : fix x86_64 oops
  2006-07-03 16:57               ` Dipankar Sarma
@ 2006-07-04  4:15                 ` Dipankar Sarma
  2006-07-04  6:43                   ` Ingo Molnar
  0 siblings, 1 reply; 16+ messages in thread
From: Dipankar Sarma @ 2006-07-04  4:15 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul E. McKenney, linux-kernel

On Mon, Jul 03, 2006 at 10:27:50PM +0530, Dipankar Sarma wrote:
> On Thu, Jun 29, 2006 at 10:11:45PM +0200, Ingo Molnar wrote:
> > 
> > * Paul E. McKenney <paulmck@us.ibm.com> wrote:
> > > OK, I ran this with both torture types (rcu and rcu_bh) on i386 with 
> > > CONFIG_PREEMPT=y on 2.6.17-mm4 and didn't see any "scheduling while 
> > > atomic" oopses -- or any other oopses, for that matter.
> > > 
> > > Here is the .config file I used.  What am I missing here?
> > 
> > hm, i'm seeing some other types of crashes too - so rcutorture could 
> > just have been collateral damage. It was on i386, an allyesconfig 
> > bzImage kernel.
> 
> With 2.6.17-rt5 I see this -
> 
> Starting pass 0
> Unable to handle kernel paging request at ffffffff88006bd0 RIP:
> <ffffffff802597d5>{rcu_process_callbacks+107}
> rcutorture: --- End of test: SUCCESS: nreaders=8 stat_interval=1PGD 203027 PUD 205027 PMD 21eb18067 PTE 21829f163
> Oops: 0000 [1] PREEMPT SMP
> CPU 1
> 
> I have been able to reproduce a similar looking oopse with 2.6.16-rt29.
> 2.6.16-rt20 works fine. I will try to track it down to the exact
> release as far as I can.

OK, it looks as if rt20 is fine but rt21 is broken. So something
that got in rt21 is causing this oops.

Ingo, do you have a suspect ?

Thanks
Dipankar

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] 2.6.17-rt1 : fix x86_64 oops
  2006-07-04  4:15                 ` Dipankar Sarma
@ 2006-07-04  6:43                   ` Ingo Molnar
  2006-07-04  6:50                     ` Ingo Molnar
  0 siblings, 1 reply; 16+ messages in thread
From: Ingo Molnar @ 2006-07-04  6:43 UTC (permalink / raw)
  To: Dipankar Sarma; +Cc: Paul E. McKenney, linux-kernel, john stultz


* Dipankar Sarma <dipankar@in.ibm.com> wrote:

> > I have been able to reproduce a similar looking oopse with 2.6.16-rt29.
> > 2.6.16-rt20 works fine. I will try to track it down to the exact
> > release as far as I can.
> 
> OK, it looks as if rt20 is fine but rt21 is broken. So something that 
> got in rt21 is causing this oops.

thanks! That really narrows it down.

> Ingo, do you have a suspect ?

I suspect it's the patch below. That patch (from John) relaxes the 
affinities of IRQ threads: if there are /proc/irq/*/smp_affinity entries 
that have multiple bits set an IRQ thread is allowed to jump from one 
CPU to another while it is executing a IRQ-handler. It _should_ be fine 
but i'd not be surprised if that caused breakage ...

if this is the cause of the crash, would be hard for you trying to 
figure out _which_ IRQ thread is so sensitive to affinity?

	Ingo

Index: linux/kernel/irq/manage.c
===================================================================
--- linux.orig/kernel/irq/manage.c
+++ linux/kernel/irq/manage.c
@@ -717,24 +717,21 @@ static int do_irqd(void * __desc)
 	if (param.sched_priority > 25)
 		curr_irq_prio = param.sched_priority - 1;
 
-//	param.sched_priority = 1;
 	sys_sched_setscheduler(current->pid, SCHED_FIFO, &param);
 
 	while (!kthread_should_stop()) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		do_hardirq(desc);
 		cond_resched_all();
+		local_irq_disable();
 		__do_softirq();
-//		do_softirq_from_hardirq();
 		local_irq_enable();
 #ifdef CONFIG_SMP
 		/*
 		 * Did IRQ affinities change?
 		 */
-		if (!cpu_isset(smp_processor_id(), irq_affinity[irq])) {
-			mask = cpumask_of_cpu(any_online_cpu(irq_affinity[irq]));
-			set_cpus_allowed(current, mask);
-		}
+		if (!cpus_equal(current->cpus_allowed, irq_affinity[irq]));
+			set_cpus_allowed(current, irq_affinity[irq]);
 #endif
 		schedule();
 	}

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] 2.6.17-rt1 : fix x86_64 oops
  2006-07-04  6:43                   ` Ingo Molnar
@ 2006-07-04  6:50                     ` Ingo Molnar
  2006-07-05  9:11                       ` Dipankar Sarma
  2006-07-06 20:06                       ` Paul E. McKenney
  0 siblings, 2 replies; 16+ messages in thread
From: Ingo Molnar @ 2006-07-04  6:50 UTC (permalink / raw)
  To: Dipankar Sarma; +Cc: Paul E. McKenney, linux-kernel, john stultz


* Ingo Molnar <mingo@elte.hu> wrote:

> > Ingo, do you have a suspect ?
> 
> I suspect it's the patch below. That patch (from John) relaxes the 
> affinities of IRQ threads: if there are /proc/irq/*/smp_affinity 
> entries that have multiple bits set an IRQ thread is allowed to jump 
> from one CPU to another while it is executing a IRQ-handler. It 
> _should_ be fine but i'd not be surprised if that caused breakage ...

the patch below is against 2.6.17-rt5, does this solve the crashes?

	Ingo

Index: linux-rt.q/kernel/irq/manage.c
===================================================================
--- linux-rt.q.orig/kernel/irq/manage.c
+++ linux-rt.q/kernel/irq/manage.c
@@ -645,17 +645,24 @@ extern asmlinkage void __do_softirq(void
 
 static int curr_irq_prio = 49;
 
-static int do_irqd(void * __desc)
+static void follow_irq_affinity(struct irq_desc *desc)
 {
-	struct sched_param param = { 0, };
-	struct irq_desc *desc = __desc;
 #ifdef CONFIG_SMP
-	int irq = desc - irq_desc;
 	cpumask_t mask;
 
-	mask = cpumask_of_cpu(any_online_cpu(irq_desc[irq].affinity));
+	if (cpus_equal(current->cpus_allowed, desc->affinity))
+		return;
+	mask = cpumask_of_cpu(any_online_cpu(desc->affinity));
 	set_cpus_allowed(current, mask);
 #endif
+}
+
+static int do_irqd(void * __desc)
+{
+	struct sched_param param = { 0, };
+	struct irq_desc *desc = __desc;
+
+	follow_irq_affinity(desc);
 	current->flags |= PF_NOFREEZE | PF_HARDIRQ;
 
 	/*
@@ -674,13 +681,7 @@ static int do_irqd(void * __desc)
 		local_irq_disable();
 		__do_softirq();
 		local_irq_enable();
-#ifdef CONFIG_SMP
-		/*
-		 * Did IRQ affinities change?
-		 */
-		if (!cpus_equal(current->cpus_allowed, irq_desc[irq].affinity))
-			set_cpus_allowed(current, irq_desc[irq].affinity);
-#endif
+		follow_irq_affinity(desc);
 		schedule();
 	}
 	__set_current_state(TASK_RUNNING);

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] 2.6.17-rt1 : fix x86_64 oops
  2006-07-04  6:50                     ` Ingo Molnar
@ 2006-07-05  9:11                       ` Dipankar Sarma
  2006-07-26  7:36                         ` Dipankar Sarma
  2006-07-06 20:06                       ` Paul E. McKenney
  1 sibling, 1 reply; 16+ messages in thread
From: Dipankar Sarma @ 2006-07-05  9:11 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul E. McKenney, linux-kernel, john stultz

On Tue, Jul 04, 2006 at 08:50:24AM +0200, Ingo Molnar wrote:
> 
> * Ingo Molnar <mingo@elte.hu> wrote:
> 
> > > Ingo, do you have a suspect ?
> > 
> > I suspect it's the patch below. That patch (from John) relaxes the 
> > affinities of IRQ threads: if there are /proc/irq/*/smp_affinity 
> > entries that have multiple bits set an IRQ thread is allowed to jump 
> > from one CPU to another while it is executing a IRQ-handler. It 
> > _should_ be fine but i'd not be surprised if that caused breakage ...
> 
> the patch below is against 2.6.17-rt5, does this solve the crashes?
> 

I tried this patch but I still oops quickly after starting rcutorture.

There is some additional information - my -rt20 directory had
another patch which re-organized RCU code to cleanly have multiple
RCU implementations (rcuclassic and rcupreempt for now). That
kernel ran fine with rcutorture, but when I removed that
reorg-rcu-code patch to go to standard -rt20, I started seeing
the same oops. This is bizarre because the reorg-rcu-code
patch isn't supposed to change any logic. I am still investigating
this, but the patch is included below for your reference.

Thanks
Dipankar


This patch re-organizes the RCU code to maintain multiple implementations
of RCU. Users of RCU continues to include rcupdate.h and the
RCU interfaces remain the same.

Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
---



diff -puN /dev/null include/linux/rcuclassic.h
--- /dev/null	2006-03-26 18:34:52.000000000 +0530
+++ linux-2.6.16-rt20-rcu-dipankar/include/linux/rcuclassic.h	2006-07-04 11:33:34.000000000 +0530
@@ -0,0 +1,133 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (classic version)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2001
+ *
+ * Author: Dipankar Sarma <dipankar@in.ibm.com>
+ * 
+ * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ * Papers:
+ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
+ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		http://lse.sourceforge.net/locking/rcupdate.html
+ *
+ */
+
+#ifndef __LINUX_RCUCLASSIC_H
+#define __LINUX_RCUCLASSIC_H
+
+#ifdef __KERNEL__
+
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/seqlock.h>
+
+/* Global control variables for rcupdate callback mechanism. */
+struct rcu_ctrlblk {
+	long	cur;		/* Current batch number.                      */
+	long	completed;	/* Number of the last completed batch         */
+	int	next_pending;	/* Is the next batch already waiting?         */
+
+	spinlock_t	lock	____cacheline_internodealigned_in_smp;
+	cpumask_t	cpumask; /* CPUs that need to switch in order    */
+	                         /* for current batch to proceed.        */
+} ____cacheline_internodealigned_in_smp;
+
+/* Is batch a before batch b ? */
+static inline int rcu_batch_before(long a, long b)
+{
+        return (a - b) < 0;
+}
+
+/* Is batch a after batch b ? */
+static inline int rcu_batch_after(long a, long b)
+{
+        return (a - b) > 0;
+}
+
+/*
+ * Per-CPU data for Read-Copy UPdate.
+ * nxtlist - new callbacks are added here
+ * curlist - current batch for which quiescent cycle started if any
+ */
+struct rcu_data {
+	/* 1) quiescent state handling : */
+	long		quiescbatch;     /* Batch # for grace period */
+	int		passed_quiesc;	 /* User-mode/idle loop etc. */
+	int		qs_pending;	 /* core waits for quiesc state */
+
+	/* 2) batch handling */
+	long  	       	batch;           /* Batch # for current RCU batch */
+	struct rcu_head *nxtlist;
+	struct rcu_head **nxttail;
+	long            qlen; 	 	 /* # of queued callbacks */
+	struct rcu_head *curlist;
+	struct rcu_head **curtail;
+	struct rcu_head *donelist;
+	struct rcu_head **donetail;
+	long		blimit;		 /* Upper limit on a processed batch */
+	int cpu;
+#ifdef CONFIG_SMP
+	long		last_rs_qlen;	 /* qlen during the last resched */
+#endif
+};
+
+DECLARE_PER_CPU(struct rcu_data, rcu_data);
+DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
+extern struct rcu_ctrlblk rcu_ctrlblk;
+extern struct rcu_ctrlblk rcu_bh_ctrlblk;
+
+/*
+ * Increment the quiescent state counter.
+ * The counter is a bit degenerated: We do not need to know
+ * how many quiescent states passed, just if there was at least
+ * one since the start of the grace period. Thus just a flag.
+ */
+static inline void rcu_qsctr_inc(int cpu)
+{
+	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+	rdp->passed_quiesc = 1;
+}
+static inline void rcu_bh_qsctr_inc(int cpu)
+{
+	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
+	rdp->passed_quiesc = 1;
+}
+
+extern int rcu_pending(int cpu);
+
+#define __rcu_read_lock() preempt_disable()
+#define __rcu_read_unlock() preempt_enable()
+
+#define __rcu_read_lock_bh()	local_bh_disable()
+#define __rcu_read_unlock_bh()	local_bh_enable()
+
+#define __synchronize_sched()	synchronize_rcu()
+
+extern void __rcu_init(void);
+extern void rcu_check_callbacks(int cpu, int user);
+extern void rcu_restart_cpu(int cpu);
+extern long rcu_batches_completed(void);
+
+#endif /* __KERNEL__ */
+#endif /* __LINUX_RCUCLASSIC_H */
diff -puN include/linux/rcupdate.h~reorg-rcu-code include/linux/rcupdate.h
--- linux-2.6.16-rt20-rcu/include/linux/rcupdate.h~reorg-rcu-code	2006-07-04 11:33:33.000000000 +0530
+++ linux-2.6.16-rt20-rcu-dipankar/include/linux/rcupdate.h	2006-07-04 11:33:34.000000000 +0530
@@ -42,6 +42,12 @@
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
 
+#ifdef CONFIG_CLASSIC_RCU
+#include <linux/rcuclassic.h>
+#else
+#include <linux/rcupreempt.h>
+#endif
+
 /**
  * struct rcu_head - callback structure for use with RCU
  * @next: next update requests in a list
@@ -58,84 +64,6 @@ struct rcu_head {
        (ptr)->next = NULL; (ptr)->func = NULL; \
 } while (0)
 
-
-#ifndef CONFIG_PREEMPT_RCU
-
-/* Global control variables for rcupdate callback mechanism. */
-struct rcu_ctrlblk {
-	long	cur;		/* Current batch number.                      */
-	long	completed;	/* Number of the last completed batch         */
-	int	next_pending;	/* Is the next batch already waiting?         */
-
-	spinlock_t	lock	____cacheline_internodealigned_in_smp;
-	cpumask_t	cpumask; /* CPUs that need to switch in order    */
-	                         /* for current batch to proceed.        */
-} ____cacheline_internodealigned_in_smp;
-
-/* Is batch a before batch b ? */
-static inline int rcu_batch_before(long a, long b)
-{
-        return (a - b) < 0;
-}
-
-/* Is batch a after batch b ? */
-static inline int rcu_batch_after(long a, long b)
-{
-        return (a - b) > 0;
-}
-
-/*
- * Per-CPU data for Read-Copy UPdate.
- * nxtlist - new callbacks are added here
- * curlist - current batch for which quiescent cycle started if any
- */
-struct rcu_data {
-	/* 1) quiescent state handling : */
-	long		quiescbatch;     /* Batch # for grace period */
-	int		passed_quiesc;	 /* User-mode/idle loop etc. */
-	int		qs_pending;	 /* core waits for quiesc state */
-
-	/* 2) batch handling */
-	long  	       	batch;           /* Batch # for current RCU batch */
-	struct rcu_head *nxtlist;
-	struct rcu_head **nxttail;
-	long            qlen; 	 	 /* # of queued callbacks */
-	struct rcu_head *curlist;
-	struct rcu_head **curtail;
-	struct rcu_head *donelist;
-	struct rcu_head **donetail;
-	long		blimit;		 /* Upper limit on a processed batch */
-	int cpu;
-	struct rcu_head barrier;
-#ifdef CONFIG_SMP
-	long		last_rs_qlen;	 /* qlen during the last resched */
-#endif
-};
-
-DECLARE_PER_CPU(struct rcu_data, rcu_data);
-DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
-extern struct rcu_ctrlblk rcu_ctrlblk;
-extern struct rcu_ctrlblk rcu_bh_ctrlblk;
-
-/*
- * Increment the quiescent state counter.
- * The counter is a bit degenerated: We do not need to know
- * how many quiescent states passed, just if there was at least
- * one since the start of the grace period. Thus just a flag.
- */
-static inline void rcu_qsctr_inc(int cpu)
-{
-	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
-	rdp->passed_quiesc = 1;
-}
-static inline void rcu_bh_qsctr_inc(int cpu)
-{
-	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
-	rdp->passed_quiesc = 1;
-}
-
-extern int rcu_pending(int cpu);
-
 /**
  * rcu_read_lock - mark the beginning of an RCU read-side critical section.
  *
@@ -165,26 +93,14 @@ extern int rcu_pending(int cpu);
  *
  * It is illegal to block while in an RCU read-side critical section.
  */
-#define rcu_read_lock preempt_disable
+#define rcu_read_lock() __rcu_read_lock()
 
 /**
  * rcu_read_unlock - marks the end of an RCU read-side critical section.
  *
  * See rcu_read_lock() for more information.
  */
-#define rcu_read_unlock preempt_enable
-
-#else /* #ifndef CONFIG_PREEMPT_RCU */
-
-#define rcu_qsctr_inc(cpu)
-#define rcu_bh_qsctr_inc(cpu)
-#define call_rcu_bh(head, rcu) call_rcu(head, rcu)
-
-extern void rcu_read_lock(void);
-extern void rcu_read_unlock(void);
-extern int rcu_pending(int cpu);
-
-#endif /* #else #ifndef CONFIG_PREEMPT_RCU */
+#define rcu_read_unlock() __rcu_read_unlock()
 
 /*
  * So where is rcu_write_lock()?  It does not exist, as there is no
@@ -207,22 +123,14 @@ extern int rcu_pending(int cpu);
  * can use just rcu_read_lock().
  *
  */
-#ifndef CONFIG_PREEMPT_RCU
-#define rcu_read_lock_bh()	local_bh_disable()
-#else /* #ifndef CONFIG_PREEMPT_RCU */
-#define rcu_read_lock_bh()	{ rcu_read_lock(); local_bh_disable(); }
-#endif /* #else #ifndef CONFIG_PREEMPT_RCU */
+#define rcu_read_lock_bh()	__rcu_read_lock_bh()
 
-/*
+/**
  * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section
  *
  * See rcu_read_lock_bh() for more information.
  */
-#ifndef CONFIG_PREEMPT_RCU
-#define rcu_read_unlock_bh()	local_bh_enable()
-#else /* #ifndef CONFIG_PREEMPT_RCU */
-#define rcu_read_unlock_bh()	{ local_bh_enable(); rcu_read_unlock(); }
-#endif /* #else #ifndef CONFIG_PREEMPT_RCU */
+#define rcu_read_unlock_bh()	__rcu_read_unlock_bh()
 
 /**
  * rcu_dereference - fetch an RCU-protected pointer in an
@@ -274,26 +182,51 @@ extern int rcu_pending(int cpu);
  * In "classic RCU", these two guarantees happen to be one and
  * the same, but can differ in realtime RCU implementations.
  */
-#ifndef CONFIG_PREEMPT_RCU
-#define synchronize_sched() synchronize_rcu()
-extern void rcu_barrier(void);
-#else /* #ifndef CONFIG_PREEMPT_RCU */
-extern void synchronize_sched(void);
-#define rcu_barrier() do {} while(0)
-#endif /* #else #ifndef CONFIG_PREEMPT_RCU */
+#define synchronize_sched()	__synchronize_sched()
 
-extern void rcu_init(void);
-extern void rcu_check_callbacks(int cpu, int user);
-extern void rcu_restart_cpu(int cpu);
-extern long rcu_batches_completed(void);
 
-/* Exported interfaces */
+/**
+ * call_rcu - Queue an RCU callback for invocation after a grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual update function to be invoked after the grace period
+ *
+ * The update function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed.  RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
+ * and may be nested.
+ */
 extern void FASTCALL(call_rcu(struct rcu_head *head, 
 				void (*func)(struct rcu_head *head)));
+
+
+/**
+ * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual update function to be invoked after the grace period
+ *
+ * The update function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_bh() assumes
+ * that the read-side critical sections end on completion of a softirq
+ * handler. This means that read-side critical sections in process
+ * context must not be interrupted by softirqs. This interface is to be
+ * used when most of the read-side critical sections are in softirq context.
+ * RCU read-side critical sections are delimited by rcu_read_lock() and
+ * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
+ * and rcu_read_unlock_bh(), if in process context. These may be nested.
+ */
 extern void FASTCALL(call_rcu_bh(struct rcu_head *head,
 				void (*func)(struct rcu_head *head)));
+
+/* Exported common interfaces */
+extern __deprecated_for_modules void synchronize_kernel(void);
 extern void synchronize_rcu(void);
+extern void rcu_barrier(void);
 void synchronize_idle(void);
 
+/* Internal to kernel */
+extern void rcu_init(void);
+
 #endif /* __KERNEL__ */
 #endif /* __LINUX_RCUPDATE_H */
diff -puN /dev/null include/linux/rcupreempt.h
--- /dev/null	2006-03-26 18:34:52.000000000 +0530
+++ linux-2.6.16-rt20-rcu-dipankar/include/linux/rcupreempt.h	2006-07-04 11:33:34.000000000 +0530
@@ -0,0 +1,66 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (RT implementation)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Author:  Paul McKenney <paulmck@us.ibm.com>
+ * 
+ * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ * Papers:
+ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
+ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		http://lse.sourceforge.net/locking/rcupdate.html
+ *
+ */
+
+#ifndef __LINUX_RCUPREEMPT_H
+#define __LINUX_RCUPREEMPT_H
+
+#ifdef __KERNEL__
+
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/seqlock.h>
+
+#define rcu_qsctr_inc(cpu)
+#define rcu_bh_qsctr_inc(cpu)
+#define call_rcu_bh(head, rcu) call_rcu(head, rcu)
+
+extern void __rcu_read_lock(void);
+extern void __rcu_read_unlock(void);
+extern int rcu_pending(int cpu);
+
+#define __rcu_read_lock_bh()	{ rcu_read_lock(); local_bh_disable(); }
+#define __rcu_read_unlock_bh()	{ local_bh_enable(); rcu_read_unlock(); }
+
+#define __rcu_read_lock_nesting()	(current->rcu_read_lock_nesting)
+
+extern void __synchronize_sched(void);
+
+extern void __rcu_init(void);
+extern void rcu_check_callbacks(int cpu, int user);
+extern void rcu_restart_cpu(int cpu);
+extern long rcu_batches_completed(void);
+
+#endif /* __KERNEL__ */
+#endif /* __LINUX_RCUPREEMPT_H */
diff -puN kernel/Kconfig.preempt~reorg-rcu-code kernel/Kconfig.preempt
--- linux-2.6.16-rt20-rcu/kernel/Kconfig.preempt~reorg-rcu-code	2006-07-04 11:33:33.000000000 +0530
+++ linux-2.6.16-rt20-rcu-dipankar/kernel/Kconfig.preempt	2006-07-04 11:33:34.000000000 +0530
@@ -155,7 +155,6 @@ config CLASSIC_RCU
 
 config PREEMPT_RCU
 	bool "Preemptible RCU"
-	depends on PREEMPT
 	help
 	  This option reduces the latency of the kernel by making certain
 	  RCU sections preemptible. Normally RCU code is non-preemptible, if
diff -puN kernel/Makefile~reorg-rcu-code kernel/Makefile
--- linux-2.6.16-rt20-rcu/kernel/Makefile~reorg-rcu-code	2006-07-04 11:33:33.000000000 +0530
+++ linux-2.6.16-rt20-rcu-dipankar/kernel/Makefile	2006-07-04 11:33:34.000000000 +0530
@@ -48,8 +48,8 @@ obj-$(CONFIG_DETECT_SOFTLOCKUP) += softl
 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
-obj-$(CONFIG_CLASSIC_RCU) += rcupdate.o
-obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
+obj-$(CONFIG_CLASSIC_RCU) += rcupdate.o rcuclassic.o
+obj-$(CONFIG_PREEMPT_RCU) += rcupdate.o rcupreempt.o
 
 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff -puN /dev/null kernel/rcuclassic.c
--- /dev/null	2006-03-26 18:34:52.000000000 +0530
+++ linux-2.6.16-rt20-rcu-dipankar/kernel/rcuclassic.c	2006-07-04 11:33:34.000000000 +0530
@@ -0,0 +1,511 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion, classic implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2001
+ *
+ * Authors: Dipankar Sarma <dipankar@in.ibm.com>
+ *	    Manfred Spraul <manfred@colorfullife.com>
+ * 
+ * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ *
+ * Papers:  http://www.rdrop.com/users/paulmck/RCU
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		Documentation/RCU/ *.txt
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <asm/atomic.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/cpu.h>
+#include <linux/random.h>
+#include <linux/delay.h>
+#include <linux/byteorder/swabb.h>
+
+
+/* Definition for rcupdate control block. */
+struct rcu_ctrlblk rcu_ctrlblk = {
+	.cur = -300,
+	.completed = -300,
+	.lock = SPIN_LOCK_UNLOCKED,
+	.cpumask = CPU_MASK_NONE,
+};
+struct rcu_ctrlblk rcu_bh_ctrlblk = {
+	.cur = -300,
+	.completed = -300,
+	.lock = SPIN_LOCK_UNLOCKED,
+	.cpumask = CPU_MASK_NONE,
+};
+
+DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
+DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
+
+/* Fake initialization required by compiler */
+static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
+static int blimit = 10;
+static int qhimark = 10000;
+static int qlowmark = 100;
+#ifdef CONFIG_SMP
+static int rsinterval = 1000;
+#endif
+
+#ifdef CONFIG_SMP
+static void force_quiescent_state(struct rcu_data *rdp,
+			struct rcu_ctrlblk *rcp)
+{
+	int cpu;
+	cpumask_t cpumask;
+	set_need_resched();
+	if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) {
+		rdp->last_rs_qlen = rdp->qlen;
+		/*
+		 * Don't send IPI to itself. With irqs disabled,
+		 * rdp->cpu is the current cpu.
+		 */
+		cpumask = rcp->cpumask;
+		cpu_clear(rdp->cpu, cpumask);
+		for_each_cpu_mask(cpu, cpumask)
+			smp_send_reschedule(cpu);
+	}
+}
+#else
+static inline void force_quiescent_state(struct rcu_data *rdp,
+			struct rcu_ctrlblk *rcp)
+{
+	set_need_resched();
+}
+#endif
+
+void fastcall call_rcu(struct rcu_head *head,
+				void (*func)(struct rcu_head *rcu))
+{
+	unsigned long flags;
+	struct rcu_data *rdp;
+
+	head->func = func;
+	head->next = NULL;
+	local_irq_save(flags);
+	rdp = &__get_cpu_var(rcu_data);
+	*rdp->nxttail = head;
+	rdp->nxttail = &head->next;
+	if (unlikely(++rdp->qlen > qhimark)) {
+		rdp->blimit = INT_MAX;
+		force_quiescent_state(rdp, &rcu_ctrlblk);
+	}
+	local_irq_restore(flags);
+}
+
+void fastcall call_rcu_bh(struct rcu_head *head,
+				void (*func)(struct rcu_head *rcu))
+{
+	unsigned long flags;
+	struct rcu_data *rdp;
+
+	head->func = func;
+	head->next = NULL;
+	local_irq_save(flags);
+	rdp = &__get_cpu_var(rcu_bh_data);
+	*rdp->nxttail = head;
+	rdp->nxttail = &head->next;
+
+	if (unlikely(++rdp->qlen > qhimark)) {
+		rdp->blimit = INT_MAX;
+		force_quiescent_state(rdp, &rcu_bh_ctrlblk);
+	}
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Return the number of RCU batches processed thus far.  Useful
+ * for debug and statistics.
+ */
+long rcu_batches_completed(void)
+{
+	return rcu_ctrlblk.completed;
+}
+
+/*
+ * Invoke the completed RCU callbacks. They are expected to be in
+ * a per-cpu list.
+ */
+static void rcu_do_batch(struct rcu_data *rdp)
+{
+	struct rcu_head *next, *list;
+	int count = 0;
+
+	list = rdp->donelist;
+	while (list) {
+		next = rdp->donelist = list->next;
+		list->func(list);
+		list = next;
+		rdp->qlen--;
+		if (++count >= rdp->blimit)
+			break;
+	}
+	if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
+		rdp->blimit = blimit;
+	if (!rdp->donelist)
+		rdp->donetail = &rdp->donelist;
+	else
+		tasklet_schedule(&per_cpu(rcu_tasklet, rdp->cpu));
+}
+
+/*
+ * Grace period handling:
+ * The grace period handling consists out of two steps:
+ * - A new grace period is started.
+ *   This is done by rcu_start_batch. The start is not broadcasted to
+ *   all cpus, they must pick this up by comparing rcp->cur with
+ *   rdp->quiescbatch. All cpus are recorded  in the
+ *   rcu_ctrlblk.cpumask bitmap.
+ * - All cpus must go through a quiescent state.
+ *   Since the start of the grace period is not broadcasted, at least two
+ *   calls to rcu_check_quiescent_state are required:
+ *   The first call just notices that a new grace period is running. The
+ *   following calls check if there was a quiescent state since the beginning
+ *   of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
+ *   the bitmap is empty, then the grace period is completed.
+ *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
+ *   period (if necessary).
+ */
+/*
+ * Register a new batch of callbacks, and start it up if there is currently no
+ * active batch and the batch to be registered has not already occurred.
+ * Caller must hold rcu_ctrlblk.lock.
+ */
+static void rcu_start_batch(struct rcu_ctrlblk *rcp)
+{
+	if (rcp->next_pending &&
+			rcp->completed == rcp->cur) {
+		rcp->next_pending = 0;
+		/*
+		 * next_pending == 0 must be visible in
+		 * __rcu_process_callbacks() before it can see new value of cur.
+		 */
+		smp_wmb();
+		rcp->cur++;
+
+		/*
+		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
+		 * Barrier  Otherwise it can cause tickless idle CPUs to be
+		 * included in rcp->cpumask, which will extend graceperiods
+		 * unnecessarily.
+		 */
+		smp_mb();
+		cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
+
+	}
+}
+
+/*
+ * cpu went through a quiescent state since the beginning of the grace period.
+ * Clear it from the cpu mask and complete the grace period if it was the last
+ * cpu. Start another grace period if someone has further entries pending
+ */
+static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
+{
+	cpu_clear(cpu, rcp->cpumask);
+	if (cpus_empty(rcp->cpumask)) {
+		/* batch completed ! */
+		rcp->completed = rcp->cur;
+		rcu_start_batch(rcp);
+	}
+}
+
+/*
+ * Check if the cpu has gone through a quiescent state (say context
+ * switch). If so and if it already hasn't done so in this RCU
+ * quiescent cycle, then indicate that it has done so.
+ */
+static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
+					struct rcu_data *rdp)
+{
+	if (rdp->quiescbatch != rcp->cur) {
+		/* start new grace period: */
+		rdp->qs_pending = 1;
+		rdp->passed_quiesc = 0;
+		rdp->quiescbatch = rcp->cur;
+		return;
+	}
+
+	/* Grace period already completed for this cpu?
+	 * qs_pending is checked instead of the actual bitmap to avoid
+	 * cacheline trashing.
+	 */
+	if (!rdp->qs_pending)
+		return;
+
+	/* 
+	 * Was there a quiescent state since the beginning of the grace
+	 * period? If no, then exit and wait for the next call.
+	 */
+	if (!rdp->passed_quiesc)
+		return;
+	rdp->qs_pending = 0;
+
+	spin_lock(&rcp->lock);
+	/*
+	 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
+	 * during cpu startup. Ignore the quiescent state.
+	 */
+	if (likely(rdp->quiescbatch == rcp->cur))
+		cpu_quiet(rdp->cpu, rcp);
+
+	spin_unlock(&rcp->lock);
+}
+
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
+ * locking requirements, the list it's pulling from has to belong to a cpu
+ * which is dead and hence not processing interrupts.
+ */
+static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
+				struct rcu_head **tail)
+{
+	local_irq_disable();
+	*this_rdp->nxttail = list;
+	if (list)
+		this_rdp->nxttail = tail;
+	local_irq_enable();
+}
+
+static void __rcu_offline_cpu(struct rcu_data *this_rdp,
+				struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
+{
+	/* if the cpu going offline owns the grace period
+	 * we can block indefinitely waiting for it, so flush
+	 * it here
+	 */
+	spin_lock_bh(&rcp->lock);
+	if (rcp->cur != rcp->completed)
+		cpu_quiet(rdp->cpu, rcp);
+	spin_unlock_bh(&rcp->lock);
+	rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
+	rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
+	rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
+}
+
+static void rcu_offline_cpu(int cpu)
+{
+	struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
+	struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
+
+	__rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
+					&per_cpu(rcu_data, cpu));
+	__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
+					&per_cpu(rcu_bh_data, cpu));
+	put_cpu_var(rcu_data);
+	put_cpu_var(rcu_bh_data);
+	tasklet_kill_immediate(&per_cpu(rcu_tasklet, cpu), cpu);
+}
+
+#else
+
+static void rcu_offline_cpu(int cpu)
+{
+}
+
+#endif
+
+/*
+ * This does the RCU processing work from tasklet context. 
+ */
+static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
+					struct rcu_data *rdp)
+{
+	if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
+		*rdp->donetail = rdp->curlist;
+		rdp->donetail = rdp->curtail;
+		rdp->curlist = NULL;
+		rdp->curtail = &rdp->curlist;
+	}
+
+	local_irq_disable();
+	if (rdp->nxtlist && !rdp->curlist) {
+		rdp->curlist = rdp->nxtlist;
+		rdp->curtail = rdp->nxttail;
+		rdp->nxtlist = NULL;
+		rdp->nxttail = &rdp->nxtlist;
+		local_irq_enable();
+
+		/*
+		 * start the next batch of callbacks
+		 */
+
+		/* determine batch number */
+		rdp->batch = rcp->cur + 1;
+		/* see the comment and corresponding wmb() in
+		 * the rcu_start_batch()
+		 */
+		smp_rmb();
+
+		if (!rcp->next_pending) {
+			/* and start it/schedule start if it's a new batch */
+			spin_lock(&rcp->lock);
+			rcp->next_pending = 1;
+			rcu_start_batch(rcp);
+			spin_unlock(&rcp->lock);
+		}
+	} else {
+		local_irq_enable();
+	}
+	rcu_check_quiescent_state(rcp, rdp);
+	if (rdp->donelist)
+		rcu_do_batch(rdp);
+}
+
+static void rcu_process_callbacks(unsigned long unused)
+{
+	__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
+	__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
+}
+
+static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
+{
+	/* This cpu has pending rcu entries and the grace period
+	 * for them has completed.
+	 */
+	if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
+		return 1;
+
+	/* This cpu has no pending entries, but there are new entries */
+	if (!rdp->curlist && rdp->nxtlist)
+		return 1;
+
+	/* This cpu has finished callbacks to invoke */
+	if (rdp->donelist)
+		return 1;
+
+	/* The rcu core waits for a quiescent state from the cpu */
+	if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
+		return 1;
+
+	/* nothing to do */
+	return 0;
+}
+
+int rcu_pending(int cpu)
+{
+	return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
+		__rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
+}
+
+void rcu_check_callbacks(int cpu, int user)
+{
+	if (user || 
+	    (idle_cpu(cpu) && !in_softirq() && 
+				hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
+		rcu_qsctr_inc(cpu);
+		rcu_bh_qsctr_inc(cpu);
+	} else if (!in_softirq())
+		rcu_bh_qsctr_inc(cpu);
+	tasklet_schedule(&per_cpu(rcu_tasklet, cpu));
+}
+
+static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
+						struct rcu_data *rdp)
+{
+	memset(rdp, 0, sizeof(*rdp));
+	rdp->curtail = &rdp->curlist;
+	rdp->nxttail = &rdp->nxtlist;
+	rdp->donetail = &rdp->donelist;
+	rdp->quiescbatch = rcp->completed;
+	rdp->qs_pending = 0;
+	rdp->cpu = cpu;
+	rdp->blimit = blimit;
+}
+
+static void __devinit rcu_online_cpu(int cpu)
+{
+	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+	struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
+
+	rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
+	rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
+	tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL);
+	INIT_RCU_HEAD(&per_cpu(rcu_barrier_head, cpu));
+}
+
+static int __devinit rcu_cpu_notify(struct notifier_block *self, 
+				unsigned long action, void *hcpu)
+{
+	long cpu = (long)hcpu;
+	switch (action) {
+	case CPU_UP_PREPARE:
+		rcu_online_cpu(cpu);
+		break;
+	case CPU_DEAD:
+		rcu_offline_cpu(cpu);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __devinitdata rcu_nb = {
+	.notifier_call	= rcu_cpu_notify,
+};
+
+/*
+ * Initializes rcu mechanism.  Assumed to be called early.
+ * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
+ * Note that rcu_qsctr and friends are implicitly
+ * initialized due to the choice of ``0'' for RCU_CTR_INVALID.
+ */
+void __init __rcu_init(void)
+{
+	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
+			(void *)(long)smp_processor_id());
+	/* Register notifier for non-boot CPUs */
+	register_cpu_notifier(&rcu_nb);
+}
+
+/*
+ * Deprecated, use synchronize_rcu() or synchronize_sched() instead.
+ */
+void synchronize_kernel(void)
+{
+	synchronize_rcu();
+}
+
+module_param(blimit, int, 0);
+module_param(qhimark, int, 0);
+module_param(qlowmark, int, 0);
+#ifdef CONFIG_SMP
+module_param(rsinterval, int, 0);
+#endif
+EXPORT_SYMBOL(call_rcu);  /* WARNING: GPL-only in April 2006. */
+EXPORT_SYMBOL(call_rcu_bh);  /* WARNING: GPL-only in April 2006. */
+EXPORT_SYMBOL(synchronize_kernel);  /* WARNING: GPL-only in April 2006. */
diff -puN kernel/rcupdate.c~reorg-rcu-code kernel/rcupdate.c
--- linux-2.6.16-rt20-rcu/kernel/rcupdate.c~reorg-rcu-code	2006-07-04 11:33:33.000000000 +0530
+++ linux-2.6.16-rt20-rcu-dipankar/kernel/rcupdate.c	2006-07-04 11:33:34.000000000 +0530
@@ -49,140 +49,49 @@
 #include <linux/random.h>
 #include <linux/delay.h>
 #include <linux/byteorder/swabb.h>
-#include "rcucommon.h"
 
 
-/* Definition for rcupdate control block. */
-struct rcu_ctrlblk rcu_ctrlblk = {
-	.cur = -300,
-	.completed = -300,
-	.lock = SPIN_LOCK_UNLOCKED,
-	.cpumask = CPU_MASK_NONE,
+struct rcu_synchronize {
+	struct rcu_head head;
+	struct completion completion;
 };
-struct rcu_ctrlblk rcu_bh_ctrlblk = {
-	.cur = -300,
-	.completed = -300,
-	.lock = SPIN_LOCK_UNLOCKED,
-	.cpumask = CPU_MASK_NONE,
-};
-
-DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
-DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
-
-/* Fake initialization required by compiler */
-static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
-static int blimit = 10;
-static int qhimark = 10000;
-static int qlowmark = 100;
-#ifdef CONFIG_SMP
-static int rsinterval = 1000;
-#endif
 
+static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head);
 static atomic_t rcu_barrier_cpu_count;
 static struct semaphore rcu_barrier_sema;
 static struct completion rcu_barrier_completion;
 
-#ifdef CONFIG_SMP
-static void force_quiescent_state(struct rcu_data *rdp,
-			struct rcu_ctrlblk *rcp)
+/* Because of FASTCALL declaration of complete, we use this wrapper */
+static void wakeme_after_rcu(struct rcu_head  *head)
 {
-	int cpu;
-	cpumask_t cpumask;
-	set_need_resched();
-	if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) {
-		rdp->last_rs_qlen = rdp->qlen;
-		/*
-		 * Don't send IPI to itself. With irqs disabled,
-		 * rdp->cpu is the current cpu.
-		 */
-		cpumask = rcp->cpumask;
-		cpu_clear(rdp->cpu, cpumask);
-		for_each_cpu_mask(cpu, cpumask)
-			smp_send_reschedule(cpu);
-	}
-}
-#else
-static inline void force_quiescent_state(struct rcu_data *rdp,
-			struct rcu_ctrlblk *rcp)
-{
-	set_need_resched();
+	struct rcu_synchronize *rcu;
+
+	rcu = container_of(head, struct rcu_synchronize, head);
+	complete(&rcu->completion);
 }
-#endif
 
 /**
- * call_rcu - Queue an RCU callback for invocation after a grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual update function to be invoked after the grace period
+ * synchronize_rcu - wait until a grace period has elapsed.
  *
- * The update function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
+ * Control will return to the caller some time after a full grace
+ * period has elapsed, in other words after all currently executing RCU
  * read-side critical sections have completed.  RCU read-side critical
  * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
  * and may be nested.
- */
-void fastcall call_rcu(struct rcu_head *head,
-				void (*func)(struct rcu_head *rcu))
-{
-	unsigned long flags;
-	struct rcu_data *rdp;
-
-	head->func = func;
-	head->next = NULL;
-	local_irq_save(flags);
-	rdp = &__get_cpu_var(rcu_data);
-	*rdp->nxttail = head;
-	rdp->nxttail = &head->next;
-	if (unlikely(++rdp->qlen > qhimark)) {
-		rdp->blimit = INT_MAX;
-		force_quiescent_state(rdp, &rcu_ctrlblk);
-	}
-	local_irq_restore(flags);
-}
-
-/**
- * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual update function to be invoked after the grace period
  *
- * The update function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_bh() assumes
- * that the read-side critical sections end on completion of a softirq
- * handler. This means that read-side critical sections in process
- * context must not be interrupted by softirqs. This interface is to be
- * used when most of the read-side critical sections are in softirq context.
- * RCU read-side critical sections are delimited by rcu_read_lock() and
- * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
- * and rcu_read_unlock_bh(), if in process context. These may be nested.
+ * If your read-side code is not protected by rcu_read_lock(), do -not-
+ * use synchronize_rcu().
  */
-void fastcall call_rcu_bh(struct rcu_head *head,
-				void (*func)(struct rcu_head *rcu))
+void synchronize_rcu(void)
 {
-	unsigned long flags;
-	struct rcu_data *rdp;
+	struct rcu_synchronize rcu;
+ 
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished */
+	call_rcu(&rcu.head, wakeme_after_rcu);
 
-	head->func = func;
-	head->next = NULL;
-	local_irq_save(flags);
-	rdp = &__get_cpu_var(rcu_bh_data);
-	*rdp->nxttail = head;
-	rdp->nxttail = &head->next;
-
-	if (unlikely(++rdp->qlen > qhimark)) {
-		rdp->blimit = INT_MAX;
-		force_quiescent_state(rdp, &rcu_bh_ctrlblk);
-	}
-
-	local_irq_restore(flags);
-}
-
-/*
- * Return the number of RCU batches processed thus far.  Useful
- * for debug and statistics.
- */
-long rcu_batches_completed(void)
-{
-	return rcu_ctrlblk.completed;
+	/* Wait for it */
+	wait_for_completion(&rcu.completion);
 }
 
 static void rcu_barrier_callback(struct rcu_head *notused)
@@ -197,10 +106,8 @@ static void rcu_barrier_callback(struct 
 static void rcu_barrier_func(void *notused)
 {
 	int cpu = smp_processor_id();
-	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
-	struct rcu_head *head;
+	struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
 
-	head = &rdp->barrier;
 	atomic_inc(&rcu_barrier_cpu_count);
 	call_rcu(head, rcu_barrier_callback);
 }
@@ -219,353 +126,12 @@ void rcu_barrier(void)
 	wait_for_completion(&rcu_barrier_completion);
 	up(&rcu_barrier_sema);
 }
-EXPORT_SYMBOL_GPL(rcu_barrier);
-
-/*
- * Invoke the completed RCU callbacks. They are expected to be in
- * a per-cpu list.
- */
-static void rcu_do_batch(struct rcu_data *rdp)
-{
-	struct rcu_head *next, *list;
-	int count = 0;
-
-	list = rdp->donelist;
-	while (list) {
-		next = rdp->donelist = list->next;
-		list->func(list);
-		list = next;
-		rdp->qlen--;
-		if (++count >= rdp->blimit)
-			break;
-	}
-	if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
-		rdp->blimit = blimit;
-	if (!rdp->donelist)
-		rdp->donetail = &rdp->donelist;
-	else
-		tasklet_schedule(&per_cpu(rcu_tasklet, rdp->cpu));
-}
-
-/*
- * Grace period handling:
- * The grace period handling consists out of two steps:
- * - A new grace period is started.
- *   This is done by rcu_start_batch. The start is not broadcasted to
- *   all cpus, they must pick this up by comparing rcp->cur with
- *   rdp->quiescbatch. All cpus are recorded  in the
- *   rcu_ctrlblk.cpumask bitmap.
- * - All cpus must go through a quiescent state.
- *   Since the start of the grace period is not broadcasted, at least two
- *   calls to rcu_check_quiescent_state are required:
- *   The first call just notices that a new grace period is running. The
- *   following calls check if there was a quiescent state since the beginning
- *   of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
- *   the bitmap is empty, then the grace period is completed.
- *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
- *   period (if necessary).
- */
-/*
- * Register a new batch of callbacks, and start it up if there is currently no
- * active batch and the batch to be registered has not already occurred.
- * Caller must hold rcu_ctrlblk.lock.
- */
-static void rcu_start_batch(struct rcu_ctrlblk *rcp)
-{
-	if (rcp->next_pending &&
-			rcp->completed == rcp->cur) {
-		rcp->next_pending = 0;
-		/*
-		 * next_pending == 0 must be visible in
-		 * __rcu_process_callbacks() before it can see new value of cur.
-		 */
-		smp_wmb();
-		rcp->cur++;
-
-		/*
-		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
-		 * Barrier  Otherwise it can cause tickless idle CPUs to be
-		 * included in rcp->cpumask, which will extend graceperiods
-		 * unnecessarily.
-		 */
-		smp_mb();
-		cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
-
-	}
-}
-
-/*
- * cpu went through a quiescent state since the beginning of the grace period.
- * Clear it from the cpu mask and complete the grace period if it was the last
- * cpu. Start another grace period if someone has further entries pending
- */
-static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
-{
-	cpu_clear(cpu, rcp->cpumask);
-	if (cpus_empty(rcp->cpumask)) {
-		/* batch completed ! */
-		rcp->completed = rcp->cur;
-		rcu_start_batch(rcp);
-	}
-}
-
-/*
- * Check if the cpu has gone through a quiescent state (say context
- * switch). If so and if it already hasn't done so in this RCU
- * quiescent cycle, then indicate that it has done so.
- */
-static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
-					struct rcu_data *rdp)
-{
-	if (rdp->quiescbatch != rcp->cur) {
-		/* start new grace period: */
-		rdp->qs_pending = 1;
-		rdp->passed_quiesc = 0;
-		rdp->quiescbatch = rcp->cur;
-		return;
-	}
-
-	/* Grace period already completed for this cpu?
-	 * qs_pending is checked instead of the actual bitmap to avoid
-	 * cacheline trashing.
-	 */
-	if (!rdp->qs_pending)
-		return;
-
-	/* 
-	 * Was there a quiescent state since the beginning of the grace
-	 * period? If no, then exit and wait for the next call.
-	 */
-	if (!rdp->passed_quiesc)
-		return;
-	rdp->qs_pending = 0;
-
-	spin_lock(&rcp->lock);
-	/*
-	 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
-	 * during cpu startup. Ignore the quiescent state.
-	 */
-	if (likely(rdp->quiescbatch == rcp->cur))
-		cpu_quiet(rdp->cpu, rcp);
-
-	spin_unlock(&rcp->lock);
-}
-
 
-#ifdef CONFIG_HOTPLUG_CPU
-
-/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
- * locking requirements, the list it's pulling from has to belong to a cpu
- * which is dead and hence not processing interrupts.
- */
-static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
-				struct rcu_head **tail)
-{
-	local_irq_disable();
-	*this_rdp->nxttail = list;
-	if (list)
-		this_rdp->nxttail = tail;
-	local_irq_enable();
-}
-
-static void __rcu_offline_cpu(struct rcu_data *this_rdp,
-				struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
-{
-	/* if the cpu going offline owns the grace period
-	 * we can block indefinitely waiting for it, so flush
-	 * it here
-	 */
-	spin_lock_bh(&rcp->lock);
-	if (rcp->cur != rcp->completed)
-		cpu_quiet(rdp->cpu, rcp);
-	spin_unlock_bh(&rcp->lock);
-	rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
-	rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
-	rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
-}
-
-static void rcu_offline_cpu(int cpu)
-{
-	struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
-	struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
-
-	__rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
-					&per_cpu(rcu_data, cpu));
-	__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
-					&per_cpu(rcu_bh_data, cpu));
-	put_cpu_var(rcu_data);
-	put_cpu_var(rcu_bh_data);
-	tasklet_kill_immediate(&per_cpu(rcu_tasklet, cpu), cpu);
-}
-
-#else
-
-static void rcu_offline_cpu(int cpu)
-{
-}
-
-#endif
-
-/*
- * This does the RCU processing work from tasklet context. 
- */
-static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
-					struct rcu_data *rdp)
-{
-	if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
-		*rdp->donetail = rdp->curlist;
-		rdp->donetail = rdp->curtail;
-		rdp->curlist = NULL;
-		rdp->curtail = &rdp->curlist;
-	}
-
-	local_irq_disable();
-	if (rdp->nxtlist && !rdp->curlist) {
-		rdp->curlist = rdp->nxtlist;
-		rdp->curtail = rdp->nxttail;
-		rdp->nxtlist = NULL;
-		rdp->nxttail = &rdp->nxtlist;
-		local_irq_enable();
-
-		/*
-		 * start the next batch of callbacks
-		 */
-
-		/* determine batch number */
-		rdp->batch = rcp->cur + 1;
-		/* see the comment and corresponding wmb() in
-		 * the rcu_start_batch()
-		 */
-		smp_rmb();
-
-		if (!rcp->next_pending) {
-			/* and start it/schedule start if it's a new batch */
-			spin_lock(&rcp->lock);
-			rcp->next_pending = 1;
-			rcu_start_batch(rcp);
-			spin_unlock(&rcp->lock);
-		}
-	} else {
-		local_irq_enable();
-	}
-	rcu_check_quiescent_state(rcp, rdp);
-	if (rdp->donelist)
-		rcu_do_batch(rdp);
-}
-
-static void rcu_process_callbacks(unsigned long unused)
-{
-	__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
-	__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
-}
-
-static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
-{
-	/* This cpu has pending rcu entries and the grace period
-	 * for them has completed.
-	 */
-	if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
-		return 1;
-
-	/* This cpu has no pending entries, but there are new entries */
-	if (!rdp->curlist && rdp->nxtlist)
-		return 1;
-
-	/* This cpu has finished callbacks to invoke */
-	if (rdp->donelist)
-		return 1;
-
-	/* The rcu core waits for a quiescent state from the cpu */
-	if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
-		return 1;
-
-	/* nothing to do */
-	return 0;
-}
-
-int rcu_pending(int cpu)
-{
-	return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
-		__rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
-}
-
-void rcu_check_callbacks(int cpu, int user)
-{
-	if (user || 
-	    (idle_cpu(cpu) && !in_softirq() && 
-				hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
-		rcu_qsctr_inc(cpu);
-		rcu_bh_qsctr_inc(cpu);
-	} else if (!in_softirq())
-		rcu_bh_qsctr_inc(cpu);
-	tasklet_schedule(&per_cpu(rcu_tasklet, cpu));
-}
-
-static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
-						struct rcu_data *rdp)
-{
-	memset(rdp, 0, sizeof(*rdp));
-	rdp->curtail = &rdp->curlist;
-	rdp->nxttail = &rdp->nxtlist;
-	rdp->donetail = &rdp->donelist;
-	rdp->quiescbatch = rcp->completed;
-	rdp->qs_pending = 0;
-	rdp->cpu = cpu;
-	rdp->blimit = blimit;
-}
-
-static void __devinit rcu_online_cpu(int cpu)
-{
-	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
-	struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
-
-	rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
-	rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
-	tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL);
-}
-
-static int __devinit rcu_cpu_notify(struct notifier_block *self, 
-				unsigned long action, void *hcpu)
-{
-	long cpu = (long)hcpu;
-	switch (action) {
-	case CPU_UP_PREPARE:
-		rcu_online_cpu(cpu);
-		break;
-	case CPU_DEAD:
-		rcu_offline_cpu(cpu);
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block __devinitdata rcu_nb = {
-	.notifier_call	= rcu_cpu_notify,
-};
-
-/*
- * Initializes rcu mechanism.  Assumed to be called early.
- * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
- * Note that rcu_qsctr and friends are implicitly
- * initialized due to the choice of ``0'' for RCU_CTR_INVALID.
- */
 void __init rcu_init(void)
 {
 	sema_init(&rcu_barrier_sema, 1);
-	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
-			(void *)(long)smp_processor_id());
-	/* Register notifier for non-boot CPUs */
-	register_cpu_notifier(&rcu_nb);
+	__rcu_init();
 }
 
-module_param(blimit, int, 0);
-module_param(qhimark, int, 0);
-module_param(qlowmark, int, 0);
-#ifdef CONFIG_SMP
-module_param(rsinterval, int, 0);
-#endif
-EXPORT_SYMBOL_GPL(call_rcu);
-EXPORT_SYMBOL_GPL(call_rcu_bh);
 EXPORT_SYMBOL_GPL(synchronize_rcu);
+EXPORT_SYMBOL_GPL(rcu_barrier);
diff -puN kernel/rcupreempt.c~reorg-rcu-code kernel/rcupreempt.c
--- linux-2.6.16-rt20-rcu/kernel/rcupreempt.c~reorg-rcu-code	2006-07-04 11:33:34.000000000 +0530
+++ linux-2.6.16-rt20-rcu-dipankar/kernel/rcupreempt.c	2006-07-04 11:33:34.000000000 +0530
@@ -46,7 +46,6 @@
 #include <linux/random.h>
 #include <linux/delay.h>
 #include <linux/byteorder/swabb.h>
-#include "rcucommon.h"
 
 /*
  * PREEMPT_RCU data structures.
@@ -101,8 +100,7 @@ long rcu_batches_completed(void)
 	return rcu_ctrlblk.completed;
 }
 
-void
-rcu_read_lock(void)
+void __rcu_read_lock(void)
 {
 	int flipctr;
 	unsigned long oldirq;
@@ -146,8 +144,7 @@ rcu_read_lock(void)
 	local_irq_restore(oldirq);
 }
 
-void
-rcu_read_unlock(void)
+void __rcu_read_unlock(void)
 {
 	unsigned long oldirq;
 
@@ -172,8 +169,7 @@ rcu_read_unlock(void)
 	local_irq_restore(oldirq);
 }
 
-static void
-__rcu_advance_callbacks(void)
+static void __rcu_advance_callbacks(void)
 {
 
 	if (rcu_data.completed != rcu_ctrlblk.completed) {
@@ -213,8 +209,7 @@ __rcu_advance_callbacks(void)
  * on a large SMP, they might want to use a hierarchical organization of
  * the per-CPU-counter pairs.
  */
-static void
-rcu_try_flip(void)
+static void rcu_try_flip(void)
 {
 	int cpu;
 	long flipctr;
@@ -271,8 +266,7 @@ rcu_try_flip(void)
 	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, oldirq);
 }
 
-void
-rcu_check_callbacks(int cpu, int user)
+void rcu_check_callbacks(int cpu, int user)
 {
 	unsigned long oldirq;
 
@@ -295,8 +289,7 @@ rcu_check_callbacks(int cpu, int user)
 	}
 }
 
-static
-void rcu_process_callbacks(unsigned long data)
+static void rcu_process_callbacks(unsigned long data)
 {
 	unsigned long flags;
 	struct rcu_head *next, *list;
@@ -324,9 +317,8 @@ void rcu_process_callbacks(unsigned long
 	}
 }
 
-void fastcall
-call_rcu(struct rcu_head *head,
-	 void (*func)(struct rcu_head *rcu))
+void fastcall call_rcu(struct rcu_head *head, 
+				void (*func)(struct rcu_head *rcu))
 {
 	unsigned long flags;
 
@@ -349,8 +341,7 @@ call_rcu(struct rcu_head *head,
  * switch to eliminate possibility of failure.  (Maybe just crank
  * priority down...)
  */
-void
-synchronize_sched(void)
+void __synchronize_sched(void)
 {
 	cpumask_t oldmask;
 	int cpu;
@@ -365,15 +356,14 @@ synchronize_sched(void)
 	sched_setaffinity(0, oldmask);
 }
 
-int
-rcu_pending(int cpu)
+int rcu_pending(int cpu)
 {
 	return (rcu_data.donelist != NULL ||
 		rcu_data.waitlist != NULL ||
 		rcu_data.nextlist != NULL);
 }
 
-void __init rcu_init(void)
+void __init __rcu_init(void)
 {
 /*&&&&*/printk("WARNING: experimental RCU implementation.\n");
 	spin_lock_init(&rcu_data.lock);
@@ -457,6 +447,6 @@ int rcu_read_proc_ctrs_data(char *page)
 EXPORT_SYMBOL_GPL(call_rcu);
 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 EXPORT_SYMBOL_GPL(synchronize_rcu);
-EXPORT_SYMBOL_GPL(synchronize_sched);
-EXPORT_SYMBOL_GPL(rcu_read_lock);
-EXPORT_SYMBOL_GPL(rcu_read_unlock);
+EXPORT_SYMBOL_GPL(__synchronize_sched);
+EXPORT_SYMBOL_GPL(__rcu_read_lock);
+EXPORT_SYMBOL_GPL(__rcu_read_unlock);
diff -puN -L kernel/rcucommon.h kernel/rcucommon.h~reorg-rcu-code /dev/null
--- linux-2.6.16-rt20-rcu/kernel/rcucommon.h
+++ /dev/null	2006-03-26 18:34:52.000000000 +0530
@@ -1,71 +0,0 @@
-/*
- * Read-Copy Update mechanism for mutual exclusion, definitions common
- * to all implementations.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2001
- *
- * Authors: Dipankar Sarma <dipankar@in.ibm.com>
- *	    Manfred Spraul <manfred@colorfullife.com>
- *          Paul E. McKenney <paulmck@us.ibm.com>
- *
- * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
- * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
- *
- * Papers:  http://www.rdrop.com/users/paulmck/RCU
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU/ *.txt
- *
- */
-
-struct rcu_synchronize {
-	struct rcu_head head;
-	struct completion completion;
-};
-
-/* Because of FASTCALL declaration of complete, we use this wrapper */
-static void wakeme_after_rcu(struct rcu_head  *head)
-{
-	struct rcu_synchronize *rcu;
-
-	rcu = container_of(head, struct rcu_synchronize, head);
-	complete(&rcu->completion);
-}
-
-/**
- * synchronize_rcu - wait until a grace period has elapsed.
- *
- * Control will return to the caller some time after a full grace
- * period has elapsed, in other words after all currently executing RCU
- * read-side critical sections have completed.  RCU read-side critical
- * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
- * and may be nested.
- *
- * If your read-side code is not protected by rcu_read_lock(), do -not-
- * use synchronize_rcu().
- */
-void synchronize_rcu(void)
-{
-	struct rcu_synchronize rcu;
-
-	init_completion(&rcu.completion);
-	/* Will wake me after RCU finished */
-	call_rcu(&rcu.head, wakeme_after_rcu);
-
-	/* Wait for it */
-	wait_for_completion(&rcu.completion);
-}

_

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] 2.6.17-rt1 : fix x86_64 oops
  2006-07-04  6:50                     ` Ingo Molnar
  2006-07-05  9:11                       ` Dipankar Sarma
@ 2006-07-06 20:06                       ` Paul E. McKenney
  1 sibling, 0 replies; 16+ messages in thread
From: Paul E. McKenney @ 2006-07-06 20:06 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Dipankar Sarma, linux-kernel, john stultz

On Tue, Jul 04, 2006 at 08:50:24AM +0200, Ingo Molnar wrote:
> 
> * Ingo Molnar <mingo@elte.hu> wrote:
> 
> > > Ingo, do you have a suspect ?
> > 
> > I suspect it's the patch below. That patch (from John) relaxes the 
> > affinities of IRQ threads: if there are /proc/irq/*/smp_affinity 
> > entries that have multiple bits set an IRQ thread is allowed to jump 
> > from one CPU to another while it is executing a IRQ-handler. It 
> > _should_ be fine but i'd not be surprised if that caused breakage ...
> 
> the patch below is against 2.6.17-rt5, does this solve the crashes?

And I also still get a segmentation fault when modprobing rcutorture
with this patch.  :-/

The segfault is bizarre -- it is in the module symbol-lookup code.  I get
the segfault regardless of what module parameters I specify, in fact,
it even shows up even if I comment out all the module parameters in
rcutorture.c.  Tiny modules, such as hello.c from "Linux Device Drivers",
work just fine -- but the size of rcutorture.ko is -way- below the
64k limit, and, as I mentioned, I get the oops even if I comment out
all of rcutorture's module parameters.

See oops below.

Any enlightenment available?

							Thanx, Paul

BUG: unable to handle kernel paging request at virtual address 75010000
 printing eip:
c0133941
*pde = 00000000
Oops: 0000 [#1]
PREEMPT SMP 
Modules linked in:
CPU:    0
EIP:    0060:[<c0133941>]    Not tainted VLI
EFLAGS: 00010297   (2.6.17-rt5-autokern1 #1) 
EIP is at lookup_symbol+0x16/0x3b
eax: ffffffff   ebx: c0345a5c   ecx: c0343e90   edx: c0343e98
esi: 75010000   edi: f8ded2a6   ebp: f1a61edc   esp: f1a61e98
ds: 007b   es: 007b   ss: 0068   preempt: 00000001
Process modprobe (pid: 3663, threadinfo=f1a60000 task=f12160d0 stack_left=7780 worst_left=-1)
Stack: f8df02d0 f8df1380 f8ded2a6 c013398c f8ded2a6 c03419f0 c0345a5c f8df02d0 
       f8df1380 0000008e 00000062 c01345b5 f8ded2a6 f1a61ed8 f1a61edc 00000001 
       00000000 f8dc2e31 f8df02d0 00000620 c0134b49 f8de2214 00000000 f8ded2a6 
Call Trace:
 [<c013398c>] __find_symbol+0x26/0x158 (16)
 [<c01345b5>] resolve_symbol+0x21/0x46 (32)
 [<c0134b49>] simplify_symbols+0x88/0x101 (36)
 [<c0135707>] load_module+0x5f0/0x913 (40)
 [<c0135a8e>] sys_init_module+0x41/0x1c5 (144)
 [<c0102a03>] syscall_call+0x7/0xb (24)
---------------------------
| preempt count: 00000001 ]
| 1-level deep critical section nesting:
----------------------------------------
.. [<c02f17f7>] .... _raw_spin_lock_irqsave+0xe/0x35
.....[<00000000>] ..   ( <= _stext+0x3feffd64/0x41)

Code: 43 83 c1 28 0f b7 42 30 39 c3 72 c7 31 d2 5b 89 d0 5e 5f 5d c3 57 56 53 8b 5c 24 18 8b 54 24 14 39 da 73 24 8b 72 04 8b 7c 24 10 <ac> ae 75 08 84 c0 75 f8 31 c0 eb 04 19 c0 0c 01 85 c0 89 d1 74 
EIP: [<c0133941>] lookup_symbol+0x16/0x3b SS:ESP 0068:f1a61e98

> 	Ingo
> 
> Index: linux-rt.q/kernel/irq/manage.c
> ===================================================================
> --- linux-rt.q.orig/kernel/irq/manage.c
> +++ linux-rt.q/kernel/irq/manage.c
> @@ -645,17 +645,24 @@ extern asmlinkage void __do_softirq(void
>  
>  static int curr_irq_prio = 49;
>  
> -static int do_irqd(void * __desc)
> +static void follow_irq_affinity(struct irq_desc *desc)
>  {
> -	struct sched_param param = { 0, };
> -	struct irq_desc *desc = __desc;
>  #ifdef CONFIG_SMP
> -	int irq = desc - irq_desc;
>  	cpumask_t mask;
>  
> -	mask = cpumask_of_cpu(any_online_cpu(irq_desc[irq].affinity));
> +	if (cpus_equal(current->cpus_allowed, desc->affinity))
> +		return;
> +	mask = cpumask_of_cpu(any_online_cpu(desc->affinity));
>  	set_cpus_allowed(current, mask);
>  #endif
> +}
> +
> +static int do_irqd(void * __desc)
> +{
> +	struct sched_param param = { 0, };
> +	struct irq_desc *desc = __desc;
> +
> +	follow_irq_affinity(desc);
>  	current->flags |= PF_NOFREEZE | PF_HARDIRQ;
>  
>  	/*
> @@ -674,13 +681,7 @@ static int do_irqd(void * __desc)
>  		local_irq_disable();
>  		__do_softirq();
>  		local_irq_enable();
> -#ifdef CONFIG_SMP
> -		/*
> -		 * Did IRQ affinities change?
> -		 */
> -		if (!cpus_equal(current->cpus_allowed, irq_desc[irq].affinity))
> -			set_cpus_allowed(current, irq_desc[irq].affinity);
> -#endif
> +		follow_irq_affinity(desc);
>  		schedule();
>  	}
>  	__set_current_state(TASK_RUNNING);

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] 2.6.17-rt1 : fix x86_64 oops
  2006-07-05  9:11                       ` Dipankar Sarma
@ 2006-07-26  7:36                         ` Dipankar Sarma
  0 siblings, 0 replies; 16+ messages in thread
From: Dipankar Sarma @ 2006-07-26  7:36 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul E. McKenney, linux-kernel, john stultz

On Wed, Jul 05, 2006 at 02:41:57PM +0530, Dipankar Sarma wrote:
> On Tue, Jul 04, 2006 at 08:50:24AM +0200, Ingo Molnar wrote:
> > 
> > * Ingo Molnar <mingo@elte.hu> wrote:
> > 
> > > > Ingo, do you have a suspect ?
> > > 
> > > I suspect it's the patch below. That patch (from John) relaxes the 
> > > affinities of IRQ threads: if there are /proc/irq/*/smp_affinity 
> > > entries that have multiple bits set an IRQ thread is allowed to jump 
> > > from one CPU to another while it is executing a IRQ-handler. It 
> > > _should_ be fine but i'd not be surprised if that caused breakage ...
> > 
> > the patch below is against 2.6.17-rt5, does this solve the crashes?
> > 
> 
> I tried this patch but I still oops quickly after starting rcutorture.
> 
> There is some additional information - my -rt20 directory had
> another patch which re-organized RCU code to cleanly have multiple
> RCU implementations (rcuclassic and rcupreempt for now). That
> kernel ran fine with rcutorture, but when I removed that
> reorg-rcu-code patch to go to standard -rt20, I started seeing
> the same oops. This is bizarre because the reorg-rcu-code
> patch isn't supposed to change any logic. I am still investigating
> this, but the patch is included below for your reference.

Hello Ingo,

Finally, I got around to debug this a little bit and I have
figured out why I get this oops. In the oops I see that
I am advancing a list of rcu callbacks to the done list
but the last element of the done list has already been
freed. This made me suspect rcutorture module and I remembered
that rcu_barrier() is a NOP in rcupreempt. In my rcu
code reorganization patchset, I fixed that (rcu_barrier()
is a common primitive on top of both classic and preemptible
rcu). That is why I wasn't seeing the crash with my
patchset applied.

Anyway, the following patch fixes this problem in my
x86_64 box (64-bit kernel) and I can run rcutorture.
However, I would request not applying this for the moment
since this would get fixed in the RCU cleanup that
is to follow. I am working on your suggestion at Ottawa of 
merging as much possible in the mainline itself.
The patch below is only for those who want to temporarily work around
this for running rcutorture.

Thanks
Dipankar


Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>

diff -puN kernel/rcupreempt.c~fix-rcu-barrier-in-preempt kernel/rcupreempt.c
--- linux-2.6.17-rt7-rcu/kernel/rcupreempt.c~fix-rcu-barrier-in-preempt	2006-07-26 12:12:46.000000000 +0530
+++ linux-2.6.17-rt7-rcu-dipankar/kernel/rcupreempt.c	2006-07-26 12:17:19.000000000 +0530
@@ -93,6 +93,11 @@ static struct rcu_ctrlblk rcu_ctrlblk = 
 static DEFINE_PER_CPU(atomic_t [2], rcu_flipctr) =
 	{ ATOMIC_INIT(0), ATOMIC_INIT(0) };
 
+static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head);
+static atomic_t rcu_barrier_cpu_count;
+static DEFINE_MUTEX(rcu_barrier_mutex);
+static struct completion rcu_barrier_completion;
+
 /*
  * Return the number of RCU batches processed thus far.  Useful
  * for debug and statistics.
@@ -388,6 +393,39 @@ rcu_pending(int cpu)
 		rcu_data.nextlist != NULL);
 }
 
+static void rcu_barrier_callback(struct rcu_head *notused)
+{
+        if (atomic_dec_and_test(&rcu_barrier_cpu_count))
+                complete(&rcu_barrier_completion);
+}
+
+/*
+ * Called with preemption disabled, and from cross-cpu IRQ context.
+ */
+static void rcu_barrier_func(void *notused)
+{
+        int cpu = smp_processor_id();
+        struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
+
+        atomic_inc(&rcu_barrier_cpu_count);
+        call_rcu(head, rcu_barrier_callback);
+}
+
+/**
+ * rcu_barrier - Wait until all the in-flight RCUs are complete.
+ */
+void rcu_barrier(void)
+{
+        BUG_ON(in_interrupt());
+        /* Take cpucontrol mutex to protect against CPU hotplug */
+        mutex_lock(&rcu_barrier_mutex);
+        init_completion(&rcu_barrier_completion);
+        atomic_set(&rcu_barrier_cpu_count, 0);
+        on_each_cpu(rcu_barrier_func, NULL, 0, 1);
+        wait_for_completion(&rcu_barrier_completion);
+        mutex_unlock(&rcu_barrier_mutex);
+}
+
 void __init rcu_init(void)
 {
 /*&&&&*/printk("WARNING: experimental RCU implementation.\n");
@@ -477,6 +515,7 @@ int rcu_read_proc_ctrs_data(char *page)
 
 #endif /* #ifdef CONFIG_RCU_STATS */
 
+EXPORT_SYMBOL_GPL(rcu_barrier);
 EXPORT_SYMBOL_GPL(call_rcu);
 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 EXPORT_SYMBOL_GPL(synchronize_rcu);
diff -puN include/linux/rcupdate.h~fix-rcu-barrier-in-preempt include/linux/rcupdate.h
--- linux-2.6.17-rt7-rcu/include/linux/rcupdate.h~fix-rcu-barrier-in-preempt	2006-07-26 12:18:00.000000000 +0530
+++ linux-2.6.17-rt7-rcu-dipankar/include/linux/rcupdate.h	2006-07-26 12:18:38.000000000 +0530
@@ -275,12 +275,11 @@ extern int rcu_pending(int cpu);
  */
 #ifndef CONFIG_PREEMPT_RCU
 #define synchronize_sched() synchronize_rcu()
-extern void rcu_barrier(void);
 #else /* #ifndef CONFIG_PREEMPT_RCU */
 extern void synchronize_sched(void);
-#define rcu_barrier() do {} while(0)
 #endif /* #else #ifndef CONFIG_PREEMPT_RCU */
 
+extern void rcu_barrier(void);
 extern void rcu_init(void);
 extern void rcu_check_callbacks(int cpu, int user);
 extern void rcu_restart_cpu(int cpu);

_




^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2006-07-26  7:39 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-06-27 20:01 2.6.17-rt1 : x86_64 oops Dipankar Sarma
2006-06-28 18:21 ` [PATCH] 2.6.17-rt1 : fix " Dipankar Sarma
2006-06-28 19:32   ` Ingo Molnar
2006-06-28 20:02     ` Dipankar Sarma
2006-06-29 14:24       ` Ingo Molnar
2006-06-29 16:32         ` Paul E. McKenney
2006-06-29 19:41           ` Paul E. McKenney
2006-06-29 20:11             ` Ingo Molnar
2006-06-29 21:35               ` Paul E. McKenney
2006-07-03 16:57               ` Dipankar Sarma
2006-07-04  4:15                 ` Dipankar Sarma
2006-07-04  6:43                   ` Ingo Molnar
2006-07-04  6:50                     ` Ingo Molnar
2006-07-05  9:11                       ` Dipankar Sarma
2006-07-26  7:36                         ` Dipankar Sarma
2006-07-06 20:06                       ` Paul E. McKenney

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).