All of lore.kernel.org
 help / color / mirror / Atom feed
* EIP is at device_shutdown+0x32/0x60
@ 2007-11-15  9:11 Alexey Dobriyan
  2007-11-15  9:44 ` Andrew Morton
  0 siblings, 1 reply; 18+ messages in thread
From: Alexey Dobriyan @ 2007-11-15  9:11 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel

Three boxes rarely oops during reboot or poweroff with 2.6.24-rc2-mm1
(1) and during 2.6.24 cycle (2):

	kernel_restart
	sys_reboot
	[garbage]
Code: 8b 88 a8 00 00 00 85 c9 74 04 89
EIP is at device_shutdown+0x32/0x60

which corresponds to the following place:

c110659c <device_shutdown>:
c110659c:	a1 d0 47 4d c1       	mov    0xc14d47d0,%eax
c11065a1:	53                   	push   %ebx
c11065a2:	8b 50 04             	mov    0x4(%eax),%edx
c11065a5:	81 ea f0 00 00 00    	sub    $0xf0,%edx
c11065ab:	8b 9a f4 00 00 00    	mov    0xf4(%edx),%ebx
c11065b1:	eb 31                	jmp    c11065e4 <device_shutdown+0x48>
c11065b3:	8b 82 54 01 00 00    	mov    0x154(%edx),%eax
c11065b9:	85 c0                	test   %eax,%eax
c11065bb:	74 07                	je     c11065c4 <device_shutdown+0x28>
c11065bd:	8b 48 20             	mov    0x20(%eax),%ecx
c11065c0:	85 c9                	test   %ecx,%ecx
c11065c2:	75 14                	jne    c11065d8 <device_shutdown+0x3c>
c11065c4:	8b 82 58 01 00 00    	mov    0x158(%edx),%eax
c11065ca:	85 c0                	test   %eax,%eax
c11065cc:	74 0e                	je     c11065dc <device_shutdown+0x40>
c11065ce: ===>	8b 88 a8 00 00 00    	mov    0xa8(%eax),%ecx	<===
c11065d4:	85 c9                	test   %ecx,%ecx
c11065d6:	74 04                	je     c11065dc <device_shutdown+0x40>
c11065d8:	89 d0                	mov    %edx,%eax
c11065da:	ff d1                	call   *%ecx
c11065dc:	89 da                	mov    %ebx,%edx
c11065de:	8b 9b f4 00 00 00    	mov    0xf4(%ebx),%ebx
c11065e4:	81 eb f0 00 00 00    	sub    $0xf0,%ebx
c11065ea:	8d 82 f0 00 00 00    	lea    0xf0(%edx),%eax
c11065f0:	3b 05 d0 47 4d c1    	cmp    0xc14d47d0,%eax
c11065f6:	75 bb                	jne    c11065b3 <device_shutdown+0x17>
c11065f8:	5b                   	pop    %ebx
c11065f9:	c3                   	ret    


.config one of them:

#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.24-rc2-mm1
# Wed Nov 14 13:27:44 2007
#
CONFIG_X86_32=y
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CMOS_UPDATE=y
CONFIG_CLOCKSOURCE_WATCHDOG=y
CONFIG_GENERIC_CLOCKEVENTS=y
CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_X86=y
CONFIG_FAST_CMPXCHG_LOCAL=y
CONFIG_MMU=y
CONFIG_ZONE_DMA=y
CONFIG_QUICKLIST=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_BUG=y
# CONFIG_GENERIC_GPIO is not set
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_DMI=y
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"

#
# General setup
#
CONFIG_EXPERIMENTAL=y
CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_LOCALVERSION=""
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
# CONFIG_POSIX_MQUEUE is not set
# CONFIG_BSD_PROCESS_ACCT is not set
# CONFIG_TASKSTATS is not set
# CONFIG_USER_NS is not set
CONFIG_PID_NS=y
# CONFIG_AUDIT is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=17
# CONFIG_CGROUPS is not set
# CONFIG_FAIR_GROUP_SCHED is not set
CONFIG_SYSFS_DEPRECATED=y
# CONFIG_RELAY is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_SYSCTL=y
CONFIG_EMBEDDED=y
CONFIG_UID16=y
CONFIG_SYSCTL_SYSCALL=y
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
# CONFIG_KALLSYMS_EXTRA_PASS is not set
CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
# CONFIG_EPOLL is not set
# CONFIG_SIGNALFD is not set
# CONFIG_EVENTFD is not set
CONFIG_SHMEM=y
CONFIG_VM_EVENT_COUNTERS=y
CONFIG_SLUB_DEBUG=y
# CONFIG_SLAB is not set
CONFIG_SLUB=y
# CONFIG_SLOB is not set
CONFIG_PROC_PAGE_MONITOR=y
CONFIG_RT_MUTEXES=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_MODULE_FORCE_UNLOAD is not set
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
CONFIG_BLOCK=y
# CONFIG_LBD is not set
# CONFIG_BLK_DEV_IO_TRACE is not set
# CONFIG_LSF is not set
# CONFIG_BLK_DEV_BSG is not set

#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
# CONFIG_IOSCHED_AS is not set
# CONFIG_IOSCHED_DEADLINE is not set
CONFIG_IOSCHED_CFQ=y
# CONFIG_DEFAULT_AS is not set
# CONFIG_DEFAULT_DEADLINE is not set
CONFIG_DEFAULT_CFQ=y
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="cfq"

#
# Processor type and features
#
CONFIG_TICK_ONESHOT=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
CONFIG_SMP=y
CONFIG_X86_PC=y
# CONFIG_X86_ELAN is not set
# CONFIG_X86_VOYAGER is not set
# CONFIG_X86_NUMAQ is not set
# CONFIG_X86_RDC321X is not set
# CONFIG_X86_SUMMIT is not set
# CONFIG_X86_BIGSMP is not set
# CONFIG_X86_VISWS is not set
# CONFIG_X86_GENERICARCH is not set
# CONFIG_X86_ES7000 is not set
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
# CONFIG_PARAVIRT_GUEST is not set
# CONFIG_M386 is not set
# CONFIG_M486 is not set
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
# CONFIG_M686 is not set
# CONFIG_MPENTIUMII is not set
# CONFIG_MPENTIUMIII is not set
# CONFIG_MPENTIUMM is not set
# CONFIG_MCORE2 is not set
# CONFIG_MPENTIUM4 is not set
# CONFIG_MK6 is not set
# CONFIG_MK7 is not set
CONFIG_MK8=y
# CONFIG_MCRUSOE is not set
# CONFIG_MEFFICEON is not set
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP2 is not set
# CONFIG_MWINCHIP3D is not set
# CONFIG_MGEODEGX1 is not set
# CONFIG_MGEODE_LX is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_MVIAC7 is not set
# CONFIG_X86_GENERIC is not set
CONFIG_X86_CMPXCHG=y
CONFIG_X86_L1_CACHE_SHIFT=6
CONFIG_X86_XADD=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
# CONFIG_ARCH_HAS_ILOG2_U32 is not set
# CONFIG_ARCH_HAS_ILOG2_U64 is not set
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_X86_TSC=y
CONFIG_X86_MINIMUM_CPU_FAMILY=4
CONFIG_HPET_TIMER=y
CONFIG_HPET_EMULATE_RTC=y
CONFIG_NR_CPUS=2
# CONFIG_SCHED_SMT is not set
# CONFIG_SCHED_MC is not set
# CONFIG_PREEMPT_NONE is not set
# CONFIG_PREEMPT_VOLUNTARY is not set
CONFIG_PREEMPT=y
CONFIG_PREEMPT_BKL=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
CONFIG_X86_MCE=y
# CONFIG_X86_MCE_NONFATAL is not set
# CONFIG_X86_MCE_P4THERMAL is not set
CONFIG_VM86=y
# CONFIG_TOSHIBA is not set
# CONFIG_I8K is not set
# CONFIG_X86_REBOOTFIXUPS is not set
# CONFIG_MICROCODE is not set
# CONFIG_X86_MSR is not set
# CONFIG_X86_CPUID is not set

#
# Firmware Drivers
#
# CONFIG_EDD is not set
# CONFIG_DELL_RBU is not set
# CONFIG_DCDBAS is not set
# CONFIG_DMIID is not set
# CONFIG_NOHIGHMEM is not set
# CONFIG_HIGHMEM4G is not set
CONFIG_HIGHMEM64G=y
CONFIG_VMSPLIT_3G=y
# CONFIG_VMSPLIT_3G_OPT is not set
# CONFIG_VMSPLIT_2G is not set
# CONFIG_VMSPLIT_2G_OPT is not set
# CONFIG_VMSPLIT_1G is not set
CONFIG_PAGE_OFFSET=0xC0000000
CONFIG_HIGHMEM=y
CONFIG_X86_PAE=y
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_ARCH_POPULATES_NODE_MAP=y
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
# CONFIG_DISCONTIGMEM_MANUAL is not set
# CONFIG_SPARSEMEM_MANUAL is not set
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_SPARSEMEM_STATIC=y
# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
CONFIG_SPLIT_PTLOCK_CPUS=4
CONFIG_RESOURCES_64BIT=y
CONFIG_ZONE_DMA_FLAG=1
CONFIG_BOUNCE=y
CONFIG_NR_QUICK=1
CONFIG_VIRT_TO_BUS=y
CONFIG_HIGHPTE=y
# CONFIG_MATH_EMULATION is not set
CONFIG_MTRR=y
# CONFIG_EFI is not set
# CONFIG_IRQBALANCE is not set
# CONFIG_SECCOMP is not set
# CONFIG_HZ_100 is not set
# CONFIG_HZ_250 is not set
# CONFIG_HZ_300 is not set
CONFIG_HZ_1000=y
CONFIG_HZ=1000
# CONFIG_KEXEC is not set
# CONFIG_CRASH_DUMP is not set
CONFIG_PHYSICAL_START=0x1000000
# CONFIG_RELOCATABLE is not set
CONFIG_PHYSICAL_ALIGN=0x400000
# CONFIG_HOTPLUG_CPU is not set
# CONFIG_COMPAT_VDSO is not set
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y

#
# Power management options (ACPI, APM)
#
CONFIG_PM=y
# CONFIG_PM_LEGACY is not set
# CONFIG_PM_DEBUG is not set
CONFIG_SUSPEND_SMP_POSSIBLE=y
# CONFIG_SUSPEND is not set
CONFIG_HIBERNATION_SMP_POSSIBLE=y
# CONFIG_HIBERNATION is not set
CONFIG_ACPI=y
# CONFIG_ACPI_PROCFS is not set
# CONFIG_ACPI_PROC_EVENT is not set
# CONFIG_ACPI_AC is not set
# CONFIG_ACPI_BATTERY is not set
# CONFIG_ACPI_BUTTON is not set
CONFIG_ACPI_FAN=y
# CONFIG_ACPI_DOCK is not set
CONFIG_ACPI_PROCESSOR=y
CONFIG_ACPI_THERMAL=y
# CONFIG_ACPI_ASUS is not set
# CONFIG_ACPI_TOSHIBA is not set
CONFIG_ACPI_BLACKLIST_YEAR=1999
# CONFIG_ACPI_DEBUG is not set
CONFIG_ACPI_EC=y
CONFIG_ACPI_POWER=y
CONFIG_ACPI_SYSTEM=y
CONFIG_X86_PM_TIMER=y
# CONFIG_ACPI_CONTAINER is not set
# CONFIG_ACPI_SBS is not set

#
# CPU Frequency scaling
#
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_TABLE=y
# CONFIG_CPU_FREQ_DEBUG is not set
CONFIG_CPU_FREQ_STAT=m
CONFIG_CPU_FREQ_STAT_DETAILS=y
# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=m
# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set

#
# CPUFreq processor drivers
#
# CONFIG_X86_ACPI_CPUFREQ is not set
# CONFIG_X86_POWERNOW_K6 is not set
# CONFIG_X86_POWERNOW_K7 is not set
CONFIG_X86_POWERNOW_K8=y
CONFIG_X86_POWERNOW_K8_ACPI=y
# CONFIG_X86_GX_SUSPMOD is not set
# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
# CONFIG_X86_SPEEDSTEP_ICH is not set
# CONFIG_X86_SPEEDSTEP_SMI is not set
# CONFIG_X86_P4_CLOCKMOD is not set
# CONFIG_X86_CPUFREQ_NFORCE2 is not set
# CONFIG_X86_LONGRUN is not set
# CONFIG_X86_LONGHAUL is not set
# CONFIG_X86_E_POWERSAVER is not set

#
# shared options
#
# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set
# CONFIG_X86_SPEEDSTEP_LIB is not set
# CONFIG_CPU_IDLE is not set

#
# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
#
CONFIG_PCI=y
# CONFIG_PCI_GOBIOS is not set
# CONFIG_PCI_GOMMCONFIG is not set
# CONFIG_PCI_GODIRECT is not set
CONFIG_PCI_GOANY=y
CONFIG_PCI_BIOS=y
CONFIG_PCI_DIRECT=y
CONFIG_PCI_MMCONFIG=y
CONFIG_PCI_DOMAINS=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCIEAER=y
CONFIG_ARCH_SUPPORTS_MSI=y
# CONFIG_PCI_MSI is not set
# CONFIG_PCI_LEGACY is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_HT_IRQ is not set
CONFIG_ISA_DMA_API=y
# CONFIG_ISA is not set
# CONFIG_MCA is not set
# CONFIG_SCx200 is not set
# CONFIG_PCCARD is not set
# CONFIG_HOTPLUG_PCI is not set

#
# Executable file formats
#
CONFIG_BINFMT_ELF=y
# CONFIG_BINFMT_AOUT is not set
# CONFIG_BINFMT_MISC is not set

#
# Networking
#
CONFIG_NET=y

#
# Networking options
#
CONFIG_PACKET=m
CONFIG_PACKET_MMAP=y
CONFIG_UNIX=y
# CONFIG_NET_KEY is not set
CONFIG_INET=y
# CONFIG_IP_MULTICAST is not set
# CONFIG_IP_ADVANCED_ROUTER is not set
CONFIG_IP_FIB_HASH=y
# CONFIG_IP_PNP is not set
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE is not set
# CONFIG_ARPD is not set
CONFIG_SYN_COOKIES=y
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
# CONFIG_INET_XFRM_TUNNEL is not set
# CONFIG_INET_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_INET_LRO is not set
CONFIG_INET_DIAG=m
CONFIG_INET_TCP_DIAG=m
# CONFIG_TCP_CONG_ADVANCED is not set
CONFIG_TCP_CONG_CUBIC=y
CONFIG_DEFAULT_TCP_CONG="cubic"
# CONFIG_TCP_MD5SIG is not set
CONFIG_IPV6=m
# CONFIG_IPV6_PRIVACY is not set
# CONFIG_IPV6_ROUTER_PREF is not set
# CONFIG_IPV6_OPTIMISTIC_DAD is not set
# CONFIG_INET6_AH is not set
# CONFIG_INET6_ESP is not set
# CONFIG_INET6_IPCOMP is not set
# CONFIG_IPV6_MIP6 is not set
# CONFIG_INET6_XFRM_TUNNEL is not set
# CONFIG_INET6_TUNNEL is not set
# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
# CONFIG_INET6_XFRM_MODE_BEET is not set
# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
# CONFIG_IPV6_SIT is not set
# CONFIG_IPV6_TUNNEL is not set
# CONFIG_IPV6_MULTIPLE_TABLES is not set
# CONFIG_NETWORK_SECMARK is not set
# CONFIG_NETFILTER is not set
# CONFIG_IP_DCCP is not set
# CONFIG_IP_SCTP is not set
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
# CONFIG_BRIDGE is not set
# CONFIG_VLAN_8021Q is not set
# CONFIG_DECNET is not set
# CONFIG_LLC2 is not set
# CONFIG_IPX is not set
# CONFIG_ATALK is not set
# CONFIG_X25 is not set
# CONFIG_LAPB is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
# CONFIG_NET_SCHED is not set

#
# Network testing
#
# CONFIG_NET_PKTGEN is not set
# CONFIG_HAMRADIO is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
# CONFIG_AF_RXRPC is not set

#
# Wireless
#
# CONFIG_CFG80211 is not set
# CONFIG_WIRELESS_EXT is not set
# CONFIG_MAC80211 is not set
# CONFIG_IEEE80211 is not set
# CONFIG_RFKILL is not set
# CONFIG_NET_9P is not set

#
# Device Drivers
#

#
# Generic Driver Options
#
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
# CONFIG_FW_LOADER is not set
# CONFIG_DEBUG_DRIVER is not set
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_SYS_HYPERVISOR is not set
# CONFIG_CONNECTOR is not set
# CONFIG_MTD is not set
# CONFIG_PARPORT is not set
CONFIG_PNP=y
# CONFIG_PNP_DEBUG is not set

#
# Protocols
#
CONFIG_PNPACPI=y
CONFIG_BLK_DEV=y
# CONFIG_BLK_DEV_FD is not set
# CONFIG_BLK_CPQ_DA is not set
# CONFIG_BLK_CPQ_CISS_DA is not set
# CONFIG_BLK_DEV_DAC960 is not set
# CONFIG_BLK_DEV_UMEM is not set
# CONFIG_BLK_DEV_COW_COMMON is not set
CONFIG_BLK_DEV_LOOP=m
# CONFIG_BLK_DEV_CRYPTOLOOP is not set
# CONFIG_BLK_DEV_NBD is not set
# CONFIG_BLK_DEV_SX8 is not set
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_BLK_DEV_RAM_BLOCKSIZE=4096
# CONFIG_CDROM_PKTCDVD is not set
# CONFIG_ATA_OVER_ETH is not set
# CONFIG_MISC_DEVICES is not set
# CONFIG_IDE is not set

#
# SCSI device support
#
# CONFIG_RAID_ATTRS is not set
CONFIG_SCSI=y
CONFIG_SCSI_DMA=y
# CONFIG_SCSI_TGT is not set
# CONFIG_SCSI_NETLINK is not set
# CONFIG_SCSI_PROC_FS is not set

#
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=y
# CONFIG_CHR_DEV_ST is not set
# CONFIG_CHR_DEV_OSST is not set
CONFIG_BLK_DEV_SR=m
# CONFIG_BLK_DEV_SR_VENDOR is not set
# CONFIG_CHR_DEV_SG is not set
# CONFIG_CHR_DEV_SCH is not set

#
# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
#
# CONFIG_SCSI_MULTI_LUN is not set
# CONFIG_SCSI_CONSTANTS is not set
# CONFIG_SCSI_LOGGING is not set
# CONFIG_SCSI_SCAN_ASYNC is not set
CONFIG_SCSI_WAIT_SCAN=m

#
# SCSI Transports
#
# CONFIG_SCSI_SPI_ATTRS is not set
# CONFIG_SCSI_FC_ATTRS is not set
# CONFIG_SCSI_ISCSI_ATTRS is not set
# CONFIG_SCSI_SAS_LIBSAS is not set
# CONFIG_SCSI_SRP_ATTRS is not set
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_ATA=y
# CONFIG_ATA_NONSTANDARD is not set
CONFIG_ATA_ACPI=y
# CONFIG_SATA_AHCI is not set
# CONFIG_SATA_SVW is not set
# CONFIG_ATA_PIIX is not set
# CONFIG_SATA_MV is not set
# CONFIG_SATA_NV is not set
# CONFIG_PDC_ADMA is not set
# CONFIG_SATA_QSTOR is not set
# CONFIG_SATA_PROMISE is not set
# CONFIG_SATA_SX4 is not set
CONFIG_SATA_SIL=y
# CONFIG_SATA_SIL24 is not set
# CONFIG_SATA_SIS is not set
# CONFIG_SATA_ULI is not set
# CONFIG_SATA_VIA is not set
# CONFIG_SATA_VITESSE is not set
# CONFIG_SATA_INIC162X is not set
# CONFIG_PATA_ACPI is not set
# CONFIG_PATA_ALI is not set
CONFIG_PATA_AMD=y
# CONFIG_PATA_ARTOP is not set
# CONFIG_PATA_ATIIXP is not set
# CONFIG_PATA_CMD640_PCI is not set
# CONFIG_PATA_CMD64X is not set
# CONFIG_PATA_CS5520 is not set
# CONFIG_PATA_CS5530 is not set
# CONFIG_PATA_CS5535 is not set
# CONFIG_PATA_CS5536 is not set
# CONFIG_PATA_CYPRESS is not set
# CONFIG_PATA_EFAR is not set
# CONFIG_ATA_GENERIC is not set
# CONFIG_PATA_HPT366 is not set
# CONFIG_PATA_HPT37X is not set
# CONFIG_PATA_HPT3X2N is not set
# CONFIG_PATA_HPT3X3 is not set
# CONFIG_PATA_IT821X is not set
# CONFIG_PATA_IT8213 is not set
# CONFIG_PATA_JMICRON is not set
# CONFIG_PATA_TRIFLEX is not set
# CONFIG_PATA_MARVELL is not set
# CONFIG_PATA_MPIIX is not set
# CONFIG_PATA_OLDPIIX is not set
# CONFIG_PATA_NETCELL is not set
# CONFIG_PATA_NS87410 is not set
# CONFIG_PATA_NS87415 is not set
# CONFIG_PATA_OPTI is not set
# CONFIG_PATA_OPTIDMA is not set
# CONFIG_PATA_PDC_OLD is not set
# CONFIG_PATA_RADISYS is not set
# CONFIG_PATA_RZ1000 is not set
# CONFIG_PATA_SC1200 is not set
# CONFIG_PATA_SERVERWORKS is not set
# CONFIG_PATA_PDC2027X is not set
# CONFIG_PATA_SIL680 is not set
# CONFIG_PATA_SIS is not set
# CONFIG_PATA_VIA is not set
# CONFIG_PATA_WINBOND is not set
# CONFIG_PATA_PLATFORM is not set
# CONFIG_MD is not set
# CONFIG_FUSION is not set

#
# IEEE 1394 (FireWire) support
#
# CONFIG_FIREWIRE is not set
# CONFIG_IEEE1394 is not set
# CONFIG_I2O is not set
# CONFIG_MACINTOSH_DRIVERS is not set
CONFIG_NETDEVICES=y
# CONFIG_NETDEVICES_MULTIQUEUE is not set
# CONFIG_DUMMY is not set
# CONFIG_BONDING is not set
# CONFIG_MACVLAN is not set
# CONFIG_EQUALIZER is not set
# CONFIG_TUN is not set
# CONFIG_VETH is not set
# CONFIG_NET_SB1000 is not set
# CONFIG_IP1000 is not set
# CONFIG_ARCNET is not set
# CONFIG_NET_ETHERNET is not set
CONFIG_NETDEV_1000=y
# CONFIG_ACENIC is not set
# CONFIG_DL2K is not set
# CONFIG_E1000 is not set
# CONFIG_E1000E is not set
# CONFIG_NS83820 is not set
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
# CONFIG_R8169 is not set
# CONFIG_SIS190 is not set
# CONFIG_SKGE is not set
# CONFIG_SKY2 is not set
# CONFIG_SK98LIN is not set
# CONFIG_VIA_VELOCITY is not set
CONFIG_TIGON3=y
# CONFIG_BNX2 is not set
# CONFIG_QLA3XXX is not set
# CONFIG_ATL1 is not set
# CONFIG_NETDEV_10000 is not set
# CONFIG_TR is not set

#
# Wireless LAN
#
# CONFIG_WLAN_PRE80211 is not set
# CONFIG_WLAN_80211 is not set
# CONFIG_WAN is not set
# CONFIG_FDDI is not set
# CONFIG_HIPPI is not set
# CONFIG_PPP is not set
# CONFIG_SLIP is not set
# CONFIG_NET_FC is not set
# CONFIG_SHAPER is not set
CONFIG_NETCONSOLE=y
# CONFIG_NETCONSOLE_DYNAMIC is not set
CONFIG_NETPOLL=y
# CONFIG_NETPOLL_TRAP is not set
CONFIG_NET_POLL_CONTROLLER=y
# CONFIG_ISDN is not set
# CONFIG_PHONE is not set

#
# Input device support
#
CONFIG_INPUT=y
# CONFIG_INPUT_FF_MEMLESS is not set
# CONFIG_INPUT_POLLDEV is not set

#
# Userland interfaces
#
CONFIG_INPUT_MOUSEDEV=y
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
# CONFIG_INPUT_JOYDEV is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_EVBUG is not set

#
# Input Device Drivers
#
CONFIG_INPUT_KEYBOARD=y
CONFIG_KEYBOARD_ATKBD=y
# CONFIG_KEYBOARD_SUNKBD is not set
# CONFIG_KEYBOARD_LKKBD is not set
# CONFIG_KEYBOARD_XTKBD is not set
# CONFIG_KEYBOARD_NEWTON is not set
# CONFIG_KEYBOARD_STOWAWAY is not set
CONFIG_INPUT_MOUSE=y
CONFIG_MOUSE_PS2=m
# CONFIG_MOUSE_PS2_ALPS is not set
# CONFIG_MOUSE_PS2_LOGIPS2PP is not set
# CONFIG_MOUSE_PS2_SYNAPTICS is not set
# CONFIG_MOUSE_PS2_LIFEBOOK is not set
# CONFIG_MOUSE_PS2_TRACKPOINT is not set
# CONFIG_MOUSE_PS2_TOUCHKIT is not set
# CONFIG_MOUSE_PS2_ELANTECH is not set
# CONFIG_MOUSE_SERIAL is not set
# CONFIG_MOUSE_VSXXXAA is not set
# CONFIG_INPUT_JOYSTICK is not set
# CONFIG_INPUT_TABLET is not set
# CONFIG_INPUT_TOUCHSCREEN is not set
# CONFIG_INPUT_MISC is not set

#
# Hardware I/O ports
#
CONFIG_SERIO=y
CONFIG_SERIO_I8042=y
CONFIG_SERIO_SERPORT=y
# CONFIG_SERIO_CT82C710 is not set
# CONFIG_SERIO_PCIPS2 is not set
CONFIG_SERIO_LIBPS2=y
CONFIG_SERIO_RAW=m
# CONFIG_GAMEPORT is not set

#
# Character devices
#
CONFIG_VT=y
CONFIG_VT_CONSOLE=y
CONFIG_HW_CONSOLE=y
# CONFIG_VT_HW_CONSOLE_BINDING is not set
# CONFIG_SERIAL_NONSTANDARD is not set
# CONFIG_NOZOMI is not set

#
# Serial drivers
#
# CONFIG_SERIAL_8250 is not set
CONFIG_FIX_EARLYCON_MEM=y

#
# Non-8250 serial port support
#
# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
# CONFIG_LEGACY_PTYS is not set
# CONFIG_IPMI_HANDLER is not set
CONFIG_HW_RANDOM=y
# CONFIG_HW_RANDOM_INTEL is not set
CONFIG_HW_RANDOM_AMD=y
# CONFIG_HW_RANDOM_GEODE is not set
# CONFIG_HW_RANDOM_VIA is not set
CONFIG_NVRAM=y
CONFIG_RTC=y
# CONFIG_R3964 is not set
# CONFIG_APPLICOM is not set
# CONFIG_SONYPI is not set
# CONFIG_MWAVE is not set
# CONFIG_PC8736x_GPIO is not set
# CONFIG_NSC_GPIO is not set
# CONFIG_CS5535_GPIO is not set
# CONFIG_RAW_DRIVER is not set
CONFIG_HPET=y
# CONFIG_HPET_RTC_IRQ is not set
# CONFIG_HPET_MMAP is not set
# CONFIG_HANGCHECK_TIMER is not set
# CONFIG_TCG_TPM is not set
# CONFIG_TELCLOCK is not set
CONFIG_DEVPORT=y
# CONFIG_I2C is not set

#
# SPI support
#
# CONFIG_SPI is not set
# CONFIG_SPI_MASTER is not set
# CONFIG_W1 is not set
# CONFIG_POWER_SUPPLY is not set
CONFIG_HWMON=m
# CONFIG_HWMON_VID is not set
# CONFIG_SENSORS_ABITUGURU is not set
# CONFIG_SENSORS_ABITUGURU3 is not set
CONFIG_SENSORS_K8TEMP=m
# CONFIG_SENSORS_I5K_AMB is not set
# CONFIG_SENSORS_F71805F is not set
# CONFIG_SENSORS_F71882FG is not set
# CONFIG_SENSORS_CORETEMP is not set
# CONFIG_SENSORS_IT87 is not set
# CONFIG_SENSORS_PC87360 is not set
# CONFIG_SENSORS_PC87427 is not set
# CONFIG_SENSORS_SIS5595 is not set
# CONFIG_SENSORS_SMSC47M1 is not set
# CONFIG_SENSORS_SMSC47B397 is not set
# CONFIG_SENSORS_VIA686A is not set
# CONFIG_SENSORS_VT1211 is not set
# CONFIG_SENSORS_VT8231 is not set
# CONFIG_SENSORS_W83627HF is not set
# CONFIG_SENSORS_W83627EHF is not set
# CONFIG_SENSORS_HDAPS is not set
# CONFIG_SENSORS_APPLESMC is not set
# CONFIG_HWMON_DEBUG_CHIP is not set
# CONFIG_WATCHDOG is not set

#
# Sonics Silicon Backplane
#
CONFIG_SSB_POSSIBLE=y
# CONFIG_SSB is not set

#
# Multifunction device drivers
#
# CONFIG_MFD_SM501 is not set

#
# Multimedia devices
#
# CONFIG_VIDEO_DEV is not set
# CONFIG_DVB_CORE is not set
# CONFIG_DAB is not set

#
# Graphics support
#
# CONFIG_AGP is not set
# CONFIG_DRM is not set
# CONFIG_VGASTATE is not set
# CONFIG_VIDEO_OUTPUT_CONTROL is not set
# CONFIG_FB is not set
# CONFIG_BACKLIGHT_LCD_SUPPORT is not set

#
# Display device support
#
# CONFIG_DISPLAY_SUPPORT is not set

#
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
# CONFIG_VGACON_SOFT_SCROLLBACK is not set
CONFIG_VIDEO_SELECT=y
CONFIG_DUMMY_CONSOLE=y

#
# Sound
#
# CONFIG_SOUND is not set
# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_MMC is not set
# CONFIG_NEW_LEDS is not set
# CONFIG_INFINIBAND is not set
# CONFIG_EDAC is not set
# CONFIG_RTC_CLASS is not set
# CONFIG_DMADEVICES is not set
# CONFIG_VIRTUALIZATION is not set

#
# Userspace I/O
#
# CONFIG_UIO is not set

#
# File systems
#
CONFIG_EXT2_FS=m
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
# CONFIG_EXT2_FS_XIP is not set
CONFIG_EXT3_FS=y
# CONFIG_EXT3_FS_XATTR is not set
# CONFIG_EXT4DEV_FS is not set
CONFIG_JBD=y
CONFIG_FS_MBCACHE=y
# CONFIG_REISER4_FS is not set
CONFIG_REISERFS_FS=m
# CONFIG_REISERFS_CHECK is not set
# CONFIG_REISERFS_PROC_INFO is not set
CONFIG_REISERFS_FS_XATTR=y
CONFIG_REISERFS_FS_POSIX_ACL=y
CONFIG_REISERFS_FS_SECURITY=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
CONFIG_JFS_SECURITY=y
# CONFIG_JFS_DEBUG is not set
# CONFIG_JFS_STATISTICS is not set
CONFIG_FS_POSIX_ACL=y
CONFIG_XFS_FS=m
CONFIG_XFS_QUOTA=y
CONFIG_XFS_SECURITY=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_XFS_RT=y
# CONFIG_GFS2_FS is not set
# CONFIG_OCFS2_FS is not set
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY is not set
# CONFIG_QUOTA is not set
CONFIG_QUOTACTL=y
# CONFIG_AUTOFS_FS is not set
# CONFIG_AUTOFS4_FS is not set
# CONFIG_FUSE_FS is not set

#
# CD-ROM/DVD Filesystems
#
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
CONFIG_UDF_FS=m
CONFIG_UDF_NLS=y

#
# DOS/FAT/NT Filesystems
#
CONFIG_FAT_FS=m
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_FAT_DEFAULT_CODEPAGE=437
CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
# CONFIG_NTFS_FS is not set

#
# Pseudo filesystems
#
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
CONFIG_PROC_SYSCTL=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
# CONFIG_TMPFS_POSIX_ACL is not set
# CONFIG_HUGETLBFS is not set
# CONFIG_HUGETLB_PAGE is not set
# CONFIG_CONFIGFS_FS is not set

#
# Layered filesystems
#
# CONFIG_UNION_FS is not set

#
# Miscellaneous filesystems
#
# CONFIG_ADFS_FS is not set
# CONFIG_AFFS_FS is not set
# CONFIG_HFS_FS is not set
# CONFIG_HFSPLUS_FS is not set
# CONFIG_BEFS_FS is not set
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
# CONFIG_CRAMFS is not set
# CONFIG_VXFS_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
# CONFIG_ROMFS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
# CONFIG_NETWORK_FILESYSTEMS is not set

#
# Partition Types
#
# CONFIG_PARTITION_ADVANCED is not set
CONFIG_MSDOS_PARTITION=y
CONFIG_NLS=m
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=m
# CONFIG_NLS_CODEPAGE_737 is not set
# CONFIG_NLS_CODEPAGE_775 is not set
# CONFIG_NLS_CODEPAGE_850 is not set
# CONFIG_NLS_CODEPAGE_852 is not set
CONFIG_NLS_CODEPAGE_855=m
# CONFIG_NLS_CODEPAGE_857 is not set
# CONFIG_NLS_CODEPAGE_860 is not set
# CONFIG_NLS_CODEPAGE_861 is not set
# CONFIG_NLS_CODEPAGE_862 is not set
# CONFIG_NLS_CODEPAGE_863 is not set
# CONFIG_NLS_CODEPAGE_864 is not set
# CONFIG_NLS_CODEPAGE_865 is not set
CONFIG_NLS_CODEPAGE_866=m
# CONFIG_NLS_CODEPAGE_869 is not set
# CONFIG_NLS_CODEPAGE_936 is not set
# CONFIG_NLS_CODEPAGE_950 is not set
# CONFIG_NLS_CODEPAGE_932 is not set
# CONFIG_NLS_CODEPAGE_949 is not set
# CONFIG_NLS_CODEPAGE_874 is not set
# CONFIG_NLS_ISO8859_8 is not set
# CONFIG_NLS_CODEPAGE_1250 is not set
CONFIG_NLS_CODEPAGE_1251=m
CONFIG_NLS_ASCII=m
CONFIG_NLS_ISO8859_1=m
# CONFIG_NLS_ISO8859_2 is not set
# CONFIG_NLS_ISO8859_3 is not set
# CONFIG_NLS_ISO8859_4 is not set
CONFIG_NLS_ISO8859_5=m
# CONFIG_NLS_ISO8859_6 is not set
# CONFIG_NLS_ISO8859_7 is not set
# CONFIG_NLS_ISO8859_9 is not set
# CONFIG_NLS_ISO8859_13 is not set
# CONFIG_NLS_ISO8859_14 is not set
# CONFIG_NLS_ISO8859_15 is not set
CONFIG_NLS_KOI8_R=m
CONFIG_NLS_KOI8_U=m
CONFIG_NLS_UTF8=m
# CONFIG_DLM is not set
# CONFIG_INSTRUMENTATION is not set

#
# Kernel hacking
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
CONFIG_PRINTK_TIME=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
# CONFIG_PAGE_OWNER is not set
# CONFIG_DEBUG_FS is not set
# CONFIG_HEADERS_CHECK is not set
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_SHIRQ=y
CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_SCHED_DEBUG is not set
# CONFIG_SCHEDSTATS is not set
# CONFIG_TIMER_STATS is not set
CONFIG_SLUB_DEBUG_ON=y
CONFIG_DEBUG_PREEMPT=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_DEBUG_PI_LIST=y
# CONFIG_RT_MUTEX_TESTER is not set
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCKDEP=y
# CONFIG_LOCK_STAT is not set
# CONFIG_DEBUG_LOCKDEP is not set
CONFIG_TRACE_IRQFLAGS=y
CONFIG_DEBUG_SPINLOCK_SLEEP=y
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
CONFIG_STACKTRACE=y
# CONFIG_DEBUG_KOBJECT is not set
CONFIG_DEBUG_HIGHMEM=y
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_INFO is not set
CONFIG_DEBUG_VM=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
# CONFIG_FRAME_POINTER is not set
# CONFIG_PROFILE_LIKELY is not set
# CONFIG_FORCED_INLINING is not set
# CONFIG_BOOT_PRINTK_DELAY is not set
# CONFIG_DEBUG_SYNCHRO_TEST is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_FAULT_INJECTION is not set
# CONFIG_SAMPLES is not set
# CONFIG_EARLY_PRINTK is not set
# CONFIG_DEBUG_STACKOVERFLOW is not set
# CONFIG_DEBUG_STACK_USAGE is not set
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_DEBUG_RODATA=y
CONFIG_4KSTACKS=y
CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y
CONFIG_DOUBLEFAULT=y

#
# Security options
#
# CONFIG_KEYS is not set
# CONFIG_SECURITY is not set
# CONFIG_SECURITY_FILE_CAPABILITIES is not set
# CONFIG_CRYPTO is not set

#
# Library routines
#
CONFIG_BITREVERSE=y
# CONFIG_CRC_CCITT is not set
# CONFIG_CRC16 is not set
# CONFIG_CRC_ITU_T is not set
CONFIG_CRC32=y
# CONFIG_CRC7 is not set
# CONFIG_LIBCRC32C is not set
CONFIG_ZLIB_INFLATE=m
CONFIG_PLIST=y
CONFIG_HAS_IOMEM=y
CONFIG_HAS_IOPORT=y
CONFIG_HAS_DMA=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_X86_SMP=y
CONFIG_X86_HT=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_X86_TRAMPOLINE=y
CONFIG_KTIME_SCALAR=y


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-15  9:11 EIP is at device_shutdown+0x32/0x60 Alexey Dobriyan
@ 2007-11-15  9:44 ` Andrew Morton
  2007-11-15  9:59   ` Alexey Dobriyan
                     ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Andrew Morton @ 2007-11-15  9:44 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: linux-kernel

On Thu, 15 Nov 2007 12:11:58 +0300 Alexey Dobriyan <adobriyan@sw.ru> wrote:

> Three boxes rarely oops during reboot or poweroff with 2.6.24-rc2-mm1
> (1) and during 2.6.24 cycle (2):
> 
> 	kernel_restart
> 	sys_reboot
> 	[garbage]
> Code: 8b 88 a8 00 00 00 85 c9 74 04 89
> EIP is at device_shutdown+0x32/0x60

Yes, all my test boxes did that - it's what I referred to in the releaee
notes.  Greg is pondering the problem - seem he's the only person who
cannot reproduce it ;)

But what does "during 2.6.24 cycle (2)" mean?  Some kernel other than
2.6.24-rc2-mm1 is crashing?


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-15  9:44 ` Andrew Morton
@ 2007-11-15  9:59   ` Alexey Dobriyan
  2007-11-15 12:55   ` Yasunori Goto
  2007-11-15 14:40   ` Jeff Dike
  2 siblings, 0 replies; 18+ messages in thread
From: Alexey Dobriyan @ 2007-11-15  9:59 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

On Thu, Nov 15, 2007 at 01:44:46AM -0800, Andrew Morton wrote:
> On Thu, 15 Nov 2007 12:11:58 +0300 Alexey Dobriyan <adobriyan@sw.ru> wrote:
> 
> > Three boxes rarely oops during reboot or poweroff with 2.6.24-rc2-mm1
> > (1) and during 2.6.24 cycle (2):
> > 
> > 	kernel_restart
> > 	sys_reboot
> > 	[garbage]
> > Code: 8b 88 a8 00 00 00 85 c9 74 04 89
> > EIP is at device_shutdown+0x32/0x60
> 
> Yes, all my test boxes did that - it's what I referred to in the releaee
> notes.  Greg is pondering the problem - seem he's the only person who
> cannot reproduce it ;)
> 
> But what does "during 2.6.24 cycle (2)" mean?  Some kernel other than
> 2.6.24-rc2-mm1 is crashing?

2.6.24-something also rarely crashes on reboot for me. In fact it can very
well be different crash. I never managed to catch oops message with mainline.
Netconsole doesn't show anything and replugging monitor cable also
doesn't help and bug dissapear if I try to catch it. :)


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-15  9:44 ` Andrew Morton
  2007-11-15  9:59   ` Alexey Dobriyan
@ 2007-11-15 12:55   ` Yasunori Goto
  2007-11-15 13:15     ` Cornelia Huck
  2007-11-15 16:34     ` Greg KH
  2007-11-15 14:40   ` Jeff Dike
  2 siblings, 2 replies; 18+ messages in thread
From: Yasunori Goto @ 2007-11-15 12:55 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Alexey Dobriyan, linux-kernel, Greg Kroah-Hartman

> On Thu, 15 Nov 2007 12:11:58 +0300 Alexey Dobriyan <adobriyan@sw.ru> wrote:
> 
> > Three boxes rarely oops during reboot or poweroff with 2.6.24-rc2-mm1
> > (1) and during 2.6.24 cycle (2):
> > 
> > 	kernel_restart
> > 	sys_reboot
> > 	[garbage]
> > Code: 8b 88 a8 00 00 00 85 c9 74 04 89
> > EIP is at device_shutdown+0x32/0x60
> 
> Yes, all my test boxes did that - it's what I referred to in the releaee
> notes.  Greg is pondering the problem - seem he's the only person who
> cannot reproduce it ;)

Fortunately, my ia64 box reproduces this oops "every time". 
So, I could chase it.

device_shutdown() function in drivers/base/power/shutdown.c
is followings.
-----------
/**
 * device_shutdown - call ->shutdown() on each device to shutdown.
 */
void device_shutdown(void)
{
        struct device * dev, *devn;

        list_for_each_entry_safe_reverse(dev, devn, &devices_kset->list,
                                kobj.entry) {
                if (dev->bus && dev->bus->shutdown) {
                        dev_dbg(dev, "shutdown\n");
                        dev->bus->shutdown(dev);
                } else if (dev->driver && dev->driver->shutdown) {
                        dev_dbg(dev, "shutdown\n");
                        dev->driver->shutdown(dev);
                }
        }
}
--------
When oops occured, dev->driver pointed kset_ktype's address,
and dev->driver->shutdown was the address of bus_type_list.
So, Oops was caused by "Illegal operation fault".
kset_ktypes is pointed by system_kset.

If my understanding is correct, this loop can't distinguish between
struct device and struct kset, but both are connected in this list,
right? It may be the cause of this.

Bye.

-- 
Yasunori Goto 



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-15 12:55   ` Yasunori Goto
@ 2007-11-15 13:15     ` Cornelia Huck
  2007-11-15 16:34     ` Greg KH
  1 sibling, 0 replies; 18+ messages in thread
From: Cornelia Huck @ 2007-11-15 13:15 UTC (permalink / raw)
  To: Yasunori Goto
  Cc: Andrew Morton, Alexey Dobriyan, linux-kernel, Greg Kroah-Hartman

On Thu, 15 Nov 2007 21:55:34 +0900,
Yasunori Goto <y-goto@jp.fujitsu.com> wrote:

> /**
>  * device_shutdown - call ->shutdown() on each device to shutdown.
>  */
> void device_shutdown(void)
> {
>         struct device * dev, *devn;
> 
>         list_for_each_entry_safe_reverse(dev, devn, &devices_kset->list,
>                                 kobj.entry) {
>                 if (dev->bus && dev->bus->shutdown) {
>                         dev_dbg(dev, "shutdown\n");
>                         dev->bus->shutdown(dev);
>                 } else if (dev->driver && dev->driver->shutdown) {
>                         dev_dbg(dev, "shutdown\n");
>                         dev->driver->shutdown(dev);
>                 }
>         }
> }
> --------
> When oops occured, dev->driver pointed kset_ktype's address,
> and dev->driver->shutdown was the address of bus_type_list.
> So, Oops was caused by "Illegal operation fault".
> kset_ktypes is pointed by system_kset.
> 
> If my understanding is correct, this loop can't distinguish between
> struct device and struct kset, but both are connected in this list,
> right? It may be the cause of this.

Uh. This fits with my observations on s390 as well (the list is not
corrupted, but it contains entities that are not devices...)

The other entries under /sys/devices (on my system css0, platform,
qeth) are all struct devices. This sysdev stuff is headache-inducing :(

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-15  9:44 ` Andrew Morton
  2007-11-15  9:59   ` Alexey Dobriyan
  2007-11-15 12:55   ` Yasunori Goto
@ 2007-11-15 14:40   ` Jeff Dike
  2 siblings, 0 replies; 18+ messages in thread
From: Jeff Dike @ 2007-11-15 14:40 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Alexey Dobriyan, linux-kernel

On Thu, Nov 15, 2007 at 01:44:46AM -0800, Andrew Morton wrote:
> Yes, all my test boxes did that - it's what I referred to in the releaee
> notes.  Greg is pondering the problem - seem he's the only person who
> cannot reproduce it ;)

UML does it reliably too, in case Greg is still looking for a way to
reproduce it...

				Jeff

-- 
Work email - jdike at linux dot intel dot com

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-15 12:55   ` Yasunori Goto
  2007-11-15 13:15     ` Cornelia Huck
@ 2007-11-15 16:34     ` Greg KH
  2007-11-15 17:07       ` Mark Lord
  2007-11-15 17:50       ` Kay Sievers
  1 sibling, 2 replies; 18+ messages in thread
From: Greg KH @ 2007-11-15 16:34 UTC (permalink / raw)
  To: Yasunori Goto; +Cc: Andrew Morton, Alexey Dobriyan, linux-kernel

On Thu, Nov 15, 2007 at 09:55:34PM +0900, Yasunori Goto wrote:
> > On Thu, 15 Nov 2007 12:11:58 +0300 Alexey Dobriyan <adobriyan@sw.ru> wrote:
> > 
> > > Three boxes rarely oops during reboot or poweroff with 2.6.24-rc2-mm1
> > > (1) and during 2.6.24 cycle (2):
> > > 
> > > 	kernel_restart
> > > 	sys_reboot
> > > 	[garbage]
> > > Code: 8b 88 a8 00 00 00 85 c9 74 04 89
> > > EIP is at device_shutdown+0x32/0x60
> > 
> > Yes, all my test boxes did that - it's what I referred to in the releaee
> > notes.  Greg is pondering the problem - seem he's the only person who
> > cannot reproduce it ;)
> 
> Fortunately, my ia64 box reproduces this oops "every time". 
> So, I could chase it.
> 
> device_shutdown() function in drivers/base/power/shutdown.c
> is followings.
> -----------
> /**
>  * device_shutdown - call ->shutdown() on each device to shutdown.
>  */
> void device_shutdown(void)
> {
>         struct device * dev, *devn;
> 
>         list_for_each_entry_safe_reverse(dev, devn, &devices_kset->list,
>                                 kobj.entry) {
>                 if (dev->bus && dev->bus->shutdown) {
>                         dev_dbg(dev, "shutdown\n");
>                         dev->bus->shutdown(dev);
>                 } else if (dev->driver && dev->driver->shutdown) {
>                         dev_dbg(dev, "shutdown\n");
>                         dev->driver->shutdown(dev);
>                 }
>         }
> }
> --------
> When oops occured, dev->driver pointed kset_ktype's address,
> and dev->driver->shutdown was the address of bus_type_list.
> So, Oops was caused by "Illegal operation fault".
> kset_ktypes is pointed by system_kset.
> 
> If my understanding is correct, this loop can't distinguish between
> struct device and struct kset, but both are connected in this list,
> right? It may be the cause of this.

Hm, no, it should just be a list of devices for the kset, but I'll go
verify that this is correct.

And yeah, I can duplicate this problem here too...

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-15 16:34     ` Greg KH
@ 2007-11-15 17:07       ` Mark Lord
  2007-11-15 18:23         ` Greg KH
  2007-11-15 17:50       ` Kay Sievers
  1 sibling, 1 reply; 18+ messages in thread
From: Mark Lord @ 2007-11-15 17:07 UTC (permalink / raw)
  To: Greg KH; +Cc: Yasunori Goto, Andrew Morton, Alexey Dobriyan, linux-kernel

Greg KH wrote:
> On Thu, Nov 15, 2007 at 09:55:34PM +0900, Yasunori Goto wrote:
>>> On Thu, 15 Nov 2007 12:11:58 +0300 Alexey Dobriyan <adobriyan@sw.ru> wrote:
>>>
>>>> Three boxes rarely oops during reboot or poweroff with 2.6.24-rc2-mm1
>>>> (1) and during 2.6.24 cycle (2):
>>>>
>>>> 	kernel_restart
>>>> 	sys_reboot
>>>> 	[garbage]
>>>> Code: 8b 88 a8 00 00 00 85 c9 74 04 89
>>>> EIP is at device_shutdown+0x32/0x60
>>> Yes, all my test boxes did that - it's what I referred to in the releaee
>>> notes.  Greg is pondering the problem - seem he's the only person who
>>> cannot reproduce it ;)
>> Fortunately, my ia64 box reproduces this oops "every time". 
>> So, I could chase it.
>>
>> device_shutdown() function in drivers/base/power/shutdown.c
>> is followings.
>> -----------
>> /**
>>  * device_shutdown - call ->shutdown() on each device to shutdown.
>>  */
>> void device_shutdown(void)
>> {
>>         struct device * dev, *devn;
>>
>>         list_for_each_entry_safe_reverse(dev, devn, &devices_kset->list,
>>                                 kobj.entry) {
>>                 if (dev->bus && dev->bus->shutdown) {
>>                         dev_dbg(dev, "shutdown\n");
>>                         dev->bus->shutdown(dev);
>>                 } else if (dev->driver && dev->driver->shutdown) {
>>                         dev_dbg(dev, "shutdown\n");
>>                         dev->driver->shutdown(dev);
>>                 }
>>         }
>> }
>> --------
>> When oops occured, dev->driver pointed kset_ktype's address,
>> and dev->driver->shutdown was the address of bus_type_list.
>> So, Oops was caused by "Illegal operation fault".
>> kset_ktypes is pointed by system_kset.
>>
>> If my understanding is correct, this loop can't distinguish between
>> struct device and struct kset, but both are connected in this list,
>> right? It may be the cause of this.
> 
> Hm, no, it should just be a list of devices for the kset, but I'll go
> verify that this is correct.
> 
> And yeah, I can duplicate this problem here too...
..

Greg, I don't know if this is relevant or not,
but x86 has bugs in the halt/reboot code for SMP.

Specifically, in native_smp_send_stop() the code now uses
spin_trylock() to "lock" the shared call buffers,
but then ignores the result.

This means that multiple CPUs can/will clobber each other
in that code.

The second bug, is that this code does not wait for the
target CPUs to actually stop before it continues.

This was the real cause of the failure-to-poweroff problems
I was having with 2.6.23, which we fixed by using CPU hotplug
to disable_nonboot_cpus() before the above code ever got run.

Maybe it's related, maybe not.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-15 16:34     ` Greg KH
  2007-11-15 17:07       ` Mark Lord
@ 2007-11-15 17:50       ` Kay Sievers
  2007-11-15 18:18         ` Greg KH
  2007-11-15 19:01         ` Greg KH
  1 sibling, 2 replies; 18+ messages in thread
From: Kay Sievers @ 2007-11-15 17:50 UTC (permalink / raw)
  To: Greg KH; +Cc: Yasunori Goto, Andrew Morton, Alexey Dobriyan, linux-kernel

On Nov 15, 2007 5:34 PM, Greg KH <gregkh@suse.de> wrote:
> On Thu, Nov 15, 2007 at 09:55:34PM +0900, Yasunori Goto wrote:
> > > On Thu, 15 Nov 2007 12:11:58 +0300 Alexey Dobriyan <adobriyan@sw.ru> wrote:
> > >
> > > > Three boxes rarely oops during reboot or poweroff with 2.6.24-rc2-mm1
> > > > (1) and during 2.6.24 cycle (2):
> > > >
> > > >   kernel_restart
> > > >   sys_reboot
> > > >   [garbage]
> > > > Code: 8b 88 a8 00 00 00 85 c9 74 04 89
> > > > EIP is at device_shutdown+0x32/0x60
> > >
> > > Yes, all my test boxes did that - it's what I referred to in the releaee
> > > notes.  Greg is pondering the problem - seem he's the only person who
> > > cannot reproduce it ;)
> >
> > Fortunately, my ia64 box reproduces this oops "every time".
> > So, I could chase it.
> >
> > device_shutdown() function in drivers/base/power/shutdown.c
> > is followings.
> > -----------
> > /**
> >  * device_shutdown - call ->shutdown() on each device to shutdown.
> >  */
> > void device_shutdown(void)
> > {
> >         struct device * dev, *devn;
> >
> >         list_for_each_entry_safe_reverse(dev, devn, &devices_kset->list,
> >                                 kobj.entry) {
> >                 if (dev->bus && dev->bus->shutdown) {
> >                         dev_dbg(dev, "shutdown\n");
> >                         dev->bus->shutdown(dev);
> >                 } else if (dev->driver && dev->driver->shutdown) {
> >                         dev_dbg(dev, "shutdown\n");
> >                         dev->driver->shutdown(dev);
> >                 }
> >         }
> > }
> > --------
> > When oops occured, dev->driver pointed kset_ktype's address,
> > and dev->driver->shutdown was the address of bus_type_list.
> > So, Oops was caused by "Illegal operation fault".
> > kset_ktypes is pointed by system_kset.
> >
> > If my understanding is correct, this loop can't distinguish between
> > struct device and struct kset, but both are connected in this list,
> > right? It may be the cause of this.
>
> Hm, no, it should just be a list of devices for the kset, but I'll go
> verify that this is correct.

Care to try this:
  +       system_kset = kset_create_and_register("system", NULL,
  +                                              &devices_kset->kobj, NULL);

We should not join the kset, only use it as a parent.

Kay

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-15 17:50       ` Kay Sievers
@ 2007-11-15 18:18         ` Greg KH
  2007-11-15 19:01         ` Greg KH
  1 sibling, 0 replies; 18+ messages in thread
From: Greg KH @ 2007-11-15 18:18 UTC (permalink / raw)
  To: Kay Sievers; +Cc: Yasunori Goto, Andrew Morton, Alexey Dobriyan, linux-kernel

On Thu, Nov 15, 2007 at 06:50:06PM +0100, Kay Sievers wrote:
> On Nov 15, 2007 5:34 PM, Greg KH <gregkh@suse.de> wrote:
> > On Thu, Nov 15, 2007 at 09:55:34PM +0900, Yasunori Goto wrote:
> > > > On Thu, 15 Nov 2007 12:11:58 +0300 Alexey Dobriyan <adobriyan@sw.ru> wrote:
> > > >
> > > > > Three boxes rarely oops during reboot or poweroff with 2.6.24-rc2-mm1
> > > > > (1) and during 2.6.24 cycle (2):
> > > > >
> > > > >   kernel_restart
> > > > >   sys_reboot
> > > > >   [garbage]
> > > > > Code: 8b 88 a8 00 00 00 85 c9 74 04 89
> > > > > EIP is at device_shutdown+0x32/0x60
> > > >
> > > > Yes, all my test boxes did that - it's what I referred to in the releaee
> > > > notes.  Greg is pondering the problem - seem he's the only person who
> > > > cannot reproduce it ;)
> > >
> > > Fortunately, my ia64 box reproduces this oops "every time".
> > > So, I could chase it.
> > >
> > > device_shutdown() function in drivers/base/power/shutdown.c
> > > is followings.
> > > -----------
> > > /**
> > >  * device_shutdown - call ->shutdown() on each device to shutdown.
> > >  */
> > > void device_shutdown(void)
> > > {
> > >         struct device * dev, *devn;
> > >
> > >         list_for_each_entry_safe_reverse(dev, devn, &devices_kset->list,
> > >                                 kobj.entry) {
> > >                 if (dev->bus && dev->bus->shutdown) {
> > >                         dev_dbg(dev, "shutdown\n");
> > >                         dev->bus->shutdown(dev);
> > >                 } else if (dev->driver && dev->driver->shutdown) {
> > >                         dev_dbg(dev, "shutdown\n");
> > >                         dev->driver->shutdown(dev);
> > >                 }
> > >         }
> > > }
> > > --------
> > > When oops occured, dev->driver pointed kset_ktype's address,
> > > and dev->driver->shutdown was the address of bus_type_list.
> > > So, Oops was caused by "Illegal operation fault".
> > > kset_ktypes is pointed by system_kset.
> > >
> > > If my understanding is correct, this loop can't distinguish between
> > > struct device and struct kset, but both are connected in this list,
> > > right? It may be the cause of this.
> >
> > Hm, no, it should just be a list of devices for the kset, but I'll go
> > verify that this is correct.
> 
> Care to try this:
>   +       system_kset = kset_create_and_register("system", NULL,
>   +                                              &devices_kset->kobj, NULL);
> 
> We should not join the kset, only use it as a parent.

ARGH!

<snip loads of curse words...>

that should do it, let me go test.

Actually, once that is changed, the whole kset_create_and_register can
drop that last argument, we never want to create a kset as part of
another one...

The kset's kobject should not belong to any kset at all, I really messed
that one up.

Yet another reason why this patchset really matters, this crap isn't
even understood well by the people trying to maintain it...

greg k-h

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-15 17:07       ` Mark Lord
@ 2007-11-15 18:23         ` Greg KH
  2007-11-15 18:29           ` Mark Lord
  0 siblings, 1 reply; 18+ messages in thread
From: Greg KH @ 2007-11-15 18:23 UTC (permalink / raw)
  To: Mark Lord; +Cc: Yasunori Goto, Andrew Morton, Alexey Dobriyan, linux-kernel

On Thu, Nov 15, 2007 at 12:07:48PM -0500, Mark Lord wrote:
> Greg KH wrote:
>> On Thu, Nov 15, 2007 at 09:55:34PM +0900, Yasunori Goto wrote:
>>>> On Thu, 15 Nov 2007 12:11:58 +0300 Alexey Dobriyan <adobriyan@sw.ru> 
>>>> wrote:
>>>>
>>>>> Three boxes rarely oops during reboot or poweroff with 2.6.24-rc2-mm1
>>>>> (1) and during 2.6.24 cycle (2):
>>>>>
>>>>> 	kernel_restart
>>>>> 	sys_reboot
>>>>> 	[garbage]
>>>>> Code: 8b 88 a8 00 00 00 85 c9 74 04 89
>>>>> EIP is at device_shutdown+0x32/0x60
>>>> Yes, all my test boxes did that - it's what I referred to in the releaee
>>>> notes.  Greg is pondering the problem - seem he's the only person who
>>>> cannot reproduce it ;)
>>> Fortunately, my ia64 box reproduces this oops "every time". So, I could 
>>> chase it.
>>>
>>> device_shutdown() function in drivers/base/power/shutdown.c
>>> is followings.
>>> -----------
>>> /**
>>>  * device_shutdown - call ->shutdown() on each device to shutdown.
>>>  */
>>> void device_shutdown(void)
>>> {
>>>         struct device * dev, *devn;
>>>
>>>         list_for_each_entry_safe_reverse(dev, devn, &devices_kset->list,
>>>                                 kobj.entry) {
>>>                 if (dev->bus && dev->bus->shutdown) {
>>>                         dev_dbg(dev, "shutdown\n");
>>>                         dev->bus->shutdown(dev);
>>>                 } else if (dev->driver && dev->driver->shutdown) {
>>>                         dev_dbg(dev, "shutdown\n");
>>>                         dev->driver->shutdown(dev);
>>>                 }
>>>         }
>>> }
>>> --------
>>> When oops occured, dev->driver pointed kset_ktype's address,
>>> and dev->driver->shutdown was the address of bus_type_list.
>>> So, Oops was caused by "Illegal operation fault".
>>> kset_ktypes is pointed by system_kset.
>>>
>>> If my understanding is correct, this loop can't distinguish between
>>> struct device and struct kset, but both are connected in this list,
>>> right? It may be the cause of this.
>> Hm, no, it should just be a list of devices for the kset, but I'll go
>> verify that this is correct.
>> And yeah, I can duplicate this problem here too...
> ..
>
> Greg, I don't know if this is relevant or not,
> but x86 has bugs in the halt/reboot code for SMP.
>
> Specifically, in native_smp_send_stop() the code now uses
> spin_trylock() to "lock" the shared call buffers,
> but then ignores the result.
>
> This means that multiple CPUs can/will clobber each other
> in that code.
>
> The second bug, is that this code does not wait for the
> target CPUs to actually stop before it continues.
>
> This was the real cause of the failure-to-poweroff problems
> I was having with 2.6.23, which we fixed by using CPU hotplug
> to disable_nonboot_cpus() before the above code ever got run.

I have noticed that the shutdown path is quite weird, shutting down
sysdev devices differently depending on the type of shutdown, which is
probably not good.

But what change are you talking about for the poweroff problem?  I have
a _lot_ of people reporting that 2.6.22 is not powering off for them and
I can't seem to figure it out.  Do you have a changeset for something
that went in to fix this issue?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-15 18:23         ` Greg KH
@ 2007-11-15 18:29           ` Mark Lord
  2007-11-15 18:48             ` Greg KH
  0 siblings, 1 reply; 18+ messages in thread
From: Mark Lord @ 2007-11-15 18:29 UTC (permalink / raw)
  To: Greg KH; +Cc: Yasunori Goto, Andrew Morton, Alexey Dobriyan, linux-kernel

Greg KH wrote:
> On Thu, Nov 15, 2007 at 12:07:48PM -0500, Mark Lord wrote:
..
>> Greg, I don't know if this is relevant or not,
>> but x86 has bugs in the halt/reboot code for SMP.
>>
>> Specifically, in native_smp_send_stop() the code now uses
>> spin_trylock() to "lock" the shared call buffers,
>> but then ignores the result.
>>
>> This means that multiple CPUs can/will clobber each other
>> in that code.
>>
>> The second bug, is that this code does not wait for the
>> target CPUs to actually stop before it continues.
>>
>> This was the real cause of the failure-to-poweroff problems
>> I was having with 2.6.23, which we fixed by using CPU hotplug
>> to disable_nonboot_cpus() before the above code ever got run.
> 
> I have noticed that the shutdown path is quite weird, shutting down
> sysdev devices differently depending on the type of shutdown, which is
> probably not good.
> 
> But what change are you talking about for the poweroff problem?  I have
> a _lot_ of people reporting that 2.6.22 is not powering off for them and
> I can't seem to figure it out.  Do you have a changeset for something
> that went in to fix this issue?
..

Well, the real bugs that cause the problem are described by me above.
I don't have a fix for those, but the workaround is in 2.6.23
under git 4047727e5ae33f9b8d2b7766d1994ea6e5ec2991 Fix SMP poweroff hangs.

With that workaround, there's no more hanging on halt,
though there could still be a hang on reboot.

A problem with that workaround is that it has no effect unless
the CPU hotplug code is configured (CONFIG_PM_SLEEP_SMP and pals).

Cheers

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-15 18:29           ` Mark Lord
@ 2007-11-15 18:48             ` Greg KH
  2007-11-15 19:13               ` Mark Lord
  0 siblings, 1 reply; 18+ messages in thread
From: Greg KH @ 2007-11-15 18:48 UTC (permalink / raw)
  To: Mark Lord; +Cc: Yasunori Goto, Andrew Morton, Alexey Dobriyan, linux-kernel

On Thu, Nov 15, 2007 at 01:29:04PM -0500, Mark Lord wrote:
> Greg KH wrote:
>> On Thu, Nov 15, 2007 at 12:07:48PM -0500, Mark Lord wrote:
> ..
>>> Greg, I don't know if this is relevant or not,
>>> but x86 has bugs in the halt/reboot code for SMP.
>>>
>>> Specifically, in native_smp_send_stop() the code now uses
>>> spin_trylock() to "lock" the shared call buffers,
>>> but then ignores the result.
>>>
>>> This means that multiple CPUs can/will clobber each other
>>> in that code.
>>>
>>> The second bug, is that this code does not wait for the
>>> target CPUs to actually stop before it continues.
>>>
>>> This was the real cause of the failure-to-poweroff problems
>>> I was having with 2.6.23, which we fixed by using CPU hotplug
>>> to disable_nonboot_cpus() before the above code ever got run.
>> I have noticed that the shutdown path is quite weird, shutting down
>> sysdev devices differently depending on the type of shutdown, which is
>> probably not good.
>> But what change are you talking about for the poweroff problem?  I have
>> a _lot_ of people reporting that 2.6.22 is not powering off for them and
>> I can't seem to figure it out.  Do you have a changeset for something
>> that went in to fix this issue?
> ..
>
> Well, the real bugs that cause the problem are described by me above.
> I don't have a fix for those, but the workaround is in 2.6.23
> under git 4047727e5ae33f9b8d2b7766d1994ea6e5ec2991 Fix SMP poweroff hangs.
>
> With that workaround, there's no more hanging on halt,
> though there could still be a hang on reboot.
>
> A problem with that workaround is that it has no effect unless
> the CPU hotplug code is configured (CONFIG_PM_SLEEP_SMP and pals).

Hm, that's not going to be a fix for 2.6.22 as we already do a call to
that function in that call when shutting down, something in the
2.6.23-rc series must have changed that logic to make it required again.

Oh well, thanks anyway, I'll keep trying to track this down.

greg k-h

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-15 17:50       ` Kay Sievers
  2007-11-15 18:18         ` Greg KH
@ 2007-11-15 19:01         ` Greg KH
  2007-11-16  1:13           ` Yasunori Goto
  1 sibling, 1 reply; 18+ messages in thread
From: Greg KH @ 2007-11-15 19:01 UTC (permalink / raw)
  To: Kay Sievers; +Cc: Yasunori Goto, Andrew Morton, Alexey Dobriyan, linux-kernel

On Thu, Nov 15, 2007 at 06:50:06PM +0100, Kay Sievers wrote:
> On Nov 15, 2007 5:34 PM, Greg KH <gregkh@suse.de> wrote:
> > On Thu, Nov 15, 2007 at 09:55:34PM +0900, Yasunori Goto wrote:
> > > > On Thu, 15 Nov 2007 12:11:58 +0300 Alexey Dobriyan <adobriyan@sw.ru> wrote:
> > > >
> > > > > Three boxes rarely oops during reboot or poweroff with 2.6.24-rc2-mm1
> > > > > (1) and during 2.6.24 cycle (2):
> > > > >
> > > > >   kernel_restart
> > > > >   sys_reboot
> > > > >   [garbage]
> > > > > Code: 8b 88 a8 00 00 00 85 c9 74 04 89
> > > > > EIP is at device_shutdown+0x32/0x60
> > > >
> > > > Yes, all my test boxes did that - it's what I referred to in the releaee
> > > > notes.  Greg is pondering the problem - seem he's the only person who
> > > > cannot reproduce it ;)
> > >
> > > Fortunately, my ia64 box reproduces this oops "every time".
> > > So, I could chase it.
> > >
> > > device_shutdown() function in drivers/base/power/shutdown.c
> > > is followings.
> > > -----------
> > > /**
> > >  * device_shutdown - call ->shutdown() on each device to shutdown.
> > >  */
> > > void device_shutdown(void)
> > > {
> > >         struct device * dev, *devn;
> > >
> > >         list_for_each_entry_safe_reverse(dev, devn, &devices_kset->list,
> > >                                 kobj.entry) {
> > >                 if (dev->bus && dev->bus->shutdown) {
> > >                         dev_dbg(dev, "shutdown\n");
> > >                         dev->bus->shutdown(dev);
> > >                 } else if (dev->driver && dev->driver->shutdown) {
> > >                         dev_dbg(dev, "shutdown\n");
> > >                         dev->driver->shutdown(dev);
> > >                 }
> > >         }
> > > }
> > > --------
> > > When oops occured, dev->driver pointed kset_ktype's address,
> > > and dev->driver->shutdown was the address of bus_type_list.
> > > So, Oops was caused by "Illegal operation fault".
> > > kset_ktypes is pointed by system_kset.
> > >
> > > If my understanding is correct, this loop can't distinguish between
> > > struct device and struct kset, but both are connected in this list,
> > > right? It may be the cause of this.
> >
> > Hm, no, it should just be a list of devices for the kset, but I'll go
> > verify that this is correct.
> 
> Care to try this:
>   +       system_kset = kset_create_and_register("system", NULL,
>   +                                              &devices_kset->kobj, NULL);
> 
> We should not join the kset, only use it as a parent.

Yes, that fixes the problem for me!

Can anyone else verify this?

I'll go rework the whole patch series now, as we should never be
allowing a kset as a paramater to that function, it's just wrong.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-15 18:48             ` Greg KH
@ 2007-11-15 19:13               ` Mark Lord
  0 siblings, 0 replies; 18+ messages in thread
From: Mark Lord @ 2007-11-15 19:13 UTC (permalink / raw)
  To: Greg KH; +Cc: Yasunori Goto, Andrew Morton, Alexey Dobriyan, linux-kernel

Greg KH wrote:
> On Thu, Nov 15, 2007 at 01:29:04PM -0500, Mark Lord wrote:
>> Greg KH wrote:
>>> On Thu, Nov 15, 2007 at 12:07:48PM -0500, Mark Lord wrote:
>> ..
>>>> Greg, I don't know if this is relevant or not,
>>>> but x86 has bugs in the halt/reboot code for SMP.
>>>>
>>>> Specifically, in native_smp_send_stop() the code now uses
>>>> spin_trylock() to "lock" the shared call buffers,
>>>> but then ignores the result.
>>>>
>>>> This means that multiple CPUs can/will clobber each other
>>>> in that code.
>>>>
>>>> The second bug, is that this code does not wait for the
>>>> target CPUs to actually stop before it continues.
>>>>
>>>> This was the real cause of the failure-to-poweroff problems
>>>> I was having with 2.6.23, which we fixed by using CPU hotplug
>>>> to disable_nonboot_cpus() before the above code ever got run.
>>> I have noticed that the shutdown path is quite weird, shutting down
>>> sysdev devices differently depending on the type of shutdown, which is
>>> probably not good.
>>> But what change are you talking about for the poweroff problem?  I have
>>> a _lot_ of people reporting that 2.6.22 is not powering off for them and
>>> I can't seem to figure it out.  Do you have a changeset for something
>>> that went in to fix this issue?
>> ..
>>
>> Well, the real bugs that cause the problem are described by me above.
>> I don't have a fix for those, but the workaround is in 2.6.23
>> under git 4047727e5ae33f9b8d2b7766d1994ea6e5ec2991 Fix SMP poweroff hangs.
>>
>> With that workaround, there's no more hanging on halt,
>> though there could still be a hang on reboot.
>>
>> A problem with that workaround is that it has no effect unless
>> the CPU hotplug code is configured (CONFIG_PM_SLEEP_SMP and pals).
> 
> Hm, that's not going to be a fix for 2.6.22 as we already do a call to
> that function in that call when shutting down, something in the
> 2.6.23-rc series must have changed that logic to make it required again.
..

But remember, it has no effect unless the kernel has suspend/resume
configured in.  Many desktops don't.

And the real unfixed bugs are still there in native_smp_send_stop()
as described above.  You could prod whoever takes care of that code
to fix them.

Cheers

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-15 19:01         ` Greg KH
@ 2007-11-16  1:13           ` Yasunori Goto
  2007-11-16  1:22             ` Greg KH
  0 siblings, 1 reply; 18+ messages in thread
From: Yasunori Goto @ 2007-11-16  1:13 UTC (permalink / raw)
  To: Greg KH; +Cc: Kay Sievers, Andrew Morton, Alexey Dobriyan, linux-kernel

> > 
> > Care to try this:
> >   +       system_kset = kset_create_and_register("system", NULL,
> >   +                                              &devices_kset->kobj, NULL);
> > 
> > We should not join the kset, only use it as a parent.
> 
> Yes, that fixes the problem for me!
> 
> Can anyone else verify this?

I confirmed it fixed the problem. :-)

Thanks.


-- 
Yasunori Goto 



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-16  1:13           ` Yasunori Goto
@ 2007-11-16  1:22             ` Greg KH
  2007-11-19  9:55               ` Cornelia Huck
  0 siblings, 1 reply; 18+ messages in thread
From: Greg KH @ 2007-11-16  1:22 UTC (permalink / raw)
  To: Yasunori Goto; +Cc: Kay Sievers, Andrew Morton, Alexey Dobriyan, linux-kernel

On Fri, Nov 16, 2007 at 10:13:42AM +0900, Yasunori Goto wrote:
> > > 
> > > Care to try this:
> > >   +       system_kset = kset_create_and_register("system", NULL,
> > >   +                                              &devices_kset->kobj, NULL);
> > > 
> > > We should not join the kset, only use it as a parent.
> > 
> > Yes, that fixes the problem for me!
> > 
> > Can anyone else verify this?
> 
> I confirmed it fixed the problem. :-)

Thanks for testing, the next -mm should have this fix.

greg k-h

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: EIP is at device_shutdown+0x32/0x60
  2007-11-16  1:22             ` Greg KH
@ 2007-11-19  9:55               ` Cornelia Huck
  0 siblings, 0 replies; 18+ messages in thread
From: Cornelia Huck @ 2007-11-19  9:55 UTC (permalink / raw)
  To: Greg KH
  Cc: Yasunori Goto, Kay Sievers, Andrew Morton, Alexey Dobriyan, linux-kernel

On Thu, 15 Nov 2007 17:22:11 -0800,
Greg KH <gregkh@suse.de> wrote:

> On Fri, Nov 16, 2007 at 10:13:42AM +0900, Yasunori Goto wrote:
> > > > 
> > > > Care to try this:
> > > >   +       system_kset = kset_create_and_register("system", NULL,
> > > >   +                                              &devices_kset->kobj, NULL);
> > > > 
> > > > We should not join the kset, only use it as a parent.
> > > 
> > > Yes, that fixes the problem for me!
> > > 
> > > Can anyone else verify this?
> > 
> > I confirmed it fixed the problem. :-)
> 
> Thanks for testing, the next -mm should have this fix.

Good, since this fixes things for me on s390 as well.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2007-11-19  9:56 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-11-15  9:11 EIP is at device_shutdown+0x32/0x60 Alexey Dobriyan
2007-11-15  9:44 ` Andrew Morton
2007-11-15  9:59   ` Alexey Dobriyan
2007-11-15 12:55   ` Yasunori Goto
2007-11-15 13:15     ` Cornelia Huck
2007-11-15 16:34     ` Greg KH
2007-11-15 17:07       ` Mark Lord
2007-11-15 18:23         ` Greg KH
2007-11-15 18:29           ` Mark Lord
2007-11-15 18:48             ` Greg KH
2007-11-15 19:13               ` Mark Lord
2007-11-15 17:50       ` Kay Sievers
2007-11-15 18:18         ` Greg KH
2007-11-15 19:01         ` Greg KH
2007-11-16  1:13           ` Yasunori Goto
2007-11-16  1:22             ` Greg KH
2007-11-19  9:55               ` Cornelia Huck
2007-11-15 14:40   ` Jeff Dike

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.