From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752988AbcG2Sjg (ORCPT ); Fri, 29 Jul 2016 14:39:36 -0400 Received: from arcturus.aphlor.org ([188.246.204.175]:45870 "EHLO arcturus.aphlor.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751602AbcG2Sjd (ORCPT ); Fri, 29 Jul 2016 14:39:33 -0400 Date: Fri, 29 Jul 2016 14:39:25 -0400 From: Dave Jones To: Andrey Ryabinin Cc: Linux Kernel , Linus Torvalds , "linux-mm@kvack.org" Subject: Re: [4.7+] various memory corruption reports. Message-ID: <20160729183925.GA28376@codemonkey.org.uk> Mail-Followup-To: Dave Jones , Andrey Ryabinin , Linux Kernel , Linus Torvalds , "linux-mm@kvack.org" References: <20160729150513.GB29545@codemonkey.org.uk> <20160729151907.GC29545@codemonkey.org.uk> <20160729154929.GA30611@codemonkey.org.uk> <579B9339.7030707@gmail.com> <579B98B8.40007@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <579B98B8.40007@gmail.com> User-Agent: Mutt/1.6.0 (2016-04-01) X-Spam-Flag: skipped (authorised relay user) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Fri, Jul 29, 2016 at 08:56:08PM +0300, Andrey Ryabinin wrote: > >> > I suspect this is false positives due to changes in KASAN. > >> > Bisection probably will point to > >> > 80a9201a5965f4715d5c09790862e0df84ce0614 ("mm, kasan: switch SLUB to > >> > stackdepot, enable memory quarantine for SLUB)" > >> > >> good call. reverting that changeset seems to have solved it. > > Could you please try with this? > Actually, this is not quite right, it should be like this: Seems to have stopped the corruption, but now I get NMi watchdog traces.. [ 109.158553] NMI watchdog: Watchdog detected hard LOCKUP on cpu 2irq event stamp: 1411258 [ 109.158797] hardirqs last enabled at (1411257): [] get_page_from_freelist+0x897/0x1bb0 [ 109.159034] hardirqs last disabled at (1411258): [] _raw_spin_lock_irq+0x19/0x80 [ 109.159246] softirqs last enabled at (1411096): [] __do_softirq+0x66e/0x9a7 [ 109.159457] softirqs last disabled at (1411089): [] irq_exit+0x118/0x140 [ 109.159646] CPU: 2 PID: 2998 Comm: trinity-c6 Not tainted 4.7.0-think+ #12 [ 109.159883] ffff880461380434 00000000f39ba2d9 ffff88046880bab8 ffffffff98a48532 [ 109.160050] 0000000000000000 0000000000000002 ffff88046880bad8 ffffffff98357fbb [ 109.160218] ffff880461380008 ffff88046880bc00 ffff88046880bb20 ffffffff9842f7d1 [ 109.160385] Call Trace: [ 109.160439] [] dump_stack+0x68/0x96 [ 109.160568] [] watchdog_overflow_callback+0x15b/0x190 [ 109.160721] [] __perf_event_overflow+0x1b1/0x540 [ 109.172185] [] perf_event_overflow+0x14/0x20 [ 109.183591] [] intel_pmu_handle_irq+0x36a/0xad0 [ 109.194988] [] ? intel_pmu_save_and_restart+0xe0/0xe0 [ 109.206444] [] ? nmi_handle+0x2b9/0x480 [ 109.218066] [] ? is_ftrace_trampoline+0xa9/0x100 [ 109.229782] [] perf_event_nmi_handler+0x2c/0x50 [ 109.241370] [] nmi_handle+0x128/0x480 [ 109.252853] [] ? nmi_handle+0x5/0x480 [ 109.264293] [] ? is_ftrace_trampoline+0xa9/0x100 [ 109.275713] [] default_do_nmi+0xb2/0x210 [ 109.287158] [] do_nmi+0x1aa/0x220 [ 109.298562] [] end_repeat_nmi+0x1a/0x1e [ 109.309964] [] ? __add_to_page_cache_locked+0x335/0xaa0 [ 109.321366] [] ? is_ftrace_trampoline+0xa9/0x100 [ 109.332776] [] ? is_ftrace_trampoline+0xa9/0x100 [ 109.344091] [] ? is_ftrace_trampoline+0xa9/0x100 [ 109.355283] <> [] __kernel_text_address+0x86/0xb0 [ 109.366438] [] print_context_stack+0x7b/0x100 [ 109.377709] [] dump_trace+0x12b/0x350 [ 109.388863] [] ? qlist_free_all+0x42/0x100 [ 109.399976] [] save_stack_trace+0x2b/0x50 [ 109.411082] [] set_track+0x83/0x140 [ 109.422152] [] free_debug_processing+0x1aa/0x420 [ 109.433268] [] ? qlist_free_all+0x42/0x100 [ 109.444334] [] ? qlist_free_all+0x42/0x100 [ 109.455345] [] __slab_free+0x1d6/0x2e0 [ 109.466262] [] ? debug_smp_processor_id+0x17/0x20 [ 109.477239] [] ? get_lock_stats+0x1d/0x90 [ 109.487929] [] ? qlist_free_all+0x42/0x100 [ 109.498521] [] ___cache_free+0xb6/0xd0 [ 109.509125] [] qlist_free_all+0x83/0x100 [ 109.519652] [] quarantine_reduce+0x177/0x1b0 [ 109.530159] [] kasan_kmalloc+0xf3/0x100 [ 109.540676] [] ? trace_hardirqs_off+0xd/0x10 [ 109.551159] [] ? radix_tree_node_alloc+0x96/0x190 [ 109.561658] [] kasan_slab_alloc+0x12/0x20 [ 109.572120] [] kmem_cache_alloc+0x109/0x3e0 [ 109.582555] [] ? get_mem_cgroup_from_mm+0x3c1/0x4c0 [ 109.593006] [] radix_tree_node_alloc+0x96/0x190 [ 109.603406] [] __radix_tree_create+0x32b/0xa10 [ 109.613785] [] ? __add_to_page_cache_locked+0x300/0xaa0 [ 109.624148] [] __add_to_page_cache_locked+0x335/0xaa0 [ 109.634508] [] ? filemap_map_pages+0xcc0/0xcc0 [ 109.644814] [] ? gfp_pfmemalloc_allowed+0x130/0x130 [ 109.655110] [] ? debug_smp_processor_id+0x17/0x20 [ 109.665296] [] ? get_lock_stats+0x1d/0x90 [ 109.675494] [] ? jbd2_journal_stop+0x8f1/0x1390 [ 109.685622] [] add_to_page_cache_lru+0xdd/0x2c0 [ 109.695761] [] ? add_to_page_cache_locked+0x20/0x20 [ 109.705885] [] ? find_get_entry+0x259/0x490 [ 109.715981] [] ? find_get_entry+0x5/0x490 [ 109.726074] [] pagecache_get_page+0x191/0x620 [ 109.736160] [] grab_cache_page_write_begin+0x51/0x80 [ 109.746224] [] ? rcu_read_lock_sched_held+0xf0/0x130 [ 109.756277] [] ext4_da_write_begin+0x1c2/0xaa0 [ 109.766287] [] ? ext4_write_begin+0xe90/0xe90 [ 109.776312] [] ? balance_dirty_pages_ratelimited+0x498/0x14c0 [ 109.786347] [] generic_perform_write+0x290/0x520 [ 109.796322] [] ? rcu_read_lock_sched_held+0xf0/0x130 [ 109.806352] [] ? generic_file_readonly_mmap+0x1b0/0x1b0 [ 109.816359] [] ? __mnt_drop_write_file+0x31/0x40 [ 109.826345] [] ? file_update_time+0x24a/0x3a0 [ 109.836261] [] ? should_remove_suid+0xc0/0xc0 [ 109.846155] [] ? get_lock_stats+0x1d/0x90 [ 109.855976] [] __generic_file_write_iter+0x314/0x530 [ 109.865741] [] ext4_file_write_iter+0x1b4/0xf10 [ 109.875549] [] ? debug_smp_processor_id+0x17/0x20 [ 109.885328] [] ? get_lock_stats+0x1d/0x90 [ 109.895081] [] ? ext4_unwritten_wait+0x1e0/0x1e0 [ 109.904849] [] ? debug_check_no_locks_freed+0x280/0x280 [ 109.914646] [] ? __might_fault+0xf6/0x1b0 [ 109.924394] [] ? __might_fault+0x166/0x1b0 [ 109.934012] [] ? kasan_check_write+0x14/0x20 [ 109.943677] [] do_iter_readv_writev+0x23f/0x510 [ 109.953248] [] ? vfs_iter_write+0x550/0x550 [ 109.962810] [] ? percpu_down_read+0x57/0xa0 [ 109.972324] [] ? __sb_start_write+0xb4/0xf0 [ 109.981729] [] do_readv_writev+0x394/0x6a0 [ 109.991103] [] ? ext4_unwritten_wait+0x1e0/0x1e0 [ 110.000348] [] ? vfs_write+0x4c0/0x4c0 [ 110.009503] [] ? mark_held_locks+0xcf/0x130 [ 110.018660] [] ? mutex_lock_nested+0x4ed/0x8d0 [ 110.027762] [] ? mutex_lock_nested+0x508/0x8d0 [ 110.036778] [] ? __fdget_pos+0x92/0xc0 [ 110.045809] [] ? debug_check_no_locks_freed+0x280/0x280 [ 110.054841] [] ? do_setitimer+0x389/0x7f0 [ 110.063794] [] ? __fdget_pos+0x92/0xc0 [ 110.072727] [] ? trace_hardirqs_on_caller+0x3f9/0x580 [ 110.081643] [] ? mutex_lock_interruptible_nested+0x9e0/0x9e0 [ 110.090467] [] ? debug_smp_processor_id+0x17/0x20 [ 110.099135] [] ? get_lock_stats+0x1d/0x90 [ 110.107815] [] vfs_writev+0x75/0xb0 [ 110.116379] [] ? __fdget_pos+0x92/0xc0 [ 110.124868] [] do_writev+0xe5/0x280 [ 110.133279] [] ? vfs_writev+0xb0/0xb0 [ 110.141704] [] ? SyS_readv+0x20/0x20 [ 110.150088] [] SyS_writev+0x10/0x20 [ 110.158347] [] do_syscall_64+0x1a0/0x4e0 [ 110.166496] [] ? trace_hardirqs_on_thunk+0x1a/0x1c [ 110.174666] [] entry_SYSCALL64_slow_path+0x25/0x25 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-lf0-f69.google.com (mail-lf0-f69.google.com [209.85.215.69]) by kanga.kvack.org (Postfix) with ESMTP id ED7CF6B0253 for ; Fri, 29 Jul 2016 14:39:32 -0400 (EDT) Received: by mail-lf0-f69.google.com with SMTP id p85so40220163lfg.3 for ; Fri, 29 Jul 2016 11:39:32 -0700 (PDT) Received: from arcturus.aphlor.org (arcturus.ipv6.aphlor.org. [2a03:9800:10:4a::2]) by mx.google.com with ESMTPS id eo1si20110624wjb.236.2016.07.29.11.39.30 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Fri, 29 Jul 2016 11:39:30 -0700 (PDT) Date: Fri, 29 Jul 2016 14:39:25 -0400 From: Dave Jones Subject: Re: [4.7+] various memory corruption reports. Message-ID: <20160729183925.GA28376@codemonkey.org.uk> References: <20160729150513.GB29545@codemonkey.org.uk> <20160729151907.GC29545@codemonkey.org.uk> <20160729154929.GA30611@codemonkey.org.uk> <579B9339.7030707@gmail.com> <579B98B8.40007@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <579B98B8.40007@gmail.com> Sender: owner-linux-mm@kvack.org List-ID: To: Andrey Ryabinin Cc: Linux Kernel , Linus Torvalds , "linux-mm@kvack.org" On Fri, Jul 29, 2016 at 08:56:08PM +0300, Andrey Ryabinin wrote: > >> > I suspect this is false positives due to changes in KASAN. > >> > Bisection probably will point to > >> > 80a9201a5965f4715d5c09790862e0df84ce0614 ("mm, kasan: switch SLUB to > >> > stackdepot, enable memory quarantine for SLUB)" > >> > >> good call. reverting that changeset seems to have solved it. > > Could you please try with this? > Actually, this is not quite right, it should be like this: Seems to have stopped the corruption, but now I get NMi watchdog traces.. [ 109.158553] NMI watchdog: Watchdog detected hard LOCKUP on cpu 2irq event stamp: 1411258 [ 109.158797] hardirqs last enabled at (1411257): [] get_page_from_freelist+0x897/0x1bb0 [ 109.159034] hardirqs last disabled at (1411258): [] _raw_spin_lock_irq+0x19/0x80 [ 109.159246] softirqs last enabled at (1411096): [] __do_softirq+0x66e/0x9a7 [ 109.159457] softirqs last disabled at (1411089): [] irq_exit+0x118/0x140 [ 109.159646] CPU: 2 PID: 2998 Comm: trinity-c6 Not tainted 4.7.0-think+ #12 [ 109.159883] ffff880461380434 00000000f39ba2d9 ffff88046880bab8 ffffffff98a48532 [ 109.160050] 0000000000000000 0000000000000002 ffff88046880bad8 ffffffff98357fbb [ 109.160218] ffff880461380008 ffff88046880bc00 ffff88046880bb20 ffffffff9842f7d1 [ 109.160385] Call Trace: [ 109.160439] [] dump_stack+0x68/0x96 [ 109.160568] [] watchdog_overflow_callback+0x15b/0x190 [ 109.160721] [] __perf_event_overflow+0x1b1/0x540 [ 109.172185] [] perf_event_overflow+0x14/0x20 [ 109.183591] [] intel_pmu_handle_irq+0x36a/0xad0 [ 109.194988] [] ? intel_pmu_save_and_restart+0xe0/0xe0 [ 109.206444] [] ? nmi_handle+0x2b9/0x480 [ 109.218066] [] ? is_ftrace_trampoline+0xa9/0x100 [ 109.229782] [] perf_event_nmi_handler+0x2c/0x50 [ 109.241370] [] nmi_handle+0x128/0x480 [ 109.252853] [] ? nmi_handle+0x5/0x480 [ 109.264293] [] ? is_ftrace_trampoline+0xa9/0x100 [ 109.275713] [] default_do_nmi+0xb2/0x210 [ 109.287158] [] do_nmi+0x1aa/0x220 [ 109.298562] [] end_repeat_nmi+0x1a/0x1e [ 109.309964] [] ? __add_to_page_cache_locked+0x335/0xaa0 [ 109.321366] [] ? is_ftrace_trampoline+0xa9/0x100 [ 109.332776] [] ? is_ftrace_trampoline+0xa9/0x100 [ 109.344091] [] ? is_ftrace_trampoline+0xa9/0x100 [ 109.355283] <> [] __kernel_text_address+0x86/0xb0 [ 109.366438] [] print_context_stack+0x7b/0x100 [ 109.377709] [] dump_trace+0x12b/0x350 [ 109.388863] [] ? qlist_free_all+0x42/0x100 [ 109.399976] [] save_stack_trace+0x2b/0x50 [ 109.411082] [] set_track+0x83/0x140 [ 109.422152] [] free_debug_processing+0x1aa/0x420 [ 109.433268] [] ? qlist_free_all+0x42/0x100 [ 109.444334] [] ? qlist_free_all+0x42/0x100 [ 109.455345] [] __slab_free+0x1d6/0x2e0 [ 109.466262] [] ? debug_smp_processor_id+0x17/0x20 [ 109.477239] [] ? get_lock_stats+0x1d/0x90 [ 109.487929] [] ? qlist_free_all+0x42/0x100 [ 109.498521] [] ___cache_free+0xb6/0xd0 [ 109.509125] [] qlist_free_all+0x83/0x100 [ 109.519652] [] quarantine_reduce+0x177/0x1b0 [ 109.530159] [] kasan_kmalloc+0xf3/0x100 [ 109.540676] [] ? trace_hardirqs_off+0xd/0x10 [ 109.551159] [] ? radix_tree_node_alloc+0x96/0x190 [ 109.561658] [] kasan_slab_alloc+0x12/0x20 [ 109.572120] [] kmem_cache_alloc+0x109/0x3e0 [ 109.582555] [] ? get_mem_cgroup_from_mm+0x3c1/0x4c0 [ 109.593006] [] radix_tree_node_alloc+0x96/0x190 [ 109.603406] [] __radix_tree_create+0x32b/0xa10 [ 109.613785] [] ? __add_to_page_cache_locked+0x300/0xaa0 [ 109.624148] [] __add_to_page_cache_locked+0x335/0xaa0 [ 109.634508] [] ? filemap_map_pages+0xcc0/0xcc0 [ 109.644814] [] ? gfp_pfmemalloc_allowed+0x130/0x130 [ 109.655110] [] ? debug_smp_processor_id+0x17/0x20 [ 109.665296] [] ? get_lock_stats+0x1d/0x90 [ 109.675494] [] ? jbd2_journal_stop+0x8f1/0x1390 [ 109.685622] [] add_to_page_cache_lru+0xdd/0x2c0 [ 109.695761] [] ? add_to_page_cache_locked+0x20/0x20 [ 109.705885] [] ? find_get_entry+0x259/0x490 [ 109.715981] [] ? find_get_entry+0x5/0x490 [ 109.726074] [] pagecache_get_page+0x191/0x620 [ 109.736160] [] grab_cache_page_write_begin+0x51/0x80 [ 109.746224] [] ? rcu_read_lock_sched_held+0xf0/0x130 [ 109.756277] [] ext4_da_write_begin+0x1c2/0xaa0 [ 109.766287] [] ? ext4_write_begin+0xe90/0xe90 [ 109.776312] [] ? balance_dirty_pages_ratelimited+0x498/0x14c0 [ 109.786347] [] generic_perform_write+0x290/0x520 [ 109.796322] [] ? rcu_read_lock_sched_held+0xf0/0x130 [ 109.806352] [] ? generic_file_readonly_mmap+0x1b0/0x1b0 [ 109.816359] [] ? __mnt_drop_write_file+0x31/0x40 [ 109.826345] [] ? file_update_time+0x24a/0x3a0 [ 109.836261] [] ? should_remove_suid+0xc0/0xc0 [ 109.846155] [] ? get_lock_stats+0x1d/0x90 [ 109.855976] [] __generic_file_write_iter+0x314/0x530 [ 109.865741] [] ext4_file_write_iter+0x1b4/0xf10 [ 109.875549] [] ? debug_smp_processor_id+0x17/0x20 [ 109.885328] [] ? get_lock_stats+0x1d/0x90 [ 109.895081] [] ? ext4_unwritten_wait+0x1e0/0x1e0 [ 109.904849] [] ? debug_check_no_locks_freed+0x280/0x280 [ 109.914646] [] ? __might_fault+0xf6/0x1b0 [ 109.924394] [] ? __might_fault+0x166/0x1b0 [ 109.934012] [] ? kasan_check_write+0x14/0x20 [ 109.943677] [] do_iter_readv_writev+0x23f/0x510 [ 109.953248] [] ? vfs_iter_write+0x550/0x550 [ 109.962810] [] ? percpu_down_read+0x57/0xa0 [ 109.972324] [] ? __sb_start_write+0xb4/0xf0 [ 109.981729] [] do_readv_writev+0x394/0x6a0 [ 109.991103] [] ? ext4_unwritten_wait+0x1e0/0x1e0 [ 110.000348] [] ? vfs_write+0x4c0/0x4c0 [ 110.009503] [] ? mark_held_locks+0xcf/0x130 [ 110.018660] [] ? mutex_lock_nested+0x4ed/0x8d0 [ 110.027762] [] ? mutex_lock_nested+0x508/0x8d0 [ 110.036778] [] ? __fdget_pos+0x92/0xc0 [ 110.045809] [] ? debug_check_no_locks_freed+0x280/0x280 [ 110.054841] [] ? do_setitimer+0x389/0x7f0 [ 110.063794] [] ? __fdget_pos+0x92/0xc0 [ 110.072727] [] ? trace_hardirqs_on_caller+0x3f9/0x580 [ 110.081643] [] ? mutex_lock_interruptible_nested+0x9e0/0x9e0 [ 110.090467] [] ? debug_smp_processor_id+0x17/0x20 [ 110.099135] [] ? get_lock_stats+0x1d/0x90 [ 110.107815] [] vfs_writev+0x75/0xb0 [ 110.116379] [] ? __fdget_pos+0x92/0xc0 [ 110.124868] [] do_writev+0xe5/0x280 [ 110.133279] [] ? vfs_writev+0xb0/0xb0 [ 110.141704] [] ? SyS_readv+0x20/0x20 [ 110.150088] [] SyS_writev+0x10/0x20 [ 110.158347] [] do_syscall_64+0x1a0/0x4e0 [ 110.166496] [] ? trace_hardirqs_on_thunk+0x1a/0x1c [ 110.174666] [] entry_SYSCALL64_slow_path+0x25/0x25 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org