* [RFC PATCHv2] mm: use stack_depot for recording kmemleak's backtrace
@ 2022-10-12 8:01 zhaoyang.huang
[not found] ` <202210131309.fe5427b-oliver.sang@intel.com>
0 siblings, 1 reply; 4+ messages in thread
From: zhaoyang.huang @ 2022-10-12 8:01 UTC (permalink / raw)
To: Andrew Morton, Matthew Wilcox, Vlastimil Babka, Zhaoyang Huang,
linux-mm, linux-block, linux-kernel, ke.wang, steve.kang
From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
Using stack_depot to record kmemleak's backtrace which has been implemented
on slub for reducing redundant information.
Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
---
changes of v2: fix bugs of stack_depot_init related issue
---
---
mm/kmemleak.c | 45 +++++++++++++++++++++++++++++++++++----------
1 file changed, 35 insertions(+), 10 deletions(-)
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 1eddc01..0f78774 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -79,6 +79,7 @@
#include <linux/mutex.h>
#include <linux/rcupdate.h>
#include <linux/stacktrace.h>
+#include <linux/stackdepot.h>
#include <linux/cache.h>
#include <linux/percpu.h>
#include <linux/memblock.h>
@@ -159,8 +160,7 @@ struct kmemleak_object {
u32 checksum;
/* memory ranges to be scanned inside an object (empty for all) */
struct hlist_head area_list;
- unsigned long trace[MAX_TRACE];
- unsigned int trace_len;
+ depot_stack_handle_t trace_handle;
unsigned long jiffies; /* creation timestamp */
pid_t pid; /* pid of the current task */
char comm[TASK_COMM_LEN]; /* executable name */
@@ -346,8 +346,11 @@ static void print_unreferenced(struct seq_file *seq,
struct kmemleak_object *object)
{
int i;
+ unsigned long *entries;
+ unsigned int nr_entries;
unsigned int msecs_age = jiffies_to_msecs(jiffies - object->jiffies);
+ nr_entries = stack_depot_fetch(object->trace_handle, &entries);
warn_or_seq_printf(seq, "unreferenced object 0x%08lx (size %zu):\n",
object->pointer, object->size);
warn_or_seq_printf(seq, " comm \"%s\", pid %d, jiffies %lu (age %d.%03ds)\n",
@@ -356,10 +359,10 @@ static void print_unreferenced(struct seq_file *seq,
hex_dump_object(seq, object);
warn_or_seq_printf(seq, " backtrace:\n");
- for (i = 0; i < object->trace_len; i++) {
- void *ptr = (void *)object->trace[i];
- warn_or_seq_printf(seq, " [<%p>] %pS\n", ptr, ptr);
- }
+ for (i = 0; i < nr_entries; i++) {
+ void *ptr = (void *)entries[i];
+ warn_or_seq_printf(seq, " [<%p>] %pS\n", ptr, ptr);
+ }
}
/*
@@ -378,7 +381,8 @@ static void dump_object_info(struct kmemleak_object *object)
pr_notice(" flags = 0x%x\n", object->flags);
pr_notice(" checksum = %u\n", object->checksum);
pr_notice(" backtrace:\n");
- stack_trace_print(object->trace, object->trace_len, 4);
+ if(object->trace_handle)
+ stack_depot_print(object->trace_handle);
}
/*
@@ -591,6 +595,27 @@ static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali
return object;
}
+#ifdef CONFIG_STACKDEPOT
+static noinline depot_stack_handle_t set_track_prepare(void)
+{
+ depot_stack_handle_t trace_handle;
+ unsigned long entries[MAX_TRACE];
+ unsigned int nr_entries;
+
+ if (!kmemleak_initialized)
+ stack_depot_init();
+ nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
+ trace_handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
+
+ return trace_handle;
+}
+#else
+static inline depot_stack_handle_t set_track_prepare(void)
+{
+ return 0;
+}
+#endif
+
/*
* Save stack trace to the given array of MAX_TRACE size.
*/
@@ -654,7 +679,7 @@ static struct kmemleak_object *__create_object(unsigned long ptr, size_t size,
}
/* kernel backtrace */
- object->trace_len = __save_stack_trace(object->trace);
+ object->trace_handle = set_track_prepare();
raw_spin_lock_irqsave(&kmemleak_lock, flags);
@@ -694,7 +719,6 @@ static struct kmemleak_object *__create_object(unsigned long ptr, size_t size,
rb_link_node(&object->rb_node, rb_parent, link);
rb_insert_color(&object->rb_node, is_phys ? &object_phys_tree_root :
&object_tree_root);
-
list_add_tail_rcu(&object->object_list, &object_list);
out:
raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
@@ -1094,7 +1118,7 @@ void __ref kmemleak_update_trace(const void *ptr)
}
raw_spin_lock_irqsave(&object->lock, flags);
- object->trace_len = __save_stack_trace(object->trace);
+ object->trace_handle = set_track_prepare();
raw_spin_unlock_irqrestore(&object->lock, flags);
put_object(object);
@@ -2064,6 +2088,7 @@ void __init kmemleak_init(void)
if (kmemleak_error)
return;
+ stack_depot_init();
jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
--
1.9.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [mm] db8d280d38: PANIC:early_exception
[not found] ` <202210131309.fe5427b-oliver.sang@intel.com>
@ 2022-10-13 8:38 ` Zhaoyang Huang
2022-10-13 8:51 ` Vlastimil Babka
0 siblings, 1 reply; 4+ messages in thread
From: Zhaoyang Huang @ 2022-10-13 8:38 UTC (permalink / raw)
To: kernel test robot
Cc: zhaoyang.huang, lkp, lkp, linux-mm, Andrew Morton,
Matthew Wilcox, Vlastimil Babka, linux-block, linux-kernel,
ke.wang, steve.kang
@Vlastimil Could you please have an eye on this series of robot
reports which are caused by stack_depot_init related issues. The
problem arises from a very early access of stack_depot_save/init by
kmemleak within setup_arch which happens even before
stack_depot_early_init and zone related data ready. I would like to
suggest adding a criteria at the entrance check of stack_depot_save
which help the stackdepot API more aggregation and the caller free to
call
[ 0.062350][ T0] ? stack_depot_init.cold+0x5/0xbd
[ 0.063072][ T0] ? set_track_prepare+0x6e/0x80
[ 0.063957][ T0] ?
__raw_callee_save___native_queued_spin_unlock+0x11/0x22
[ 0.064952][ T0] ? write_comp_data+0x2a/0x80
[ 0.065623][ T0] ? strncpy+0x2f/0x60
[ 0.066205][ T0] ? __create_object+0x10c/0x3c0
[ 0.066904][ T0] ? kmemleak_alloc_phys+0x6f/0x80
[ 0.067561][ T0] ? memblock_alloc_range_nid+0x274/0x28f
[ 0.068396][ T0] ? memblock_phys_alloc_range+0xa4/0xb3
[ 0.069200][ T0] ? reserve_real_mode+0x87/0xd7
[ 0.069895][ T0] ? setup_arch+0x6a9/0x995
[ 0.070526][ T0] ? start_kernel+0x7c/0x854
[ 0.071195][ T0] ? load_ucode_bsp+0x1bb/0x1c6
[ 0.071875][ T0] ? secondary_startup_64_no_verify+0xe0/0xeb
[ 0.072682][ T0] </TASK>
On Thu, Oct 13, 2022 at 1:58 PM kernel test robot <oliver.sang@intel.com> wrote:
>
>
> Hi zhaoyang.huang,
>
> seems this is the fix based on our report
> "[mm] 0e949320db: BUG:kernel_NULL_pointer_dereference,address"
> at
> https://lore.kernel.org/all/202210121406.d4ebc9bc-oliver.sang@intel.com/
> but now it seems have new issue. report FYI
>
>
> Greeting,
>
> FYI, we noticed the following commit (built with gcc-11):
>
> commit: db8d280d38efb061ad1a57ce060cbb917a4cf503 ("[RFC PATCHv2] mm: use stack_depot for recording kmemleak's backtrace")
> url: https://github.com/intel-lab-lkp/linux/commits/zhaoyang-huang/mm-use-stack_depot-for-recording-kmemleak-s-backtrace/20221012-160458
> base: https://git.kernel.org/cgit/linux/kernel/git/akpm/mm.git mm-everything
> patch link: https://lore.kernel.org/linux-mm/1665561689-29498-1-git-send-email-zhaoyang.huang@unisoc.com
> patch subject: [RFC PATCHv2] mm: use stack_depot for recording kmemleak's backtrace
>
> in testcase: boot
>
> on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G
>
> caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
>
>
> +-------------------------------+------------+------------+
> | | 95f1b43741 | db8d280d38 |
> +-------------------------------+------------+------------+
> | boot_successes | 20 | 0 |
> | boot_failures | 0 | 18 |
> | PANIC:early_exception | 0 | 18 |
> | RIP:nr_free_zone_pages | 0 | 18 |
> | BUG:kernel_hang_in_boot_stage | 0 | 18 |
> +-------------------------------+------------+------------+
>
>
> If you fix the issue, kindly add following tag
> | Reported-by: kernel test robot <oliver.sang@intel.com>
> | Link: https://lore.kernel.org/r/202210131309.fe5427b-oliver.sang@intel.com
>
>
> [ 0.029254][ T0] Scan for SMP in [mem 0x00000000-0x000003ff]
> [ 0.030178][ T0] Scan for SMP in [mem 0x0009fc00-0x0009ffff]
> [ 0.031080][ T0] Scan for SMP in [mem 0x000f0000-0x000fffff]
> [ 0.043370][ T0] found SMP MP-table at [mem 0x000f5ba0-0x000f5baf]
> [ 0.044301][ T0] mpc: f5bb0-f5c80
> PANIC: early exception 0x0e IP 10:ffffffff8149c282 error 0 cr2 0x1e08
> [ 0.045770][ T0] CPU: 0 PID: 0 Comm: swapper Not tainted 6.0.0-rc3-00711-gdb8d280d38ef #5
> [ 0.046970][ T0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-4 04/01/2014
> [ 0.048356][ T0] RIP: 0010:nr_free_zone_pages (kbuild/src/x86_64-2/include/linux/mmzone.h:1478 kbuild/src/x86_64-2/include/linux/mmzone.h:1504 kbuild/src/x86_64-2/mm/page_alloc.c:5886)
> [ 0.049158][ T0] Code: e9 a3 5c 0b 00 0f 1f 00 e8 9b cd be ff 65 8b 05 14 b8 b8 7e 48 98 41 54 48 8b 04 c5 e0 e4 bc 83 53 89 fb 4c 8d 80 00 1e 00 00 <3b> b8 08 1e 00 00 72 5b 49 8b 10 45 31 e4 48 85 d2 75 05 eb 35 49
> All code
> ========
> 0: e9 a3 5c 0b 00 jmpq 0xb5ca8
> 5: 0f 1f 00 nopl (%rax)
> 8: e8 9b cd be ff callq 0xffffffffffbecda8
> d: 65 8b 05 14 b8 b8 7e mov %gs:0x7eb8b814(%rip),%eax # 0x7eb8b828
> 14: 48 98 cltq
> 16: 41 54 push %r12
> 18: 48 8b 04 c5 e0 e4 bc mov -0x7c431b20(,%rax,8),%rax
> 1f: 83
> 20: 53 push %rbx
> 21: 89 fb mov %edi,%ebx
> 23: 4c 8d 80 00 1e 00 00 lea 0x1e00(%rax),%r8
> 2a:* 3b b8 08 1e 00 00 cmp 0x1e08(%rax),%edi <-- trapping instruction
> 30: 72 5b jb 0x8d
> 32: 49 8b 10 mov (%r8),%rdx
> 35: 45 31 e4 xor %r12d,%r12d
> 38: 48 85 d2 test %rdx,%rdx
> 3b: 75 05 jne 0x42
> 3d: eb 35 jmp 0x74
> 3f: 49 rex.WB
>
> Code starting with the faulting instruction
> ===========================================
> 0: 3b b8 08 1e 00 00 cmp 0x1e08(%rax),%edi
> 6: 72 5b jb 0x63
> 8: 49 8b 10 mov (%r8),%rdx
> b: 45 31 e4 xor %r12d,%r12d
> e: 48 85 d2 test %rdx,%rdx
> 11: 75 05 jne 0x18
> 13: eb 35 jmp 0x4a
> 15: 49 rex.WB
> [ 0.051803][ T0] RSP: 0000:ffffffff83603d18 EFLAGS: 00010046 ORIG_RAX: 0000000000000000
> [ 0.052881][ T0] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0001ffff84b317e0
> [ 0.053932][ T0] RDX: 0000000000000485 RSI: 0001ffffffffffff RDI: 0000000000000002
> [ 0.055007][ T0] RBP: ffffffff84b077d0 R08: 0000000000001e00 R09: 0000000000000000
> [ 0.056054][ T0] R10: ffffffff81b16d30 R11: 0001ffff84b317e8 R12: 0000000000000001
> [ 0.057088][ T0] R13: ffffffff84b077d8 R14: 0000000000098000 R15: 0000000000007000
> [ 0.058160][ T0] FS: 0000000000000000(0000) GS:ffffffff842c9000(0000) knlGS:0000000000000000
> [ 0.059440][ T0] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 0.060381][ T0] CR2: 0000000000001e08 CR3: 00000000043b6000 CR4: 00000000000406a0
> [ 0.061516][ T0] Call Trace:
> [ 0.061969][ T0] <TASK>
> [ 0.062350][ T0] ? stack_depot_init.cold (kbuild/src/x86_64-2/lib/stackdepot.c:258)
> [ 0.063072][ T0] ? set_track_prepare (kbuild/src/x86_64-2/mm/slub.c:752)
> [ 0.063957][ T0] ? __raw_callee_save___native_queued_spin_unlock (??:?)
> [ 0.064952][ T0] ? write_comp_data (kbuild/src/x86_64-2/kernel/kcov.c:236)
>
>
> To reproduce:
>
> # build kernel
> cd linux
> cp config-6.0.0-rc3-00711-gdb8d280d38ef .config
> make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 olddefconfig prepare modules_prepare bzImage modules
> make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 INSTALL_MOD_PATH=<mod-install-dir> modules_install
> cd <mod-install-dir>
> find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz
>
>
> git clone https://github.com/intel/lkp-tests.git
> cd lkp-tests
> bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email
>
> # if come across any failure that blocks the test,
> # please remove ~/.lkp and /lkp dir to run from a clean state.
>
>
>
> --
> 0-DAY CI Kernel Test Service
> https://01.org/lkp
>
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [mm] db8d280d38: PANIC:early_exception
2022-10-13 8:38 ` [mm] db8d280d38: PANIC:early_exception Zhaoyang Huang
@ 2022-10-13 8:51 ` Vlastimil Babka
2022-10-13 8:59 ` Zhaoyang Huang
0 siblings, 1 reply; 4+ messages in thread
From: Vlastimil Babka @ 2022-10-13 8:51 UTC (permalink / raw)
To: Zhaoyang Huang, kernel test robot
Cc: zhaoyang.huang, lkp, lkp, linux-mm, Andrew Morton,
Matthew Wilcox, linux-block, linux-kernel, ke.wang, steve.kang
On 10/13/22 10:38, Zhaoyang Huang wrote:
> @Vlastimil Could you please have an eye on this series of robot
> reports which are caused by stack_depot_init related issues. The
> problem arises from a very early access of stack_depot_save/init by
> kmemleak within setup_arch which happens even before
> stack_depot_early_init and zone related data ready. I would like to
> suggest adding a criteria at the entrance check of stack_depot_save
> which help the stackdepot API more aggregation and the caller free to
> call
From the report it seems to me the problem is in your set_track_prepare():
+ if (!kmemleak_initialized)
+ stack_depot_init();
stack_depot_init() should be called only once and from well defined places,
this looks like neither condition is met.
> [ 0.062350][ T0] ? stack_depot_init.cold+0x5/0xbd
> [ 0.063072][ T0] ? set_track_prepare+0x6e/0x80
> [ 0.063957][ T0] ?
> __raw_callee_save___native_queued_spin_unlock+0x11/0x22
> [ 0.064952][ T0] ? write_comp_data+0x2a/0x80
> [ 0.065623][ T0] ? strncpy+0x2f/0x60
> [ 0.066205][ T0] ? __create_object+0x10c/0x3c0
> [ 0.066904][ T0] ? kmemleak_alloc_phys+0x6f/0x80
> [ 0.067561][ T0] ? memblock_alloc_range_nid+0x274/0x28f
> [ 0.068396][ T0] ? memblock_phys_alloc_range+0xa4/0xb3
> [ 0.069200][ T0] ? reserve_real_mode+0x87/0xd7
> [ 0.069895][ T0] ? setup_arch+0x6a9/0x995
> [ 0.070526][ T0] ? start_kernel+0x7c/0x854
> [ 0.071195][ T0] ? load_ucode_bsp+0x1bb/0x1c6
> [ 0.071875][ T0] ? secondary_startup_64_no_verify+0xe0/0xeb
> [ 0.072682][ T0] </TASK>
>
> On Thu, Oct 13, 2022 at 1:58 PM kernel test robot <oliver.sang@intel.com> wrote:
>>
>>
>> Hi zhaoyang.huang,
>>
>> seems this is the fix based on our report
>> "[mm] 0e949320db: BUG:kernel_NULL_pointer_dereference,address"
>> at
>> https://lore.kernel.org/all/202210121406.d4ebc9bc-oliver.sang@intel.com/
>> but now it seems have new issue. report FYI
>>
>>
>> Greeting,
>>
>> FYI, we noticed the following commit (built with gcc-11):
>>
>> commit: db8d280d38efb061ad1a57ce060cbb917a4cf503 ("[RFC PATCHv2] mm: use stack_depot for recording kmemleak's backtrace")
>> url: https://github.com/intel-lab-lkp/linux/commits/zhaoyang-huang/mm-use-stack_depot-for-recording-kmemleak-s-backtrace/20221012-160458
>> base: https://git.kernel.org/cgit/linux/kernel/git/akpm/mm.git mm-everything
>> patch link: https://lore.kernel.org/linux-mm/1665561689-29498-1-git-send-email-zhaoyang.huang@unisoc.com
>> patch subject: [RFC PATCHv2] mm: use stack_depot for recording kmemleak's backtrace
>>
>> in testcase: boot
>>
>> on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G
>>
>> caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
>>
>>
>> +-------------------------------+------------+------------+
>> | | 95f1b43741 | db8d280d38 |
>> +-------------------------------+------------+------------+
>> | boot_successes | 20 | 0 |
>> | boot_failures | 0 | 18 |
>> | PANIC:early_exception | 0 | 18 |
>> | RIP:nr_free_zone_pages | 0 | 18 |
>> | BUG:kernel_hang_in_boot_stage | 0 | 18 |
>> +-------------------------------+------------+------------+
>>
>>
>> If you fix the issue, kindly add following tag
>> | Reported-by: kernel test robot <oliver.sang@intel.com>
>> | Link: https://lore.kernel.org/r/202210131309.fe5427b-oliver.sang@intel.com
>>
>>
>> [ 0.029254][ T0] Scan for SMP in [mem 0x00000000-0x000003ff]
>> [ 0.030178][ T0] Scan for SMP in [mem 0x0009fc00-0x0009ffff]
>> [ 0.031080][ T0] Scan for SMP in [mem 0x000f0000-0x000fffff]
>> [ 0.043370][ T0] found SMP MP-table at [mem 0x000f5ba0-0x000f5baf]
>> [ 0.044301][ T0] mpc: f5bb0-f5c80
>> PANIC: early exception 0x0e IP 10:ffffffff8149c282 error 0 cr2 0x1e08
>> [ 0.045770][ T0] CPU: 0 PID: 0 Comm: swapper Not tainted 6.0.0-rc3-00711-gdb8d280d38ef #5
>> [ 0.046970][ T0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-4 04/01/2014
>> [ 0.048356][ T0] RIP: 0010:nr_free_zone_pages (kbuild/src/x86_64-2/include/linux/mmzone.h:1478 kbuild/src/x86_64-2/include/linux/mmzone.h:1504 kbuild/src/x86_64-2/mm/page_alloc.c:5886)
>> [ 0.049158][ T0] Code: e9 a3 5c 0b 00 0f 1f 00 e8 9b cd be ff 65 8b 05 14 b8 b8 7e 48 98 41 54 48 8b 04 c5 e0 e4 bc 83 53 89 fb 4c 8d 80 00 1e 00 00 <3b> b8 08 1e 00 00 72 5b 49 8b 10 45 31 e4 48 85 d2 75 05 eb 35 49
>> All code
>> ========
>> 0: e9 a3 5c 0b 00 jmpq 0xb5ca8
>> 5: 0f 1f 00 nopl (%rax)
>> 8: e8 9b cd be ff callq 0xffffffffffbecda8
>> d: 65 8b 05 14 b8 b8 7e mov %gs:0x7eb8b814(%rip),%eax # 0x7eb8b828
>> 14: 48 98 cltq
>> 16: 41 54 push %r12
>> 18: 48 8b 04 c5 e0 e4 bc mov -0x7c431b20(,%rax,8),%rax
>> 1f: 83
>> 20: 53 push %rbx
>> 21: 89 fb mov %edi,%ebx
>> 23: 4c 8d 80 00 1e 00 00 lea 0x1e00(%rax),%r8
>> 2a:* 3b b8 08 1e 00 00 cmp 0x1e08(%rax),%edi <-- trapping instruction
>> 30: 72 5b jb 0x8d
>> 32: 49 8b 10 mov (%r8),%rdx
>> 35: 45 31 e4 xor %r12d,%r12d
>> 38: 48 85 d2 test %rdx,%rdx
>> 3b: 75 05 jne 0x42
>> 3d: eb 35 jmp 0x74
>> 3f: 49 rex.WB
>>
>> Code starting with the faulting instruction
>> ===========================================
>> 0: 3b b8 08 1e 00 00 cmp 0x1e08(%rax),%edi
>> 6: 72 5b jb 0x63
>> 8: 49 8b 10 mov (%r8),%rdx
>> b: 45 31 e4 xor %r12d,%r12d
>> e: 48 85 d2 test %rdx,%rdx
>> 11: 75 05 jne 0x18
>> 13: eb 35 jmp 0x4a
>> 15: 49 rex.WB
>> [ 0.051803][ T0] RSP: 0000:ffffffff83603d18 EFLAGS: 00010046 ORIG_RAX: 0000000000000000
>> [ 0.052881][ T0] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0001ffff84b317e0
>> [ 0.053932][ T0] RDX: 0000000000000485 RSI: 0001ffffffffffff RDI: 0000000000000002
>> [ 0.055007][ T0] RBP: ffffffff84b077d0 R08: 0000000000001e00 R09: 0000000000000000
>> [ 0.056054][ T0] R10: ffffffff81b16d30 R11: 0001ffff84b317e8 R12: 0000000000000001
>> [ 0.057088][ T0] R13: ffffffff84b077d8 R14: 0000000000098000 R15: 0000000000007000
>> [ 0.058160][ T0] FS: 0000000000000000(0000) GS:ffffffff842c9000(0000) knlGS:0000000000000000
>> [ 0.059440][ T0] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [ 0.060381][ T0] CR2: 0000000000001e08 CR3: 00000000043b6000 CR4: 00000000000406a0
>> [ 0.061516][ T0] Call Trace:
>> [ 0.061969][ T0] <TASK>
>> [ 0.062350][ T0] ? stack_depot_init.cold (kbuild/src/x86_64-2/lib/stackdepot.c:258)
>> [ 0.063072][ T0] ? set_track_prepare (kbuild/src/x86_64-2/mm/slub.c:752)
>> [ 0.063957][ T0] ? __raw_callee_save___native_queued_spin_unlock (??:?)
>> [ 0.064952][ T0] ? write_comp_data (kbuild/src/x86_64-2/kernel/kcov.c:236)
>>
>>
>> To reproduce:
>>
>> # build kernel
>> cd linux
>> cp config-6.0.0-rc3-00711-gdb8d280d38ef .config
>> make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 olddefconfig prepare modules_prepare bzImage modules
>> make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 INSTALL_MOD_PATH=<mod-install-dir> modules_install
>> cd <mod-install-dir>
>> find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz
>>
>>
>> git clone https://github.com/intel/lkp-tests.git
>> cd lkp-tests
>> bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email
>>
>> # if come across any failure that blocks the test,
>> # please remove ~/.lkp and /lkp dir to run from a clean state.
>>
>>
>>
>> --
>> 0-DAY CI Kernel Test Service
>> https://01.org/lkp
>>
>>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [mm] db8d280d38: PANIC:early_exception
2022-10-13 8:51 ` Vlastimil Babka
@ 2022-10-13 8:59 ` Zhaoyang Huang
0 siblings, 0 replies; 4+ messages in thread
From: Zhaoyang Huang @ 2022-10-13 8:59 UTC (permalink / raw)
To: Vlastimil Babka
Cc: kernel test robot, zhaoyang.huang, lkp, lkp, linux-mm,
Andrew Morton, Matthew Wilcox, linux-block, linux-kernel,
ke.wang, steve.kang
On Thu, Oct 13, 2022 at 4:51 PM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 10/13/22 10:38, Zhaoyang Huang wrote:
> > @Vlastimil Could you please have an eye on this series of robot
> > reports which are caused by stack_depot_init related issues. The
> > problem arises from a very early access of stack_depot_save/init by
> > kmemleak within setup_arch which happens even before
> > stack_depot_early_init and zone related data ready. I would like to
> > suggest adding a criteria at the entrance check of stack_depot_save
> > which help the stackdepot API more aggregation and the caller free to
> > call
>
> From the report it seems to me the problem is in your set_track_prepare():
>
> + if (!kmemleak_initialized)
> + stack_depot_init();
>
> stack_depot_init() should be called only once and from well defined places,
> this looks like neither condition is met.
yes, but this is not the key point for this series problem as kmemleak
wants to access stack_depot_save during setup_arch and lead to NULL
stack_table access. To my understanding, there is no way to provide a
valid stack_table so far until stack_depot_early_init called. kmemleak
could take another action if it get a vacant trace_handle from
stack_depot_save.
>
> > [ 0.062350][ T0] ? stack_depot_init.cold+0x5/0xbd
> > [ 0.063072][ T0] ? set_track_prepare+0x6e/0x80
> > [ 0.063957][ T0] ?
> > __raw_callee_save___native_queued_spin_unlock+0x11/0x22
> > [ 0.064952][ T0] ? write_comp_data+0x2a/0x80
> > [ 0.065623][ T0] ? strncpy+0x2f/0x60
> > [ 0.066205][ T0] ? __create_object+0x10c/0x3c0
> > [ 0.066904][ T0] ? kmemleak_alloc_phys+0x6f/0x80
> > [ 0.067561][ T0] ? memblock_alloc_range_nid+0x274/0x28f
> > [ 0.068396][ T0] ? memblock_phys_alloc_range+0xa4/0xb3
> > [ 0.069200][ T0] ? reserve_real_mode+0x87/0xd7
> > [ 0.069895][ T0] ? setup_arch+0x6a9/0x995
> > [ 0.070526][ T0] ? start_kernel+0x7c/0x854
> > [ 0.071195][ T0] ? load_ucode_bsp+0x1bb/0x1c6
> > [ 0.071875][ T0] ? secondary_startup_64_no_verify+0xe0/0xeb
> > [ 0.072682][ T0] </TASK>
> >
> > On Thu, Oct 13, 2022 at 1:58 PM kernel test robot <oliver.sang@intel.com> wrote:
> >>
> >>
> >> Hi zhaoyang.huang,
> >>
> >> seems this is the fix based on our report
> >> "[mm] 0e949320db: BUG:kernel_NULL_pointer_dereference,address"
> >> at
> >> https://lore.kernel.org/all/202210121406.d4ebc9bc-oliver.sang@intel.com/
> >> but now it seems have new issue. report FYI
> >>
> >>
> >> Greeting,
> >>
> >> FYI, we noticed the following commit (built with gcc-11):
> >>
> >> commit: db8d280d38efb061ad1a57ce060cbb917a4cf503 ("[RFC PATCHv2] mm: use stack_depot for recording kmemleak's backtrace")
> >> url: https://github.com/intel-lab-lkp/linux/commits/zhaoyang-huang/mm-use-stack_depot-for-recording-kmemleak-s-backtrace/20221012-160458
> >> base: https://git.kernel.org/cgit/linux/kernel/git/akpm/mm.git mm-everything
> >> patch link: https://lore.kernel.org/linux-mm/1665561689-29498-1-git-send-email-zhaoyang.huang@unisoc.com
> >> patch subject: [RFC PATCHv2] mm: use stack_depot for recording kmemleak's backtrace
> >>
> >> in testcase: boot
> >>
> >> on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G
> >>
> >> caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
> >>
> >>
> >> +-------------------------------+------------+------------+
> >> | | 95f1b43741 | db8d280d38 |
> >> +-------------------------------+------------+------------+
> >> | boot_successes | 20 | 0 |
> >> | boot_failures | 0 | 18 |
> >> | PANIC:early_exception | 0 | 18 |
> >> | RIP:nr_free_zone_pages | 0 | 18 |
> >> | BUG:kernel_hang_in_boot_stage | 0 | 18 |
> >> +-------------------------------+------------+------------+
> >>
> >>
> >> If you fix the issue, kindly add following tag
> >> | Reported-by: kernel test robot <oliver.sang@intel.com>
> >> | Link: https://lore.kernel.org/r/202210131309.fe5427b-oliver.sang@intel.com
> >>
> >>
> >> [ 0.029254][ T0] Scan for SMP in [mem 0x00000000-0x000003ff]
> >> [ 0.030178][ T0] Scan for SMP in [mem 0x0009fc00-0x0009ffff]
> >> [ 0.031080][ T0] Scan for SMP in [mem 0x000f0000-0x000fffff]
> >> [ 0.043370][ T0] found SMP MP-table at [mem 0x000f5ba0-0x000f5baf]
> >> [ 0.044301][ T0] mpc: f5bb0-f5c80
> >> PANIC: early exception 0x0e IP 10:ffffffff8149c282 error 0 cr2 0x1e08
> >> [ 0.045770][ T0] CPU: 0 PID: 0 Comm: swapper Not tainted 6.0.0-rc3-00711-gdb8d280d38ef #5
> >> [ 0.046970][ T0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-4 04/01/2014
> >> [ 0.048356][ T0] RIP: 0010:nr_free_zone_pages (kbuild/src/x86_64-2/include/linux/mmzone.h:1478 kbuild/src/x86_64-2/include/linux/mmzone.h:1504 kbuild/src/x86_64-2/mm/page_alloc.c:5886)
> >> [ 0.049158][ T0] Code: e9 a3 5c 0b 00 0f 1f 00 e8 9b cd be ff 65 8b 05 14 b8 b8 7e 48 98 41 54 48 8b 04 c5 e0 e4 bc 83 53 89 fb 4c 8d 80 00 1e 00 00 <3b> b8 08 1e 00 00 72 5b 49 8b 10 45 31 e4 48 85 d2 75 05 eb 35 49
> >> All code
> >> ========
> >> 0: e9 a3 5c 0b 00 jmpq 0xb5ca8
> >> 5: 0f 1f 00 nopl (%rax)
> >> 8: e8 9b cd be ff callq 0xffffffffffbecda8
> >> d: 65 8b 05 14 b8 b8 7e mov %gs:0x7eb8b814(%rip),%eax # 0x7eb8b828
> >> 14: 48 98 cltq
> >> 16: 41 54 push %r12
> >> 18: 48 8b 04 c5 e0 e4 bc mov -0x7c431b20(,%rax,8),%rax
> >> 1f: 83
> >> 20: 53 push %rbx
> >> 21: 89 fb mov %edi,%ebx
> >> 23: 4c 8d 80 00 1e 00 00 lea 0x1e00(%rax),%r8
> >> 2a:* 3b b8 08 1e 00 00 cmp 0x1e08(%rax),%edi <-- trapping instruction
> >> 30: 72 5b jb 0x8d
> >> 32: 49 8b 10 mov (%r8),%rdx
> >> 35: 45 31 e4 xor %r12d,%r12d
> >> 38: 48 85 d2 test %rdx,%rdx
> >> 3b: 75 05 jne 0x42
> >> 3d: eb 35 jmp 0x74
> >> 3f: 49 rex.WB
> >>
> >> Code starting with the faulting instruction
> >> ===========================================
> >> 0: 3b b8 08 1e 00 00 cmp 0x1e08(%rax),%edi
> >> 6: 72 5b jb 0x63
> >> 8: 49 8b 10 mov (%r8),%rdx
> >> b: 45 31 e4 xor %r12d,%r12d
> >> e: 48 85 d2 test %rdx,%rdx
> >> 11: 75 05 jne 0x18
> >> 13: eb 35 jmp 0x4a
> >> 15: 49 rex.WB
> >> [ 0.051803][ T0] RSP: 0000:ffffffff83603d18 EFLAGS: 00010046 ORIG_RAX: 0000000000000000
> >> [ 0.052881][ T0] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0001ffff84b317e0
> >> [ 0.053932][ T0] RDX: 0000000000000485 RSI: 0001ffffffffffff RDI: 0000000000000002
> >> [ 0.055007][ T0] RBP: ffffffff84b077d0 R08: 0000000000001e00 R09: 0000000000000000
> >> [ 0.056054][ T0] R10: ffffffff81b16d30 R11: 0001ffff84b317e8 R12: 0000000000000001
> >> [ 0.057088][ T0] R13: ffffffff84b077d8 R14: 0000000000098000 R15: 0000000000007000
> >> [ 0.058160][ T0] FS: 0000000000000000(0000) GS:ffffffff842c9000(0000) knlGS:0000000000000000
> >> [ 0.059440][ T0] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> >> [ 0.060381][ T0] CR2: 0000000000001e08 CR3: 00000000043b6000 CR4: 00000000000406a0
> >> [ 0.061516][ T0] Call Trace:
> >> [ 0.061969][ T0] <TASK>
> >> [ 0.062350][ T0] ? stack_depot_init.cold (kbuild/src/x86_64-2/lib/stackdepot.c:258)
> >> [ 0.063072][ T0] ? set_track_prepare (kbuild/src/x86_64-2/mm/slub.c:752)
> >> [ 0.063957][ T0] ? __raw_callee_save___native_queued_spin_unlock (??:?)
> >> [ 0.064952][ T0] ? write_comp_data (kbuild/src/x86_64-2/kernel/kcov.c:236)
> >>
> >>
> >> To reproduce:
> >>
> >> # build kernel
> >> cd linux
> >> cp config-6.0.0-rc3-00711-gdb8d280d38ef .config
> >> make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 olddefconfig prepare modules_prepare bzImage modules
> >> make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 INSTALL_MOD_PATH=<mod-install-dir> modules_install
> >> cd <mod-install-dir>
> >> find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz
> >>
> >>
> >> git clone https://github.com/intel/lkp-tests.git
> >> cd lkp-tests
> >> bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email
> >>
> >> # if come across any failure that blocks the test,
> >> # please remove ~/.lkp and /lkp dir to run from a clean state.
> >>
> >>
> >>
> >> --
> >> 0-DAY CI Kernel Test Service
> >> https://01.org/lkp
> >>
> >>
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-10-13 9:00 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-12 8:01 [RFC PATCHv2] mm: use stack_depot for recording kmemleak's backtrace zhaoyang.huang
[not found] ` <202210131309.fe5427b-oliver.sang@intel.com>
2022-10-13 8:38 ` [mm] db8d280d38: PANIC:early_exception Zhaoyang Huang
2022-10-13 8:51 ` Vlastimil Babka
2022-10-13 8:59 ` Zhaoyang Huang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).