All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] PM / sleep: Mechanism to find source aborting kernel suspend transition
@ 2024-01-09 13:02 Vimal Kumar
  2024-01-29 14:08 ` kernel test robot
  0 siblings, 1 reply; 5+ messages in thread
From: Vimal Kumar @ 2024-01-09 13:02 UTC (permalink / raw)
  To: Rafael J. Wysocki, Len Brown, Pavel Machek, Greg Kroah-Hartman,
	linux-pm, linux-kernel
  Cc: chinmoyghosh2001, badolevishal1116, mintupatel89, Vimal Kumar

Sometimes kernel suspend transitions can be aborted unconditionally by
manipulating pm_abort_suspend value using "hard" wakeup triggers or
through "pm_system_wakeup()".

There is no way to trace the source path of module or subsystem which
aborted the suspend transitions. This change will create a list of
wakeup sources aborting suspend in progress through "hard" events as
well as subsytems aborting suspend using "pm_system_wakeup()".

Example: Existing suspend failure logs:
[  349.708359] PM: Some devices failed to suspend, or early wake event detected
[  350.327842] PM: suspend exit

Suspend failure logs with this change:
[  518.761835] PM: Some devices failed to suspend, or early wake event detected
[  519.486939] PM: wakeup source or subsystem uart_suspend_port aborted suspend
[  519.500594] PM: suspend exit

Here we can clearly identify the module triggerring abort suspend.

Co-developed-by: Chinmoy Ghosh <chinmoyghosh2001@gmail.com>
Signed-off-by: Chinmoy Ghosh <chinmoyghosh2001@gmail.com>
Co-developed-by: Mintu Patel <mintupatel89@gmail.com>
Signed-off-by: Mintu Patel <mintupatel89@gmail.com>
Co-developed-by: Vishal Badole <badolevishal1116@gmail.com>
Signed-off-by: Vishal Badole <badolevishal1116@gmail.com>
Signed-off-by: Vimal Kumar <vimal.kumar32@gmail.com>
---
Changes in v3:
 - Moved changes under PM_DEBUG config
 - Removed def for buffer size
 - Modified static buffer to dynamic allocation
 - Use mutex_lock instead of raw_spin_lock_irqsave
 - Clear the buffer after printing
---
 drivers/base/power/wakeup.c | 98 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 97 insertions(+), 1 deletion(-)

diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index a917219feea6..9bb32c21cfd1 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -73,6 +73,16 @@ static struct wakeup_source deleted_ws = {
 
 static DEFINE_IDA(wakeup_ida);
 
+#ifdef CONFIG_PM_DEBUG
+static DEFINE_MUTEX(pm_abort_suspend_list_lock);
+
+struct pm_abort_suspend_source {
+	struct list_head list;
+	char *source_triggering_abort_suspend;
+};
+static LIST_HEAD(pm_abort_suspend_list);
+#endif
+
 /**
  * wakeup_source_create - Create a struct wakeup_source object.
  * @name: Name of the new wakeup source.
@@ -575,6 +585,52 @@ static void wakeup_source_activate(struct wakeup_source *ws)
 	trace_wakeup_source_activate(ws->name, cec);
 }
 
+#ifdef CONFIG_PM_DEBUG
+/**
+ * pm_abort_suspend_list_clear - Clear pm_abort_suspend_list.
+ *
+ * The pm_abort_suspend_list will be cleared when system PM exits.
+ */
+static void pm_abort_suspend_list_clear(void)
+{
+	struct pm_abort_suspend_source *info, *tmp;
+
+	mutex_lock(&pm_abort_suspend_list_lock);
+	list_for_each_entry_safe(info, tmp, &pm_abort_suspend_list, list) {
+		list_del(&info->list);
+		kfree(info);
+	}
+	mutex_unlock(&pm_abort_suspend_list_lock);
+}
+
+/**
+ * pm_abort_suspend_source_add - Update pm_abort_suspend_list
+ * @source_name: Wakeup_source or function aborting suspend transitions.
+ *
+ * Add the source name responsible for updating the abort_suspend flag in the
+ * pm_abort_suspend_list.
+ */
+static void pm_abort_suspend_source_add(const char *source_name)
+{
+	struct pm_abort_suspend_source *info;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return;
+
+	INIT_LIST_HEAD(&info->list);
+	info->source_triggering_abort_suspend = kstrdup(source_name, GFP_KERNEL);
+	if (!info->source_triggering_abort_suspend) {
+		kfree(info);
+		return;
+	}
+
+	mutex_lock(&pm_abort_suspend_list_lock);
+	list_add_tail(&info->list, &pm_abort_suspend_list);
+	mutex_unlock(&pm_abort_suspend_list_lock);
+}
+#endif
+
 /**
  * wakeup_source_report_event - Report wakeup event using the given source.
  * @ws: Wakeup source to report the event for.
@@ -590,8 +646,13 @@ static void wakeup_source_report_event(struct wakeup_source *ws, bool hard)
 	if (!ws->active)
 		wakeup_source_activate(ws);
 
-	if (hard)
+	if (hard) {
+#ifdef CONFIG_PM_DEBUG
+		if (pm_suspend_target_state != PM_SUSPEND_ON)
+			pm_abort_suspend_source_add(ws->name);
+#endif
 		pm_system_wakeup();
+	}
 }
 
 /**
@@ -893,12 +954,47 @@ bool pm_wakeup_pending(void)
 		pm_print_active_wakeup_sources();
 	}
 
+#ifdef CONFIG_PM_DEBUG
+	if (atomic_read(&pm_abort_suspend) > 0) {
+		struct pm_abort_suspend_source *info;
+
+		mutex_lock(&pm_abort_suspend_list_lock);
+		list_for_each_entry(info, &pm_abort_suspend_list, list) {
+			pm_pr_dbg("wakeup source or subsystem %s aborted suspend\n",
+					info->source_triggering_abort_suspend);
+		}
+		mutex_unlock(&pm_abort_suspend_list_lock);
+		pm_abort_suspend_list_clear();
+	}
+#endif
+
 	return ret || atomic_read(&pm_abort_suspend) > 0;
 }
 EXPORT_SYMBOL_GPL(pm_wakeup_pending);
 
 void pm_system_wakeup(void)
 {
+
+#ifdef CONFIG_PM_DEBUG
+#ifdef CONFIG_DEBUG_INFO
+	if (pm_suspend_target_state != PM_SUSPEND_ON) {
+		char *source_name = kasprintf(GFP_KERNEL,
+					"%ps",
+					__builtin_return_address(0));
+		if (!source_name)
+			goto exit;
+
+		if (strcmp(source_name, "pm_wakeup_ws_event"))
+			pm_abort_suspend_source_add(source_name);
+
+		kfree(source_name);
+	}
+exit:
+#else
+	if (pm_suspend_target_state != PM_SUSPEND_ON)
+		pm_pr_dbg("Some wakeup source or subsystem aborted suspend\n");
+#endif
+#endif
 	atomic_inc(&pm_abort_suspend);
 	s2idle_wake();
 }
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] PM / sleep: Mechanism to find source aborting kernel suspend transition
  2024-01-09 13:02 [PATCH v3] PM / sleep: Mechanism to find source aborting kernel suspend transition Vimal Kumar
@ 2024-01-29 14:08 ` kernel test robot
  2024-02-01  4:44   ` Vimal Kumar
  0 siblings, 1 reply; 5+ messages in thread
From: kernel test robot @ 2024-01-29 14:08 UTC (permalink / raw)
  To: Vimal Kumar
  Cc: oe-lkp, lkp, Chinmoy Ghosh, Mintu Patel, Vishal Badole, linux-pm,
	rui.zhang, yu.c.chen, Rafael J. Wysocki, Len Brown, Pavel Machek,
	Greg Kroah-Hartman, linux-kernel, Vimal Kumar, oliver.sang



Hello,

kernel test robot noticed "BUG:sleeping_function_called_from_invalid_context_at_include/linux/sched/mm.h" on:

commit: e2d228456d500f3b00ad746b353fa85eee235b7a ("[PATCH v3] PM / sleep: Mechanism to find source aborting kernel suspend transition")
url: https://github.com/intel-lab-lkp/linux/commits/Vimal-Kumar/PM-sleep-Mechanism-to-find-source-aborting-kernel-suspend-transition/20240109-210519
base: https://git.kernel.org/cgit/linux/kernel/git/rafael/linux-pm.git linux-next
patch link: https://lore.kernel.org/all/20240109130210.31938-1-vimal.kumar32@gmail.com/
patch subject: [PATCH v3] PM / sleep: Mechanism to find source aborting kernel suspend transition

in testcase: suspend-stress
version: 
with following parameters:

	mode: freeze
	iterations: 10



compiler: gcc-12
test machine: 4 threads (Broadwell) with 8G memory

(please refer to attached dmesg/kmsg for entire log/backtrace)



If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202401292109.f115a688-oliver.sang@intel.com


kern  :err   : [   97.935754] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:306
kern  :err   : [   97.935886] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1
kern  :err   : [   97.936000] preempt_count: 10002, expected: 0
kern  :err   : [   97.936071] RCU nest depth: 0, expected: 0
kern  :warn  : [   97.936138] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.7.0-rc8-00169-ge2d228456d50 #1
kern  :warn  : [   97.936255] Hardware name:  /NUC5i3RYB, BIOS RYBDWi35.86A.0363.2017.0316.1028 03/16/2017
kern  :warn  : [   97.936371] Call Trace:
kern  :warn  : [   97.936419]  <IRQ>
kern :warn : [   97.936461] dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) 
kern :warn : [   97.936529] __might_resched (kernel/sched/core.c:10152) 
kern :warn : [   97.936598] ? preempt_notifier_dec (kernel/sched/core.c:10106) 
kern :warn : [   97.936672] __kmem_cache_alloc_node (include/linux/kernel.h:107 include/linux/sched/mm.h:306 mm/slab.h:710 mm/slub.c:3460 mm/slub.c:3517) 
kern :warn : [   97.936750] ? kasprintf (lib/kasprintf.c:54) 
kern :warn : [   97.936812] ? pointer (lib/vsprintf.c:2755) 
kern :warn : [   97.936875] ? kasprintf (lib/kasprintf.c:54) 
kern :warn : [   97.936936] __kmalloc_node_track_caller (include/linux/kasan.h:198 mm/slab_common.c:1007 mm/slab_common.c:1027) 
kern :warn : [   97.937017] kvasprintf (lib/kasprintf.c:25) 
kern :warn : [   97.937079] ? bust_spinlocks (lib/kasprintf.c:16) 
kern :warn : [   97.937146] ? enqueue_hrtimer (kernel/time/hrtimer.c:1095 (discriminator 3)) 
kern :warn : [   97.937214] ? tick_nohz_highres_handler (kernel/time/tick-sched.c:1530) 
kern :warn : [   97.937295] kasprintf (lib/kasprintf.c:54) 
kern :warn : [   97.937354] ? kvasprintf_const (lib/kasprintf.c:54) 
kern :warn : [   97.937426] ? _raw_spin_lock_irqsave (arch/x86/include/asm/atomic.h:115 include/linux/atomic/atomic-arch-fallback.h:2164 include/linux/atomic/atomic-instrumented.h:1296 include/asm-generic/qspinlock.h:111 include/linux/spinlock.h:187 include/linux/spinlock_api_smp.h:111 kernel/locking/spinlock.c:162) 
kern :warn : [   97.937501] ? _raw_read_unlock_irqrestore (kernel/locking/spinlock.c:161) 
kern :warn : [   97.937581] ? _raw_spin_lock (arch/x86/include/asm/atomic.h:115 include/linux/atomic/atomic-arch-fallback.h:2164 include/linux/atomic/atomic-instrumented.h:1296 include/asm-generic/qspinlock.h:111 include/linux/spinlock.h:187 include/linux/spinlock_api_smp.h:134 kernel/locking/spinlock.c:154) 
kern :warn : [   97.937648] ? _raw_write_lock_irq (kernel/locking/spinlock.c:153) 
kern :warn : [   97.937719] ? irq_pm_check_wakeup (kernel/irq/pm.c:24) 
kern :warn : [   97.937793] pm_system_wakeup (drivers/base/power/wakeup.c:981) 
kern :warn : [   97.937862] irq_pm_check_wakeup (kernel/irq/pm.c:24) 
kern :warn : [   97.937933] handle_fasteoi_irq (kernel/irq/chip.c:518 kernel/irq/chip.c:502 kernel/irq/chip.c:698) 
kern :warn : [   97.938006] __common_interrupt (arch/x86/kernel/irq.c:271 (discriminator 22)) 
kern :warn : [   97.938078] common_interrupt (arch/x86/kernel/irq.c:247 (discriminator 14)) 
kern  :warn  : [   97.938145]  </IRQ>
kern  :warn  : [   97.938187]  <TASK>
kern :warn : [   97.938230] asm_common_interrupt (arch/x86/include/asm/idtentry.h:640) 
kern :warn : [   97.938301] RIP: 0010:cpuidle_enter_s2idle (arch/x86/include/asm/irqflags.h:78 drivers/cpuidle/cpuidle.c:199) 
kern :warn : [ 97.938384] Code: d9 75 83 85 d2 7f 11 48 83 c4 20 89 d0 5b 5d 41 5c 41 5d 41 5e 41 5f c3 4c 89 c6 48 89 cf 89 14 24 e8 fb 64 9b 00 fb 8b 14 24 <eb> db 31 d2 eb d7 48 89 0c 24 48 89 74 24 08 e8 03 51 9f fe 4c 8b
All code
========
   0:	d9 75 83             	fnstenv -0x7d(%rbp)
   3:	85 d2                	test   %edx,%edx
   5:	7f 11                	jg     0x18
   7:	48 83 c4 20          	add    $0x20,%rsp
   b:	89 d0                	mov    %edx,%eax
   d:	5b                   	pop    %rbx
   e:	5d                   	pop    %rbp
   f:	41 5c                	pop    %r12
  11:	41 5d                	pop    %r13
  13:	41 5e                	pop    %r14
  15:	41 5f                	pop    %r15
  17:	c3                   	retq   
  18:	4c 89 c6             	mov    %r8,%rsi
  1b:	48 89 cf             	mov    %rcx,%rdi
  1e:	89 14 24             	mov    %edx,(%rsp)
  21:	e8 fb 64 9b 00       	callq  0x9b6521
  26:	fb                   	sti    
  27:	8b 14 24             	mov    (%rsp),%edx
  2a:*	eb db                	jmp    0x7		<-- trapping instruction
  2c:	31 d2                	xor    %edx,%edx
  2e:	eb d7                	jmp    0x7
  30:	48 89 0c 24          	mov    %rcx,(%rsp)
  34:	48 89 74 24 08       	mov    %rsi,0x8(%rsp)
  39:	e8 03 51 9f fe       	callq  0xfffffffffe9f5141
  3e:	4c                   	rex.WR
  3f:	8b                   	.byte 0x8b

Code starting with the faulting instruction
===========================================
   0:	eb db                	jmp    0xffffffffffffffdd
   2:	31 d2                	xor    %edx,%edx
   4:	eb d7                	jmp    0xffffffffffffffdd
   6:	48 89 0c 24          	mov    %rcx,(%rsp)
   a:	48 89 74 24 08       	mov    %rsi,0x8(%rsp)
   f:	e8 03 51 9f fe       	callq  0xfffffffffe9f5117
  14:	4c                   	rex.WR
  15:	8b                   	.byte 0x8b
kern  :warn  : [   97.938632] RSP: 0018:ffffc90000177db8 EFLAGS: 00000286
kern  :warn  : [   97.938717] RAX: 00000000037a402c RBX: 0000000000000009 RCX: 0000000000000000
kern  :warn  : [   97.938823] RDX: 0000000000000008 RSI: 0000000937e23ad2 RDI: 0000000000000000
kern  :warn  : [   97.938927] RBP: ffffffff84f7f680 R08: 0000000000000000 R09: ffffed1021ad5530
kern  :warn  : [   97.939032] R10: ffff88810d6aa987 R11: 0000000000000000 R12: ffff88820decca10
kern  :warn  : [   97.939137] R13: dffffc0000000000 R14: 000000000027ac40 R15: 000000000027ac40
kern :warn : [   97.939244] ? cpuidle_enter_s2idle (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:77 drivers/cpuidle/cpuidle.c:199) 
kern :warn : [   97.939319] cpuidle_idle_call (kernel/sched/idle.c:190) 
kern :warn : [   97.939389] ? arch_cpu_idle_exit+0x40/0x40 
kern :warn : [   97.939459] ? mark_tsc_async_resets (arch/x86/kernel/tsc_sync.c:51) 
kern :warn : [   97.939536] do_idle (kernel/sched/idle.c:282) 
kern :warn : [   97.939594] cpu_startup_entry (kernel/sched/idle.c:379 (discriminator 1)) 
kern :warn : [   97.939662] start_secondary (arch/x86/kernel/smpboot.c:224 arch/x86/kernel/smpboot.c:304) 
kern :warn : [   97.939731] ? set_cpu_sibling_map (arch/x86/kernel/smpboot.c:254) 
kern :warn : [   97.939808] ? soft_restart_cpu (arch/x86/kernel/head_64.S:485) 
kern :warn : [   97.939878] secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:448) 
kern  :warn  : [   97.939963]  </TASK>
kern :info : [   97.948803] bdw_uncore 0000:00:00.0: PM: calling pci_pm_resume_noirq+0x0/0x390 @ 85, parent: 00 
kern :info : [   97.948993] bdw_uncore 0000:00:00.0: PM: pci_pm_resume_noirq+0x0/0x390 returned 0 after 17 usecs 



The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20240129/202401292109.f115a688-oliver.sang@intel.com



-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] PM / sleep: Mechanism to find source aborting kernel suspend transition
  2024-01-29 14:08 ` kernel test robot
@ 2024-02-01  4:44   ` Vimal Kumar
  2024-02-01 13:59     ` Rafael J. Wysocki
  0 siblings, 1 reply; 5+ messages in thread
From: Vimal Kumar @ 2024-02-01  4:44 UTC (permalink / raw)
  To: kernel test robot
  Cc: oe-lkp, lkp, Chinmoy Ghosh, Mintu Patel, Vishal Badole, linux-pm,
	rui.zhang, yu.c.chen, Rafael J. Wysocki, Len Brown, Pavel Machek,
	Greg Kroah-Hartman, linux-kernel

On Mon, Jan 29, 2024 at 10:08:32PM +0800, kernel test robot wrote:
> 
> 
> Hello,
> 
> kernel test robot noticed "BUG:sleeping_function_called_from_invalid_context_at_include/linux/sched/mm.h" on:
> 
> commit: e2d228456d500f3b00ad746b353fa85eee235b7a ("[PATCH v3] PM / sleep: Mechanism to find source aborting kernel suspend transition")
> url: https://github.com/intel-lab-lkp/linux/commits/Vimal-Kumar/PM-sleep-Mechanism-to-find-source-aborting-kernel-suspend-transition/20240109-210519
> base: https://git.kernel.org/cgit/linux/kernel/git/rafael/linux-pm.git linux-next
> patch link: https://lore.kernel.org/all/20240109130210.31938-1-vimal.kumar32@gmail.com/
> patch subject: [PATCH v3] PM / sleep: Mechanism to find source aborting kernel suspend transition
> 
> in testcase: suspend-stress
> version: 
> with following parameters:
> 
> 	mode: freeze
> 	iterations: 10
> 
> 
> 
> compiler: gcc-12
> test machine: 4 threads (Broadwell) with 8G memory
> 
> (please refer to attached dmesg/kmsg for entire log/backtrace)
> 
> 
> 
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <oliver.sang@intel.com>
> | Closes: https://lore.kernel.org/oe-lkp/202401292109.f115a688-oliver.sang@intel.com
> 
> 
> kern  :err   : [   97.935754] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:306
> kern  :err   : [   97.935886] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1
> kern  :err   : [   97.936000] preempt_count: 10002, expected: 0
> kern  :err   : [   97.936071] RCU nest depth: 0, expected: 0
> kern  :warn  : [   97.936138] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.7.0-rc8-00169-ge2d228456d50 #1
> kern  :warn  : [   97.936255] Hardware name:  /NUC5i3RYB, BIOS RYBDWi35.86A.0363.2017.0316.1028 03/16/2017
> kern  :warn  : [   97.936371] Call Trace:
> kern  :warn  : [   97.936419]  <IRQ>
> kern :warn : [   97.936461] dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) 
> kern :warn : [   97.936529] __might_resched (kernel/sched/core.c:10152) 
> kern :warn : [   97.936598] ? preempt_notifier_dec (kernel/sched/core.c:10106) 
> kern :warn : [   97.936672] __kmem_cache_alloc_node (include/linux/kernel.h:107 include/linux/sched/mm.h:306 mm/slab.h:710 mm/slub.c:3460 mm/slub.c:3517) 
> kern :warn : [   97.936750] ? kasprintf (lib/kasprintf.c:54) 
> kern :warn : [   97.936812] ? pointer (lib/vsprintf.c:2755) 
> kern :warn : [   97.936875] ? kasprintf (lib/kasprintf.c:54) 
> kern :warn : [   97.936936] __kmalloc_node_track_caller (include/linux/kasan.h:198 mm/slab_common.c:1007 mm/slab_common.c:1027) 
> kern :warn : [   97.937017] kvasprintf (lib/kasprintf.c:25) 
> kern :warn : [   97.937079] ? bust_spinlocks (lib/kasprintf.c:16) 
> kern :warn : [   97.937146] ? enqueue_hrtimer (kernel/time/hrtimer.c:1095 (discriminator 3)) 
> kern :warn : [   97.937214] ? tick_nohz_highres_handler (kernel/time/tick-sched.c:1530) 
> kern :warn : [   97.937295] kasprintf (lib/kasprintf.c:54) 
> kern :warn : [   97.937354] ? kvasprintf_const (lib/kasprintf.c:54) 
> kern :warn : [   97.937426] ? _raw_spin_lock_irqsave (arch/x86/include/asm/atomic.h:115 include/linux/atomic/atomic-arch-fallback.h:2164 include/linux/atomic/atomic-instrumented.h:1296 include/asm-generic/qspinlock.h:111 include/linux/spinlock.h:187 include/linux/spinlock_api_smp.h:111 kernel/locking/spinlock.c:162) 
> kern :warn : [   97.937501] ? _raw_read_unlock_irqrestore (kernel/locking/spinlock.c:161) 
> kern :warn : [   97.937581] ? _raw_spin_lock (arch/x86/include/asm/atomic.h:115 include/linux/atomic/atomic-arch-fallback.h:2164 include/linux/atomic/atomic-instrumented.h:1296 include/asm-generic/qspinlock.h:111 include/linux/spinlock.h:187 include/linux/spinlock_api_smp.h:134 kernel/locking/spinlock.c:154) 
> kern :warn : [   97.937648] ? _raw_write_lock_irq (kernel/locking/spinlock.c:153) 
> kern :warn : [   97.937719] ? irq_pm_check_wakeup (kernel/irq/pm.c:24) 
> kern :warn : [   97.937793] pm_system_wakeup (drivers/base/power/wakeup.c:981) 
> kern :warn : [   97.937862] irq_pm_check_wakeup (kernel/irq/pm.c:24) 
> kern :warn : [   97.937933] handle_fasteoi_irq (kernel/irq/chip.c:518 kernel/irq/chip.c:502 kernel/irq/chip.c:698) 
> kern :warn : [   97.938006] __common_interrupt (arch/x86/kernel/irq.c:271 (discriminator 22)) 
> kern :warn : [   97.938078] common_interrupt (arch/x86/kernel/irq.c:247 (discriminator 14)) 
> kern  :warn  : [   97.938145]  </IRQ>
> kern  :warn  : [   97.938187]  <TASK>
> kern :warn : [   97.938230] asm_common_interrupt (arch/x86/include/asm/idtentry.h:640) 
> kern :warn : [   97.938301] RIP: 0010:cpuidle_enter_s2idle (arch/x86/include/asm/irqflags.h:78 drivers/cpuidle/cpuidle.c:199) 
> kern :warn : [ 97.938384] Code: d9 75 83 85 d2 7f 11 48 83 c4 20 89 d0 5b 5d 41 5c 41 5d 41 5e 41 5f c3 4c 89 c6 48 89 cf 89 14 24 e8 fb 64 9b 00 fb 8b 14 24 <eb> db 31 d2 eb d7 48 89 0c 24 48 89 74 24 08 e8 03 51 9f fe 4c 8b
> All code
> ========
>    0:	d9 75 83             	fnstenv -0x7d(%rbp)
>    3:	85 d2                	test   %edx,%edx
>    5:	7f 11                	jg     0x18
>    7:	48 83 c4 20          	add    $0x20,%rsp
>    b:	89 d0                	mov    %edx,%eax
>    d:	5b                   	pop    %rbx
>    e:	5d                   	pop    %rbp
>    f:	41 5c                	pop    %r12
>   11:	41 5d                	pop    %r13
>   13:	41 5e                	pop    %r14
>   15:	41 5f                	pop    %r15
>   17:	c3                   	retq   
>   18:	4c 89 c6             	mov    %r8,%rsi
>   1b:	48 89 cf             	mov    %rcx,%rdi
>   1e:	89 14 24             	mov    %edx,(%rsp)
>   21:	e8 fb 64 9b 00       	callq  0x9b6521
>   26:	fb                   	sti    
>   27:	8b 14 24             	mov    (%rsp),%edx
>   2a:*	eb db                	jmp    0x7		<-- trapping instruction
>   2c:	31 d2                	xor    %edx,%edx
>   2e:	eb d7                	jmp    0x7
>   30:	48 89 0c 24          	mov    %rcx,(%rsp)
>   34:	48 89 74 24 08       	mov    %rsi,0x8(%rsp)
>   39:	e8 03 51 9f fe       	callq  0xfffffffffe9f5141
>   3e:	4c                   	rex.WR
>   3f:	8b                   	.byte 0x8b
> 
> Code starting with the faulting instruction
> ===========================================
>    0:	eb db                	jmp    0xffffffffffffffdd
>    2:	31 d2                	xor    %edx,%edx
>    4:	eb d7                	jmp    0xffffffffffffffdd
>    6:	48 89 0c 24          	mov    %rcx,(%rsp)
>    a:	48 89 74 24 08       	mov    %rsi,0x8(%rsp)
>    f:	e8 03 51 9f fe       	callq  0xfffffffffe9f5117
>   14:	4c                   	rex.WR
>   15:	8b                   	.byte 0x8b
> kern  :warn  : [   97.938632] RSP: 0018:ffffc90000177db8 EFLAGS: 00000286
> kern  :warn  : [   97.938717] RAX: 00000000037a402c RBX: 0000000000000009 RCX: 0000000000000000
> kern  :warn  : [   97.938823] RDX: 0000000000000008 RSI: 0000000937e23ad2 RDI: 0000000000000000
> kern  :warn  : [   97.938927] RBP: ffffffff84f7f680 R08: 0000000000000000 R09: ffffed1021ad5530
> kern  :warn  : [   97.939032] R10: ffff88810d6aa987 R11: 0000000000000000 R12: ffff88820decca10
> kern  :warn  : [   97.939137] R13: dffffc0000000000 R14: 000000000027ac40 R15: 000000000027ac40
> kern :warn : [   97.939244] ? cpuidle_enter_s2idle (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:77 drivers/cpuidle/cpuidle.c:199) 
> kern :warn : [   97.939319] cpuidle_idle_call (kernel/sched/idle.c:190) 
> kern :warn : [   97.939389] ? arch_cpu_idle_exit+0x40/0x40 
> kern :warn : [   97.939459] ? mark_tsc_async_resets (arch/x86/kernel/tsc_sync.c:51) 
> kern :warn : [   97.939536] do_idle (kernel/sched/idle.c:282) 
> kern :warn : [   97.939594] cpu_startup_entry (kernel/sched/idle.c:379 (discriminator 1)) 
> kern :warn : [   97.939662] start_secondary (arch/x86/kernel/smpboot.c:224 arch/x86/kernel/smpboot.c:304) 
> kern :warn : [   97.939731] ? set_cpu_sibling_map (arch/x86/kernel/smpboot.c:254) 
> kern :warn : [   97.939808] ? soft_restart_cpu (arch/x86/kernel/head_64.S:485) 
> kern :warn : [   97.939878] secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:448) 
> kern  :warn  : [   97.939963]  </TASK>
> kern :info : [   97.948803] bdw_uncore 0000:00:00.0: PM: calling pci_pm_resume_noirq+0x0/0x390 @ 85, parent: 00 
> kern :info : [   97.948993] bdw_uncore 0000:00:00.0: PM: pci_pm_resume_noirq+0x0/0x390 returned 0 after 17 usecs 
> 
> 
> 
> The kernel config and materials to reproduce are available at:
> https://download.01.org/0day-ci/archive/20240129/202401292109.f115a688-oliver.sang@intel.com
> 
> 
> 
> -- 
> 0-DAY CI Kernel Test Service
> https://github.com/intel/lkp-tests/wiki
> 

Hi Greg,

This is an example where "pm_system_wakeup" can be called from atomic context. It also gives an actual instance where we can utilise this feature to find the source.

The bug reported due to "GFP_KERNEL" flag, but it will be applicable for using "mutex_lock" as well.
I will change to ATOMIC flag in kmalloc, as well as use raw_spin_lock* instead of mutex_lock.


Warm regards,
Vimal Kumar  

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] PM / sleep: Mechanism to find source aborting kernel suspend transition
  2024-02-01  4:44   ` Vimal Kumar
@ 2024-02-01 13:59     ` Rafael J. Wysocki
  2024-02-05  4:54       ` Vimal Kumar
  0 siblings, 1 reply; 5+ messages in thread
From: Rafael J. Wysocki @ 2024-02-01 13:59 UTC (permalink / raw)
  To: Vimal Kumar
  Cc: kernel test robot, oe-lkp, lkp, Chinmoy Ghosh, Mintu Patel,
	Vishal Badole, linux-pm, rui.zhang, yu.c.chen, Rafael J. Wysocki,
	Len Brown, Pavel Machek, Greg Kroah-Hartman, linux-kernel

On Thu, Feb 1, 2024 at 5:44 AM Vimal Kumar <vimal.kumar32@gmail.com> wrote:
>
> On Mon, Jan 29, 2024 at 10:08:32PM +0800, kernel test robot wrote:
> >
> >
> > Hello,
> >
> > kernel test robot noticed "BUG:sleeping_function_called_from_invalid_context_at_include/linux/sched/mm.h" on:
> >
> > commit: e2d228456d500f3b00ad746b353fa85eee235b7a ("[PATCH v3] PM / sleep: Mechanism to find source aborting kernel suspend transition")
> > url: https://github.com/intel-lab-lkp/linux/commits/Vimal-Kumar/PM-sleep-Mechanism-to-find-source-aborting-kernel-suspend-transition/20240109-210519
> > base: https://git.kernel.org/cgit/linux/kernel/git/rafael/linux-pm.git linux-next
> > patch link: https://lore.kernel.org/all/20240109130210.31938-1-vimal.kumar32@gmail.com/
> > patch subject: [PATCH v3] PM / sleep: Mechanism to find source aborting kernel suspend transition
> >
> > in testcase: suspend-stress
> > version:
> > with following parameters:
> >
> >       mode: freeze
> >       iterations: 10
> >
> >
> >
> > compiler: gcc-12
> > test machine: 4 threads (Broadwell) with 8G memory
> >
> > (please refer to attached dmesg/kmsg for entire log/backtrace)
> >
> >
> >
> > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > the same patch/commit), kindly add following tags
> > | Reported-by: kernel test robot <oliver.sang@intel.com>
> > | Closes: https://lore.kernel.org/oe-lkp/202401292109.f115a688-oliver.sang@intel.com
> >
> >
> > kern  :err   : [   97.935754] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:306
> > kern  :err   : [   97.935886] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1
> > kern  :err   : [   97.936000] preempt_count: 10002, expected: 0
> > kern  :err   : [   97.936071] RCU nest depth: 0, expected: 0
> > kern  :warn  : [   97.936138] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.7.0-rc8-00169-ge2d228456d50 #1
> > kern  :warn  : [   97.936255] Hardware name:  /NUC5i3RYB, BIOS RYBDWi35.86A.0363.2017.0316.1028 03/16/2017
> > kern  :warn  : [   97.936371] Call Trace:
> > kern  :warn  : [   97.936419]  <IRQ>
> > kern :warn : [   97.936461] dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1))
> > kern :warn : [   97.936529] __might_resched (kernel/sched/core.c:10152)
> > kern :warn : [   97.936598] ? preempt_notifier_dec (kernel/sched/core.c:10106)
> > kern :warn : [   97.936672] __kmem_cache_alloc_node (include/linux/kernel.h:107 include/linux/sched/mm.h:306 mm/slab.h:710 mm/slub.c:3460 mm/slub.c:3517)
> > kern :warn : [   97.936750] ? kasprintf (lib/kasprintf.c:54)
> > kern :warn : [   97.936812] ? pointer (lib/vsprintf.c:2755)
> > kern :warn : [   97.936875] ? kasprintf (lib/kasprintf.c:54)
> > kern :warn : [   97.936936] __kmalloc_node_track_caller (include/linux/kasan.h:198 mm/slab_common.c:1007 mm/slab_common.c:1027)
> > kern :warn : [   97.937017] kvasprintf (lib/kasprintf.c:25)
> > kern :warn : [   97.937079] ? bust_spinlocks (lib/kasprintf.c:16)
> > kern :warn : [   97.937146] ? enqueue_hrtimer (kernel/time/hrtimer.c:1095 (discriminator 3))
> > kern :warn : [   97.937214] ? tick_nohz_highres_handler (kernel/time/tick-sched.c:1530)
> > kern :warn : [   97.937295] kasprintf (lib/kasprintf.c:54)
> > kern :warn : [   97.937354] ? kvasprintf_const (lib/kasprintf.c:54)
> > kern :warn : [   97.937426] ? _raw_spin_lock_irqsave (arch/x86/include/asm/atomic.h:115 include/linux/atomic/atomic-arch-fallback.h:2164 include/linux/atomic/atomic-instrumented.h:1296 include/asm-generic/qspinlock.h:111 include/linux/spinlock.h:187 include/linux/spinlock_api_smp.h:111 kernel/locking/spinlock.c:162)
> > kern :warn : [   97.937501] ? _raw_read_unlock_irqrestore (kernel/locking/spinlock.c:161)
> > kern :warn : [   97.937581] ? _raw_spin_lock (arch/x86/include/asm/atomic.h:115 include/linux/atomic/atomic-arch-fallback.h:2164 include/linux/atomic/atomic-instrumented.h:1296 include/asm-generic/qspinlock.h:111 include/linux/spinlock.h:187 include/linux/spinlock_api_smp.h:134 kernel/locking/spinlock.c:154)
> > kern :warn : [   97.937648] ? _raw_write_lock_irq (kernel/locking/spinlock.c:153)
> > kern :warn : [   97.937719] ? irq_pm_check_wakeup (kernel/irq/pm.c:24)
> > kern :warn : [   97.937793] pm_system_wakeup (drivers/base/power/wakeup.c:981)
> > kern :warn : [   97.937862] irq_pm_check_wakeup (kernel/irq/pm.c:24)
> > kern :warn : [   97.937933] handle_fasteoi_irq (kernel/irq/chip.c:518 kernel/irq/chip.c:502 kernel/irq/chip.c:698)
> > kern :warn : [   97.938006] __common_interrupt (arch/x86/kernel/irq.c:271 (discriminator 22))
> > kern :warn : [   97.938078] common_interrupt (arch/x86/kernel/irq.c:247 (discriminator 14))
> > kern  :warn  : [   97.938145]  </IRQ>
> > kern  :warn  : [   97.938187]  <TASK>
> > kern :warn : [   97.938230] asm_common_interrupt (arch/x86/include/asm/idtentry.h:640)
> > kern :warn : [   97.938301] RIP: 0010:cpuidle_enter_s2idle (arch/x86/include/asm/irqflags.h:78 drivers/cpuidle/cpuidle.c:199)
> > kern :warn : [ 97.938384] Code: d9 75 83 85 d2 7f 11 48 83 c4 20 89 d0 5b 5d 41 5c 41 5d 41 5e 41 5f c3 4c 89 c6 48 89 cf 89 14 24 e8 fb 64 9b 00 fb 8b 14 24 <eb> db 31 d2 eb d7 48 89 0c 24 48 89 74 24 08 e8 03 51 9f fe 4c 8b
> > All code
> > ========
> >    0: d9 75 83                fnstenv -0x7d(%rbp)
> >    3: 85 d2                   test   %edx,%edx
> >    5: 7f 11                   jg     0x18
> >    7: 48 83 c4 20             add    $0x20,%rsp
> >    b: 89 d0                   mov    %edx,%eax
> >    d: 5b                      pop    %rbx
> >    e: 5d                      pop    %rbp
> >    f: 41 5c                   pop    %r12
> >   11: 41 5d                   pop    %r13
> >   13: 41 5e                   pop    %r14
> >   15: 41 5f                   pop    %r15
> >   17: c3                      retq
> >   18: 4c 89 c6                mov    %r8,%rsi
> >   1b: 48 89 cf                mov    %rcx,%rdi
> >   1e: 89 14 24                mov    %edx,(%rsp)
> >   21: e8 fb 64 9b 00          callq  0x9b6521
> >   26: fb                      sti
> >   27: 8b 14 24                mov    (%rsp),%edx
> >   2a:*        eb db                   jmp    0x7              <-- trapping instruction
> >   2c: 31 d2                   xor    %edx,%edx
> >   2e: eb d7                   jmp    0x7
> >   30: 48 89 0c 24             mov    %rcx,(%rsp)
> >   34: 48 89 74 24 08          mov    %rsi,0x8(%rsp)
> >   39: e8 03 51 9f fe          callq  0xfffffffffe9f5141
> >   3e: 4c                      rex.WR
> >   3f: 8b                      .byte 0x8b
> >
> > Code starting with the faulting instruction
> > ===========================================
> >    0: eb db                   jmp    0xffffffffffffffdd
> >    2: 31 d2                   xor    %edx,%edx
> >    4: eb d7                   jmp    0xffffffffffffffdd
> >    6: 48 89 0c 24             mov    %rcx,(%rsp)
> >    a: 48 89 74 24 08          mov    %rsi,0x8(%rsp)
> >    f: e8 03 51 9f fe          callq  0xfffffffffe9f5117
> >   14: 4c                      rex.WR
> >   15: 8b                      .byte 0x8b
> > kern  :warn  : [   97.938632] RSP: 0018:ffffc90000177db8 EFLAGS: 00000286
> > kern  :warn  : [   97.938717] RAX: 00000000037a402c RBX: 0000000000000009 RCX: 0000000000000000
> > kern  :warn  : [   97.938823] RDX: 0000000000000008 RSI: 0000000937e23ad2 RDI: 0000000000000000
> > kern  :warn  : [   97.938927] RBP: ffffffff84f7f680 R08: 0000000000000000 R09: ffffed1021ad5530
> > kern  :warn  : [   97.939032] R10: ffff88810d6aa987 R11: 0000000000000000 R12: ffff88820decca10
> > kern  :warn  : [   97.939137] R13: dffffc0000000000 R14: 000000000027ac40 R15: 000000000027ac40
> > kern :warn : [   97.939244] ? cpuidle_enter_s2idle (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:77 drivers/cpuidle/cpuidle.c:199)
> > kern :warn : [   97.939319] cpuidle_idle_call (kernel/sched/idle.c:190)
> > kern :warn : [   97.939389] ? arch_cpu_idle_exit+0x40/0x40
> > kern :warn : [   97.939459] ? mark_tsc_async_resets (arch/x86/kernel/tsc_sync.c:51)
> > kern :warn : [   97.939536] do_idle (kernel/sched/idle.c:282)
> > kern :warn : [   97.939594] cpu_startup_entry (kernel/sched/idle.c:379 (discriminator 1))
> > kern :warn : [   97.939662] start_secondary (arch/x86/kernel/smpboot.c:224 arch/x86/kernel/smpboot.c:304)
> > kern :warn : [   97.939731] ? set_cpu_sibling_map (arch/x86/kernel/smpboot.c:254)
> > kern :warn : [   97.939808] ? soft_restart_cpu (arch/x86/kernel/head_64.S:485)
> > kern :warn : [   97.939878] secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:448)
> > kern  :warn  : [   97.939963]  </TASK>
> > kern :info : [   97.948803] bdw_uncore 0000:00:00.0: PM: calling pci_pm_resume_noirq+0x0/0x390 @ 85, parent: 00
> > kern :info : [   97.948993] bdw_uncore 0000:00:00.0: PM: pci_pm_resume_noirq+0x0/0x390 returned 0 after 17 usecs
> >
> >
> >
> > The kernel config and materials to reproduce are available at:
> > https://download.01.org/0day-ci/archive/20240129/202401292109.f115a688-oliver.sang@intel.com
> >
> >
> >
> > --
> > 0-DAY CI Kernel Test Service
> > https://github.com/intel/lkp-tests/wiki
> >
>
> Hi Greg,
>
> This is an example where "pm_system_wakeup" can be called from atomic context. It also gives an actual instance where we can utilise this feature to find the source.
>
> The bug reported due to "GFP_KERNEL" flag, but it will be applicable for using "mutex_lock" as well.
> I will change to ATOMIC flag in kmalloc, as well as use raw_spin_lock* instead of mutex_lock.

Maybe instead of trying to add bandaids, you could work on making the
wakeup sources infrastructure suitable for addressing your use case?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] PM / sleep: Mechanism to find source aborting kernel suspend transition
  2024-02-01 13:59     ` Rafael J. Wysocki
@ 2024-02-05  4:54       ` Vimal Kumar
  0 siblings, 0 replies; 5+ messages in thread
From: Vimal Kumar @ 2024-02-05  4:54 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: kernel test robot, oe-lkp, lkp, Chinmoy Ghosh, Mintu Patel,
	Vishal Badole, linux-pm, rui.zhang, yu.c.chen, Len Brown,
	Pavel Machek, Greg Kroah-Hartman, linux-kernel

On Thu, Feb 01, 2024 at 02:59:00PM +0100, Rafael J. Wysocki wrote:
> On Thu, Feb 1, 2024 at 5:44 AM Vimal Kumar <vimal.kumar32@gmail.com> wrote:
> >
> > On Mon, Jan 29, 2024 at 10:08:32PM +0800, kernel test robot wrote:
> > >
> > >
> > > Hello,
> > >
B> > > kernel test robot noticed "BUG:sleeping_function_called_from_invalid_context_at_include/linux/sched/mm.h" on:
> > >
> > > commit: e2d228456d500f3b00ad746b353fa85eee235b7a ("[PATCH v3] PM / sleep: Mechanism to find source aborting kernel suspend transition")
> > > url: https://github.com/intel-lab-lkp/linux/commits/Vimal-Kumar/PM-sleep-Mechanism-to-find-source-aborting-kernel-suspend-transition/20240109-210519
> > > base: https://git.kernel.org/cgit/linux/kernel/git/rafael/linux-pm.git linux-next
> > > patch link: https://lore.kernel.org/all/20240109130210.31938-1-vimal.kumar32@gmail.com/
> > > patch subject: [PATCH v3] PM / sleep: Mechanism to find source aborting kernel suspend transition
> > >
> > > in testcase: suspend-stress
> > > version:
> > > with following parameters:
> > >
> > >       mode: freeze
> > >       iterations: 10
> > >
> > >
> > >
> > > compiler: gcc-12
> > > test machine: 4 threads (Broadwell) with 8G memory
> > >
> > > (please refer to attached dmesg/kmsg for entire log/backtrace)
> > >
> > >
> > >
> > > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > > the same patch/commit), kindly add following tags
> > > | Reported-by: kernel test robot <oliver.sang@intel.com>
> > > | Closes: https://lore.kernel.org/oe-lkp/202401292109.f115a688-oliver.sang@intel.com
> > >
> > >
> > > kern  :err   : [   97.935754] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:306
> > > kern  :err   : [   97.935886] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1
> > > kern  :err   : [   97.936000] preempt_count: 10002, expected: 0
> > > kern  :err   : [   97.936071] RCU nest depth: 0, expected: 0
> > > kern  :warn  : [   97.936138] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.7.0-rc8-00169-ge2d228456d50 #1
> > > kern  :warn  : [   97.936255] Hardware name:  /NUC5i3RYB, BIOS RYBDWi35.86A.0363.2017.0316.1028 03/16/2017
> > > kern  :warn  : [   97.936371] Call Trace:
> > > kern  :warn  : [   97.936419]  <IRQ>
> > > kern :warn : [   97.936461] dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1))
> > > kern :warn : [   97.936529] __might_resched (kernel/sched/core.c:10152)
> > > kern :warn : [   97.936598] ? preempt_notifier_dec (kernel/sched/core.c:10106)
> > > kern :warn : [   97.936672] __kmem_cache_alloc_node (include/linux/kernel.h:107 include/linux/sched/mm.h:306 mm/slab.h:710 mm/slub.c:3460 mm/slub.c:3517)
> > > kern :warn : [   97.936750] ? kasprintf (lib/kasprintf.c:54)
> > > kern :warn : [   97.936812] ? pointer (lib/vsprintf.c:2755)
> > > kern :warn : [   97.936875] ? kasprintf (lib/kasprintf.c:54)
> > > kern :warn : [   97.936936] __kmalloc_node_track_caller (include/linux/kasan.h:198 mm/slab_common.c:1007 mm/slab_common.c:1027)
> > > kern :warn : [   97.937017] kvasprintf (lib/kasprintf.c:25)
> > > kern :warn : [   97.937079] ? bust_spinlocks (lib/kasprintf.c:16)
> > > kern :warn : [   97.937146] ? enqueue_hrtimer (kernel/time/hrtimer.c:1095 (discriminator 3))
> > > kern :warn : [   97.937214] ? tick_nohz_highres_handler (kernel/time/tick-sched.c:1530)
> > > kern :warn : [   97.937295] kasprintf (lib/kasprintf.c:54)
> > > kern :warn : [   97.937354] ? kvasprintf_const (lib/kasprintf.c:54)
> > > kern :warn : [   97.937426] ? _raw_spin_lock_irqsave (arch/x86/include/asm/atomic.h:115 include/linux/atomic/atomic-arch-fallback.h:2164 include/linux/atomic/atomic-instrumented.h:1296 include/asm-generic/qspinlock.h:111 include/linux/spinlock.h:187 include/linux/spinlock_api_smp.h:111 kernel/locking/spinlock.c:162)
> > > kern :warn : [   97.937501] ? _raw_read_unlock_irqrestore (kernel/locking/spinlock.c:161)
> > > kern :warn : [   97.937581] ? _raw_spin_lock (arch/x86/include/asm/atomic.h:115 include/linux/atomic/atomic-arch-fallback.h:2164 include/linux/atomic/atomic-instrumented.h:1296 include/asm-generic/qspinlock.h:111 include/linux/spinlock.h:187 include/linux/spinlock_api_smp.h:134 kernel/locking/spinlock.c:154)
> > > kern :warn : [   97.937648] ? _raw_write_lock_irq (kernel/locking/spinlock.c:153)
> > > kern :warn : [   97.937719] ? irq_pm_check_wakeup (kernel/irq/pm.c:24)
> > > kern :warn : [   97.937793] pm_system_wakeup (drivers/base/power/wakeup.c:981)
> > > kern :warn : [   97.937862] irq_pm_check_wakeup (kernel/irq/pm.c:24)
> > > kern :warn : [   97.937933] handle_fasteoi_irq (kernel/irq/chip.c:518 kernel/irq/chip.c:502 kernel/irq/chip.c:698)
> > > kern :warn : [   97.938006] __common_interrupt (arch/x86/kernel/irq.c:271 (discriminator 22))
> > > kern :warn : [   97.938078] common_interrupt (arch/x86/kernel/irq.c:247 (discriminator 14))
> > > kern  :warn  : [   97.938145]  </IRQ>
> > > kern  :warn  : [   97.938187]  <TASK>
> > > kern :warn : [   97.938230] asm_common_interrupt (arch/x86/include/asm/idtentry.h:640)
> > > kern :warn : [   97.938301] RIP: 0010:cpuidle_enter_s2idle (arch/x86/include/asm/irqflags.h:78 drivers/cpuidle/cpuidle.c:199)
> > > kern :warn : [ 97.938384] Code: d9 75 83 85 d2 7f 11 48 83 c4 20 89 d0 5b 5d 41 5c 41 5d 41 5e 41 5f c3 4c 89 c6 48 89 cf 89 14 24 e8 fb 64 9b 00 fb 8b 14 24 <eb> db 31 d2 eb d7 48 89 0c 24 48 89 74 24 08 e8 03 51 9f fe 4c 8b
> > > All code
> > > ========
> > >    0: d9 75 83                fnstenv -0x7d(%rbp)
> > >    3: 85 d2                   test   %edx,%edx
> > >    5: 7f 11                   jg     0x18
> > >    7: 48 83 c4 20             add    $0x20,%rsp
> > >    b: 89 d0                   mov    %edx,%eax
> > >    d: 5b                      pop    %rbx
> > >    e: 5d                      pop    %rbp
> > >    f: 41 5c                   pop    %r12
> > >   11: 41 5d                   pop    %r13
> > >   13: 41 5e                   pop    %r14
> > >   15: 41 5f                   pop    %r15
> > >   17: c3                      retq
> > >   18: 4c 89 c6                mov    %r8,%rsi
> > >   1b: 48 89 cf                mov    %rcx,%rdi
> > >   1e: 89 14 24                mov    %edx,(%rsp)
> > >   21: e8 fb 64 9b 00          callq  0x9b6521
> > >   26: fb                      sti
> > >   27: 8b 14 24                mov    (%rsp),%edx
> > >   2a:*        eb db                   jmp    0x7              <-- trapping instruction
> > >   2c: 31 d2                   xor    %edx,%edx
> > >   2e: eb d7                   jmp    0x7
> > >   30: 48 89 0c 24             mov    %rcx,(%rsp)
> > >   34: 48 89 74 24 08          mov    %rsi,0x8(%rsp)
> > >   39: e8 03 51 9f fe          callq  0xfffffffffe9f5141
> > >   3e: 4c                      rex.WR
> > >   3f: 8b                      .byte 0x8b
> > >
> > > Code starting with the faulting instruction
> > > ===========================================
> > >    0: eb db                   jmp    0xffffffffffffffdd
> > >    2: 31 d2                   xor    %edx,%edx
> > >    4: eb d7                   jmp    0xffffffffffffffdd
> > >    6: 48 89 0c 24             mov    %rcx,(%rsp)
> > >    a: 48 89 74 24 08          mov    %rsi,0x8(%rsp)
> > >    f: e8 03 51 9f fe          callq  0xfffffffffe9f5117
> > >   14: 4c                      rex.WR
> > >   15: 8b                      .byte 0x8b
> > > kern  :warn  : [   97.938632] RSP: 0018:ffffc90000177db8 EFLAGS: 00000286
> > > kern  :warn  : [   97.938717] RAX: 00000000037a402c RBX: 0000000000000009 RCX: 0000000000000000
> > > kern  :warn  : [   97.938823] RDX: 0000000000000008 RSI: 0000000937e23ad2 RDI: 0000000000000000
> > > kern  :warn  : [   97.938927] RBP: ffffffff84f7f680 R08: 0000000000000000 R09: ffffed1021ad5530
> > > kern  :warn  : [   97.939032] R10: ffff88810d6aa987 R11: 0000000000000000 R12: ffff88820decca10
> > > kern  :warn  : [   97.939137] R13: dffffc0000000000 R14: 000000000027ac40 R15: 000000000027ac40
> > > kern :warn : [   97.939244] ? cpuidle_enter_s2idle (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:77 drivers/cpuidle/cpuidle.c:199)
> > > kern :warn : [   97.939319] cpuidle_idle_call (kernel/sched/idle.c:190)
> > > kern :warn : [   97.939389] ? arch_cpu_idle_exit+0x40/0x40
> > > kern :warn : [   97.939459] ? mark_tsc_async_resets (arch/x86/kernel/tsc_sync.c:51)
> > > kern :warn : [   97.939536] do_idle (kernel/sched/idle.c:282)
> > > kern :warn : [   97.939594] cpu_startup_entry (kernel/sched/idle.c:379 (discriminator 1))
> > > kern :warn : [   97.939662] start_secondary (arch/x86/kernel/smpboot.c:224 arch/x86/kernel/smpboot.c:304)
> > > kern :warn : [   97.939731] ? set_cpu_sibling_map (arch/x86/kernel/smpboot.c:254)
> > > kern :warn : [   97.939808] ? soft_restart_cpu (arch/x86/kernel/head_64.S:485)
> > > kern :warn : [   97.939878] secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:448)
> > > kern  :warn  : [   97.939963]  </TASK>
> > > kern :info : [   97.948803] bdw_uncore 0000:00:00.0: PM: calling pci_pm_resume_noirq+0x0/0x390 @ 85, parent: 00
> > > kern :info : [   97.948993] bdw_uncore 0000:00:00.0: PM: pci_pm_resume_noirq+0x0/0x390 returned 0 after 17 usecs
> > >
> > >
> > >
> > > The kernel config and materials to reproduce are available at:
> > > https://download.01.org/0day-ci/archive/20240129/202401292109.f115a688-oliver.sang@intel.com
> > >
> > >
> > >
> > > --
> > > 0-DAY CI Kernel Test Service
> > > https://github.com/intel/lkp-tests/wiki
> > >
> >
> > Hi Greg,
> >
> > This is an example where "pm_system_wakeup" can be called from atomic context. It also gives an actual instance where we can utilise this feature to find the source.
> >
> > The bug reported due to "GFP_KERNEL" flag, but it will be applicable for using "mutex_lock" as well.
> > I will change to ATOMIC flag in kmalloc, as well as use raw_spin_lock* instead of mutex_lock.
> 
> Maybe instead of trying to add bandaids, you could work on making the
> wakeup sources infrastructure suitable for addressing your use case?

Sure, Rafael. Thanks for acknowledging it. We are making a plan to correct wakeup sources infrastructure but this patch will help those products
which are already released and seeking for "how to debug and fix".

As per my understanding, this current patch will help find the issues in projects that are based on <6.8 mainline, and
the developer/OEM/ODM can find and fix the issues easily.

Warm Regards,
Vimal Kumar

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-02-05  4:54 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-09 13:02 [PATCH v3] PM / sleep: Mechanism to find source aborting kernel suspend transition Vimal Kumar
2024-01-29 14:08 ` kernel test robot
2024-02-01  4:44   ` Vimal Kumar
2024-02-01 13:59     ` Rafael J. Wysocki
2024-02-05  4:54       ` Vimal Kumar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.