All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5] kthread: Add debugobject support
@ 2020-08-17  6:37 Qianli Zhao
  2020-09-07  8:02 ` qianli zhao
  2020-10-01 14:34 ` Thomas Gleixner
  0 siblings, 2 replies; 4+ messages in thread
From: Qianli Zhao @ 2020-08-17  6:37 UTC (permalink / raw)
  To: tglx, axboe, akpm, Felix.Kuehling, sboyd
  Cc: john.stultz, ben.dooks, bfields, cl, linux-kernel, zhaoqianli

From: Qianli Zhao <zhaoqianli@xiaomi.com>

Add debugobject support to track the life time of kthread_work
which is used to detect reinitialization/free active object problems
Add kthread_init_work_onstack()/kthread_init_delayed_work_onstack() for
kthread onstack support

If we reinitialize a kthread_work that has been activated,
this will cause delayed_work_list/work_list corruption.
enable this config,there is an chance to fixup these errors
or WARNING the wrong use of kthread_work

[30858.395766] list_del corruption. next->prev should be ffffffe388ebbf88, but was ffffffe388ebb588
[30858.395788] WARNING: CPU: 2 PID: 387 at kernel/msm-4.19/lib/list_debug.c:56 __list_del_entry_valid+0xc8/0xd0
...
[30858.395906] Call trace:
[30858.395909]  __list_del_entry_valid+0xc8/0xd0
[30858.395912]  __kthread_cancel_work_sync+0x98/0x138
[30858.395915]  kthread_cancel_delayed_work_sync+0x10/0x20
[30858.395917]  sde_encoder_resource_control+0xe8/0x12c0
[30858.395920]  sde_encoder_prepare_for_kickoff+0x5dc/0x2008
[30858.395923]  sde_crtc_commit_kickoff+0x280/0x890
[30858.395925]  sde_kms_commit+0x16c/0x278
[30858.395928]  complete_commit+0x3c4/0x760
[30858.395931]  _msm_drm_commit_work_cb+0xec/0x1e0
[30858.395933]  kthread_worker_fn+0xf8/0x190
[30858.395935]  kthread+0x118/0x128
[30858.395938]  ret_from_fork+0x10/0x18

crash> struct kthread_worker.delayed_work_list 0xffffffe3893925f0
 [ffffffe389392620] delayed_work_list = {
    next = 0xffffffe388ebbf88,
    prev = 0xffffffe388ebb588
  }
crash> list 0xffffffe388ebbf88
ffffffe388ebbf88

Signed-off-by: Qianli Zhao <zhaoqianli@xiaomi.com>
---
V5:
- Fix format error checked by checkpatch.pl 

V4:
- Changelog update
- Add comment for KWORK_ENTRY_STATIC
- Code format modification 
- Check worker availability early in kthread_flush_work

V3:
- changelog update
- add fixup_assert_init support
- move debug_kwork_activate/debug_kwork_deactivate before list operation
- name the kconfig CONFIG_DEBUG_OBJECTS_KTHREAD_WORK
- use kthread_init_work_onstack/destroy_kwork_on_stack when kthread_work used on stack
- __init_kwork before clear kthread_work in kthread_init_work
---
 include/linux/kthread.h |  30 ++++++++++-
 include/linux/poison.h  |   4 ++
 kernel/kthread.c        | 136 ++++++++++++++++++++++++++++++++++++++++++++++--
 lib/Kconfig.debug       |  10 ++++
 4 files changed, 174 insertions(+), 6 deletions(-)

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 65b81e0..706302b 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -108,6 +108,17 @@ struct kthread_delayed_work {
 	struct timer_list timer;
 };
 
+#ifdef CONFIG_DEBUG_OBJECTS_KTHREAD_WORK
+extern void __init_kwork(struct kthread_work *kwork, int onstack);
+extern void destroy_kwork_on_stack(struct kthread_work *kwork);
+extern void destroy_delayed_kwork_on_stack(struct kthread_delayed_work *kdwork);
+#else
+static inline void __init_kwork(struct kthread_work *kwork, int onstack) { }
+static inline void destroy_kwork_on_stack(struct kthread_work *kwork) { }
+static inline void destroy_delayed_kwork_on_stack(struct kthread_delayed_work *kdwork) { }
+#endif
+
+
 #define KTHREAD_WORKER_INIT(worker)	{				\
 	.lock = __RAW_SPIN_LOCK_UNLOCKED((worker).lock),		\
 	.work_list = LIST_HEAD_INIT((worker).work_list),		\
@@ -115,7 +126,7 @@ struct kthread_delayed_work {
 	}
 
 #define KTHREAD_WORK_INIT(work, fn)	{				\
-	.node = LIST_HEAD_INIT((work).node),				\
+	.node = { .next = KWORK_ENTRY_STATIC },				\
 	.func = (fn),							\
 	}
 
@@ -159,6 +170,15 @@ extern void __kthread_init_worker(struct kthread_worker *worker,
 
 #define kthread_init_work(work, fn)					\
 	do {								\
+		__init_kwork(work, 0);						\
+		memset((work), 0, sizeof(struct kthread_work));		\
+		INIT_LIST_HEAD(&(work)->node);				\
+		(work)->func = (fn);					\
+	} while (0)
+
+#define kthread_init_work_onstack(work, fn)					\
+	do {								\
+		__init_kwork(work, 1);						\
 		memset((work), 0, sizeof(struct kthread_work));		\
 		INIT_LIST_HEAD(&(work)->node);				\
 		(work)->func = (fn);					\
@@ -172,6 +192,14 @@ extern void __kthread_init_worker(struct kthread_worker *worker,
 			     TIMER_IRQSAFE);				\
 	} while (0)
 
+#define kthread_init_delayed_work_onstack(dwork, fn)				\
+	do {								\
+		kthread_init_work_onstack(&(dwork)->work, (fn));		\
+		__init_timer_on_stack(&(dwork)->timer,				\
+			     kthread_delayed_work_timer_fn,		\
+			     TIMER_IRQSAFE);				\
+	} while (0)
+
 int kthread_worker_fn(void *worker_ptr);
 
 __printf(2, 3)
diff --git a/include/linux/poison.h b/include/linux/poison.h
index df34330..d9095ca 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -86,4 +86,8 @@
 /********** security/ **********/
 #define KEY_DESTROY		0xbd
 
+/********** kernel/kthread **********/
+/*indicate a static kthread_work initializer for the object debugging code.*/
+#define KWORK_ENTRY_STATIC	((void *) 0x600 + POISON_POINTER_DELTA)
+
 #endif
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 132f84a..c283b24 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -67,6 +67,119 @@ enum KTHREAD_BITS {
 	KTHREAD_SHOULD_PARK,
 };
 
+#ifdef CONFIG_DEBUG_OBJECTS_KTHREAD_WORK
+static struct debug_obj_descr kwork_debug_descr;
+
+static void *kwork_debug_hint(void *addr)
+{
+	return ((struct kthread_work *) addr)->func;
+}
+
+static bool kwork_is_static_object(void *addr)
+{
+	struct kthread_work *kwork = addr;
+
+	return (kwork->node.prev == NULL &&
+		kwork->node.next == KWORK_ENTRY_STATIC);
+}
+
+static bool kwork_fixup_init(void *addr, enum debug_obj_state state)
+{
+	struct kthread_work *kwork = addr;
+
+	switch (state) {
+	case ODEBUG_STATE_ACTIVE:
+		kthread_cancel_work_sync(kwork);
+		debug_object_init(kwork, &kwork_debug_descr);
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool kwork_fixup_free(void *addr, enum debug_obj_state state)
+{
+	struct kthread_work *kwork = addr;
+
+	switch (state) {
+	case ODEBUG_STATE_ACTIVE:
+		kthread_cancel_work_sync(kwork);
+		debug_object_free(kwork, &kwork_debug_descr);
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void stub_kthread_work(struct kthread_work *unuse)
+{
+	WARN_ON(1);
+}
+
+static bool kwork_fixup_assert_init(void *addr, enum debug_obj_state state)
+{
+	struct kthread_work *kwork = addr;
+
+	switch (state) {
+	case ODEBUG_STATE_NOTAVAILABLE:
+		kthread_init_work(kwork, stub_kthread_work);
+		return true;
+	default:
+		return false;
+	}
+}
+
+static struct debug_obj_descr kwork_debug_descr = {
+	.name			= "kthread_work",
+	.debug_hint		= kwork_debug_hint,
+	.is_static_object	= kwork_is_static_object,
+	.fixup_init		= kwork_fixup_init,
+	.fixup_free		= kwork_fixup_free,
+	.fixup_assert_init	= kwork_fixup_assert_init,
+};
+
+static inline void debug_kwork_activate(struct kthread_work *kwork)
+{
+	debug_object_activate(kwork, &kwork_debug_descr);
+}
+
+static inline void debug_kwork_deactivate(struct kthread_work *kwork)
+{
+	debug_object_deactivate(kwork, &kwork_debug_descr);
+}
+
+static inline void debug_kwork_assert_init(struct kthread_work *kwork)
+{
+	debug_object_assert_init(kwork, &kwork_debug_descr);
+}
+
+void __init_kwork(struct kthread_work *kwork, int onstack)
+{
+	if (onstack)
+		debug_object_init_on_stack(kwork, &kwork_debug_descr);
+	else
+		debug_object_init(kwork, &kwork_debug_descr);
+}
+EXPORT_SYMBOL_GPL(__init_kwork);
+
+void destroy_kwork_on_stack(struct kthread_work *kwork)
+{
+	debug_object_free(kwork, &kwork_debug_descr);
+}
+EXPORT_SYMBOL_GPL(destroy_kwork_on_stack);
+
+void destroy_delayed_kwork_on_stack(struct kthread_delayed_work *kdwork)
+{
+	destroy_timer_on_stack(&kdwork->timer);
+	debug_object_free(&kdwork->work, &kwork_debug_descr);
+}
+EXPORT_SYMBOL_GPL(destroy_delayed_kwork_on_stack);
+#else
+static inline void debug_kwork_activate(struct kthread_work *kwork) { }
+static inline void debug_kwork_deactivate(struct kthread_work *kwork) { }
+static inline void debug_kwork_assert_init(struct kthread_work *kwork) { }
+#endif
+
 static inline void set_kthread_struct(void *kthread)
 {
 	/*
@@ -697,6 +810,7 @@ int kthread_worker_fn(void *worker_ptr)
 	if (!list_empty(&worker->work_list)) {
 		work = list_first_entry(&worker->work_list,
 					struct kthread_work, node);
+		debug_kwork_deactivate(work);
 		list_del_init(&work->node);
 	}
 	worker->current_work = work;
@@ -833,6 +947,7 @@ static void kthread_insert_work(struct kthread_worker *worker,
 {
 	kthread_insert_work_sanity_check(worker, work);
 
+	debug_kwork_activate(work);
 	list_add_tail(&work->node, pos);
 	work->worker = worker;
 	if (!worker->current_work && likely(worker->task))
@@ -857,6 +972,7 @@ bool kthread_queue_work(struct kthread_worker *worker,
 	bool ret = false;
 	unsigned long flags;
 
+	debug_kwork_assert_init(work);
 	raw_spin_lock_irqsave(&worker->lock, flags);
 	if (!queuing_blocked(worker, work)) {
 		kthread_insert_work(worker, work, &worker->work_list);
@@ -895,6 +1011,7 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
 
 	/* Move the work from worker->delayed_work_list. */
 	WARN_ON_ONCE(list_empty(&work->node));
+	debug_kwork_deactivate(work);
 	list_del_init(&work->node);
 	kthread_insert_work(worker, work, &worker->work_list);
 
@@ -925,6 +1042,7 @@ static void __kthread_queue_delayed_work(struct kthread_worker *worker,
 	/* Be paranoid and try to detect possible races already now. */
 	kthread_insert_work_sanity_check(worker, work);
 
+	debug_kwork_activate(work);
 	list_add(&work->node, &worker->delayed_work_list);
 	work->worker = worker;
 	timer->expires = jiffies + delay;
@@ -954,6 +1072,7 @@ bool kthread_queue_delayed_work(struct kthread_worker *worker,
 	unsigned long flags;
 	bool ret = false;
 
+	debug_kwork_assert_init(work);
 	raw_spin_lock_irqsave(&worker->lock, flags);
 
 	if (!queuing_blocked(worker, work)) {
@@ -987,16 +1106,17 @@ static void kthread_flush_work_fn(struct kthread_work *work)
 void kthread_flush_work(struct kthread_work *work)
 {
 	struct kthread_flush_work fwork = {
-		KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
-		COMPLETION_INITIALIZER_ONSTACK(fwork.done),
+		.done = COMPLETION_INITIALIZER_ONSTACK(fwork.done),
 	};
 	struct kthread_worker *worker;
 	bool noop = false;
 
+	debug_kwork_assert_init(work);
 	worker = work->worker;
 	if (!worker)
 		return;
 
+	kthread_init_work_onstack(&fwork.work, kthread_flush_work_fn);
 	raw_spin_lock_irq(&worker->lock);
 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
 	WARN_ON_ONCE(work->worker != worker);
@@ -1013,6 +1133,7 @@ void kthread_flush_work(struct kthread_work *work)
 
 	if (!noop)
 		wait_for_completion(&fwork.done);
+	destroy_kwork_on_stack(&fwork.work);
 }
 EXPORT_SYMBOL_GPL(kthread_flush_work);
 
@@ -1053,6 +1174,7 @@ static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
 	 * be from worker->work_list or from worker->delayed_work_list.
 	 */
 	if (!list_empty(&work->node)) {
+		debug_kwork_deactivate(work);
 		list_del_init(&work->node);
 		return true;
 	}
@@ -1091,6 +1213,7 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
 	unsigned long flags;
 	int ret = false;
 
+	debug_kwork_assert_init(work);
 	raw_spin_lock_irqsave(&worker->lock, flags);
 
 	/* Do not bother with canceling when never queued. */
@@ -1115,10 +1238,12 @@ EXPORT_SYMBOL_GPL(kthread_mod_delayed_work);
 
 static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
 {
-	struct kthread_worker *worker = work->worker;
+	struct kthread_worker *worker;
 	unsigned long flags;
 	int ret = false;
 
+	debug_kwork_assert_init(work);
+	worker = work->worker;
 	if (!worker)
 		goto out;
 
@@ -1194,12 +1319,13 @@ EXPORT_SYMBOL_GPL(kthread_cancel_delayed_work_sync);
 void kthread_flush_worker(struct kthread_worker *worker)
 {
 	struct kthread_flush_work fwork = {
-		KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
-		COMPLETION_INITIALIZER_ONSTACK(fwork.done),
+		.done = COMPLETION_INITIALIZER_ONSTACK(fwork.done),
 	};
 
+	kthread_init_work_onstack(&fwork.work, kthread_flush_work_fn);
 	kthread_queue_work(worker, &fwork.work);
 	wait_for_completion(&fwork.done);
+	destroy_kwork_on_stack(&fwork.work);
 }
 EXPORT_SYMBOL_GPL(kthread_flush_worker);
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9ad9210..71d6696 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -540,6 +540,16 @@ config DEBUG_OBJECTS_WORK
 	  work queue routines to track the life time of work objects and
 	  validate the work operations.
 
+config DEBUG_OBJECTS_KTHREAD_WORK
+	bool "Debug kthread work objects"
+	depends on DEBUG_OBJECTS
+	help
+	  If you say Y here, additional code will be inserted into the
+	  kthread routines to track the life time of kthread_work objects
+	  and validate the kthread_work operations.
+
+	  If unsure, say N.
+
 config DEBUG_OBJECTS_RCU_HEAD
 	bool "Debug RCU callbacks objects"
 	depends on DEBUG_OBJECTS
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v5] kthread: Add debugobject support
  2020-08-17  6:37 [PATCH v5] kthread: Add debugobject support Qianli Zhao
@ 2020-09-07  8:02 ` qianli zhao
  2020-10-01 14:34 ` Thomas Gleixner
  1 sibling, 0 replies; 4+ messages in thread
From: qianli zhao @ 2020-09-07  8:02 UTC (permalink / raw)
  To: Thomas Gleixner, axboe, akpm, Felix.Kuehling, Stephen Boyd
  Cc: John Stultz, ben.dooks, bfields, cl, linux-kernel, Qianli Zhao

Dear maintainer

Is this change ignored or rejected?
I'm not sure who is the maintainer of kthread, please help give me a
definite reply

Thanks

On Mon, 17 Aug 2020 at 14:37, Qianli Zhao <zhaoqianligood@gmail.com> wrote:
>
> From: Qianli Zhao <zhaoqianli@xiaomi.com>
>
> Add debugobject support to track the life time of kthread_work
> which is used to detect reinitialization/free active object problems
> Add kthread_init_work_onstack()/kthread_init_delayed_work_onstack() for
> kthread onstack support
>
> If we reinitialize a kthread_work that has been activated,
> this will cause delayed_work_list/work_list corruption.
> enable this config,there is an chance to fixup these errors
> or WARNING the wrong use of kthread_work
>
> [30858.395766] list_del corruption. next->prev should be ffffffe388ebbf88, but was ffffffe388ebb588
> [30858.395788] WARNING: CPU: 2 PID: 387 at kernel/msm-4.19/lib/list_debug.c:56 __list_del_entry_valid+0xc8/0xd0
> ...
> [30858.395906] Call trace:
> [30858.395909]  __list_del_entry_valid+0xc8/0xd0
> [30858.395912]  __kthread_cancel_work_sync+0x98/0x138
> [30858.395915]  kthread_cancel_delayed_work_sync+0x10/0x20
> [30858.395917]  sde_encoder_resource_control+0xe8/0x12c0
> [30858.395920]  sde_encoder_prepare_for_kickoff+0x5dc/0x2008
> [30858.395923]  sde_crtc_commit_kickoff+0x280/0x890
> [30858.395925]  sde_kms_commit+0x16c/0x278
> [30858.395928]  complete_commit+0x3c4/0x760
> [30858.395931]  _msm_drm_commit_work_cb+0xec/0x1e0
> [30858.395933]  kthread_worker_fn+0xf8/0x190
> [30858.395935]  kthread+0x118/0x128
> [30858.395938]  ret_from_fork+0x10/0x18
>
> crash> struct kthread_worker.delayed_work_list 0xffffffe3893925f0
>  [ffffffe389392620] delayed_work_list = {
>     next = 0xffffffe388ebbf88,
>     prev = 0xffffffe388ebb588
>   }
> crash> list 0xffffffe388ebbf88
> ffffffe388ebbf88
>
> Signed-off-by: Qianli Zhao <zhaoqianli@xiaomi.com>
> ---
> V5:
> - Fix format error checked by checkpatch.pl
>
> V4:
> - Changelog update
> - Add comment for KWORK_ENTRY_STATIC
> - Code format modification
> - Check worker availability early in kthread_flush_work
>
> V3:
> - changelog update
> - add fixup_assert_init support
> - move debug_kwork_activate/debug_kwork_deactivate before list operation
> - name the kconfig CONFIG_DEBUG_OBJECTS_KTHREAD_WORK
> - use kthread_init_work_onstack/destroy_kwork_on_stack when kthread_work used on stack
> - __init_kwork before clear kthread_work in kthread_init_work
> ---
>  include/linux/kthread.h |  30 ++++++++++-
>  include/linux/poison.h  |   4 ++
>  kernel/kthread.c        | 136 ++++++++++++++++++++++++++++++++++++++++++++++--
>  lib/Kconfig.debug       |  10 ++++
>  4 files changed, 174 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/kthread.h b/include/linux/kthread.h
> index 65b81e0..706302b 100644
> --- a/include/linux/kthread.h
> +++ b/include/linux/kthread.h
> @@ -108,6 +108,17 @@ struct kthread_delayed_work {
>         struct timer_list timer;
>  };
>
> +#ifdef CONFIG_DEBUG_OBJECTS_KTHREAD_WORK
> +extern void __init_kwork(struct kthread_work *kwork, int onstack);
> +extern void destroy_kwork_on_stack(struct kthread_work *kwork);
> +extern void destroy_delayed_kwork_on_stack(struct kthread_delayed_work *kdwork);
> +#else
> +static inline void __init_kwork(struct kthread_work *kwork, int onstack) { }
> +static inline void destroy_kwork_on_stack(struct kthread_work *kwork) { }
> +static inline void destroy_delayed_kwork_on_stack(struct kthread_delayed_work *kdwork) { }
> +#endif
> +
> +
>  #define KTHREAD_WORKER_INIT(worker)    {                               \
>         .lock = __RAW_SPIN_LOCK_UNLOCKED((worker).lock),                \
>         .work_list = LIST_HEAD_INIT((worker).work_list),                \
> @@ -115,7 +126,7 @@ struct kthread_delayed_work {
>         }
>
>  #define KTHREAD_WORK_INIT(work, fn)    {                               \
> -       .node = LIST_HEAD_INIT((work).node),                            \
> +       .node = { .next = KWORK_ENTRY_STATIC },                         \
>         .func = (fn),                                                   \
>         }
>
> @@ -159,6 +170,15 @@ extern void __kthread_init_worker(struct kthread_worker *worker,
>
>  #define kthread_init_work(work, fn)                                    \
>         do {                                                            \
> +               __init_kwork(work, 0);                                          \
> +               memset((work), 0, sizeof(struct kthread_work));         \
> +               INIT_LIST_HEAD(&(work)->node);                          \
> +               (work)->func = (fn);                                    \
> +       } while (0)
> +
> +#define kthread_init_work_onstack(work, fn)                                    \
> +       do {                                                            \
> +               __init_kwork(work, 1);                                          \
>                 memset((work), 0, sizeof(struct kthread_work));         \
>                 INIT_LIST_HEAD(&(work)->node);                          \
>                 (work)->func = (fn);                                    \
> @@ -172,6 +192,14 @@ extern void __kthread_init_worker(struct kthread_worker *worker,
>                              TIMER_IRQSAFE);                            \
>         } while (0)
>
> +#define kthread_init_delayed_work_onstack(dwork, fn)                           \
> +       do {                                                            \
> +               kthread_init_work_onstack(&(dwork)->work, (fn));                \
> +               __init_timer_on_stack(&(dwork)->timer,                          \
> +                            kthread_delayed_work_timer_fn,             \
> +                            TIMER_IRQSAFE);                            \
> +       } while (0)
> +
>  int kthread_worker_fn(void *worker_ptr);
>
>  __printf(2, 3)
> diff --git a/include/linux/poison.h b/include/linux/poison.h
> index df34330..d9095ca 100644
> --- a/include/linux/poison.h
> +++ b/include/linux/poison.h
> @@ -86,4 +86,8 @@
>  /********** security/ **********/
>  #define KEY_DESTROY            0xbd
>
> +/********** kernel/kthread **********/
> +/*indicate a static kthread_work initializer for the object debugging code.*/
> +#define KWORK_ENTRY_STATIC     ((void *) 0x600 + POISON_POINTER_DELTA)
> +
>  #endif
> diff --git a/kernel/kthread.c b/kernel/kthread.c
> index 132f84a..c283b24 100644
> --- a/kernel/kthread.c
> +++ b/kernel/kthread.c
> @@ -67,6 +67,119 @@ enum KTHREAD_BITS {
>         KTHREAD_SHOULD_PARK,
>  };
>
> +#ifdef CONFIG_DEBUG_OBJECTS_KTHREAD_WORK
> +static struct debug_obj_descr kwork_debug_descr;
> +
> +static void *kwork_debug_hint(void *addr)
> +{
> +       return ((struct kthread_work *) addr)->func;
> +}
> +
> +static bool kwork_is_static_object(void *addr)
> +{
> +       struct kthread_work *kwork = addr;
> +
> +       return (kwork->node.prev == NULL &&
> +               kwork->node.next == KWORK_ENTRY_STATIC);
> +}
> +
> +static bool kwork_fixup_init(void *addr, enum debug_obj_state state)
> +{
> +       struct kthread_work *kwork = addr;
> +
> +       switch (state) {
> +       case ODEBUG_STATE_ACTIVE:
> +               kthread_cancel_work_sync(kwork);
> +               debug_object_init(kwork, &kwork_debug_descr);
> +               return true;
> +       default:
> +               return false;
> +       }
> +}
> +
> +static bool kwork_fixup_free(void *addr, enum debug_obj_state state)
> +{
> +       struct kthread_work *kwork = addr;
> +
> +       switch (state) {
> +       case ODEBUG_STATE_ACTIVE:
> +               kthread_cancel_work_sync(kwork);
> +               debug_object_free(kwork, &kwork_debug_descr);
> +               return true;
> +       default:
> +               return false;
> +       }
> +}
> +
> +static void stub_kthread_work(struct kthread_work *unuse)
> +{
> +       WARN_ON(1);
> +}
> +
> +static bool kwork_fixup_assert_init(void *addr, enum debug_obj_state state)
> +{
> +       struct kthread_work *kwork = addr;
> +
> +       switch (state) {
> +       case ODEBUG_STATE_NOTAVAILABLE:
> +               kthread_init_work(kwork, stub_kthread_work);
> +               return true;
> +       default:
> +               return false;
> +       }
> +}
> +
> +static struct debug_obj_descr kwork_debug_descr = {
> +       .name                   = "kthread_work",
> +       .debug_hint             = kwork_debug_hint,
> +       .is_static_object       = kwork_is_static_object,
> +       .fixup_init             = kwork_fixup_init,
> +       .fixup_free             = kwork_fixup_free,
> +       .fixup_assert_init      = kwork_fixup_assert_init,
> +};
> +
> +static inline void debug_kwork_activate(struct kthread_work *kwork)
> +{
> +       debug_object_activate(kwork, &kwork_debug_descr);
> +}
> +
> +static inline void debug_kwork_deactivate(struct kthread_work *kwork)
> +{
> +       debug_object_deactivate(kwork, &kwork_debug_descr);
> +}
> +
> +static inline void debug_kwork_assert_init(struct kthread_work *kwork)
> +{
> +       debug_object_assert_init(kwork, &kwork_debug_descr);
> +}
> +
> +void __init_kwork(struct kthread_work *kwork, int onstack)
> +{
> +       if (onstack)
> +               debug_object_init_on_stack(kwork, &kwork_debug_descr);
> +       else
> +               debug_object_init(kwork, &kwork_debug_descr);
> +}
> +EXPORT_SYMBOL_GPL(__init_kwork);
> +
> +void destroy_kwork_on_stack(struct kthread_work *kwork)
> +{
> +       debug_object_free(kwork, &kwork_debug_descr);
> +}
> +EXPORT_SYMBOL_GPL(destroy_kwork_on_stack);
> +
> +void destroy_delayed_kwork_on_stack(struct kthread_delayed_work *kdwork)
> +{
> +       destroy_timer_on_stack(&kdwork->timer);
> +       debug_object_free(&kdwork->work, &kwork_debug_descr);
> +}
> +EXPORT_SYMBOL_GPL(destroy_delayed_kwork_on_stack);
> +#else
> +static inline void debug_kwork_activate(struct kthread_work *kwork) { }
> +static inline void debug_kwork_deactivate(struct kthread_work *kwork) { }
> +static inline void debug_kwork_assert_init(struct kthread_work *kwork) { }
> +#endif
> +
>  static inline void set_kthread_struct(void *kthread)
>  {
>         /*
> @@ -697,6 +810,7 @@ int kthread_worker_fn(void *worker_ptr)
>         if (!list_empty(&worker->work_list)) {
>                 work = list_first_entry(&worker->work_list,
>                                         struct kthread_work, node);
> +               debug_kwork_deactivate(work);
>                 list_del_init(&work->node);
>         }
>         worker->current_work = work;
> @@ -833,6 +947,7 @@ static void kthread_insert_work(struct kthread_worker *worker,
>  {
>         kthread_insert_work_sanity_check(worker, work);
>
> +       debug_kwork_activate(work);
>         list_add_tail(&work->node, pos);
>         work->worker = worker;
>         if (!worker->current_work && likely(worker->task))
> @@ -857,6 +972,7 @@ bool kthread_queue_work(struct kthread_worker *worker,
>         bool ret = false;
>         unsigned long flags;
>
> +       debug_kwork_assert_init(work);
>         raw_spin_lock_irqsave(&worker->lock, flags);
>         if (!queuing_blocked(worker, work)) {
>                 kthread_insert_work(worker, work, &worker->work_list);
> @@ -895,6 +1011,7 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
>
>         /* Move the work from worker->delayed_work_list. */
>         WARN_ON_ONCE(list_empty(&work->node));
> +       debug_kwork_deactivate(work);
>         list_del_init(&work->node);
>         kthread_insert_work(worker, work, &worker->work_list);
>
> @@ -925,6 +1042,7 @@ static void __kthread_queue_delayed_work(struct kthread_worker *worker,
>         /* Be paranoid and try to detect possible races already now. */
>         kthread_insert_work_sanity_check(worker, work);
>
> +       debug_kwork_activate(work);
>         list_add(&work->node, &worker->delayed_work_list);
>         work->worker = worker;
>         timer->expires = jiffies + delay;
> @@ -954,6 +1072,7 @@ bool kthread_queue_delayed_work(struct kthread_worker *worker,
>         unsigned long flags;
>         bool ret = false;
>
> +       debug_kwork_assert_init(work);
>         raw_spin_lock_irqsave(&worker->lock, flags);
>
>         if (!queuing_blocked(worker, work)) {
> @@ -987,16 +1106,17 @@ static void kthread_flush_work_fn(struct kthread_work *work)
>  void kthread_flush_work(struct kthread_work *work)
>  {
>         struct kthread_flush_work fwork = {
> -               KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
> -               COMPLETION_INITIALIZER_ONSTACK(fwork.done),
> +               .done = COMPLETION_INITIALIZER_ONSTACK(fwork.done),
>         };
>         struct kthread_worker *worker;
>         bool noop = false;
>
> +       debug_kwork_assert_init(work);
>         worker = work->worker;
>         if (!worker)
>                 return;
>
> +       kthread_init_work_onstack(&fwork.work, kthread_flush_work_fn);
>         raw_spin_lock_irq(&worker->lock);
>         /* Work must not be used with >1 worker, see kthread_queue_work(). */
>         WARN_ON_ONCE(work->worker != worker);
> @@ -1013,6 +1133,7 @@ void kthread_flush_work(struct kthread_work *work)
>
>         if (!noop)
>                 wait_for_completion(&fwork.done);
> +       destroy_kwork_on_stack(&fwork.work);
>  }
>  EXPORT_SYMBOL_GPL(kthread_flush_work);
>
> @@ -1053,6 +1174,7 @@ static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
>          * be from worker->work_list or from worker->delayed_work_list.
>          */
>         if (!list_empty(&work->node)) {
> +               debug_kwork_deactivate(work);
>                 list_del_init(&work->node);
>                 return true;
>         }
> @@ -1091,6 +1213,7 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
>         unsigned long flags;
>         int ret = false;
>
> +       debug_kwork_assert_init(work);
>         raw_spin_lock_irqsave(&worker->lock, flags);
>
>         /* Do not bother with canceling when never queued. */
> @@ -1115,10 +1238,12 @@ EXPORT_SYMBOL_GPL(kthread_mod_delayed_work);
>
>  static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
>  {
> -       struct kthread_worker *worker = work->worker;
> +       struct kthread_worker *worker;
>         unsigned long flags;
>         int ret = false;
>
> +       debug_kwork_assert_init(work);
> +       worker = work->worker;
>         if (!worker)
>                 goto out;
>
> @@ -1194,12 +1319,13 @@ EXPORT_SYMBOL_GPL(kthread_cancel_delayed_work_sync);
>  void kthread_flush_worker(struct kthread_worker *worker)
>  {
>         struct kthread_flush_work fwork = {
> -               KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
> -               COMPLETION_INITIALIZER_ONSTACK(fwork.done),
> +               .done = COMPLETION_INITIALIZER_ONSTACK(fwork.done),
>         };
>
> +       kthread_init_work_onstack(&fwork.work, kthread_flush_work_fn);
>         kthread_queue_work(worker, &fwork.work);
>         wait_for_completion(&fwork.done);
> +       destroy_kwork_on_stack(&fwork.work);
>  }
>  EXPORT_SYMBOL_GPL(kthread_flush_worker);
>
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 9ad9210..71d6696 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -540,6 +540,16 @@ config DEBUG_OBJECTS_WORK
>           work queue routines to track the life time of work objects and
>           validate the work operations.
>
> +config DEBUG_OBJECTS_KTHREAD_WORK
> +       bool "Debug kthread work objects"
> +       depends on DEBUG_OBJECTS
> +       help
> +         If you say Y here, additional code will be inserted into the
> +         kthread routines to track the life time of kthread_work objects
> +         and validate the kthread_work operations.
> +
> +         If unsure, say N.
> +
>  config DEBUG_OBJECTS_RCU_HEAD
>         bool "Debug RCU callbacks objects"
>         depends on DEBUG_OBJECTS
> --
> 2.7.4
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v5] kthread: Add debugobject support
  2020-08-17  6:37 [PATCH v5] kthread: Add debugobject support Qianli Zhao
  2020-09-07  8:02 ` qianli zhao
@ 2020-10-01 14:34 ` Thomas Gleixner
  2020-10-09  7:26   ` qianli zhao
  1 sibling, 1 reply; 4+ messages in thread
From: Thomas Gleixner @ 2020-10-01 14:34 UTC (permalink / raw)
  To: Qianli Zhao, axboe, akpm, Felix.Kuehling, sboyd
  Cc: john.stultz, ben.dooks, bfields, cl, linux-kernel, zhaoqianli

On Mon, Aug 17 2020 at 14:37, Qianli Zhao wrote:
> From: Qianli Zhao <zhaoqianli@xiaomi.com>
>
> Add debugobject support to track the life time of kthread_work
> which is used to detect reinitialization/free active object problems
> Add kthread_init_work_onstack()/kthread_init_delayed_work_onstack() for
> kthread onstack support
>
> If we reinitialize a kthread_work that has been activated,
> this will cause delayed_work_list/work_list corruption.
> enable this config,there is an chance to fixup these errors
> or WARNING the wrong use of kthread_work
>
> [30858.395766] list_del corruption. next->prev should be ffffffe388ebbf88, but was ffffffe388ebb588
> [30858.395788] WARNING: CPU: 2 PID: 387 at kernel/msm-4.19/lib/list_debug.c:56 __list_del_entry_valid+0xc8/0xd0
> ...
> [30858.395906] Call trace:
> [30858.395909]  __list_del_entry_valid+0xc8/0xd0
> [30858.395912]  __kthread_cancel_work_sync+0x98/0x138
> [30858.395915]  kthread_cancel_delayed_work_sync+0x10/0x20
> [30858.395917]  sde_encoder_resource_control+0xe8/0x12c0
> [30858.395920]  sde_encoder_prepare_for_kickoff+0x5dc/0x2008
> [30858.395923]  sde_crtc_commit_kickoff+0x280/0x890
> [30858.395925]  sde_kms_commit+0x16c/0x278
> [30858.395928]  complete_commit+0x3c4/0x760
> [30858.395931]  _msm_drm_commit_work_cb+0xec/0x1e0
> [30858.395933]  kthread_worker_fn+0xf8/0x190
> [30858.395935]  kthread+0x118/0x128
> [30858.395938]  ret_from_fork+0x10/0x18
>
> crash> struct kthread_worker.delayed_work_list 0xffffffe3893925f0
>  [ffffffe389392620] delayed_work_list = {
>     next = 0xffffffe388ebbf88,
>     prev = 0xffffffe388ebb588
>   }
> crash> list 0xffffffe388ebbf88
> ffffffe388ebbf88

This changelog is confusing at best. Something like this perhaps?

  kthread_work is not covered by debug objects, but the same problems as with
  regular work objects apply.

  Some of the issues like reinitialization of an active kthread_work are hard
  to debug because the problem manifests itself later in a completely
  different context.

  Add debugobject support along with the necessary fixup functions to make
  debugging of these problems less tedious. 

> +static void stub_kthread_work(struct kthread_work *unuse)

unused?

> +{
> +	WARN_ON(1);
> +}
>  void kthread_flush_work(struct kthread_work *work)
>  {
>  	struct kthread_flush_work fwork = {
> -		KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
> -		COMPLETION_INITIALIZER_ONSTACK(fwork.done),
> +		.done = COMPLETION_INITIALIZER_ONSTACK(fwork.done),

Eew. Why is the completion initialized seperately instead of being
initialized as part of kthread_init_work_onstack() ?

>  	};
>  	struct kthread_worker *worker;
>  	bool noop = false;
>  
> +	debug_kwork_assert_init(work);
>  	worker = work->worker;
>  	if (!worker)
>  		return;
>  
> +	kthread_init_work_onstack(&fwork.work, kthread_flush_work_fn);
>  
> @@ -1194,12 +1319,13 @@ EXPORT_SYMBOL_GPL(kthread_cancel_delayed_work_sync);
>  void kthread_flush_worker(struct kthread_worker *worker)
>  {
>  	struct kthread_flush_work fwork = {
> -		KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
> -		COMPLETION_INITIALIZER_ONSTACK(fwork.done),
> +		.done = COMPLETION_INITIALIZER_ONSTACK(fwork.done),
>  	};

Ditto.
  
> +	kthread_init_work_onstack(&fwork.work, kthread_flush_work_fn);
>  	kthread_queue_work(worker, &fwork.work);
>  	wait_for_completion(&fwork.done);
> +	destroy_kwork_on_stack(&fwork.work);

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v5] kthread: Add debugobject support
  2020-10-01 14:34 ` Thomas Gleixner
@ 2020-10-09  7:26   ` qianli zhao
  0 siblings, 0 replies; 4+ messages in thread
From: qianli zhao @ 2020-10-09  7:26 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: axboe, akpm, Felix.Kuehling, Stephen Boyd, John Stultz,
	ben.dooks, bfields, cl, linux-kernel, Qianli Zhao

Hi,Thomas

Thanks for your reply


On Thu, 1 Oct 2020 at 22:34, Thomas Gleixner <tglx@linutronix.de> wrote:
>
> On Mon, Aug 17 2020 at 14:37, Qianli Zhao wrote:
> > From: Qianli Zhao <zhaoqianli@xiaomi.com>
> >
> > Add debugobject support to track the life time of kthread_work
> > which is used to detect reinitialization/free active object problems
> > Add kthread_init_work_onstack()/kthread_init_delayed_work_onstack() for
> > kthread onstack support
> >
> > If we reinitialize a kthread_work that has been activated,
> > this will cause delayed_work_list/work_list corruption.
> > enable this config,there is an chance to fixup these errors
> > or WARNING the wrong use of kthread_work
> >
> > [30858.395766] list_del corruption. next->prev should be ffffffe388ebbf88, but was ffffffe388ebb588
> > [30858.395788] WARNING: CPU: 2 PID: 387 at kernel/msm-4.19/lib/list_debug.c:56 __list_del_entry_valid+0xc8/0xd0
> > ...
> > [30858.395906] Call trace:
> > [30858.395909]  __list_del_entry_valid+0xc8/0xd0
> > [30858.395912]  __kthread_cancel_work_sync+0x98/0x138
> > [30858.395915]  kthread_cancel_delayed_work_sync+0x10/0x20
> > [30858.395917]  sde_encoder_resource_control+0xe8/0x12c0
> > [30858.395920]  sde_encoder_prepare_for_kickoff+0x5dc/0x2008
> > [30858.395923]  sde_crtc_commit_kickoff+0x280/0x890
> > [30858.395925]  sde_kms_commit+0x16c/0x278
> > [30858.395928]  complete_commit+0x3c4/0x760
> > [30858.395931]  _msm_drm_commit_work_cb+0xec/0x1e0
> > [30858.395933]  kthread_worker_fn+0xf8/0x190
> > [30858.395935]  kthread+0x118/0x128
> > [30858.395938]  ret_from_fork+0x10/0x18
> >
> > crash> struct kthread_worker.delayed_work_list 0xffffffe3893925f0
> >  [ffffffe389392620] delayed_work_list = {
> >     next = 0xffffffe388ebbf88,
> >     prev = 0xffffffe388ebb588
> >   }
> > crash> list 0xffffffe388ebbf88
> > ffffffe388ebbf88
>
> This changelog is confusing at best. Something like this perhaps?
>
>   kthread_work is not covered by debug objects, but the same problems as with
>   regular work objects apply.
>
>   Some of the issues like reinitialization of an active kthread_work are hard
>   to debug because the problem manifests itself later in a completely
>   different context.
>
>   Add debugobject support along with the necessary fixup functions to make
>   debugging of these problems less tedious.
>

Will follow your tips to improve.

> > +static void stub_kthread_work(struct kthread_work *unuse)
>
> unused?
>
> > +{
> > +     WARN_ON(1);
> > +}

When the kthread_work is not initialized, kwork_fixup_assert_init()
will call kthread_init_work() to fixup this kthread_work,and
kthread_init_work() needs a function as a parameter,so we have to
quote an empty function(refer to stub_timer()).

> >  void kthread_flush_work(struct kthread_work *work)
> >  {
> >       struct kthread_flush_work fwork = {
> > -             KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
> > -             COMPLETION_INITIALIZER_ONSTACK(fwork.done),
> > +             .done = COMPLETION_INITIALIZER_ONSTACK(fwork.done),
>
> Eew. Why is the completion initialized seperately instead of being
> initialized as part of kthread_init_work_onstack() ?
>

I just try to keep the same as before,how about change as below?
completion initialized as part of kthread_init_work_onstack()
@@ -1319,10 +1318,9 @@ bool kthread_cancel_delayed_work_sync(struct
kthread_delayed_work *dwork)
  */
 void kthread_flush_worker(struct kthread_worker *worker)
 {
-       struct kthread_flush_work fwork = {
-               .done = COMPLETION_INITIALIZER_ONSTACK(fwork.done),
-       };
+       struct kthread_flush_work fwork;

+       fwork.done = COMPLETION_INITIALIZER_ONSTACK(fwork.done);
        kthread_init_work_onstack(&fwork.work, kthread_flush_work_fn);

> >       };
> >       struct kthread_worker *worker;
> >       bool noop = false;
> >
> > +     debug_kwork_assert_init(work);
> >       worker = work->worker;
> >       if (!worker)
> >               return;
> >
> > +     kthread_init_work_onstack(&fwork.work, kthread_flush_work_fn);
> >
> > @@ -1194,12 +1319,13 @@ EXPORT_SYMBOL_GPL(kthread_cancel_delayed_work_sync);
> >  void kthread_flush_worker(struct kthread_worker *worker)
> >  {
> >       struct kthread_flush_work fwork = {
> > -             KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
> > -             COMPLETION_INITIALIZER_ONSTACK(fwork.done),
> > +             .done = COMPLETION_INITIALIZER_ONSTACK(fwork.done),
> >       };
>
> Ditto.
>
> > +     kthread_init_work_onstack(&fwork.work, kthread_flush_work_fn);
> >       kthread_queue_work(worker, &fwork.work);
> >       wait_for_completion(&fwork.done);
> > +     destroy_kwork_on_stack(&fwork.work);
>
> Thanks,
>
>         tglx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-10-09  7:26 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-17  6:37 [PATCH v5] kthread: Add debugobject support Qianli Zhao
2020-09-07  8:02 ` qianli zhao
2020-10-01 14:34 ` Thomas Gleixner
2020-10-09  7:26   ` qianli zhao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.