All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] RFC: debugobjects: capture stack traces at _init() time
@ 2018-03-20 16:02 Daniel Vetter
  2018-03-20 16:19 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Daniel Vetter @ 2018-03-20 16:02 UTC (permalink / raw)
  To: Intel Graphics Development
  Cc: LKML, Daniel Vetter, Philippe Ombredanne, Greg Kroah-Hartman,
	Thomas Gleixner, Kate Stewart, Waiman Long, Daniel Vetter

Sometimes it's really easy to know which object has gone boom and
where the offending code is, and sometimes it's really hard. One case
we're trying to hunt down is when module unload catches a live debug
object, with a module with lots of them.

Capture a full stack trace from debug_object_init() and dump it when
there's a problem.

FIXME: Should we have a separate Kconfig knob for the backtraces,
they're quite expensive? Atm I'm just selecting it for the general
debug object stuff.

v2: Drop the locks for gathering&storing the backtrace. This is
required because depot_save_stack can call free_pages (to drop it's
preallocation), which can call debug_check_no_obj_freed, which will
recurse into the db->lock spinlocks.

Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
---
 include/linux/debugobjects.h |  2 ++
 lib/Kconfig.debug            |  1 +
 lib/debugobjects.c           | 57 +++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index afc416e5dcab..d3a6ca1a7756 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -4,6 +4,7 @@
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/stackdepot.h>
 
 enum debug_obj_state {
 	ODEBUG_STATE_NONE,
@@ -31,6 +32,7 @@ struct debug_obj {
 	unsigned int		astate;
 	void			*object;
 	struct debug_obj_descr	*descr;
+	depot_stack_handle_t	stack;
 };
 
 /**
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 64155e310a9f..894dd792e771 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -442,6 +442,7 @@ source mm/Kconfig.debug
 config DEBUG_OBJECTS
 	bool "Debug object operations"
 	depends on DEBUG_KERNEL
+	select STACKDEPOT
 	help
 	  If you say Y here, additional code will be inserted into the
 	  kernel to track the life time of various objects and validate
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 2f5349c6e81a..2acad7150bee 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -19,6 +19,7 @@
 #include <linux/slab.h>
 #include <linux/hash.h>
 #include <linux/kmemleak.h>
+#include <linux/stacktrace.h>
 
 #define ODEBUG_HASH_BITS	14
 #define ODEBUG_HASH_SIZE	(1 << ODEBUG_HASH_BITS)
@@ -30,6 +31,8 @@
 #define ODEBUG_CHUNK_SIZE	(1 << ODEBUG_CHUNK_SHIFT)
 #define ODEBUG_CHUNK_MASK	(~(ODEBUG_CHUNK_SIZE - 1))
 
+#define ODEBUG_STACKDEPTH 32
+
 struct debug_bucket {
 	struct hlist_head	list;
 	raw_spinlock_t		lock;
@@ -280,15 +283,24 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
 {
 	struct debug_obj_descr *descr = obj->descr;
 	static int limit;
+	unsigned long entries[ODEBUG_STACKDEPTH];
+	struct stack_trace trace = {
+		.entries = entries,
+		.max_entries = ODEBUG_STACKDEPTH
+	};
+
 
 	if (limit < 5 && descr != descr_test) {
 		void *hint = descr->debug_hint ?
 			descr->debug_hint(obj->object) : NULL;
 		limit++;
+		depot_fetch_stack(obj->stack, &trace);
 		WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
 				 "object type: %s hint: %pS\n",
 			msg, obj_states[obj->state], obj->astate,
 			descr->name, hint);
+		pr_err("ODEBUG: debug object originally initialized at:\n");
+		print_stack_trace(&trace, 2);
 	}
 	debug_objects_warnings++;
 }
@@ -328,6 +340,24 @@ static void debug_object_is_on_stack(void *addr, int onstack)
 	WARN_ON(1);
 }
 
+static noinline depot_stack_handle_t save_stack(struct debug_obj *obj)
+{
+	unsigned long entries[ODEBUG_STACKDEPTH];
+	struct stack_trace trace = {
+		.entries = entries,
+		.max_entries = ODEBUG_STACKDEPTH,
+		.skip = 2
+	};
+
+	save_stack_trace(&trace);
+	if (trace.nr_entries != 0 &&
+	    trace.entries[trace.nr_entries-1] == ULONG_MAX)
+		trace.nr_entries--;
+
+	/* May be called under spinlock, so avoid sleeping */
+	return depot_save_stack(&trace, GFP_NOWAIT);
+}
+
 static void
 __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
 {
@@ -344,14 +374,29 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
 
 	obj = lookup_object(addr, db);
 	if (!obj) {
-		obj = alloc_object(addr, db, descr);
+		depot_stack_handle_t stack;
+
+		/*
+		 * must drop locks while storing the stack trace to avoid
+		 * recursive deadlock through depot_save_stack
+		 * allocating/freeing memory.
+		 */
+		raw_spin_unlock_irqrestore(&db->lock, flags);
+		stack = save_stack(obj);
+		raw_spin_lock_irqsave(&db->lock, flags);
+
+		obj = lookup_object(addr, db);
 		if (!obj) {
-			debug_objects_enabled = 0;
-			raw_spin_unlock_irqrestore(&db->lock, flags);
-			debug_objects_oom();
-			return;
+			obj = alloc_object(addr, db, descr);
+			if (!obj) {
+				debug_objects_enabled = 0;
+				raw_spin_unlock_irqrestore(&db->lock, flags);
+				debug_objects_oom();
+				return;
+			}
+			debug_object_is_on_stack(addr, onstack);
+			obj->stack = stack;
 		}
-		debug_object_is_on_stack(addr, onstack);
 	}
 
 	switch (obj->state) {
-- 
2.16.2

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for RFC: debugobjects: capture stack traces at _init() time
  2018-03-20 16:02 [PATCH] RFC: debugobjects: capture stack traces at _init() time Daniel Vetter
@ 2018-03-20 16:19 ` Patchwork
  2018-03-20 16:36 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: Patchwork @ 2018-03-20 16:19 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

== Series Details ==

Series: RFC: debugobjects: capture stack traces at _init() time
URL   : https://patchwork.freedesktop.org/series/40294/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
bf60db68fbc5 RFC: debugobjects: capture stack traces at _init() time
-:124: CHECK:SPACING: spaces preferred around that '-' (ctx:VxV)
#124: FILE: lib/debugobjects.c:354:
+	    trace.entries[trace.nr_entries-1] == ULONG_MAX)
 	                                  ^

total: 0 errors, 0 warnings, 1 checks, 119 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* ✓ Fi.CI.BAT: success for RFC: debugobjects: capture stack traces at _init() time
  2018-03-20 16:02 [PATCH] RFC: debugobjects: capture stack traces at _init() time Daniel Vetter
  2018-03-20 16:19 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
@ 2018-03-20 16:36 ` Patchwork
  2018-03-20 19:18 ` ✓ Fi.CI.IGT: " Patchwork
  2018-03-20 19:44 ` [PATCH] " Thomas Gleixner
  3 siblings, 0 replies; 7+ messages in thread
From: Patchwork @ 2018-03-20 16:36 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

== Series Details ==

Series: RFC: debugobjects: capture stack traces at _init() time
URL   : https://patchwork.freedesktop.org/series/40294/
State : success

== Summary ==

Series 40294v1 RFC: debugobjects: capture stack traces at _init() time
https://patchwork.freedesktop.org/api/1.0/series/40294/revisions/1/mbox/

---- Possible new issues:

Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                incomplete -> PASS       (fi-cfl-s2)

---- Known issues:

Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                pass       -> FAIL       (fi-gdg-551) fdo#102575
Test kms_pipe_crc_basic:
        Subgroup read-crc-pipe-b-frame-sequence:
                pass       -> FAIL       (fi-skl-guc) fdo#103191
        Subgroup suspend-read-crc-pipe-b:
                pass       -> INCOMPLETE (fi-snb-2520m) fdo#103713
        Subgroup suspend-read-crc-pipe-c:
                pass       -> FAIL       (fi-skl-guc) fdo#104108

fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fdo#104108 https://bugs.freedesktop.org/show_bug.cgi?id=104108

fi-bdw-5557u     total:285  pass:264  dwarn:0   dfail:0   fail:0   skip:21  time:442s
fi-bdw-gvtdvm    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:439s
fi-blb-e6850     total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:382s
fi-bsw-n3050     total:285  pass:239  dwarn:0   dfail:0   fail:0   skip:46  time:543s
fi-bwr-2160      total:285  pass:180  dwarn:0   dfail:0   fail:0   skip:105 time:299s
fi-bxt-dsi       total:285  pass:255  dwarn:0   dfail:0   fail:0   skip:30  time:518s
fi-bxt-j4205     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:513s
fi-byt-j1900     total:285  pass:250  dwarn:0   dfail:0   fail:0   skip:35  time:519s
fi-byt-n2820     total:285  pass:246  dwarn:0   dfail:0   fail:0   skip:39  time:510s
fi-cfl-8700k     total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:410s
fi-cfl-s2        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:581s
fi-cfl-u         total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:510s
fi-cnl-drrs      total:285  pass:254  dwarn:3   dfail:0   fail:0   skip:28  time:523s
fi-elk-e7500     total:285  pass:225  dwarn:1   dfail:0   fail:0   skip:59  time:433s
fi-gdg-551       total:285  pass:176  dwarn:0   dfail:0   fail:1   skip:108 time:320s
fi-glk-1         total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:538s
fi-hsw-4770      total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:404s
fi-ilk-650       total:285  pass:225  dwarn:0   dfail:0   fail:0   skip:60  time:422s
fi-ivb-3520m     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:477s
fi-ivb-3770      total:285  pass:252  dwarn:0   dfail:0   fail:0   skip:33  time:432s
fi-kbl-7500u     total:285  pass:260  dwarn:1   dfail:0   fail:0   skip:24  time:474s
fi-kbl-7567u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:471s
fi-kbl-r         total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:515s
fi-pnv-d510      total:285  pass:219  dwarn:1   dfail:0   fail:0   skip:65  time:664s
fi-skl-6260u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:450s
fi-skl-6600u     total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:533s
fi-skl-6700hq    total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:544s
fi-skl-6700k2    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:506s
fi-skl-6770hq    total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:493s
fi-skl-guc       total:285  pass:255  dwarn:0   dfail:0   fail:2   skip:28  time:429s
fi-skl-gvtdvm    total:285  pass:262  dwarn:0   dfail:0   fail:0   skip:23  time:448s
fi-snb-2520m     total:242  pass:208  dwarn:0   dfail:0   fail:0   skip:33 
fi-snb-2600      total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:403s

9d737cebc219c821989021a3115424165ff7b052 drm-tip: 2018y-03m-20d-14h-56m-05s UTC integration manifest
bf60db68fbc5 RFC: debugobjects: capture stack traces at _init() time

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8417/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* ✓ Fi.CI.IGT: success for RFC: debugobjects: capture stack traces at _init() time
  2018-03-20 16:02 [PATCH] RFC: debugobjects: capture stack traces at _init() time Daniel Vetter
  2018-03-20 16:19 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
  2018-03-20 16:36 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-03-20 19:18 ` Patchwork
  2018-03-20 19:44 ` [PATCH] " Thomas Gleixner
  3 siblings, 0 replies; 7+ messages in thread
From: Patchwork @ 2018-03-20 19:18 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

== Series Details ==

Series: RFC: debugobjects: capture stack traces at _init() time
URL   : https://patchwork.freedesktop.org/series/40294/
State : success

== Summary ==

---- Known issues:

Test gem_exec_suspend:
        Subgroup basic-s3:
                pass       -> SKIP       (shard-snb) fdo#104311
Test gem_softpin:
        Subgroup noreloc-s3:
                pass       -> INCOMPLETE (shard-hsw) fdo#103540
Test kms_setmode:
        Subgroup basic:
                pass       -> FAIL       (shard-apl) fdo#99912
Test kms_vblank:
        Subgroup pipe-b-ts-continuation-dpms-suspend:
                incomplete -> PASS       (shard-hsw) fdo#105054

fdo#104311 https://bugs.freedesktop.org/show_bug.cgi?id=104311
fdo#103540 https://bugs.freedesktop.org/show_bug.cgi?id=103540
fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912
fdo#105054 https://bugs.freedesktop.org/show_bug.cgi?id=105054

shard-apl        total:3478 pass:1814 dwarn:1   dfail:0   fail:7   skip:1655 time:13155s
shard-hsw        total:3425 pass:1739 dwarn:1   dfail:0   fail:1   skip:1682 time:11182s
shard-snb        total:3478 pass:1357 dwarn:1   dfail:0   fail:2   skip:2118 time:7273s
Blacklisted hosts:
shard-kbl        total:3478 pass:1938 dwarn:1   dfail:0   fail:10  skip:1529 time:9866s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8417/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] RFC: debugobjects: capture stack traces at _init() time
  2018-03-20 16:02 [PATCH] RFC: debugobjects: capture stack traces at _init() time Daniel Vetter
                   ` (2 preceding siblings ...)
  2018-03-20 19:18 ` ✓ Fi.CI.IGT: " Patchwork
@ 2018-03-20 19:44 ` Thomas Gleixner
  2018-03-23 16:26     ` Daniel Vetter
  3 siblings, 1 reply; 7+ messages in thread
From: Thomas Gleixner @ 2018-03-20 19:44 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: Intel Graphics Development, LKML, Philippe Ombredanne,
	Greg Kroah-Hartman, Kate Stewart, Waiman Long, Daniel Vetter

On Tue, 20 Mar 2018, Daniel Vetter wrote:

> Sometimes it's really easy to know which object has gone boom and
> where the offending code is, and sometimes it's really hard. One case
> we're trying to hunt down is when module unload catches a live debug
> object, with a module with lots of them.
> 
> Capture a full stack trace from debug_object_init() and dump it when
> there's a problem.
> 
> FIXME: Should we have a separate Kconfig knob for the backtraces,
> they're quite expensive? Atm I'm just selecting it for the general
> debug object stuff.

Just make it boot time enabled. We really want to be able to switch it off.

> +#define ODEBUG_STACKDEPTH 32

Do we really need it that deep? I doubt that.

> +static noinline depot_stack_handle_t save_stack(struct debug_obj *obj)
> +{
> +	unsigned long entries[ODEBUG_STACKDEPTH];
> +	struct stack_trace trace = {
> +		.entries = entries,
> +		.max_entries = ODEBUG_STACKDEPTH,
> +		.skip = 2
> +	};
> +
> +	save_stack_trace(&trace);
> +	if (trace.nr_entries != 0 &&
> +	    trace.entries[trace.nr_entries-1] == ULONG_MAX)
> +		trace.nr_entries--;
> +
> +	/* May be called under spinlock, so avoid sleeping */
> +	return depot_save_stack(&trace, GFP_NOWAIT);

I really don't like the idea of unconditional allocations in that code
path. We went great length to make it perform halfways sane with the
pool. Though we should actually have per cpu pools to avoid the lock
contention, but thats a different problem.

I'd rather make the storage a fixed size allocation which is appended
to the debug object. Something like this:

struct debug_obj_trace {
	struct stack_trace	trace;
	unsigned long		entries[ODEBUG_STACKDEPTH];
};	

struct debug_obj {
	unsigned int		astate;
	void			*object;
	struct debug_obj_descr	*descr;
	struct debug_obj_trace	trace[0];
};

void __init debug_objects_mem_init(void)
{
	unsigned long objsize = sizeof (struct debug_obj);

 	if (!debug_objects_enabled)
 		return;
 
	if (debug_objects_trace_stack)
		objsize += sizeof(struct debug_obj_trace);

	obj_cache = kmem_cache_create("debug_objects_cache", objsize, 0,
 				      SLAB_DEBUG_OBJECTS, NULL);

__debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
{
	....
 	case ODEBUG_STATE_NONE:
 	case ODEBUG_STATE_INIT:
 	case ODEBUG_STATE_INACTIVE:
		if (debug_object_trace_stack) {
			obj->trace[0].trace.entries = obj->trace[0].entries;
			save_stack_trace(&trace[0].trace);
		}
 
That means we need to size the static objects which are used during early
boot with stack under the assumption that stack tracing is enabled, but
that's simple enough.

Hmm?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] RFC: debugobjects: capture stack traces at _init() time
  2018-03-20 19:44 ` [PATCH] " Thomas Gleixner
@ 2018-03-23 16:26     ` Daniel Vetter
  0 siblings, 0 replies; 7+ messages in thread
From: Daniel Vetter @ 2018-03-23 16:26 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Intel Graphics Development, LKML, Philippe Ombredanne,
	Greg Kroah-Hartman, Kate Stewart, Waiman Long, Daniel Vetter

On Tue, Mar 20, 2018 at 8:44 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> On Tue, 20 Mar 2018, Daniel Vetter wrote:
>
>> Sometimes it's really easy to know which object has gone boom and
>> where the offending code is, and sometimes it's really hard. One case
>> we're trying to hunt down is when module unload catches a live debug
>> object, with a module with lots of them.
>>
>> Capture a full stack trace from debug_object_init() and dump it when
>> there's a problem.
>>
>> FIXME: Should we have a separate Kconfig knob for the backtraces,
>> they're quite expensive? Atm I'm just selecting it for the general
>> debug object stuff.
>
> Just make it boot time enabled. We really want to be able to switch it off.
>
>> +#define ODEBUG_STACKDEPTH 32
>
> Do we really need it that deep? I doubt that.

Entirely arbitrary, I just needed this to start hunting a rare crash
we're seeing in CI and stitched something together. I agree we
probably don't need that much.

>> +static noinline depot_stack_handle_t save_stack(struct debug_obj *obj)
>> +{
>> +     unsigned long entries[ODEBUG_STACKDEPTH];
>> +     struct stack_trace trace = {
>> +             .entries = entries,
>> +             .max_entries = ODEBUG_STACKDEPTH,
>> +             .skip = 2
>> +     };
>> +
>> +     save_stack_trace(&trace);
>> +     if (trace.nr_entries != 0 &&
>> +         trace.entries[trace.nr_entries-1] == ULONG_MAX)
>> +             trace.nr_entries--;
>> +
>> +     /* May be called under spinlock, so avoid sleeping */
>> +     return depot_save_stack(&trace, GFP_NOWAIT);
>
> I really don't like the idea of unconditional allocations in that code
> path. We went great length to make it perform halfways sane with the
> pool. Though we should actually have per cpu pools to avoid the lock
> contention, but thats a different problem.
>
> I'd rather make the storage a fixed size allocation which is appended
> to the debug object. Something like this:
>
> struct debug_obj_trace {
>         struct stack_trace      trace;
>         unsigned long           entries[ODEBUG_STACKDEPTH];
> };
>
> struct debug_obj {
>         unsigned int            astate;
>         void                    *object;
>         struct debug_obj_descr  *descr;
>         struct debug_obj_trace  trace[0];
> };
>
> void __init debug_objects_mem_init(void)
> {
>         unsigned long objsize = sizeof (struct debug_obj);
>
>         if (!debug_objects_enabled)
>                 return;
>
>         if (debug_objects_trace_stack)
>                 objsize += sizeof(struct debug_obj_trace);
>
>         obj_cache = kmem_cache_create("debug_objects_cache", objsize, 0,
>                                       SLAB_DEBUG_OBJECTS, NULL);
>
> __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
> {
>         ....
>         case ODEBUG_STATE_NONE:
>         case ODEBUG_STATE_INIT:
>         case ODEBUG_STATE_INACTIVE:
>                 if (debug_object_trace_stack) {
>                         obj->trace[0].trace.entries = obj->trace[0].entries;
>                         save_stack_trace(&trace[0].trace);
>                 }
>
> That means we need to size the static objects which are used during early
> boot with stack under the assumption that stack tracing is enabled, but
> that's simple enough.
>
> Hmm?

Yeah looks reasonable, and gives us an easy Kconfig/boot-time option
to enable/disable it. I'm a bit swamped, but I'll try to respin.
Thanks very much for the look and suggesting a cleaner integration
approach - the allocations and recursion into the stack depot really
did not result in clean code.

Thanks, Daniel

> Thanks,
>
>         tglx



-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] RFC: debugobjects: capture stack traces at _init() time
@ 2018-03-23 16:26     ` Daniel Vetter
  0 siblings, 0 replies; 7+ messages in thread
From: Daniel Vetter @ 2018-03-23 16:26 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Kate Stewart, Daniel Vetter, Greg Kroah-Hartman,
	Intel Graphics Development, LKML, Philippe Ombredanne,
	Waiman Long

On Tue, Mar 20, 2018 at 8:44 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> On Tue, 20 Mar 2018, Daniel Vetter wrote:
>
>> Sometimes it's really easy to know which object has gone boom and
>> where the offending code is, and sometimes it's really hard. One case
>> we're trying to hunt down is when module unload catches a live debug
>> object, with a module with lots of them.
>>
>> Capture a full stack trace from debug_object_init() and dump it when
>> there's a problem.
>>
>> FIXME: Should we have a separate Kconfig knob for the backtraces,
>> they're quite expensive? Atm I'm just selecting it for the general
>> debug object stuff.
>
> Just make it boot time enabled. We really want to be able to switch it off.
>
>> +#define ODEBUG_STACKDEPTH 32
>
> Do we really need it that deep? I doubt that.

Entirely arbitrary, I just needed this to start hunting a rare crash
we're seeing in CI and stitched something together. I agree we
probably don't need that much.

>> +static noinline depot_stack_handle_t save_stack(struct debug_obj *obj)
>> +{
>> +     unsigned long entries[ODEBUG_STACKDEPTH];
>> +     struct stack_trace trace = {
>> +             .entries = entries,
>> +             .max_entries = ODEBUG_STACKDEPTH,
>> +             .skip = 2
>> +     };
>> +
>> +     save_stack_trace(&trace);
>> +     if (trace.nr_entries != 0 &&
>> +         trace.entries[trace.nr_entries-1] == ULONG_MAX)
>> +             trace.nr_entries--;
>> +
>> +     /* May be called under spinlock, so avoid sleeping */
>> +     return depot_save_stack(&trace, GFP_NOWAIT);
>
> I really don't like the idea of unconditional allocations in that code
> path. We went great length to make it perform halfways sane with the
> pool. Though we should actually have per cpu pools to avoid the lock
> contention, but thats a different problem.
>
> I'd rather make the storage a fixed size allocation which is appended
> to the debug object. Something like this:
>
> struct debug_obj_trace {
>         struct stack_trace      trace;
>         unsigned long           entries[ODEBUG_STACKDEPTH];
> };
>
> struct debug_obj {
>         unsigned int            astate;
>         void                    *object;
>         struct debug_obj_descr  *descr;
>         struct debug_obj_trace  trace[0];
> };
>
> void __init debug_objects_mem_init(void)
> {
>         unsigned long objsize = sizeof (struct debug_obj);
>
>         if (!debug_objects_enabled)
>                 return;
>
>         if (debug_objects_trace_stack)
>                 objsize += sizeof(struct debug_obj_trace);
>
>         obj_cache = kmem_cache_create("debug_objects_cache", objsize, 0,
>                                       SLAB_DEBUG_OBJECTS, NULL);
>
> __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
> {
>         ....
>         case ODEBUG_STATE_NONE:
>         case ODEBUG_STATE_INIT:
>         case ODEBUG_STATE_INACTIVE:
>                 if (debug_object_trace_stack) {
>                         obj->trace[0].trace.entries = obj->trace[0].entries;
>                         save_stack_trace(&trace[0].trace);
>                 }
>
> That means we need to size the static objects which are used during early
> boot with stack under the assumption that stack tracing is enabled, but
> that's simple enough.
>
> Hmm?

Yeah looks reasonable, and gives us an easy Kconfig/boot-time option
to enable/disable it. I'm a bit swamped, but I'll try to respin.
Thanks very much for the look and suggesting a cleaner integration
approach - the allocations and recursion into the stack depot really
did not result in clean code.

Thanks, Daniel

> Thanks,
>
>         tglx



-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-03-23 16:26 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-20 16:02 [PATCH] RFC: debugobjects: capture stack traces at _init() time Daniel Vetter
2018-03-20 16:19 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
2018-03-20 16:36 ` ✓ Fi.CI.BAT: success " Patchwork
2018-03-20 19:18 ` ✓ Fi.CI.IGT: " Patchwork
2018-03-20 19:44 ` [PATCH] " Thomas Gleixner
2018-03-23 16:26   ` Daniel Vetter
2018-03-23 16:26     ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.