All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] lockdep: fix stack trace caching logic
@ 2016-02-04 13:40 Dmitry Vyukov
  2016-02-04 17:44 ` Peter Hurley
  2016-02-09 12:19 ` [tip:locking/core] locking/lockdep: Fix " tip-bot for Dmitry Vyukov
  0 siblings, 2 replies; 5+ messages in thread
From: Dmitry Vyukov @ 2016-02-04 13:40 UTC (permalink / raw)
  To: peterz, mingo
  Cc: linux-kernel, peter, kcc, glider, sasha.levin, Dmitry Vyukov

check_prev_add() caches saved stack trace in static trace variable
to avoid duplicate save_trace() calls in dependencies involving trylocks.
But that caching logic contains a bug. We may not save trace on first
iteration due to early return from check_prev_add(). Then on the
second iteration when we actually need the trace we don't save it
because we think that we've already saved it.

Let check_prev_add() itself control when stack is saved.

There is another bug. Trace variable is protected by graph lock.
But we can temporary release graph lock during printing.

Fix this by invalidating cached stack trace when we release graph lock.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
---
 kernel/locking/lockdep.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 60ace56..c7710e4 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1822,7 +1822,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
  */
 static int
 check_prev_add(struct task_struct *curr, struct held_lock *prev,
-	       struct held_lock *next, int distance, int trylock_loop)
+	       struct held_lock *next, int distance, int *stack_saved)
 {
 	struct lock_list *entry;
 	int ret;
@@ -1883,8 +1883,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 		}
 	}
 
-	if (!trylock_loop && !save_trace(&trace))
-		return 0;
+	if (!*stack_saved) {
+		if (!save_trace(&trace))
+			return 0;
+		*stack_saved = 1;
+	}
 
 	/*
 	 * Ok, all validations passed, add the new lock
@@ -1907,6 +1910,8 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 	 * Debugging printouts:
 	 */
 	if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
+		/* We drop graph lock, so another thread can overwrite trace. */
+		*stack_saved = 0;
 		graph_unlock();
 		printk("\n new dependency: ");
 		print_lock_name(hlock_class(prev));
@@ -1929,7 +1934,7 @@ static int
 check_prevs_add(struct task_struct *curr, struct held_lock *next)
 {
 	int depth = curr->lockdep_depth;
-	int trylock_loop = 0;
+	int stack_saved = 0;
 	struct held_lock *hlock;
 
 	/*
@@ -1956,7 +1961,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
 		 */
 		if (hlock->read != 2 && hlock->check) {
 			if (!check_prev_add(curr, hlock, next,
-						distance, trylock_loop))
+						distance, &stack_saved))
 				return 0;
 			/*
 			 * Stop after the first non-trylock entry,
@@ -1979,7 +1984,6 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
 		if (curr->held_locks[depth].irq_context !=
 				curr->held_locks[depth-1].irq_context)
 			break;
-		trylock_loop = 1;
 	}
 	return 1;
 out_bug:
-- 
2.7.0.rc3.207.g0ac5344

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] lockdep: fix stack trace caching logic
  2016-02-04 13:40 [PATCH] lockdep: fix stack trace caching logic Dmitry Vyukov
@ 2016-02-04 17:44 ` Peter Hurley
  2016-02-04 17:50   ` Dmitry Vyukov
  2016-02-09 12:19 ` [tip:locking/core] locking/lockdep: Fix " tip-bot for Dmitry Vyukov
  1 sibling, 1 reply; 5+ messages in thread
From: Peter Hurley @ 2016-02-04 17:44 UTC (permalink / raw)
  To: Dmitry Vyukov, peterz, mingo; +Cc: linux-kernel, kcc, glider, sasha.levin

On 02/04/2016 05:40 AM, Dmitry Vyukov wrote:
> check_prev_add() caches saved stack trace in static trace variable
> to avoid duplicate save_trace() calls in dependencies involving trylocks.
> But that caching logic contains a bug.

> We may not save trace on first iteration due to early return from check_prev_add().

This commit log should identify the role test instrumentation plays in triggering
this bug: is it a recursive read lock dependency injected between existing lock
dependencies? What test component triggered this?


> Then on the second iteration when we actually need the trace we don't save it
> because we think that we've already saved it.
> 
> Let check_prev_add() itself control when stack is saved.
> 
> There is another bug. Trace variable is protected by graph lock.
> But we can temporary release graph lock during printing.
> 
> Fix this by invalidating cached stack trace when we release graph lock.
> 
> Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> ---
>  kernel/locking/lockdep.c | 16 ++++++++++------
>  1 file changed, 10 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> index 60ace56..c7710e4 100644
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c
> @@ -1822,7 +1822,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
>   */
>  static int
>  check_prev_add(struct task_struct *curr, struct held_lock *prev,
> -	       struct held_lock *next, int distance, int trylock_loop)
> +	       struct held_lock *next, int distance, int *stack_saved)
>  {
>  	struct lock_list *entry;
>  	int ret;
> @@ -1883,8 +1883,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
>  		}
>  	}
>  
> -	if (!trylock_loop && !save_trace(&trace))
> -		return 0;
> +	if (!*stack_saved) {
> +		if (!save_trace(&trace))
> +			return 0;
> +		*stack_saved = 1;
> +	}
>  
>  	/*
>  	 * Ok, all validations passed, add the new lock
> @@ -1907,6 +1910,8 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
>  	 * Debugging printouts:
>  	 */
>  	if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
> +		/* We drop graph lock, so another thread can overwrite trace. */
> +		*stack_saved = 0;
>  		graph_unlock();
>  		printk("\n new dependency: ");
>  		print_lock_name(hlock_class(prev));
> @@ -1929,7 +1934,7 @@ static int
>  check_prevs_add(struct task_struct *curr, struct held_lock *next)
>  {
>  	int depth = curr->lockdep_depth;
> -	int trylock_loop = 0;
> +	int stack_saved = 0;
>  	struct held_lock *hlock;
>  
>  	/*
> @@ -1956,7 +1961,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
>  		 */
>  		if (hlock->read != 2 && hlock->check) {
>  			if (!check_prev_add(curr, hlock, next,
> -						distance, trylock_loop))
> +						distance, &stack_saved))
>  				return 0;
>  			/*
>  			 * Stop after the first non-trylock entry,
> @@ -1979,7 +1984,6 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
>  		if (curr->held_locks[depth].irq_context !=
>  				curr->held_locks[depth-1].irq_context)
>  			break;
> -		trylock_loop = 1;
>  	}
>  	return 1;
>  out_bug:
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] lockdep: fix stack trace caching logic
  2016-02-04 17:44 ` Peter Hurley
@ 2016-02-04 17:50   ` Dmitry Vyukov
  2016-02-04 18:33     ` Peter Hurley
  0 siblings, 1 reply; 5+ messages in thread
From: Dmitry Vyukov @ 2016-02-04 17:50 UTC (permalink / raw)
  To: Peter Hurley
  Cc: Peter Zijlstra, Ingo Molnar, LKML, Kostya Serebryany,
	Alexander Potapenko, Sasha Levin

On Thu, Feb 4, 2016 at 6:44 PM, Peter Hurley <peter@hurleysoftware.com> wrote:
> On 02/04/2016 05:40 AM, Dmitry Vyukov wrote:
>> check_prev_add() caches saved stack trace in static trace variable
>> to avoid duplicate save_trace() calls in dependencies involving trylocks.
>> But that caching logic contains a bug.
>
>> We may not save trace on first iteration due to early return from check_prev_add().
>
> This commit log should identify the role test instrumentation plays in triggering
> this bug: is it a recursive read lock dependency injected between existing lock
> dependencies? What test component triggered this?


I don't think that it has something to do with instrumentation.
check_prev_add() has explicitly coded early exits paths which don't
save stack trace.
For example, if first pair of locks is: try-read-lock vs read-lock, we
will not save stack trace; then next pair is: write-lock vs read-lock,
we will use bogus stack trace.




>> Then on the second iteration when we actually need the trace we don't save it
>> because we think that we've already saved it.
>>
>> Let check_prev_add() itself control when stack is saved.
>>
>> There is another bug. Trace variable is protected by graph lock.
>> But we can temporary release graph lock during printing.
>>
>> Fix this by invalidating cached stack trace when we release graph lock.
>>
>> Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
>> ---
>>  kernel/locking/lockdep.c | 16 ++++++++++------
>>  1 file changed, 10 insertions(+), 6 deletions(-)
>>
>> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
>> index 60ace56..c7710e4 100644
>> --- a/kernel/locking/lockdep.c
>> +++ b/kernel/locking/lockdep.c
>> @@ -1822,7 +1822,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
>>   */
>>  static int
>>  check_prev_add(struct task_struct *curr, struct held_lock *prev,
>> -            struct held_lock *next, int distance, int trylock_loop)
>> +            struct held_lock *next, int distance, int *stack_saved)
>>  {
>>       struct lock_list *entry;
>>       int ret;
>> @@ -1883,8 +1883,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
>>               }
>>       }
>>
>> -     if (!trylock_loop && !save_trace(&trace))
>> -             return 0;
>> +     if (!*stack_saved) {
>> +             if (!save_trace(&trace))
>> +                     return 0;
>> +             *stack_saved = 1;
>> +     }
>>
>>       /*
>>        * Ok, all validations passed, add the new lock
>> @@ -1907,6 +1910,8 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
>>        * Debugging printouts:
>>        */
>>       if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
>> +             /* We drop graph lock, so another thread can overwrite trace. */
>> +             *stack_saved = 0;
>>               graph_unlock();
>>               printk("\n new dependency: ");
>>               print_lock_name(hlock_class(prev));
>> @@ -1929,7 +1934,7 @@ static int
>>  check_prevs_add(struct task_struct *curr, struct held_lock *next)
>>  {
>>       int depth = curr->lockdep_depth;
>> -     int trylock_loop = 0;
>> +     int stack_saved = 0;
>>       struct held_lock *hlock;
>>
>>       /*
>> @@ -1956,7 +1961,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
>>                */
>>               if (hlock->read != 2 && hlock->check) {
>>                       if (!check_prev_add(curr, hlock, next,
>> -                                             distance, trylock_loop))
>> +                                             distance, &stack_saved))
>>                               return 0;
>>                       /*
>>                        * Stop after the first non-trylock entry,
>> @@ -1979,7 +1984,6 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
>>               if (curr->held_locks[depth].irq_context !=
>>                               curr->held_locks[depth-1].irq_context)
>>                       break;
>> -             trylock_loop = 1;
>>       }
>>       return 1;
>>  out_bug:
>>
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] lockdep: fix stack trace caching logic
  2016-02-04 17:50   ` Dmitry Vyukov
@ 2016-02-04 18:33     ` Peter Hurley
  0 siblings, 0 replies; 5+ messages in thread
From: Peter Hurley @ 2016-02-04 18:33 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: Peter Zijlstra, Ingo Molnar, LKML, Kostya Serebryany,
	Alexander Potapenko, Sasha Levin

On 02/04/2016 09:50 AM, Dmitry Vyukov wrote:
> On Thu, Feb 4, 2016 at 6:44 PM, Peter Hurley <peter@hurleysoftware.com> wrote:
>> On 02/04/2016 05:40 AM, Dmitry Vyukov wrote:
>>> check_prev_add() caches saved stack trace in static trace variable
>>> to avoid duplicate save_trace() calls in dependencies involving trylocks.
>>> But that caching logic contains a bug.
>>
>>> We may not save trace on first iteration due to early return from check_prev_add().
>>
>> This commit log should identify the role test instrumentation plays in triggering
>> this bug: is it a recursive read lock dependency injected between existing lock
>> dependencies? What test component triggered this?
> 
> 
> I don't think that it has something to do with instrumentation.
> check_prev_add() has explicitly coded early exits paths which don't
> save stack trace.
> For example, if first pair of locks is: try-read-lock vs read-lock, we
> will not save stack trace; then next pair is: write-lock vs read-lock,
> we will use bogus stack trace.

Ok, but that isn't the case with the triggering stack traces we were getting.

But I see now that there's a existing lock dependency that check_prev_add()
is skipping. IOW, the other early-out path is being taken; lockdep has already seen:

 new dependency:  (&tty->ldisc_sem){++++..} =>  (&buf->lock){+.+...}

So yeah, not related to test instrumentation then.


>>> Then on the second iteration when we actually need the trace we don't save it
>>> because we think that we've already saved it.
>>>
>>> Let check_prev_add() itself control when stack is saved.
>>>
>>> There is another bug. Trace variable is protected by graph lock.
>>> But we can temporary release graph lock during printing.
>>>
>>> Fix this by invalidating cached stack trace when we release graph lock.
>>>
>>> Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
>>> ---
>>>  kernel/locking/lockdep.c | 16 ++++++++++------
>>>  1 file changed, 10 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
>>> index 60ace56..c7710e4 100644
>>> --- a/kernel/locking/lockdep.c
>>> +++ b/kernel/locking/lockdep.c
>>> @@ -1822,7 +1822,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
>>>   */
>>>  static int
>>>  check_prev_add(struct task_struct *curr, struct held_lock *prev,
>>> -            struct held_lock *next, int distance, int trylock_loop)
>>> +            struct held_lock *next, int distance, int *stack_saved)
>>>  {
>>>       struct lock_list *entry;
>>>       int ret;
>>> @@ -1883,8 +1883,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
>>>               }
>>>       }
>>>
>>> -     if (!trylock_loop && !save_trace(&trace))
>>> -             return 0;
>>> +     if (!*stack_saved) {
>>> +             if (!save_trace(&trace))
>>> +                     return 0;
>>> +             *stack_saved = 1;
>>> +     }
>>>
>>>       /*
>>>        * Ok, all validations passed, add the new lock
>>> @@ -1907,6 +1910,8 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
>>>        * Debugging printouts:
>>>        */
>>>       if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
>>> +             /* We drop graph lock, so another thread can overwrite trace. */
>>> +             *stack_saved = 0;
>>>               graph_unlock();
>>>               printk("\n new dependency: ");
>>>               print_lock_name(hlock_class(prev));
>>> @@ -1929,7 +1934,7 @@ static int
>>>  check_prevs_add(struct task_struct *curr, struct held_lock *next)
>>>  {
>>>       int depth = curr->lockdep_depth;
>>> -     int trylock_loop = 0;
>>> +     int stack_saved = 0;
>>>       struct held_lock *hlock;
>>>
>>>       /*
>>> @@ -1956,7 +1961,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
>>>                */
>>>               if (hlock->read != 2 && hlock->check) {
>>>                       if (!check_prev_add(curr, hlock, next,
>>> -                                             distance, trylock_loop))
>>> +                                             distance, &stack_saved))
>>>                               return 0;
>>>                       /*
>>>                        * Stop after the first non-trylock entry,
>>> @@ -1979,7 +1984,6 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
>>>               if (curr->held_locks[depth].irq_context !=
>>>                               curr->held_locks[depth-1].irq_context)
>>>                       break;
>>> -             trylock_loop = 1;
>>>       }
>>>       return 1;
>>>  out_bug:
>>>
>>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [tip:locking/core] locking/lockdep: Fix stack trace caching logic
  2016-02-04 13:40 [PATCH] lockdep: fix stack trace caching logic Dmitry Vyukov
  2016-02-04 17:44 ` Peter Hurley
@ 2016-02-09 12:19 ` tip-bot for Dmitry Vyukov
  1 sibling, 0 replies; 5+ messages in thread
From: tip-bot for Dmitry Vyukov @ 2016-02-09 12:19 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: hpa, akpm, dvyukov, linux-kernel, torvalds, tglx, paulmck, mingo, peterz

Commit-ID:  8a5fd56431fe1682e870bd6ab0c276e74befbeb9
Gitweb:     http://git.kernel.org/tip/8a5fd56431fe1682e870bd6ab0c276e74befbeb9
Author:     Dmitry Vyukov <dvyukov@google.com>
AuthorDate: Thu, 4 Feb 2016 14:40:40 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Tue, 9 Feb 2016 11:06:08 +0100

locking/lockdep: Fix stack trace caching logic

check_prev_add() caches saved stack trace in static trace variable
to avoid duplicate save_trace() calls in dependencies involving trylocks.
But that caching logic contains a bug. We may not save trace on first
iteration due to early return from check_prev_add(). Then on the
second iteration when we actually need the trace we don't save it
because we think that we've already saved it.

Let check_prev_add() itself control when stack is saved.

There is another bug. Trace variable is protected by graph lock.
But we can temporary release graph lock during printing.

Fix this by invalidating cached stack trace when we release graph lock.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: glider@google.com
Cc: kcc@google.com
Cc: peter@hurleysoftware.com
Cc: sasha.levin@oracle.com
Link: http://lkml.kernel.org/r/1454593240-121647-1-git-send-email-dvyukov@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lockdep.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 60ace56..c7710e4 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1822,7 +1822,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
  */
 static int
 check_prev_add(struct task_struct *curr, struct held_lock *prev,
-	       struct held_lock *next, int distance, int trylock_loop)
+	       struct held_lock *next, int distance, int *stack_saved)
 {
 	struct lock_list *entry;
 	int ret;
@@ -1883,8 +1883,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 		}
 	}
 
-	if (!trylock_loop && !save_trace(&trace))
-		return 0;
+	if (!*stack_saved) {
+		if (!save_trace(&trace))
+			return 0;
+		*stack_saved = 1;
+	}
 
 	/*
 	 * Ok, all validations passed, add the new lock
@@ -1907,6 +1910,8 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 	 * Debugging printouts:
 	 */
 	if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
+		/* We drop graph lock, so another thread can overwrite trace. */
+		*stack_saved = 0;
 		graph_unlock();
 		printk("\n new dependency: ");
 		print_lock_name(hlock_class(prev));
@@ -1929,7 +1934,7 @@ static int
 check_prevs_add(struct task_struct *curr, struct held_lock *next)
 {
 	int depth = curr->lockdep_depth;
-	int trylock_loop = 0;
+	int stack_saved = 0;
 	struct held_lock *hlock;
 
 	/*
@@ -1956,7 +1961,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
 		 */
 		if (hlock->read != 2 && hlock->check) {
 			if (!check_prev_add(curr, hlock, next,
-						distance, trylock_loop))
+						distance, &stack_saved))
 				return 0;
 			/*
 			 * Stop after the first non-trylock entry,
@@ -1979,7 +1984,6 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
 		if (curr->held_locks[depth].irq_context !=
 				curr->held_locks[depth-1].irq_context)
 			break;
-		trylock_loop = 1;
 	}
 	return 1;
 out_bug:

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-02-09 12:21 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-04 13:40 [PATCH] lockdep: fix stack trace caching logic Dmitry Vyukov
2016-02-04 17:44 ` Peter Hurley
2016-02-04 17:50   ` Dmitry Vyukov
2016-02-04 18:33     ` Peter Hurley
2016-02-09 12:19 ` [tip:locking/core] locking/lockdep: Fix " tip-bot for Dmitry Vyukov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.