linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] lockdep: Optimize the memory usage of circular queue
@ 2020-09-17  8:01 Boqun Feng
  2020-09-24 15:12 ` Boqun Feng
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Boqun Feng @ 2020-09-17  8:01 UTC (permalink / raw)
  To: linux-kernel
  Cc: Waiman Long, Boqun Feng, Qian Cai, Peter Zijlstra, Ingo Molnar,
	Will Deacon

Qian Cai reported a BFS_EQUEUEFULL warning [1] after read recursive
deadlock detection merged into tip tree recently. Unlike the previous
lockep graph searching, which iterate every lock class (every node in
the graph) exactly once, the graph searching for read recurisve deadlock
detection needs to iterate every lock dependency (every edge in the
graph) once, as a result, the maximum memory cost of the circular queue
changes from O(V), where V is the number of lock classes (nodes or
vertices) in the graph, to O(E), where E is the number of lock
dependencies (edges), because every lock class or dependency gets
enqueued once in the BFS. Therefore we hit the BFS_EQUEUEFULL case.

However, actually we don't need to enqueue all dependencies for the BFS,
because every time we enqueue a dependency, we almostly enqueue all
other dependencies in the same dependency list ("almostly" is because
we currently check before enqueue, so if a dependency doesn't pass the
check stage we won't enqueue it, however, we can always do in reverse
ordering), based on this, we can only enqueue the first dependency from
a dependency list and every time we want to fetch a new dependency to
work, we can either:

1)	fetch the dependency next to the current dependency in the
	dependency list
or
2)	if the dependency in 1) doesn't exist, fetch the dependency from
	the queue.

With this approach, the "max bfs queue depth" for a x86_64_defconfig +
lockdep and selftest config kernel can get descreased from:

        max bfs queue depth:                   201

to (after apply this patch)

        max bfs queue depth:                   61

While I'm at it, clean up the code logic a little (e.g. directly return
other than set a "ret" value and goto the "exit" label).

[1]: https://lore.kernel.org/lkml/17343f6f7f2438fc376125384133c5ba70c2a681.camel@redhat.com/

Reported-by: Qian Cai <cai@redhat.com>
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
---
 kernel/locking/lockdep.c | 108 ++++++++++++++++++++++++---------------
 1 file changed, 67 insertions(+), 41 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index cccf4bc759c6..761c2327e9cf 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1640,35 +1640,22 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
 			     int offset)
 {
 	struct lock_list *entry;
-	struct lock_list *lock;
+	struct lock_list *lock = NULL;
 	struct list_head *head;
 	struct circular_queue *cq = &lock_cq;
-	enum bfs_result ret = BFS_RNOMATCH;
 
 	lockdep_assert_locked();
 
-	if (match(source_entry, data)) {
-		*target_entry = source_entry;
-		ret = BFS_RMATCH;
-		goto exit;
-	}
-
-	head = get_dep_list(source_entry, offset);
-	if (list_empty(head))
-		goto exit;
-
 	__cq_init(cq);
 	__cq_enqueue(cq, source_entry);
 
-	while ((lock = __cq_dequeue(cq))) {
-		bool prev_only_xr;
-
-		if (!lock->class) {
-			ret = BFS_EINVALIDNODE;
-			goto exit;
-		}
+	while (lock || (lock = __cq_dequeue(cq))) {
+		if (!lock->class)
+			return BFS_EINVALIDNODE;
 
 		/*
+		 * Step 1: check whether we already finish on this one.
+		 *
 		 * If we have visited all the dependencies from this @lock to
 		 * others (iow, if we have visited all lock_list entries in
 		 * @lock->class->locks_{after,before}) we skip, otherwise go
@@ -1676,17 +1663,17 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
 		 * list accessed.
 		 */
 		if (lock_accessed(lock))
-			continue;
+			goto next;
 		else
 			mark_lock_accessed(lock);
 
-		head = get_dep_list(lock, offset);
-
-		prev_only_xr = lock->only_xr;
-
-		list_for_each_entry_rcu(entry, head, entry) {
-			unsigned int cq_depth;
-			u8 dep = entry->dep;
+		/*
+		 * Step 2: check whether prev dependency and this form a strong
+		 *         dependency path.
+		 */
+		if (lock->parent) { /* Parent exists, check prev dependency */
+			u8 dep = lock->dep;
+			bool prev_only_xr = lock->parent->only_xr;
 
 			/*
 			 * Mask out all -(S*)-> if we only have *R in previous
@@ -1698,29 +1685,68 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
 
 			/* If nothing left, we skip */
 			if (!dep)
-				continue;
+				goto next;
 
 			/* If there are only -(*R)-> left, set that for the next step */
-			entry->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
+			lock->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
+		}
 
-			visit_lock_entry(entry, lock);
-			if (match(entry, data)) {
-				*target_entry = entry;
-				ret = BFS_RMATCH;
-				goto exit;
-			}
+		/*
+		 * Step 3: we haven't visited this and there is a strong
+		 *         dependency path to this, so check with @match.
+		 */
+		if (match(lock, data)) {
+			*target_entry = lock;
+			return BFS_RMATCH;
+		}
+
+		/*
+		 * Step 4: if not match, expand the path by adding the
+		 *         afterwards or backwards dependencis in the search
+		 *
+		 * Note we only enqueue the first of the list into the queue,
+		 * because we can always find a sibling dependency from one
+		 * (see label 'next'), as a result the space of queue is saved.
+		 */
+		head = get_dep_list(lock, offset);
+		entry = list_first_or_null_rcu(head, struct lock_list, entry);
+		if (entry) {
+			unsigned int cq_depth;
+
+			if (__cq_enqueue(cq, entry))
+				return BFS_EQUEUEFULL;
 
-			if (__cq_enqueue(cq, entry)) {
-				ret = BFS_EQUEUEFULL;
-				goto exit;
-			}
 			cq_depth = __cq_get_elem_count(cq);
 			if (max_bfs_queue_depth < cq_depth)
 				max_bfs_queue_depth = cq_depth;
 		}
+
+		/*
+		 * Update the ->parent, so when @entry is iterated, we know the
+		 * previous dependency.
+		 */
+		list_for_each_entry_rcu(entry, head, entry)
+			visit_lock_entry(entry, lock);
+next:
+		/*
+		 * Step 5: fetch the next dependency to process.
+		 *
+		 * If there is a previous dependency, we fetch the sibling
+		 * dependency in the dep list of previous dependency.
+		 *
+		 * Otherwise set @lock to NULL to fetch the next entry from
+		 * queue.
+		 */
+		if (lock->parent) {
+			head = get_dep_list(lock->parent, offset);
+			lock = list_next_or_null_rcu(head, &lock->entry,
+						     struct lock_list, entry);
+		} else {
+			lock = NULL;
+		}
 	}
-exit:
-	return ret;
+
+	return BFS_RNOMATCH;
 }
 
 static inline enum bfs_result
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] lockdep: Optimize the memory usage of circular queue
  2020-09-17  8:01 [PATCH] lockdep: Optimize the memory usage of circular queue Boqun Feng
@ 2020-09-24 15:12 ` Boqun Feng
  2020-09-28  8:03   ` Dmitry Vyukov
  2020-09-28  8:51 ` Peter Zijlstra
  2020-09-30  8:55 ` [tip: locking/core] " tip-bot2 for Boqun Feng
  2 siblings, 1 reply; 9+ messages in thread
From: Boqun Feng @ 2020-09-24 15:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: Waiman Long, Qian Cai, Peter Zijlstra, Ingo Molnar, Will Deacon

Ping ;-)

Regards,
Boqun

On Thu, Sep 17, 2020 at 04:01:50PM +0800, Boqun Feng wrote:
> Qian Cai reported a BFS_EQUEUEFULL warning [1] after read recursive
> deadlock detection merged into tip tree recently. Unlike the previous
> lockep graph searching, which iterate every lock class (every node in
> the graph) exactly once, the graph searching for read recurisve deadlock
> detection needs to iterate every lock dependency (every edge in the
> graph) once, as a result, the maximum memory cost of the circular queue
> changes from O(V), where V is the number of lock classes (nodes or
> vertices) in the graph, to O(E), where E is the number of lock
> dependencies (edges), because every lock class or dependency gets
> enqueued once in the BFS. Therefore we hit the BFS_EQUEUEFULL case.
> 
> However, actually we don't need to enqueue all dependencies for the BFS,
> because every time we enqueue a dependency, we almostly enqueue all
> other dependencies in the same dependency list ("almostly" is because
> we currently check before enqueue, so if a dependency doesn't pass the
> check stage we won't enqueue it, however, we can always do in reverse
> ordering), based on this, we can only enqueue the first dependency from
> a dependency list and every time we want to fetch a new dependency to
> work, we can either:
> 
> 1)	fetch the dependency next to the current dependency in the
> 	dependency list
> or
> 2)	if the dependency in 1) doesn't exist, fetch the dependency from
> 	the queue.
> 
> With this approach, the "max bfs queue depth" for a x86_64_defconfig +
> lockdep and selftest config kernel can get descreased from:
> 
>         max bfs queue depth:                   201
> 
> to (after apply this patch)
> 
>         max bfs queue depth:                   61
> 
> While I'm at it, clean up the code logic a little (e.g. directly return
> other than set a "ret" value and goto the "exit" label).
> 
> [1]: https://lore.kernel.org/lkml/17343f6f7f2438fc376125384133c5ba70c2a681.camel@redhat.com/
> 
> Reported-by: Qian Cai <cai@redhat.com>
> Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> ---
>  kernel/locking/lockdep.c | 108 ++++++++++++++++++++++++---------------
>  1 file changed, 67 insertions(+), 41 deletions(-)
> 
> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> index cccf4bc759c6..761c2327e9cf 100644
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c
> @@ -1640,35 +1640,22 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
>  			     int offset)
>  {
>  	struct lock_list *entry;
> -	struct lock_list *lock;
> +	struct lock_list *lock = NULL;
>  	struct list_head *head;
>  	struct circular_queue *cq = &lock_cq;
> -	enum bfs_result ret = BFS_RNOMATCH;
>  
>  	lockdep_assert_locked();
>  
> -	if (match(source_entry, data)) {
> -		*target_entry = source_entry;
> -		ret = BFS_RMATCH;
> -		goto exit;
> -	}
> -
> -	head = get_dep_list(source_entry, offset);
> -	if (list_empty(head))
> -		goto exit;
> -
>  	__cq_init(cq);
>  	__cq_enqueue(cq, source_entry);
>  
> -	while ((lock = __cq_dequeue(cq))) {
> -		bool prev_only_xr;
> -
> -		if (!lock->class) {
> -			ret = BFS_EINVALIDNODE;
> -			goto exit;
> -		}
> +	while (lock || (lock = __cq_dequeue(cq))) {
> +		if (!lock->class)
> +			return BFS_EINVALIDNODE;
>  
>  		/*
> +		 * Step 1: check whether we already finish on this one.
> +		 *
>  		 * If we have visited all the dependencies from this @lock to
>  		 * others (iow, if we have visited all lock_list entries in
>  		 * @lock->class->locks_{after,before}) we skip, otherwise go
> @@ -1676,17 +1663,17 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
>  		 * list accessed.
>  		 */
>  		if (lock_accessed(lock))
> -			continue;
> +			goto next;
>  		else
>  			mark_lock_accessed(lock);
>  
> -		head = get_dep_list(lock, offset);
> -
> -		prev_only_xr = lock->only_xr;
> -
> -		list_for_each_entry_rcu(entry, head, entry) {
> -			unsigned int cq_depth;
> -			u8 dep = entry->dep;
> +		/*
> +		 * Step 2: check whether prev dependency and this form a strong
> +		 *         dependency path.
> +		 */
> +		if (lock->parent) { /* Parent exists, check prev dependency */
> +			u8 dep = lock->dep;
> +			bool prev_only_xr = lock->parent->only_xr;
>  
>  			/*
>  			 * Mask out all -(S*)-> if we only have *R in previous
> @@ -1698,29 +1685,68 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
>  
>  			/* If nothing left, we skip */
>  			if (!dep)
> -				continue;
> +				goto next;
>  
>  			/* If there are only -(*R)-> left, set that for the next step */
> -			entry->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
> +			lock->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
> +		}
>  
> -			visit_lock_entry(entry, lock);
> -			if (match(entry, data)) {
> -				*target_entry = entry;
> -				ret = BFS_RMATCH;
> -				goto exit;
> -			}
> +		/*
> +		 * Step 3: we haven't visited this and there is a strong
> +		 *         dependency path to this, so check with @match.
> +		 */
> +		if (match(lock, data)) {
> +			*target_entry = lock;
> +			return BFS_RMATCH;
> +		}
> +
> +		/*
> +		 * Step 4: if not match, expand the path by adding the
> +		 *         afterwards or backwards dependencis in the search
> +		 *
> +		 * Note we only enqueue the first of the list into the queue,
> +		 * because we can always find a sibling dependency from one
> +		 * (see label 'next'), as a result the space of queue is saved.
> +		 */
> +		head = get_dep_list(lock, offset);
> +		entry = list_first_or_null_rcu(head, struct lock_list, entry);
> +		if (entry) {
> +			unsigned int cq_depth;
> +
> +			if (__cq_enqueue(cq, entry))
> +				return BFS_EQUEUEFULL;
>  
> -			if (__cq_enqueue(cq, entry)) {
> -				ret = BFS_EQUEUEFULL;
> -				goto exit;
> -			}
>  			cq_depth = __cq_get_elem_count(cq);
>  			if (max_bfs_queue_depth < cq_depth)
>  				max_bfs_queue_depth = cq_depth;
>  		}
> +
> +		/*
> +		 * Update the ->parent, so when @entry is iterated, we know the
> +		 * previous dependency.
> +		 */
> +		list_for_each_entry_rcu(entry, head, entry)
> +			visit_lock_entry(entry, lock);
> +next:
> +		/*
> +		 * Step 5: fetch the next dependency to process.
> +		 *
> +		 * If there is a previous dependency, we fetch the sibling
> +		 * dependency in the dep list of previous dependency.
> +		 *
> +		 * Otherwise set @lock to NULL to fetch the next entry from
> +		 * queue.
> +		 */
> +		if (lock->parent) {
> +			head = get_dep_list(lock->parent, offset);
> +			lock = list_next_or_null_rcu(head, &lock->entry,
> +						     struct lock_list, entry);
> +		} else {
> +			lock = NULL;
> +		}
>  	}
> -exit:
> -	return ret;
> +
> +	return BFS_RNOMATCH;
>  }
>  
>  static inline enum bfs_result
> -- 
> 2.28.0
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] lockdep: Optimize the memory usage of circular queue
  2020-09-24 15:12 ` Boqun Feng
@ 2020-09-28  8:03   ` Dmitry Vyukov
  2020-09-28  8:08     ` Boqun Feng
  0 siblings, 1 reply; 9+ messages in thread
From: Dmitry Vyukov @ 2020-09-28  8:03 UTC (permalink / raw)
  To: Boqun Feng
  Cc: LKML, Waiman Long, Qian Cai, Peter Zijlstra, Ingo Molnar, Will Deacon

On Thu, Sep 24, 2020 at 5:13 PM Boqun Feng <boqun.feng@gmail.com> wrote:
>
> Ping ;-)
>
> Regards,
> Boqun

Hi Boqun,

Peter says this may also fix this issue:
https://syzkaller.appspot.com/bug?extid=62ebe501c1ce9a91f68c
please add the following tag to the patch so that the bug report will
be closed on merge:
Reported-by: syzbot+62ebe501c1ce9a91f68c@syzkaller.appspotmail.com

> On Thu, Sep 17, 2020 at 04:01:50PM +0800, Boqun Feng wrote:
> > Qian Cai reported a BFS_EQUEUEFULL warning [1] after read recursive
> > deadlock detection merged into tip tree recently. Unlike the previous
> > lockep graph searching, which iterate every lock class (every node in
> > the graph) exactly once, the graph searching for read recurisve deadlock
> > detection needs to iterate every lock dependency (every edge in the
> > graph) once, as a result, the maximum memory cost of the circular queue
> > changes from O(V), where V is the number of lock classes (nodes or
> > vertices) in the graph, to O(E), where E is the number of lock
> > dependencies (edges), because every lock class or dependency gets
> > enqueued once in the BFS. Therefore we hit the BFS_EQUEUEFULL case.
> >
> > However, actually we don't need to enqueue all dependencies for the BFS,
> > because every time we enqueue a dependency, we almostly enqueue all
> > other dependencies in the same dependency list ("almostly" is because
> > we currently check before enqueue, so if a dependency doesn't pass the
> > check stage we won't enqueue it, however, we can always do in reverse
> > ordering), based on this, we can only enqueue the first dependency from
> > a dependency list and every time we want to fetch a new dependency to
> > work, we can either:
> >
> > 1)    fetch the dependency next to the current dependency in the
> >       dependency list
> > or
> > 2)    if the dependency in 1) doesn't exist, fetch the dependency from
> >       the queue.
> >
> > With this approach, the "max bfs queue depth" for a x86_64_defconfig +
> > lockdep and selftest config kernel can get descreased from:
> >
> >         max bfs queue depth:                   201
> >
> > to (after apply this patch)
> >
> >         max bfs queue depth:                   61
> >
> > While I'm at it, clean up the code logic a little (e.g. directly return
> > other than set a "ret" value and goto the "exit" label).
> >
> > [1]: https://lore.kernel.org/lkml/17343f6f7f2438fc376125384133c5ba70c2a681.camel@redhat.com/
> >
> > Reported-by: Qian Cai <cai@redhat.com>
> > Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> > ---
> >  kernel/locking/lockdep.c | 108 ++++++++++++++++++++++++---------------
> >  1 file changed, 67 insertions(+), 41 deletions(-)
> >
> > diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> > index cccf4bc759c6..761c2327e9cf 100644
> > --- a/kernel/locking/lockdep.c
> > +++ b/kernel/locking/lockdep.c
> > @@ -1640,35 +1640,22 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
> >                            int offset)
> >  {
> >       struct lock_list *entry;
> > -     struct lock_list *lock;
> > +     struct lock_list *lock = NULL;
> >       struct list_head *head;
> >       struct circular_queue *cq = &lock_cq;
> > -     enum bfs_result ret = BFS_RNOMATCH;
> >
> >       lockdep_assert_locked();
> >
> > -     if (match(source_entry, data)) {
> > -             *target_entry = source_entry;
> > -             ret = BFS_RMATCH;
> > -             goto exit;
> > -     }
> > -
> > -     head = get_dep_list(source_entry, offset);
> > -     if (list_empty(head))
> > -             goto exit;
> > -
> >       __cq_init(cq);
> >       __cq_enqueue(cq, source_entry);
> >
> > -     while ((lock = __cq_dequeue(cq))) {
> > -             bool prev_only_xr;
> > -
> > -             if (!lock->class) {
> > -                     ret = BFS_EINVALIDNODE;
> > -                     goto exit;
> > -             }
> > +     while (lock || (lock = __cq_dequeue(cq))) {
> > +             if (!lock->class)
> > +                     return BFS_EINVALIDNODE;
> >
> >               /*
> > +              * Step 1: check whether we already finish on this one.
> > +              *
> >                * If we have visited all the dependencies from this @lock to
> >                * others (iow, if we have visited all lock_list entries in
> >                * @lock->class->locks_{after,before}) we skip, otherwise go
> > @@ -1676,17 +1663,17 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
> >                * list accessed.
> >                */
> >               if (lock_accessed(lock))
> > -                     continue;
> > +                     goto next;
> >               else
> >                       mark_lock_accessed(lock);
> >
> > -             head = get_dep_list(lock, offset);
> > -
> > -             prev_only_xr = lock->only_xr;
> > -
> > -             list_for_each_entry_rcu(entry, head, entry) {
> > -                     unsigned int cq_depth;
> > -                     u8 dep = entry->dep;
> > +             /*
> > +              * Step 2: check whether prev dependency and this form a strong
> > +              *         dependency path.
> > +              */
> > +             if (lock->parent) { /* Parent exists, check prev dependency */
> > +                     u8 dep = lock->dep;
> > +                     bool prev_only_xr = lock->parent->only_xr;
> >
> >                       /*
> >                        * Mask out all -(S*)-> if we only have *R in previous
> > @@ -1698,29 +1685,68 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
> >
> >                       /* If nothing left, we skip */
> >                       if (!dep)
> > -                             continue;
> > +                             goto next;
> >
> >                       /* If there are only -(*R)-> left, set that for the next step */
> > -                     entry->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
> > +                     lock->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
> > +             }
> >
> > -                     visit_lock_entry(entry, lock);
> > -                     if (match(entry, data)) {
> > -                             *target_entry = entry;
> > -                             ret = BFS_RMATCH;
> > -                             goto exit;
> > -                     }
> > +             /*
> > +              * Step 3: we haven't visited this and there is a strong
> > +              *         dependency path to this, so check with @match.
> > +              */
> > +             if (match(lock, data)) {
> > +                     *target_entry = lock;
> > +                     return BFS_RMATCH;
> > +             }
> > +
> > +             /*
> > +              * Step 4: if not match, expand the path by adding the
> > +              *         afterwards or backwards dependencis in the search
> > +              *
> > +              * Note we only enqueue the first of the list into the queue,
> > +              * because we can always find a sibling dependency from one
> > +              * (see label 'next'), as a result the space of queue is saved.
> > +              */
> > +             head = get_dep_list(lock, offset);
> > +             entry = list_first_or_null_rcu(head, struct lock_list, entry);
> > +             if (entry) {
> > +                     unsigned int cq_depth;
> > +
> > +                     if (__cq_enqueue(cq, entry))
> > +                             return BFS_EQUEUEFULL;
> >
> > -                     if (__cq_enqueue(cq, entry)) {
> > -                             ret = BFS_EQUEUEFULL;
> > -                             goto exit;
> > -                     }
> >                       cq_depth = __cq_get_elem_count(cq);
> >                       if (max_bfs_queue_depth < cq_depth)
> >                               max_bfs_queue_depth = cq_depth;
> >               }
> > +
> > +             /*
> > +              * Update the ->parent, so when @entry is iterated, we know the
> > +              * previous dependency.
> > +              */
> > +             list_for_each_entry_rcu(entry, head, entry)
> > +                     visit_lock_entry(entry, lock);
> > +next:
> > +             /*
> > +              * Step 5: fetch the next dependency to process.
> > +              *
> > +              * If there is a previous dependency, we fetch the sibling
> > +              * dependency in the dep list of previous dependency.
> > +              *
> > +              * Otherwise set @lock to NULL to fetch the next entry from
> > +              * queue.
> > +              */
> > +             if (lock->parent) {
> > +                     head = get_dep_list(lock->parent, offset);
> > +                     lock = list_next_or_null_rcu(head, &lock->entry,
> > +                                                  struct lock_list, entry);
> > +             } else {
> > +                     lock = NULL;
> > +             }
> >       }
> > -exit:
> > -     return ret;
> > +
> > +     return BFS_RNOMATCH;
> >  }
> >
> >  static inline enum bfs_result
> > --
> > 2.28.0
> >

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] lockdep: Optimize the memory usage of circular queue
  2020-09-28  8:03   ` Dmitry Vyukov
@ 2020-09-28  8:08     ` Boqun Feng
  2020-09-28  8:13       ` Dmitry Vyukov
  0 siblings, 1 reply; 9+ messages in thread
From: Boqun Feng @ 2020-09-28  8:08 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: LKML, Waiman Long, Qian Cai, Peter Zijlstra, Ingo Molnar, Will Deacon

On Mon, Sep 28, 2020 at 10:03:19AM +0200, Dmitry Vyukov wrote:
> On Thu, Sep 24, 2020 at 5:13 PM Boqun Feng <boqun.feng@gmail.com> wrote:
> >
> > Ping ;-)
> >
> > Regards,
> > Boqun
> 
> Hi Boqun,
> 
> Peter says this may also fix this issue:
> https://syzkaller.appspot.com/bug?extid=62ebe501c1ce9a91f68c
> please add the following tag to the patch so that the bug report will
> be closed on merge:
> Reported-by: syzbot+62ebe501c1ce9a91f68c@syzkaller.appspotmail.com
> 

Sure, I will if another version of this patch is required, otherwise (if
this one looks good to Peter), I will rely on Peter to add the tag ;-)
Works for you?

Regards,
Boqun

> > On Thu, Sep 17, 2020 at 04:01:50PM +0800, Boqun Feng wrote:
> > > Qian Cai reported a BFS_EQUEUEFULL warning [1] after read recursive
> > > deadlock detection merged into tip tree recently. Unlike the previous
> > > lockep graph searching, which iterate every lock class (every node in
> > > the graph) exactly once, the graph searching for read recurisve deadlock
> > > detection needs to iterate every lock dependency (every edge in the
> > > graph) once, as a result, the maximum memory cost of the circular queue
> > > changes from O(V), where V is the number of lock classes (nodes or
> > > vertices) in the graph, to O(E), where E is the number of lock
> > > dependencies (edges), because every lock class or dependency gets
> > > enqueued once in the BFS. Therefore we hit the BFS_EQUEUEFULL case.
> > >
> > > However, actually we don't need to enqueue all dependencies for the BFS,
> > > because every time we enqueue a dependency, we almostly enqueue all
> > > other dependencies in the same dependency list ("almostly" is because
> > > we currently check before enqueue, so if a dependency doesn't pass the
> > > check stage we won't enqueue it, however, we can always do in reverse
> > > ordering), based on this, we can only enqueue the first dependency from
> > > a dependency list and every time we want to fetch a new dependency to
> > > work, we can either:
> > >
> > > 1)    fetch the dependency next to the current dependency in the
> > >       dependency list
> > > or
> > > 2)    if the dependency in 1) doesn't exist, fetch the dependency from
> > >       the queue.
> > >
> > > With this approach, the "max bfs queue depth" for a x86_64_defconfig +
> > > lockdep and selftest config kernel can get descreased from:
> > >
> > >         max bfs queue depth:                   201
> > >
> > > to (after apply this patch)
> > >
> > >         max bfs queue depth:                   61
> > >
> > > While I'm at it, clean up the code logic a little (e.g. directly return
> > > other than set a "ret" value and goto the "exit" label).
> > >
> > > [1]: https://lore.kernel.org/lkml/17343f6f7f2438fc376125384133c5ba70c2a681.camel@redhat.com/
> > >
> > > Reported-by: Qian Cai <cai@redhat.com>
> > > Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> > > ---
> > >  kernel/locking/lockdep.c | 108 ++++++++++++++++++++++++---------------
> > >  1 file changed, 67 insertions(+), 41 deletions(-)
> > >
> > > diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> > > index cccf4bc759c6..761c2327e9cf 100644
> > > --- a/kernel/locking/lockdep.c
> > > +++ b/kernel/locking/lockdep.c
> > > @@ -1640,35 +1640,22 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
> > >                            int offset)
> > >  {
> > >       struct lock_list *entry;
> > > -     struct lock_list *lock;
> > > +     struct lock_list *lock = NULL;
> > >       struct list_head *head;
> > >       struct circular_queue *cq = &lock_cq;
> > > -     enum bfs_result ret = BFS_RNOMATCH;
> > >
> > >       lockdep_assert_locked();
> > >
> > > -     if (match(source_entry, data)) {
> > > -             *target_entry = source_entry;
> > > -             ret = BFS_RMATCH;
> > > -             goto exit;
> > > -     }
> > > -
> > > -     head = get_dep_list(source_entry, offset);
> > > -     if (list_empty(head))
> > > -             goto exit;
> > > -
> > >       __cq_init(cq);
> > >       __cq_enqueue(cq, source_entry);
> > >
> > > -     while ((lock = __cq_dequeue(cq))) {
> > > -             bool prev_only_xr;
> > > -
> > > -             if (!lock->class) {
> > > -                     ret = BFS_EINVALIDNODE;
> > > -                     goto exit;
> > > -             }
> > > +     while (lock || (lock = __cq_dequeue(cq))) {
> > > +             if (!lock->class)
> > > +                     return BFS_EINVALIDNODE;
> > >
> > >               /*
> > > +              * Step 1: check whether we already finish on this one.
> > > +              *
> > >                * If we have visited all the dependencies from this @lock to
> > >                * others (iow, if we have visited all lock_list entries in
> > >                * @lock->class->locks_{after,before}) we skip, otherwise go
> > > @@ -1676,17 +1663,17 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
> > >                * list accessed.
> > >                */
> > >               if (lock_accessed(lock))
> > > -                     continue;
> > > +                     goto next;
> > >               else
> > >                       mark_lock_accessed(lock);
> > >
> > > -             head = get_dep_list(lock, offset);
> > > -
> > > -             prev_only_xr = lock->only_xr;
> > > -
> > > -             list_for_each_entry_rcu(entry, head, entry) {
> > > -                     unsigned int cq_depth;
> > > -                     u8 dep = entry->dep;
> > > +             /*
> > > +              * Step 2: check whether prev dependency and this form a strong
> > > +              *         dependency path.
> > > +              */
> > > +             if (lock->parent) { /* Parent exists, check prev dependency */
> > > +                     u8 dep = lock->dep;
> > > +                     bool prev_only_xr = lock->parent->only_xr;
> > >
> > >                       /*
> > >                        * Mask out all -(S*)-> if we only have *R in previous
> > > @@ -1698,29 +1685,68 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
> > >
> > >                       /* If nothing left, we skip */
> > >                       if (!dep)
> > > -                             continue;
> > > +                             goto next;
> > >
> > >                       /* If there are only -(*R)-> left, set that for the next step */
> > > -                     entry->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
> > > +                     lock->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
> > > +             }
> > >
> > > -                     visit_lock_entry(entry, lock);
> > > -                     if (match(entry, data)) {
> > > -                             *target_entry = entry;
> > > -                             ret = BFS_RMATCH;
> > > -                             goto exit;
> > > -                     }
> > > +             /*
> > > +              * Step 3: we haven't visited this and there is a strong
> > > +              *         dependency path to this, so check with @match.
> > > +              */
> > > +             if (match(lock, data)) {
> > > +                     *target_entry = lock;
> > > +                     return BFS_RMATCH;
> > > +             }
> > > +
> > > +             /*
> > > +              * Step 4: if not match, expand the path by adding the
> > > +              *         afterwards or backwards dependencis in the search
> > > +              *
> > > +              * Note we only enqueue the first of the list into the queue,
> > > +              * because we can always find a sibling dependency from one
> > > +              * (see label 'next'), as a result the space of queue is saved.
> > > +              */
> > > +             head = get_dep_list(lock, offset);
> > > +             entry = list_first_or_null_rcu(head, struct lock_list, entry);
> > > +             if (entry) {
> > > +                     unsigned int cq_depth;
> > > +
> > > +                     if (__cq_enqueue(cq, entry))
> > > +                             return BFS_EQUEUEFULL;
> > >
> > > -                     if (__cq_enqueue(cq, entry)) {
> > > -                             ret = BFS_EQUEUEFULL;
> > > -                             goto exit;
> > > -                     }
> > >                       cq_depth = __cq_get_elem_count(cq);
> > >                       if (max_bfs_queue_depth < cq_depth)
> > >                               max_bfs_queue_depth = cq_depth;
> > >               }
> > > +
> > > +             /*
> > > +              * Update the ->parent, so when @entry is iterated, we know the
> > > +              * previous dependency.
> > > +              */
> > > +             list_for_each_entry_rcu(entry, head, entry)
> > > +                     visit_lock_entry(entry, lock);
> > > +next:
> > > +             /*
> > > +              * Step 5: fetch the next dependency to process.
> > > +              *
> > > +              * If there is a previous dependency, we fetch the sibling
> > > +              * dependency in the dep list of previous dependency.
> > > +              *
> > > +              * Otherwise set @lock to NULL to fetch the next entry from
> > > +              * queue.
> > > +              */
> > > +             if (lock->parent) {
> > > +                     head = get_dep_list(lock->parent, offset);
> > > +                     lock = list_next_or_null_rcu(head, &lock->entry,
> > > +                                                  struct lock_list, entry);
> > > +             } else {
> > > +                     lock = NULL;
> > > +             }
> > >       }
> > > -exit:
> > > -     return ret;
> > > +
> > > +     return BFS_RNOMATCH;
> > >  }
> > >
> > >  static inline enum bfs_result
> > > --
> > > 2.28.0
> > >

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] lockdep: Optimize the memory usage of circular queue
  2020-09-28  8:08     ` Boqun Feng
@ 2020-09-28  8:13       ` Dmitry Vyukov
  0 siblings, 0 replies; 9+ messages in thread
From: Dmitry Vyukov @ 2020-09-28  8:13 UTC (permalink / raw)
  To: Boqun Feng
  Cc: LKML, Waiman Long, Qian Cai, Peter Zijlstra, Ingo Molnar, Will Deacon

On Mon, Sep 28, 2020 at 10:08 AM Boqun Feng <boqun.feng@gmail.com> wrote:
>
> On Mon, Sep 28, 2020 at 10:03:19AM +0200, Dmitry Vyukov wrote:
> > On Thu, Sep 24, 2020 at 5:13 PM Boqun Feng <boqun.feng@gmail.com> wrote:
> > >
> > > Ping ;-)
> > >
> > > Regards,
> > > Boqun
> >
> > Hi Boqun,
> >
> > Peter says this may also fix this issue:
> > https://syzkaller.appspot.com/bug?extid=62ebe501c1ce9a91f68c
> > please add the following tag to the patch so that the bug report will
> > be closed on merge:
> > Reported-by: syzbot+62ebe501c1ce9a91f68c@syzkaller.appspotmail.com
> >
>
> Sure, I will if another version of this patch is required, otherwise (if
> this one looks good to Peter), I will rely on Peter to add the tag ;-)
> Works for you?

Yes, totally, thanks.

> Regards,
> Boqun
>
> > > On Thu, Sep 17, 2020 at 04:01:50PM +0800, Boqun Feng wrote:
> > > > Qian Cai reported a BFS_EQUEUEFULL warning [1] after read recursive
> > > > deadlock detection merged into tip tree recently. Unlike the previous
> > > > lockep graph searching, which iterate every lock class (every node in
> > > > the graph) exactly once, the graph searching for read recurisve deadlock
> > > > detection needs to iterate every lock dependency (every edge in the
> > > > graph) once, as a result, the maximum memory cost of the circular queue
> > > > changes from O(V), where V is the number of lock classes (nodes or
> > > > vertices) in the graph, to O(E), where E is the number of lock
> > > > dependencies (edges), because every lock class or dependency gets
> > > > enqueued once in the BFS. Therefore we hit the BFS_EQUEUEFULL case.
> > > >
> > > > However, actually we don't need to enqueue all dependencies for the BFS,
> > > > because every time we enqueue a dependency, we almostly enqueue all
> > > > other dependencies in the same dependency list ("almostly" is because
> > > > we currently check before enqueue, so if a dependency doesn't pass the
> > > > check stage we won't enqueue it, however, we can always do in reverse
> > > > ordering), based on this, we can only enqueue the first dependency from
> > > > a dependency list and every time we want to fetch a new dependency to
> > > > work, we can either:
> > > >
> > > > 1)    fetch the dependency next to the current dependency in the
> > > >       dependency list
> > > > or
> > > > 2)    if the dependency in 1) doesn't exist, fetch the dependency from
> > > >       the queue.
> > > >
> > > > With this approach, the "max bfs queue depth" for a x86_64_defconfig +
> > > > lockdep and selftest config kernel can get descreased from:
> > > >
> > > >         max bfs queue depth:                   201
> > > >
> > > > to (after apply this patch)
> > > >
> > > >         max bfs queue depth:                   61
> > > >
> > > > While I'm at it, clean up the code logic a little (e.g. directly return
> > > > other than set a "ret" value and goto the "exit" label).
> > > >
> > > > [1]: https://lore.kernel.org/lkml/17343f6f7f2438fc376125384133c5ba70c2a681.camel@redhat.com/
> > > >
> > > > Reported-by: Qian Cai <cai@redhat.com>
> > > > Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> > > > ---
> > > >  kernel/locking/lockdep.c | 108 ++++++++++++++++++++++++---------------
> > > >  1 file changed, 67 insertions(+), 41 deletions(-)
> > > >
> > > > diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> > > > index cccf4bc759c6..761c2327e9cf 100644
> > > > --- a/kernel/locking/lockdep.c
> > > > +++ b/kernel/locking/lockdep.c
> > > > @@ -1640,35 +1640,22 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
> > > >                            int offset)
> > > >  {
> > > >       struct lock_list *entry;
> > > > -     struct lock_list *lock;
> > > > +     struct lock_list *lock = NULL;
> > > >       struct list_head *head;
> > > >       struct circular_queue *cq = &lock_cq;
> > > > -     enum bfs_result ret = BFS_RNOMATCH;
> > > >
> > > >       lockdep_assert_locked();
> > > >
> > > > -     if (match(source_entry, data)) {
> > > > -             *target_entry = source_entry;
> > > > -             ret = BFS_RMATCH;
> > > > -             goto exit;
> > > > -     }
> > > > -
> > > > -     head = get_dep_list(source_entry, offset);
> > > > -     if (list_empty(head))
> > > > -             goto exit;
> > > > -
> > > >       __cq_init(cq);
> > > >       __cq_enqueue(cq, source_entry);
> > > >
> > > > -     while ((lock = __cq_dequeue(cq))) {
> > > > -             bool prev_only_xr;
> > > > -
> > > > -             if (!lock->class) {
> > > > -                     ret = BFS_EINVALIDNODE;
> > > > -                     goto exit;
> > > > -             }
> > > > +     while (lock || (lock = __cq_dequeue(cq))) {
> > > > +             if (!lock->class)
> > > > +                     return BFS_EINVALIDNODE;
> > > >
> > > >               /*
> > > > +              * Step 1: check whether we already finish on this one.
> > > > +              *
> > > >                * If we have visited all the dependencies from this @lock to
> > > >                * others (iow, if we have visited all lock_list entries in
> > > >                * @lock->class->locks_{after,before}) we skip, otherwise go
> > > > @@ -1676,17 +1663,17 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
> > > >                * list accessed.
> > > >                */
> > > >               if (lock_accessed(lock))
> > > > -                     continue;
> > > > +                     goto next;
> > > >               else
> > > >                       mark_lock_accessed(lock);
> > > >
> > > > -             head = get_dep_list(lock, offset);
> > > > -
> > > > -             prev_only_xr = lock->only_xr;
> > > > -
> > > > -             list_for_each_entry_rcu(entry, head, entry) {
> > > > -                     unsigned int cq_depth;
> > > > -                     u8 dep = entry->dep;
> > > > +             /*
> > > > +              * Step 2: check whether prev dependency and this form a strong
> > > > +              *         dependency path.
> > > > +              */
> > > > +             if (lock->parent) { /* Parent exists, check prev dependency */
> > > > +                     u8 dep = lock->dep;
> > > > +                     bool prev_only_xr = lock->parent->only_xr;
> > > >
> > > >                       /*
> > > >                        * Mask out all -(S*)-> if we only have *R in previous
> > > > @@ -1698,29 +1685,68 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
> > > >
> > > >                       /* If nothing left, we skip */
> > > >                       if (!dep)
> > > > -                             continue;
> > > > +                             goto next;
> > > >
> > > >                       /* If there are only -(*R)-> left, set that for the next step */
> > > > -                     entry->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
> > > > +                     lock->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
> > > > +             }
> > > >
> > > > -                     visit_lock_entry(entry, lock);
> > > > -                     if (match(entry, data)) {
> > > > -                             *target_entry = entry;
> > > > -                             ret = BFS_RMATCH;
> > > > -                             goto exit;
> > > > -                     }
> > > > +             /*
> > > > +              * Step 3: we haven't visited this and there is a strong
> > > > +              *         dependency path to this, so check with @match.
> > > > +              */
> > > > +             if (match(lock, data)) {
> > > > +                     *target_entry = lock;
> > > > +                     return BFS_RMATCH;
> > > > +             }
> > > > +
> > > > +             /*
> > > > +              * Step 4: if not match, expand the path by adding the
> > > > +              *         afterwards or backwards dependencis in the search
> > > > +              *
> > > > +              * Note we only enqueue the first of the list into the queue,
> > > > +              * because we can always find a sibling dependency from one
> > > > +              * (see label 'next'), as a result the space of queue is saved.
> > > > +              */
> > > > +             head = get_dep_list(lock, offset);
> > > > +             entry = list_first_or_null_rcu(head, struct lock_list, entry);
> > > > +             if (entry) {
> > > > +                     unsigned int cq_depth;
> > > > +
> > > > +                     if (__cq_enqueue(cq, entry))
> > > > +                             return BFS_EQUEUEFULL;
> > > >
> > > > -                     if (__cq_enqueue(cq, entry)) {
> > > > -                             ret = BFS_EQUEUEFULL;
> > > > -                             goto exit;
> > > > -                     }
> > > >                       cq_depth = __cq_get_elem_count(cq);
> > > >                       if (max_bfs_queue_depth < cq_depth)
> > > >                               max_bfs_queue_depth = cq_depth;
> > > >               }
> > > > +
> > > > +             /*
> > > > +              * Update the ->parent, so when @entry is iterated, we know the
> > > > +              * previous dependency.
> > > > +              */
> > > > +             list_for_each_entry_rcu(entry, head, entry)
> > > > +                     visit_lock_entry(entry, lock);
> > > > +next:
> > > > +             /*
> > > > +              * Step 5: fetch the next dependency to process.
> > > > +              *
> > > > +              * If there is a previous dependency, we fetch the sibling
> > > > +              * dependency in the dep list of previous dependency.
> > > > +              *
> > > > +              * Otherwise set @lock to NULL to fetch the next entry from
> > > > +              * queue.
> > > > +              */
> > > > +             if (lock->parent) {
> > > > +                     head = get_dep_list(lock->parent, offset);
> > > > +                     lock = list_next_or_null_rcu(head, &lock->entry,
> > > > +                                                  struct lock_list, entry);
> > > > +             } else {
> > > > +                     lock = NULL;
> > > > +             }
> > > >       }
> > > > -exit:
> > > > -     return ret;
> > > > +
> > > > +     return BFS_RNOMATCH;
> > > >  }
> > > >
> > > >  static inline enum bfs_result
> > > > --
> > > > 2.28.0
> > > >

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] lockdep: Optimize the memory usage of circular queue
  2020-09-17  8:01 [PATCH] lockdep: Optimize the memory usage of circular queue Boqun Feng
  2020-09-24 15:12 ` Boqun Feng
@ 2020-09-28  8:51 ` Peter Zijlstra
  2020-09-28  9:47   ` Boqun Feng
  2020-09-30  8:55 ` [tip: locking/core] " tip-bot2 for Boqun Feng
  2 siblings, 1 reply; 9+ messages in thread
From: Peter Zijlstra @ 2020-09-28  8:51 UTC (permalink / raw)
  To: Boqun Feng; +Cc: linux-kernel, Waiman Long, Qian Cai, Ingo Molnar, Will Deacon

On Thu, Sep 17, 2020 at 04:01:50PM +0800, Boqun Feng wrote:

>  	__cq_init(cq);
>  	__cq_enqueue(cq, source_entry);
>  
> +	while (lock || (lock = __cq_dequeue(cq))) {
> +		if (!lock->class)
> +			return BFS_EINVALIDNODE;
>  
>  		/*
> +		 * Step 1: check whether we already finish on this one.
> +		 *
>  		 * If we have visited all the dependencies from this @lock to
>  		 * others (iow, if we have visited all lock_list entries in
>  		 * @lock->class->locks_{after,before}) we skip, otherwise go

> @@ -1698,29 +1685,68 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
>  
>  			/* If nothing left, we skip */
>  			if (!dep)
> +				goto next;
>  
>  			/* If there are only -(*R)-> left, set that for the next step */
> +			lock->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
> +		}
>  
> +		/*
> +		 * Step 3: we haven't visited this and there is a strong
> +		 *         dependency path to this, so check with @match.
> +		 */
> +		if (match(lock, data)) {
> +			*target_entry = lock;
> +			return BFS_RMATCH;
> +		}
> +
> +		/*
> +		 * Step 4: if not match, expand the path by adding the
> +		 *         afterwards or backwards dependencis in the search
> +		 *
> +		 * Note we only enqueue the first of the list into the queue,
> +		 * because we can always find a sibling dependency from one
> +		 * (see label 'next'), as a result the space of queue is saved.
> +		 */
> +		head = get_dep_list(lock, offset);
> +		entry = list_first_or_null_rcu(head, struct lock_list, entry);
> +		if (entry) {
> +			unsigned int cq_depth;
> +
> +			if (__cq_enqueue(cq, entry))
> +				return BFS_EQUEUEFULL;
>  
>  			cq_depth = __cq_get_elem_count(cq);
>  			if (max_bfs_queue_depth < cq_depth)
>  				max_bfs_queue_depth = cq_depth;
>  		}
> +
> +		/*
> +		 * Update the ->parent, so when @entry is iterated, we know the
> +		 * previous dependency.
> +		 */
> +		list_for_each_entry_rcu(entry, head, entry)
> +			visit_lock_entry(entry, lock);

This confused me for a while. I think it might be a little clearer if we
put this inside the previous block.

Alternatively, we could try and write it something like so:

		/*
		 * Step 4: if not match, expand the path by adding the
		 *         afterwards or backwards dependencis in the search
		 */
		first = true;
		head = get_dep_list(lock, offset);
		list_for_each_entry_rcu(entry, head, entry) {
			visit_lock_entry(entry, lock);

			if (!first)
				continue;

			/*
			 * Only enqueue the first entry of the list,
			 * we'll iterate it's siblings at the next
			 * label.
			 */
			first = false;
			if (__cq_enqueue(cq, entry))
				return BFS_EQUEUEFULL;

			cq_depth = __cq_get_elem_count(cq);
			if (max_bfs_queue_depth < cq_depth)
				max_bfs_queue_depth = cq_depth;
		}

Hmm?

> +next:
> +		/*
> +		 * Step 5: fetch the next dependency to process.
> +		 *
> +		 * If there is a previous dependency, we fetch the sibling
> +		 * dependency in the dep list of previous dependency.
> +		 *
> +		 * Otherwise set @lock to NULL to fetch the next entry from
> +		 * queue.
> +		 */
> +		if (lock->parent) {
> +			head = get_dep_list(lock->parent, offset);
> +			lock = list_next_or_null_rcu(head, &lock->entry,
> +						     struct lock_list, entry);
> +		} else {
> +			lock = NULL;
> +		}

I think that if we hide this in a __bfs_next() helper, the iteration
becomes nicer.

>  	}
> -exit:
> +
> +	return BFS_RNOMATCH;
>  }

How's this?

---
Subject: lockdep: Optimize the memory usage of circular queue
From: Boqun Feng <boqun.feng@gmail.com>
Date: Thu, 17 Sep 2020 16:01:50 +0800

From: Boqun Feng <boqun.feng@gmail.com>

Qian Cai reported a BFS_EQUEUEFULL warning [1] after read recursive
deadlock detection merged into tip tree recently. Unlike the previous
lockep graph searching, which iterate every lock class (every node in
the graph) exactly once, the graph searching for read recurisve deadlock
detection needs to iterate every lock dependency (every edge in the
graph) once, as a result, the maximum memory cost of the circular queue
changes from O(V), where V is the number of lock classes (nodes or
vertices) in the graph, to O(E), where E is the number of lock
dependencies (edges), because every lock class or dependency gets
enqueued once in the BFS. Therefore we hit the BFS_EQUEUEFULL case.

However, actually we don't need to enqueue all dependencies for the BFS,
because every time we enqueue a dependency, we almostly enqueue all
other dependencies in the same dependency list ("almostly" is because
we currently check before enqueue, so if a dependency doesn't pass the
check stage we won't enqueue it, however, we can always do in reverse
ordering), based on this, we can only enqueue the first dependency from
a dependency list and every time we want to fetch a new dependency to
work, we can either:

  1)	fetch the dependency next to the current dependency in the
	dependency list
or

  2)	if the dependency in 1) doesn't exist, fetch the dependency from
	the queue.

With this approach, the "max bfs queue depth" for a x86_64_defconfig +
lockdep and selftest config kernel can get descreased from:

        max bfs queue depth:                   201

to (after apply this patch)

        max bfs queue depth:                   61

While I'm at it, clean up the code logic a little (e.g. directly return
other than set a "ret" value and goto the "exit" label).

[1]: https://lore.kernel.org/lkml/17343f6f7f2438fc376125384133c5ba70c2a681.camel@redhat.com/

Reported-by: Qian Cai <cai@redhat.com>
Reported-by: syzbot+62ebe501c1ce9a91f68c@syzkaller.appspotmail.com
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200917080210.108095-1-boqun.feng@gmail.com
---
 kernel/locking/lockdep.c |  101 ++++++++++++++++++++++++++++-------------------
 1 file changed, 61 insertions(+), 40 deletions(-)

--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1606,6 +1606,15 @@ static inline void bfs_init_rootb(struct
 	lock->only_xr = (hlock->read != 0);
 }
 
+static inline struct lock_list *__bfs_next(struct lock_list *lock, int offset)
+{
+	if (!lock || !lock->parent)
+		return NULL;
+
+	return list_next_or_null_rcu(get_dep_list(lock->parent, offset),
+				     &lock->entry, struct lock_list, entry);
+}
+
 /*
  * Breadth-First Search to find a strong path in the dependency graph.
  *
@@ -1639,36 +1648,25 @@ static enum bfs_result __bfs(struct lock
 			     struct lock_list **target_entry,
 			     int offset)
 {
+	struct circular_queue *cq = &lock_cq;
+	struct lock_list *lock = NULL;
 	struct lock_list *entry;
-	struct lock_list *lock;
 	struct list_head *head;
-	struct circular_queue *cq = &lock_cq;
-	enum bfs_result ret = BFS_RNOMATCH;
+	unsigned int cq_depth;
+	bool first;
 
 	lockdep_assert_locked();
 
-	if (match(source_entry, data)) {
-		*target_entry = source_entry;
-		ret = BFS_RMATCH;
-		goto exit;
-	}
-
-	head = get_dep_list(source_entry, offset);
-	if (list_empty(head))
-		goto exit;
-
 	__cq_init(cq);
 	__cq_enqueue(cq, source_entry);
 
-	while ((lock = __cq_dequeue(cq))) {
-		bool prev_only_xr;
-
-		if (!lock->class) {
-			ret = BFS_EINVALIDNODE;
-			goto exit;
-		}
+	while ((lock = __bfs_next(lock, offset)) || (lock = __cq_dequeue(cq))) {
+		if (!lock->class)
+			return BFS_EINVALIDNODE;
 
 		/*
+		 * Step 1: check whether we already finish on this one.
+		 *
 		 * If we have visited all the dependencies from this @lock to
 		 * others (iow, if we have visited all lock_list entries in
 		 * @lock->class->locks_{after,before}) we skip, otherwise go
@@ -1680,13 +1678,13 @@ static enum bfs_result __bfs(struct lock
 		else
 			mark_lock_accessed(lock);
 
-		head = get_dep_list(lock, offset);
-
-		prev_only_xr = lock->only_xr;
-
-		list_for_each_entry_rcu(entry, head, entry) {
-			unsigned int cq_depth;
-			u8 dep = entry->dep;
+		/*
+		 * Step 2: check whether prev dependency and this form a strong
+		 *         dependency path.
+		 */
+		if (lock->parent) { /* Parent exists, check prev dependency */
+			u8 dep = lock->dep;
+			bool prev_only_xr = lock->parent->only_xr;
 
 			/*
 			 * Mask out all -(S*)-> if we only have *R in previous
@@ -1701,26 +1699,49 @@ static enum bfs_result __bfs(struct lock
 				continue;
 
 			/* If there are only -(*R)-> left, set that for the next step */
-			entry->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
+			lock->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
+		}
+
+		/*
+		 * Step 3: we haven't visited this and there is a strong
+		 *         dependency path to this, so check with @match.
+		 */
+		if (match(lock, data)) {
+			*target_entry = lock;
+			return BFS_RMATCH;
+		}
 
+		/*
+		 * Step 4: if not match, expand the path by adding the
+		 *         afterwards or backwards dependencis in the search
+		 *
+		 */
+		first = true;
+		head = get_dep_list(lock, offset);
+		list_for_each_entry_rcu(entry, head, entry) {
 			visit_lock_entry(entry, lock);
-			if (match(entry, data)) {
-				*target_entry = entry;
-				ret = BFS_RMATCH;
-				goto exit;
-			}
-
-			if (__cq_enqueue(cq, entry)) {
-				ret = BFS_EQUEUEFULL;
-				goto exit;
-			}
+
+			/*
+			 * Note we only enqueue the first of the list into the
+			 * queue, because we can always find a sibling
+			 * dependency from one (see _bfs_next()), as a result
+			 * the space of queue is saved.
+			 */
+			if (!first)
+				continue;
+
+			first = false;
+
+			if (__cq_enqueue(cq, entry))
+				return BFS_EQUEUEFULL;
+
 			cq_depth = __cq_get_elem_count(cq);
 			if (max_bfs_queue_depth < cq_depth)
 				max_bfs_queue_depth = cq_depth;
 		}
 	}
-exit:
-	return ret;
+
+	return BFS_RNOMATCH;
 }
 
 static inline enum bfs_result

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] lockdep: Optimize the memory usage of circular queue
  2020-09-28  8:51 ` Peter Zijlstra
@ 2020-09-28  9:47   ` Boqun Feng
  2020-09-28  9:55     ` Peter Zijlstra
  0 siblings, 1 reply; 9+ messages in thread
From: Boqun Feng @ 2020-09-28  9:47 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Waiman Long, Qian Cai, Ingo Molnar, Will Deacon

On Mon, Sep 28, 2020 at 10:51:04AM +0200, Peter Zijlstra wrote:
> On Thu, Sep 17, 2020 at 04:01:50PM +0800, Boqun Feng wrote:
> 
> >  	__cq_init(cq);
> >  	__cq_enqueue(cq, source_entry);
> >  
> > +	while (lock || (lock = __cq_dequeue(cq))) {
> > +		if (!lock->class)
> > +			return BFS_EINVALIDNODE;
> >  
> >  		/*
> > +		 * Step 1: check whether we already finish on this one.
> > +		 *
> >  		 * If we have visited all the dependencies from this @lock to
> >  		 * others (iow, if we have visited all lock_list entries in
> >  		 * @lock->class->locks_{after,before}) we skip, otherwise go
> 
> > @@ -1698,29 +1685,68 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
> >  
> >  			/* If nothing left, we skip */
> >  			if (!dep)
> > +				goto next;
> >  
> >  			/* If there are only -(*R)-> left, set that for the next step */
> > +			lock->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
> > +		}
> >  
> > +		/*
> > +		 * Step 3: we haven't visited this and there is a strong
> > +		 *         dependency path to this, so check with @match.
> > +		 */
> > +		if (match(lock, data)) {
> > +			*target_entry = lock;
> > +			return BFS_RMATCH;
> > +		}
> > +
> > +		/*
> > +		 * Step 4: if not match, expand the path by adding the
> > +		 *         afterwards or backwards dependencis in the search
> > +		 *
> > +		 * Note we only enqueue the first of the list into the queue,
> > +		 * because we can always find a sibling dependency from one
> > +		 * (see label 'next'), as a result the space of queue is saved.
> > +		 */
> > +		head = get_dep_list(lock, offset);
> > +		entry = list_first_or_null_rcu(head, struct lock_list, entry);
> > +		if (entry) {
> > +			unsigned int cq_depth;
> > +
> > +			if (__cq_enqueue(cq, entry))
> > +				return BFS_EQUEUEFULL;
> >  
> >  			cq_depth = __cq_get_elem_count(cq);
> >  			if (max_bfs_queue_depth < cq_depth)
> >  				max_bfs_queue_depth = cq_depth;
> >  		}
> > +
> > +		/*
> > +		 * Update the ->parent, so when @entry is iterated, we know the
> > +		 * previous dependency.
> > +		 */
> > +		list_for_each_entry_rcu(entry, head, entry)
> > +			visit_lock_entry(entry, lock);
> 
> This confused me for a while. I think it might be a little clearer if we
> put this inside the previous block.
> 
> Alternatively, we could try and write it something like so:
> 
> 		/*
> 		 * Step 4: if not match, expand the path by adding the
> 		 *         afterwards or backwards dependencis in the search
> 		 */
> 		first = true;
> 		head = get_dep_list(lock, offset);
> 		list_for_each_entry_rcu(entry, head, entry) {
> 			visit_lock_entry(entry, lock);
> 
> 			if (!first)
> 				continue;
> 
> 			/*
> 			 * Only enqueue the first entry of the list,
> 			 * we'll iterate it's siblings at the next
> 			 * label.
> 			 */
> 			first = false;
> 			if (__cq_enqueue(cq, entry))
> 				return BFS_EQUEUEFULL;
> 
> 			cq_depth = __cq_get_elem_count(cq);
> 			if (max_bfs_queue_depth < cq_depth)
> 				max_bfs_queue_depth = cq_depth;
> 		}
> 
> Hmm?
> 

Better than mine ;-)

> > +next:
> > +		/*
> > +		 * Step 5: fetch the next dependency to process.
> > +		 *
> > +		 * If there is a previous dependency, we fetch the sibling
> > +		 * dependency in the dep list of previous dependency.
> > +		 *
> > +		 * Otherwise set @lock to NULL to fetch the next entry from
> > +		 * queue.
> > +		 */
> > +		if (lock->parent) {
> > +			head = get_dep_list(lock->parent, offset);
> > +			lock = list_next_or_null_rcu(head, &lock->entry,
> > +						     struct lock_list, entry);
> > +		} else {
> > +			lock = NULL;
> > +		}
> 
> I think that if we hide this in a __bfs_next() helper, the iteration
> becomes nicer.
> 
> >  	}
> > -exit:
> > +
> > +	return BFS_RNOMATCH;
> >  }
> 
> How's this?
> 

I think your version is better and should be functionally identical to
mine, also FWIW, I tested with a lockdep boot selftests, everything
worked fine.

Regards,
Boqun

> ---
> Subject: lockdep: Optimize the memory usage of circular queue
> From: Boqun Feng <boqun.feng@gmail.com>
> Date: Thu, 17 Sep 2020 16:01:50 +0800
> 
> From: Boqun Feng <boqun.feng@gmail.com>
> 
> Qian Cai reported a BFS_EQUEUEFULL warning [1] after read recursive
> deadlock detection merged into tip tree recently. Unlike the previous
> lockep graph searching, which iterate every lock class (every node in
> the graph) exactly once, the graph searching for read recurisve deadlock
> detection needs to iterate every lock dependency (every edge in the
> graph) once, as a result, the maximum memory cost of the circular queue
> changes from O(V), where V is the number of lock classes (nodes or
> vertices) in the graph, to O(E), where E is the number of lock
> dependencies (edges), because every lock class or dependency gets
> enqueued once in the BFS. Therefore we hit the BFS_EQUEUEFULL case.
> 
> However, actually we don't need to enqueue all dependencies for the BFS,
> because every time we enqueue a dependency, we almostly enqueue all
> other dependencies in the same dependency list ("almostly" is because
> we currently check before enqueue, so if a dependency doesn't pass the
> check stage we won't enqueue it, however, we can always do in reverse
> ordering), based on this, we can only enqueue the first dependency from
> a dependency list and every time we want to fetch a new dependency to
> work, we can either:
> 
>   1)	fetch the dependency next to the current dependency in the
> 	dependency list
> or
> 
>   2)	if the dependency in 1) doesn't exist, fetch the dependency from
> 	the queue.
> 
> With this approach, the "max bfs queue depth" for a x86_64_defconfig +
> lockdep and selftest config kernel can get descreased from:
> 
>         max bfs queue depth:                   201
> 
> to (after apply this patch)
> 
>         max bfs queue depth:                   61
> 
> While I'm at it, clean up the code logic a little (e.g. directly return
> other than set a "ret" value and goto the "exit" label).
> 
> [1]: https://lore.kernel.org/lkml/17343f6f7f2438fc376125384133c5ba70c2a681.camel@redhat.com/
> 
> Reported-by: Qian Cai <cai@redhat.com>
> Reported-by: syzbot+62ebe501c1ce9a91f68c@syzkaller.appspotmail.com
> Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Link: https://lkml.kernel.org/r/20200917080210.108095-1-boqun.feng@gmail.com
> ---
>  kernel/locking/lockdep.c |  101 ++++++++++++++++++++++++++++-------------------
>  1 file changed, 61 insertions(+), 40 deletions(-)
> 
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c
> @@ -1606,6 +1606,15 @@ static inline void bfs_init_rootb(struct
>  	lock->only_xr = (hlock->read != 0);
>  }
>  
> +static inline struct lock_list *__bfs_next(struct lock_list *lock, int offset)
> +{
> +	if (!lock || !lock->parent)
> +		return NULL;
> +
> +	return list_next_or_null_rcu(get_dep_list(lock->parent, offset),
> +				     &lock->entry, struct lock_list, entry);
> +}
> +
>  /*
>   * Breadth-First Search to find a strong path in the dependency graph.
>   *
> @@ -1639,36 +1648,25 @@ static enum bfs_result __bfs(struct lock
>  			     struct lock_list **target_entry,
>  			     int offset)
>  {
> +	struct circular_queue *cq = &lock_cq;
> +	struct lock_list *lock = NULL;
>  	struct lock_list *entry;
> -	struct lock_list *lock;
>  	struct list_head *head;
> -	struct circular_queue *cq = &lock_cq;
> -	enum bfs_result ret = BFS_RNOMATCH;
> +	unsigned int cq_depth;
> +	bool first;
>  
>  	lockdep_assert_locked();
>  
> -	if (match(source_entry, data)) {
> -		*target_entry = source_entry;
> -		ret = BFS_RMATCH;
> -		goto exit;
> -	}
> -
> -	head = get_dep_list(source_entry, offset);
> -	if (list_empty(head))
> -		goto exit;
> -
>  	__cq_init(cq);
>  	__cq_enqueue(cq, source_entry);
>  
> -	while ((lock = __cq_dequeue(cq))) {
> -		bool prev_only_xr;
> -
> -		if (!lock->class) {
> -			ret = BFS_EINVALIDNODE;
> -			goto exit;
> -		}
> +	while ((lock = __bfs_next(lock, offset)) || (lock = __cq_dequeue(cq))) {
> +		if (!lock->class)
> +			return BFS_EINVALIDNODE;
>  
>  		/*
> +		 * Step 1: check whether we already finish on this one.
> +		 *
>  		 * If we have visited all the dependencies from this @lock to
>  		 * others (iow, if we have visited all lock_list entries in
>  		 * @lock->class->locks_{after,before}) we skip, otherwise go
> @@ -1680,13 +1678,13 @@ static enum bfs_result __bfs(struct lock
>  		else
>  			mark_lock_accessed(lock);
>  
> -		head = get_dep_list(lock, offset);
> -
> -		prev_only_xr = lock->only_xr;
> -
> -		list_for_each_entry_rcu(entry, head, entry) {
> -			unsigned int cq_depth;
> -			u8 dep = entry->dep;
> +		/*
> +		 * Step 2: check whether prev dependency and this form a strong
> +		 *         dependency path.
> +		 */
> +		if (lock->parent) { /* Parent exists, check prev dependency */
> +			u8 dep = lock->dep;
> +			bool prev_only_xr = lock->parent->only_xr;
>  
>  			/*
>  			 * Mask out all -(S*)-> if we only have *R in previous
> @@ -1701,26 +1699,49 @@ static enum bfs_result __bfs(struct lock
>  				continue;
>  
>  			/* If there are only -(*R)-> left, set that for the next step */
> -			entry->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
> +			lock->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
> +		}
> +
> +		/*
> +		 * Step 3: we haven't visited this and there is a strong
> +		 *         dependency path to this, so check with @match.
> +		 */
> +		if (match(lock, data)) {
> +			*target_entry = lock;
> +			return BFS_RMATCH;
> +		}
>  
> +		/*
> +		 * Step 4: if not match, expand the path by adding the
> +		 *         afterwards or backwards dependencis in the search
> +		 *
> +		 */
> +		first = true;
> +		head = get_dep_list(lock, offset);
> +		list_for_each_entry_rcu(entry, head, entry) {
>  			visit_lock_entry(entry, lock);
> -			if (match(entry, data)) {
> -				*target_entry = entry;
> -				ret = BFS_RMATCH;
> -				goto exit;
> -			}
> -
> -			if (__cq_enqueue(cq, entry)) {
> -				ret = BFS_EQUEUEFULL;
> -				goto exit;
> -			}
> +
> +			/*
> +			 * Note we only enqueue the first of the list into the
> +			 * queue, because we can always find a sibling
> +			 * dependency from one (see _bfs_next()), as a result
> +			 * the space of queue is saved.
> +			 */
> +			if (!first)
> +				continue;
> +
> +			first = false;
> +
> +			if (__cq_enqueue(cq, entry))
> +				return BFS_EQUEUEFULL;
> +
>  			cq_depth = __cq_get_elem_count(cq);
>  			if (max_bfs_queue_depth < cq_depth)
>  				max_bfs_queue_depth = cq_depth;
>  		}
>  	}
> -exit:
> -	return ret;
> +
> +	return BFS_RNOMATCH;
>  }
>  
>  static inline enum bfs_result

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] lockdep: Optimize the memory usage of circular queue
  2020-09-28  9:47   ` Boqun Feng
@ 2020-09-28  9:55     ` Peter Zijlstra
  0 siblings, 0 replies; 9+ messages in thread
From: Peter Zijlstra @ 2020-09-28  9:55 UTC (permalink / raw)
  To: Boqun Feng; +Cc: linux-kernel, Waiman Long, Qian Cai, Ingo Molnar, Will Deacon

On Mon, Sep 28, 2020 at 05:47:38PM +0800, Boqun Feng wrote:
> I think your version is better and should be functionally identical to
> mine, also FWIW, I tested with a lockdep boot selftests, everything
> worked fine.

Excellent, thanks! I'll go feed it to the robots.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tip: locking/core] lockdep: Optimize the memory usage of circular queue
  2020-09-17  8:01 [PATCH] lockdep: Optimize the memory usage of circular queue Boqun Feng
  2020-09-24 15:12 ` Boqun Feng
  2020-09-28  8:51 ` Peter Zijlstra
@ 2020-09-30  8:55 ` tip-bot2 for Boqun Feng
  2 siblings, 0 replies; 9+ messages in thread
From: tip-bot2 for Boqun Feng @ 2020-09-30  8:55 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Qian Cai, syzbot+62ebe501c1ce9a91f68c, Boqun Feng,
	Peter Zijlstra (Intel),
	x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     6d1823ccc480866e571ab1206665d693aeb600cf
Gitweb:        https://git.kernel.org/tip/6d1823ccc480866e571ab1206665d693aeb600cf
Author:        Boqun Feng <boqun.feng@gmail.com>
AuthorDate:    Thu, 17 Sep 2020 16:01:50 +08:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 29 Sep 2020 09:56:59 +02:00

lockdep: Optimize the memory usage of circular queue

Qian Cai reported a BFS_EQUEUEFULL warning [1] after read recursive
deadlock detection merged into tip tree recently. Unlike the previous
lockep graph searching, which iterate every lock class (every node in
the graph) exactly once, the graph searching for read recurisve deadlock
detection needs to iterate every lock dependency (every edge in the
graph) once, as a result, the maximum memory cost of the circular queue
changes from O(V), where V is the number of lock classes (nodes or
vertices) in the graph, to O(E), where E is the number of lock
dependencies (edges), because every lock class or dependency gets
enqueued once in the BFS. Therefore we hit the BFS_EQUEUEFULL case.

However, actually we don't need to enqueue all dependencies for the BFS,
because every time we enqueue a dependency, we almostly enqueue all
other dependencies in the same dependency list ("almostly" is because
we currently check before enqueue, so if a dependency doesn't pass the
check stage we won't enqueue it, however, we can always do in reverse
ordering), based on this, we can only enqueue the first dependency from
a dependency list and every time we want to fetch a new dependency to
work, we can either:

  1)	fetch the dependency next to the current dependency in the
	dependency list
or

  2)	if the dependency in 1) doesn't exist, fetch the dependency from
	the queue.

With this approach, the "max bfs queue depth" for a x86_64_defconfig +
lockdep and selftest config kernel can get descreased from:

        max bfs queue depth:                   201

to (after apply this patch)

        max bfs queue depth:                   61

While I'm at it, clean up the code logic a little (e.g. directly return
other than set a "ret" value and goto the "exit" label).

[1]: https://lore.kernel.org/lkml/17343f6f7f2438fc376125384133c5ba70c2a681.camel@redhat.com/

Reported-by: Qian Cai <cai@redhat.com>
Reported-by: syzbot+62ebe501c1ce9a91f68c@syzkaller.appspotmail.com
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200917080210.108095-1-boqun.feng@gmail.com
---
 kernel/locking/lockdep.c |  99 +++++++++++++++++++++++---------------
 1 file changed, 60 insertions(+), 39 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index cccf4bc..9560a4e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1606,6 +1606,15 @@ static inline void bfs_init_rootb(struct lock_list *lock,
 	lock->only_xr = (hlock->read != 0);
 }
 
+static inline struct lock_list *__bfs_next(struct lock_list *lock, int offset)
+{
+	if (!lock || !lock->parent)
+		return NULL;
+
+	return list_next_or_null_rcu(get_dep_list(lock->parent, offset),
+				     &lock->entry, struct lock_list, entry);
+}
+
 /*
  * Breadth-First Search to find a strong path in the dependency graph.
  *
@@ -1639,36 +1648,25 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
 			     struct lock_list **target_entry,
 			     int offset)
 {
+	struct circular_queue *cq = &lock_cq;
+	struct lock_list *lock = NULL;
 	struct lock_list *entry;
-	struct lock_list *lock;
 	struct list_head *head;
-	struct circular_queue *cq = &lock_cq;
-	enum bfs_result ret = BFS_RNOMATCH;
+	unsigned int cq_depth;
+	bool first;
 
 	lockdep_assert_locked();
 
-	if (match(source_entry, data)) {
-		*target_entry = source_entry;
-		ret = BFS_RMATCH;
-		goto exit;
-	}
-
-	head = get_dep_list(source_entry, offset);
-	if (list_empty(head))
-		goto exit;
-
 	__cq_init(cq);
 	__cq_enqueue(cq, source_entry);
 
-	while ((lock = __cq_dequeue(cq))) {
-		bool prev_only_xr;
-
-		if (!lock->class) {
-			ret = BFS_EINVALIDNODE;
-			goto exit;
-		}
+	while ((lock = __bfs_next(lock, offset)) || (lock = __cq_dequeue(cq))) {
+		if (!lock->class)
+			return BFS_EINVALIDNODE;
 
 		/*
+		 * Step 1: check whether we already finish on this one.
+		 *
 		 * If we have visited all the dependencies from this @lock to
 		 * others (iow, if we have visited all lock_list entries in
 		 * @lock->class->locks_{after,before}) we skip, otherwise go
@@ -1680,13 +1678,13 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
 		else
 			mark_lock_accessed(lock);
 
-		head = get_dep_list(lock, offset);
-
-		prev_only_xr = lock->only_xr;
-
-		list_for_each_entry_rcu(entry, head, entry) {
-			unsigned int cq_depth;
-			u8 dep = entry->dep;
+		/*
+		 * Step 2: check whether prev dependency and this form a strong
+		 *         dependency path.
+		 */
+		if (lock->parent) { /* Parent exists, check prev dependency */
+			u8 dep = lock->dep;
+			bool prev_only_xr = lock->parent->only_xr;
 
 			/*
 			 * Mask out all -(S*)-> if we only have *R in previous
@@ -1701,26 +1699,49 @@ static enum bfs_result __bfs(struct lock_list *source_entry,
 				continue;
 
 			/* If there are only -(*R)-> left, set that for the next step */
-			entry->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
+			lock->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
+		}
 
+		/*
+		 * Step 3: we haven't visited this and there is a strong
+		 *         dependency path to this, so check with @match.
+		 */
+		if (match(lock, data)) {
+			*target_entry = lock;
+			return BFS_RMATCH;
+		}
+
+		/*
+		 * Step 4: if not match, expand the path by adding the
+		 *         forward or backwards dependencis in the search
+		 *
+		 */
+		first = true;
+		head = get_dep_list(lock, offset);
+		list_for_each_entry_rcu(entry, head, entry) {
 			visit_lock_entry(entry, lock);
-			if (match(entry, data)) {
-				*target_entry = entry;
-				ret = BFS_RMATCH;
-				goto exit;
-			}
 
-			if (__cq_enqueue(cq, entry)) {
-				ret = BFS_EQUEUEFULL;
-				goto exit;
-			}
+			/*
+			 * Note we only enqueue the first of the list into the
+			 * queue, because we can always find a sibling
+			 * dependency from one (see __bfs_next()), as a result
+			 * the space of queue is saved.
+			 */
+			if (!first)
+				continue;
+
+			first = false;
+
+			if (__cq_enqueue(cq, entry))
+				return BFS_EQUEUEFULL;
+
 			cq_depth = __cq_get_elem_count(cq);
 			if (max_bfs_queue_depth < cq_depth)
 				max_bfs_queue_depth = cq_depth;
 		}
 	}
-exit:
-	return ret;
+
+	return BFS_RNOMATCH;
 }
 
 static inline enum bfs_result

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-09-30  8:55 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-17  8:01 [PATCH] lockdep: Optimize the memory usage of circular queue Boqun Feng
2020-09-24 15:12 ` Boqun Feng
2020-09-28  8:03   ` Dmitry Vyukov
2020-09-28  8:08     ` Boqun Feng
2020-09-28  8:13       ` Dmitry Vyukov
2020-09-28  8:51 ` Peter Zijlstra
2020-09-28  9:47   ` Boqun Feng
2020-09-28  9:55     ` Peter Zijlstra
2020-09-30  8:55 ` [tip: locking/core] " tip-bot2 for Boqun Feng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).