linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Bart Van Assche <bvanassche@acm.org>
Cc: mingo@redhat.com, tj@kernel.org, longman@redhat.com,
	johannes.berg@intel.com, linux-kernel@vger.kernel.org,
	Johannes Berg <johannes@sipsolutions.net>
Subject: Re: [PATCH v5 07/15] locking/lockdep: Free lock classes that are no longer in use
Date: Thu, 10 Jan 2019 20:44:59 +0100	[thread overview]
Message-ID: <20190110194459.GD2861@worktop.programming.kicks-ass.net> (raw)
In-Reply-To: <1547146287.83374.49.camel@acm.org>

On Thu, Jan 10, 2019 at 10:51:27AM -0800, Bart Van Assche wrote:
> On Thu, 2019-01-10 at 16:24 +0100, Peter Zijlstra wrote:
> >  /*
> >   * A data structure for delayed freeing of data structures that may be
> > - * accessed by RCU readers at the time these were freed. The size of the array
> > - * is a compromise between minimizing the amount of memory used by this array
> > - * and minimizing the number of wait_event() calls by get_pending_free_lock().
> > + * accessed by RCU readers at the time these were freed.
> >   */
> >  static struct pending_free {
> > -	struct list_head zapped_classes;
> >  	struct rcu_head	 rcu_head;
> > +	int		 index;
> >  	int		 pending;
> > -} pending_free[2];
> > -static DECLARE_WAIT_QUEUE_HEAD(rcu_cb);
> > +	struct list_head zapped[2];
> > +} pending_free;
> 
> Hi Peter,
> 
> If the zapped[] array only has two elements there is no guarantee that an
> element will be free when zap_class() is called. I think we need at least
> num_online_cpus() elements to guarantee that at least one element is free
> when zap_class() is called. So removing the wait loop from
> get_pending_free_lock() seems wrong to me. Have you tried to run a workload
> that keeps all CPUs busy and that triggers get_pending_free_lock()
> frequently?

I have not ran (yet); but I do not quite follow your argument. There is
only a single rcu_head, yes? Thereby only a single list can be pending
at any one time, and the other list is free to be appended to during
this time -- all is serialized by the graph lock after all.

When the rcu callback happens, we flush the list we started the QS for,
which then becomes empty and if the open list contains entries, we
flip the lot and requeue the rcu_head for another QS.

Therefore we only ever need 2 lists; 1 closed with entries waiting for
the callback, 1 open, to which we can append all newly freed entries.

Find below a version of the patch that should apply to the first 13
patches. I'll try and run the whole set tomorrow morning or so.

---
Subject: lockdep: A wait-free RCU zapping
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu Jan 10 16:16:33 CET 2019

Implement a version of the RCU zapped_classes list that doesn't
requiring waiting.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/locking/lockdep.c |  257 +++++++++++++++++++++++------------------------
 1 file changed, 127 insertions(+), 130 deletions(-)

--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -292,18 +292,22 @@ LIST_HEAD(all_lock_classes);
 static LIST_HEAD(free_lock_classes);
 /*
  * A data structure for delayed freeing of data structures that may be
- * accessed by RCU readers at the time these were freed. The size of the array
- * is a compromise between minimizing the amount of memory used by this array
- * and minimizing the number of wait_event() calls by get_pending_free_lock().
+ * accessed by RCU readers at the time these were freed.
  */
-static struct pending_free {
+
+struct pending_free {
+	int empty;
 	struct list_head zapped_classes;
-	struct rcu_head	 rcu_head;
-	bool		 scheduled;
-	DECLARE_BITMAP(list_entries_being_freed, MAX_LOCKDEP_ENTRIES);
-	DECLARE_BITMAP(lock_chains_being_freed, MAX_LOCKDEP_CHAINS);
-} pending_free[2];
-static DECLARE_WAIT_QUEUE_HEAD(rcu_cb);
+	DECLARE_BITMAP(zapped_list_entries, MAX_LOCKDEP_ENTRIES);
+	DECLARE_BITMAP(zapped_lock_chains,  MAX_LOCKDEP_CHAINS);
+};
+
+static struct pending_head {
+	struct rcu_head rcu_head;
+	int index;
+	int pending;
+	struct pending_free pf[2];
+} pending_head;

 /*
  * The lockdep classes are in a hash-table as well, for fast lookup:
@@ -853,8 +857,9 @@ static bool list_entry_being_freed(int l
 	struct pending_free *pf;
 	int i;

-	for (i = 0, pf = pending_free; i < ARRAY_SIZE(pending_free); i++, pf++) {
-		if (test_bit(list_entry_idx, pf->list_entries_being_freed))
+	for (i = 0; i < ARRAY_SIZE(pending_head.pf); i++) {
+		pf = &pending_head.pf[i];
+		if (test_bit(list_entry_idx, pf->zapped_list_entries))
 			return true;
 	}

@@ -866,7 +871,8 @@ static bool in_any_zapped_class_list(str
 	struct pending_free *pf;
 	int i;

-	for (i = 0, pf = pending_free; i < ARRAY_SIZE(pending_free); i++, pf++) {
+	for (i = 0; i < ARRAY_SIZE(pending_head.pf); i++) {
+		pf = &pending_head.pf[i];
 		if (in_list(&class->lock_entry, &pf->zapped_classes))
 			return true;
 	}
@@ -958,7 +964,6 @@ static bool check_data_structures(void)
 static void init_data_structures_once(void)
 {
 	static bool initialization_happened;
-	struct pending_free *pf;
 	int i;

 	if (likely(initialization_happened))
@@ -966,10 +971,10 @@ static void init_data_structures_once(vo

 	initialization_happened = true;

-	for (i = 0, pf = pending_free; i < ARRAY_SIZE(pending_free);
-	     i++, pf++) {
+	init_rcu_head(&pending_head.rcu_head);
+	for (i = 0; i < ARRAY_SIZE(pending_head.pf); i++) {
+		struct pending_free *pf = &pending_head.pf[i];
 		INIT_LIST_HEAD(&pf->zapped_classes);
-		init_rcu_head(&pf->rcu_head);
 	}

 	for (i = 0; i < ARRAY_SIZE(lock_classes); i++) {
@@ -4406,10 +4411,11 @@ static void remove_class_from_lock_chain
 		if (chain_hlocks[i] != class - lock_classes)
 			continue;
 		/* The code below leaks one chain_hlock[] entry. */
-		if (--chain->depth > 0)
+		if (--chain->depth > 0) {
 			memmove(&chain_hlocks[i], &chain_hlocks[i + 1],
 				(chain->base + chain->depth - i) *
 				sizeof(chain_hlocks[0]));
+		}
 		/*
 		 * Each lock class occurs at most once in a lock chain so once
 		 * we found a match we can break out of this loop.
@@ -4432,7 +4438,8 @@ static void remove_class_from_lock_chain
 	 * hlist_for_each_entry_rcu() loop is safe.
 	 */
 	hlist_del_rcu(&chain->entry);
-	__set_bit(chain - lock_chains, pf->lock_chains_being_freed);
+	pf->empty = 0;
+	__set_bit(chain - lock_chains, pf->zapped_lock_chains);
 	if (chain->depth == 0)
 		return;
 	/*
@@ -4485,13 +4492,15 @@ static void zap_class(struct pending_fre
 		entry = list_entries + i;
 		if (entry->class != class && entry->links_to != class)
 			continue;
-		if (__test_and_set_bit(i, pf->list_entries_being_freed))
+		pf->empty = 0;
+		if (__test_and_set_bit(i, pf->zapped_list_entries))
 			continue;
 		nr_list_entries--;
 		list_del_rcu(&entry->entry);
 	}
 	if (list_empty(&class->locks_after) &&
 	    list_empty(&class->locks_before)) {
+		pf->empty = 0;
 		list_move_tail(&class->lock_entry, &pf->zapped_classes);
 		hlist_del_rcu(&class->hash_entry);
 		WRITE_ONCE(class->key, NULL);
@@ -4529,124 +4538,100 @@ static bool inside_selftest(void)
 	return current == lockdep_selftest_task_struct;
 }

-/*
- * Free all lock classes that are on the pf->zapped_classes list and also all
- * list entries that have been marked as being freed. Called as an RCU
- * callback function. May be called from RCU callback context.
- */
-static void free_zapped_classes(struct rcu_head *ch)
+/* must hold graph lock */
+struct pending_free *get_pf(void)
 {
-	struct pending_free *pf = container_of(ch, typeof(*pf), rcu_head);
-	struct lock_class *class;
-	unsigned long flags;
-
-	raw_local_irq_save(flags);
-	if (!graph_lock())
-		goto restore_irqs;
-	pf->scheduled = false;
-	if (check_data_structure_consistency)
-		WARN_ON_ONCE(!check_data_structures());
-	list_for_each_entry(class, &pf->zapped_classes, lock_entry) {
-		reinit_class(class);
-	}
-	list_splice_init(&pf->zapped_classes, &free_lock_classes);
-	bitmap_andnot(list_entries_in_use, list_entries_in_use,
-		      pf->list_entries_being_freed, ARRAY_SIZE(list_entries));
-	bitmap_clear(pf->list_entries_being_freed, 0, ARRAY_SIZE(list_entries));
-#ifdef CONFIG_PROVE_LOCKING
-	bitmap_andnot(lock_chains_in_use, lock_chains_in_use,
-		      pf->lock_chains_being_freed, ARRAY_SIZE(lock_chains));
-	bitmap_clear(pf->lock_chains_being_freed, 0, ARRAY_SIZE(lock_chains));
-#endif
-	graph_unlock();
-restore_irqs:
-	raw_local_irq_restore(flags);
+	struct pending_head *ph = &pending_head;

-	wake_up(&rcu_cb);
+	return ph->pf + ph->index;
 }

+static void pending_free_rcu(struct rcu_head *ch);
+
 /* Schedule an RCU callback. Must be called with the graph lock held. */
-static void schedule_free_zapped_classes(struct pending_free *pf)
+static void put_pf(struct pending_free *pf)
 {
+	struct pending_head *ph = &pending_head;
+
 	WARN_ON_ONCE(inside_selftest());
-	pf->scheduled = true;
-	call_rcu(&pf->rcu_head, free_zapped_classes);
-}
+	WARN_ON_ONCE(pf != get_pf());

-/*
- * Find an element in the pending_free[] array for which no RCU callback is
- * pending.
- */
-static struct pending_free *get_pending_free(void)
-{
-	struct pending_free *pf;
-	int i;
+	if (pf->empty)
+		return;
+
+	if (ph->pending)
+		return;

-	for (i = 0, pf = pending_free; i < ARRAY_SIZE(pending_free);
-	     i++, pf++)
-		if (!pf->scheduled)
-			return pf;
+	ph->pending = 1;
+	ph->index ^= 1;
+	/* Verify the list we just opened is empty. */
+	WARN_ON_ONCE(!get_pf()->empty);

-	return NULL;
+	call_rcu(&ph->rcu_head, pending_free_rcu);
 }

-/*
- * Find an element in the pending_free[] array for which no RCU callback is
- * pending and obtain the graph lock. May sleep.
- */
-static struct pending_free *get_pending_free_lock(unsigned long *flags)
+static void __free_pending(struct pending_free *pf)
 {
-	struct pending_free *pf;
+	struct lock_class *class;

-	WARN_ON_ONCE(inside_selftest());
+	if (check_data_structure_consistency)
+		WARN_ON_ONCE(!check_data_structures());

-	while (true) {
-		raw_local_irq_save(*flags);
-		if (!graph_lock()) {
-			raw_local_irq_restore(*flags);
-			return NULL;
-		}
-		pf = get_pending_free();
-		if (pf)
-			break;
-		graph_unlock();
-		raw_local_irq_restore(*flags);
+	list_for_each_entry(class, &pf->zapped_classes, lock_entry)
+		reinit_class(class);

-		wait_event(rcu_cb, get_pending_free() != NULL);
-	}
+	list_splice_init(&pf->zapped_classes, &free_lock_classes);
+
+	bitmap_andnot(list_entries_in_use, list_entries_in_use,
+		      pf->zapped_list_entries, ARRAY_SIZE(list_entries));
+	bitmap_clear(pf->zapped_list_entries, 0, ARRAY_SIZE(list_entries));
+#ifdef CONFIG_PROVE_LOCKING
+	bitmap_andnot(lock_chains_in_use, lock_chains_in_use,
+		      pf->zapped_lock_chains, ARRAY_SIZE(lock_chains));
+	bitmap_clear(pf->zapped_lock_chains, 0, ARRAY_SIZE(lock_chains));
+#endif

-	return pf;
+	pf->empty = 1;
 }

 /*
- * Find an element in the pending_free[] array for which no RCU callback is
- * pending and obtain the graph lock. Ignores debug_locks. Does not sleep.
+ * ph->pf + ph->index      := head open for addition
+ * ph->pf + (ph->idex ^ 1) := closed head, waiting for RCU QS
  */
-static struct pending_free *get_pending_free_lock_imm(unsigned long *flags)
+static void pending_free_rcu(struct rcu_head *ch)
 {
+	struct pending_head *ph = container_of(ch, typeof(*ph), rcu_head);
 	struct pending_free *pf;
+	unsigned long flags;

-	WARN_ON_ONCE(!inside_selftest());
+	if (WARN_ON_ONCE(ph != &pending_head))
+		return;

-	raw_local_irq_save(*flags);
-	arch_spin_lock(&lockdep_lock);
-	pf = get_pending_free();
-	if (!pf) {
-		arch_spin_unlock(&lockdep_lock);
-		raw_local_irq_restore(*flags);
-	}
+	raw_local_irq_save(flags);
+	if (!graph_lock())
+		goto out_irq;
+
+	pf = ph->pf + (ph->index ^ 1); /* closed head */
+	__free_pending(pf); /* empty now */
+	ph->pending = 0;
+
+	/* if there's anything on the open list, close and start a new callback */
+	put_pf(get_pf());

-	return pf;
+	graph_unlock();
+out_irq:
+	raw_local_irq_restore(flags);
 }

+
 /*
  * Remove all lock classes from the class hash table and from the
  * all_lock_classes list whose key or name is in the address range [start,
  * start + size). Move these lock classes to the zapped_classes list. Must
  * be called with the graph lock held.
  */
-static void __lockdep_free_key_range(struct pending_free *pf, void *start,
-				     unsigned long size)
+static void
+__lockdep_free_key_range(struct pending_free *pf, void *start, unsigned long size)
 {
 	struct lock_class *class;
 	struct hlist_head *head;
@@ -4683,12 +4668,16 @@ static void lockdep_free_key_range_reg(v

 	init_data_structures_once();

-	pf = get_pending_free_lock(&flags);
-	if (!pf)
-		return;
+	raw_local_irq_save(flags);
+	if (!graph_lock())
+		goto out_irq;
+
+	pf = get_pf();
 	__lockdep_free_key_range(pf, start, size);
-	schedule_free_zapped_classes(pf);
+	put_pf(pf);
+
 	graph_unlock();
+out_irq:
 	raw_local_irq_restore(flags);

 	/*
@@ -4702,23 +4691,25 @@ static void lockdep_free_key_range_reg(v

 /*
  * Free all lockdep keys in the range [start, start+size). Does not sleep.
- * Ignores debug_locks. Must only be used by the lockdep selftests.
+ * Must only be used by the lockdep selftests.
  */
 static void lockdep_free_key_range_imm(void *start, unsigned long size)
 {
-	struct pending_free *pf;
+	struct pending_free *pf = get_pf();
 	unsigned long flags;

 	init_data_structures_once();

-	pf = get_pending_free_lock_imm(&flags);
-	if (!pf)
-		return;
+	raw_local_irq_save(flags);
+	if (!graph_lock())
+		goto out_irq;
+
 	__lockdep_free_key_range(pf, start, size);
-	arch_spin_unlock(&lockdep_lock);
-	raw_local_irq_restore(flags);
+	__free_pending(pf);

-	free_zapped_classes(&pf->rcu_head);
+	graph_unlock();
+out_irq:
+	raw_local_irq_restore(flags);
 }

 void lockdep_free_key_range(void *start, unsigned long size)
@@ -4754,8 +4745,8 @@ static bool lock_class_cache_is_register
 }

 /* The caller must hold the graph lock. Does not sleep. */
-static void __lockdep_reset_lock(struct pending_free *pf,
-				 struct lockdep_map *lock)
+static void
+__lockdep_reset_lock(struct pending_free *pf, struct lockdep_map *lock)
 {
 	struct lock_class *class;
 	int j;
@@ -4790,32 +4781,38 @@ static void lockdep_reset_lock_reg(struc

 	might_sleep();

-	pf = get_pending_free_lock(&flags);
-	if (!pf)
-		return;
+	raw_local_irq_save(flags);
+	if (!graph_lock())
+		goto out_irq;
+
+	pf = get_pf();
 	__lockdep_reset_lock(pf, lock);
-	schedule_free_zapped_classes(pf);
+	put_pf(pf);
+
 	graph_unlock();
+out_irq:
 	raw_local_irq_restore(flags);
 }

 /*
- * Reset a lock. Does not sleep. Ignores debug_locks. Must only be used by the
+ * Reset a lock. Does not sleep. Must only be used by the
  * lockdep selftests.
  */
 static void lockdep_reset_lock_imm(struct lockdep_map *lock)
 {
-	struct pending_free *pf;
+	struct pending_free *pf = get_pf();
 	unsigned long flags;

-	pf = get_pending_free_lock_imm(&flags);
-	if (!pf)
-		return;
+	raw_local_irq_save(flags);
+	if (!graph_lock())
+		goto out_irq;
+
 	__lockdep_reset_lock(pf, lock);
-	arch_spin_unlock(&lockdep_lock);
-	raw_local_irq_restore(flags);
+	__free_pending(pf);

-	free_zapped_classes(&pf->rcu_head);
+	graph_unlock();
+out_irq:
+	raw_local_irq_restore(flags);
 }

 void lockdep_reset_lock(struct lockdep_map *lock)


  reply	other threads:[~2019-01-10 19:45 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-17 21:29 [PATCH v5 00/15] locking/lockdep: Add support for dynamic keys Bart Van Assche
2018-12-17 21:29 ` [PATCH v5 01/15] locking/lockdep: Fix required memory size reported if CONFIG_PROVE_LOCKING=n Bart Van Assche
2018-12-17 21:29 ` [PATCH v5 02/15] locking/lockdep: Make zap_class() remove all matching lock order entries Bart Van Assche
2018-12-17 21:29 ` [PATCH v5 03/15] locking/lockdep: Reorder struct lock_class members Bart Van Assche
2018-12-17 21:29 ` [PATCH v5 04/15] locking/lockdep: Initialize the locks_before and locks_after lists earlier Bart Van Assche
2018-12-17 21:29 ` [PATCH v5 05/15] locking/lockdep: Split lockdep_free_key_range() and lockdep_reset_lock() Bart Van Assche
2018-12-17 21:29 ` [PATCH v5 06/15] locking/lockdep: Make it easy to detect whether or not inside a selftest Bart Van Assche
2018-12-17 21:29 ` [PATCH v5 07/15] locking/lockdep: Free lock classes that are no longer in use Bart Van Assche
2019-01-10 15:20   ` Peter Zijlstra
2019-01-10 15:45     ` Peter Zijlstra
2019-01-10 15:24   ` Peter Zijlstra
2019-01-10 18:51     ` Bart Van Assche
2019-01-10 19:44       ` Peter Zijlstra [this message]
2019-01-10 21:56         ` Bart Van Assche
2019-01-11  8:35           ` Peter Zijlstra
2019-02-13  0:42     ` Bart Van Assche
2019-01-10 15:28   ` Peter Zijlstra
2019-01-10 19:31     ` Bart Van Assche
2019-01-10 19:47       ` Peter Zijlstra
2018-12-17 21:29 ` [PATCH v5 08/15] locking/lockdep: Reuse list entries " Bart Van Assche
2018-12-17 21:29 ` [PATCH v5 09/15] locking/lockdep: Introduce lockdep_next_lockchain() and lock_chain_count() Bart Van Assche
2018-12-17 21:29 ` [PATCH v5 10/15] locking/lockdep: Reuse lock chains that have been freed Bart Van Assche
2018-12-17 21:29 ` [PATCH v5 11/15] locking/lockdep: Check data structure consistency Bart Van Assche
2019-01-10 15:51   ` Peter Zijlstra
2019-01-10 15:55   ` Peter Zijlstra
2018-12-17 21:29 ` [PATCH v5 12/15] locking/lockdep: Verify whether lock objects are small enough to be used as class keys Bart Van Assche
2018-12-17 21:30 ` [PATCH v5 13/15] locking/lockdep: Add support for dynamic keys Bart Van Assche
2018-12-17 21:30 ` [PATCH v5 14/15] kernel/workqueue: Use dynamic lockdep keys for workqueues Bart Van Assche
2018-12-17 21:30 ` [PATCH v5 15/15] lockdep tests: Test dynamic key registration Bart Van Assche

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190110194459.GD2861@worktop.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=bvanassche@acm.org \
    --cc=johannes.berg@intel.com \
    --cc=johannes@sipsolutions.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mingo@redhat.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).