linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.com>
To: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Thomas Graf <tgraf@suug.ch>,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	Eric Dumazet <eric.dumazet@gmail.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH resend] rhashtable: detect when object movement might have invalidated a lookup
Date: Fri, 06 Jul 2018 17:08:35 +1000	[thread overview]
Message-ID: <87k1q8yh70.fsf@notabene.neil.brown.name> (raw)
In-Reply-To: <20180601160613.7ud25g2ux55k3bma@gondor.apana.org.au>

[-- Attachment #1: Type: text/plain, Size: 5600 bytes --]


Some users of rhashtable might need to change the key
of an object and move it to a different location in the table.
Other users might want to allocate objects using
SLAB_TYPESAFE_BY_RCU which can result in the same memory allocation
being used for a different (type-compatible) purpose and similarly
end up in a different hash-chain.

To support these, we store a unique NULLS_MARKER at the end of
each chain, and when a search fails to find a match, we check
if the NULLS marker found was the expected one.  If not,
the search is repeated.

The unique NULLS_MARKER is derived from the address of the
head of the chain.

If an object is removed and re-added to the same hash chain, we won't
notice by looking that the NULLS marker.  In this case we must be sure
that it was not re-added *after* its original location, or a lookup may
incorrectly fail.  The easiest solution is to ensure it is inserted at
the start of the chain.  insert_slow() already does that,
insert_fast() does not.  So this patch changes insert_fast to always
insert at the head of the chain.

Note that such a user must do their own double-checking of
the object found by rhashtable_lookup_fast() after ensuring
mutual exclusion which anything that might change the key, such as
successfully taking a new reference.

Signed-off-by: NeilBrown <neilb@suse.com>
---

I'm resending this unchanged.  Herbert wasn't sure if we needed all the
functionality provided.  I explained that it was useful when
SLAB_TYPESAFE_BY_RCU slabs are used.  No further discussion happened.

Thanks,
NeilBrown



 include/linux/rhashtable.h | 35 +++++++++++++++++++++++------------
 lib/rhashtable.c           |  8 +++++---
 2 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index eb7111039247..10435a77b156 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -75,8 +75,10 @@ struct bucket_table {
 	struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
 };
 
+#define	RHT_NULLS_MARKER(ptr)	\
+	((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1))
 #define INIT_RHT_NULLS_HEAD(ptr)	\
-	((ptr) = (typeof(ptr)) NULLS_MARKER(0))
+	((ptr) = RHT_NULLS_MARKER(&(ptr)))
 
 static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
 {
@@ -471,6 +473,7 @@ static inline struct rhash_head *__rhashtable_lookup(
 		.ht = ht,
 		.key = key,
 	};
+	struct rhash_head __rcu * const *head;
 	struct bucket_table *tbl;
 	struct rhash_head *he;
 	unsigned int hash;
@@ -478,13 +481,19 @@ static inline struct rhash_head *__rhashtable_lookup(
 	tbl = rht_dereference_rcu(ht->tbl, ht);
 restart:
 	hash = rht_key_hashfn(ht, tbl, key, params);
-	rht_for_each_rcu(he, tbl, hash) {
-		if (params.obj_cmpfn ?
-		    params.obj_cmpfn(&arg, rht_obj(ht, he)) :
-		    rhashtable_compare(&arg, rht_obj(ht, he)))
-			continue;
-		return he;
-	}
+	head = rht_bucket(tbl, hash);
+	do {
+		rht_for_each_rcu_continue(he, *head, tbl, hash) {
+			if (params.obj_cmpfn ?
+			    params.obj_cmpfn(&arg, rht_obj(ht, he)) :
+			    rhashtable_compare(&arg, rht_obj(ht, he)))
+				continue;
+			return he;
+		}
+		/* An object might have been moved to a different hash chain,
+		 * while we walk along it - better check and retry.
+		 */
+	} while (he != RHT_NULLS_MARKER(head));
 
 	/* Ensure we see any new tables. */
 	smp_rmb();
@@ -580,6 +589,7 @@ static inline void *__rhashtable_insert_fast(
 		.ht = ht,
 		.key = key,
 	};
+	struct rhash_head __rcu **headp;
 	struct rhash_head __rcu **pprev;
 	struct bucket_table *tbl;
 	struct rhash_head *head;
@@ -603,12 +613,13 @@ static inline void *__rhashtable_insert_fast(
 	}
 
 	elasticity = RHT_ELASTICITY;
-	pprev = rht_bucket_insert(ht, tbl, hash);
+	headp = rht_bucket_insert(ht, tbl, hash);
+	pprev = headp;
 	data = ERR_PTR(-ENOMEM);
 	if (!pprev)
 		goto out;
 
-	rht_for_each_continue(head, *pprev, tbl, hash) {
+	rht_for_each_continue(head, *headp, tbl, hash) {
 		struct rhlist_head *plist;
 		struct rhlist_head *list;
 
@@ -648,7 +659,7 @@ static inline void *__rhashtable_insert_fast(
 	if (unlikely(rht_grow_above_100(ht, tbl)))
 		goto slow_path;
 
-	head = rht_dereference_bucket(*pprev, tbl, hash);
+	head = rht_dereference_bucket(*headp, tbl, hash);
 
 	RCU_INIT_POINTER(obj->next, head);
 	if (rhlist) {
@@ -658,7 +669,7 @@ static inline void *__rhashtable_insert_fast(
 		RCU_INIT_POINTER(list->next, NULL);
 	}
 
-	rcu_assign_pointer(*pprev, obj);
+	rcu_assign_pointer(*headp, obj);
 
 	atomic_inc(&ht->nelems);
 	if (rht_grow_above_75(ht, tbl))
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 0e04947b7e0c..f87af707f086 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -1164,8 +1164,7 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
 					    unsigned int hash)
 {
 	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
-	static struct rhash_head __rcu *rhnull =
-		(struct rhash_head __rcu *)NULLS_MARKER(0);
+	static struct rhash_head __rcu *rhnull;
 	unsigned int index = hash & ((1 << tbl->nest) - 1);
 	unsigned int size = tbl->size >> tbl->nest;
 	unsigned int subhash = hash;
@@ -1183,8 +1182,11 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
 		subhash >>= shift;
 	}
 
-	if (!ntbl)
+	if (!ntbl) {
+		if (!rhnull)
+			INIT_RHT_NULLS_HEAD(rhnull, NULL, 0);
 		return &rhnull;
+	}
 
 	return &ntbl[subhash].bucket;
 
-- 
2.14.0.rc0.dirty


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

  parent reply	other threads:[~2018-07-06  7:08 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-01  4:44 [RFC PATCH 00/18] Assorted rhashtable improvements NeilBrown
2018-06-01  4:44 ` [PATCH 11/18] rhashtable: further improve stability of rhashtable_walk NeilBrown
2018-06-01  4:44 ` [PATCH 17/18] rhashtable: rename rht_for_each*continue as *from NeilBrown
2018-06-01  4:44 ` [PATCH 10/18] rhashtable: remove rhashtable_walk_peek() NeilBrown
2018-06-02 15:48   ` Herbert Xu
2018-06-04  0:30     ` NeilBrown
2018-06-04  1:18       ` Tom Herbert
2018-06-04  2:09         ` NeilBrown
2018-06-04 21:31           ` Tom Herbert
2018-06-04 22:13             ` Tom Herbert
2018-06-05  1:24               ` NeilBrown
2018-06-05  1:00             ` NeilBrown
     [not found]               ` <CALx6S36Ce-rXQMzmFYZVPGD10Bo6udvRAHiZ5gWwnzVwoTVv0w@mail.gmail.com>
2018-06-06  5:07                 ` NeilBrown
2018-06-07  2:45                   ` [PATCH - RFC] rhashtable: add rhashtable_walk_last_seen() NeilBrown
2018-06-07  2:46                     ` [PATCH - RFC] rhashtable: implement rhashtable_walk_peek() using rhashtable_walk_last_seen() NeilBrown
     [not found]                       ` <CALx6S35GgUOd0dPgv7P96wNNTv5pN7fij0pcAoccqcSWZhvY7Q@mail.gmail.com>
2018-06-12  2:48                         ` [PATCH RFC v2] " NeilBrown
2018-06-14 17:41                           ` Tom Herbert
2018-06-15  4:23                             ` Herbert Xu
2018-06-15  5:31                               ` NeilBrown
2018-06-01  4:44 ` [PATCH 18/18] rhashtable: add rhashtable_walk_delay_rehash() NeilBrown
2018-06-01  4:44 ` [PATCH 14/18] rhashtable: allow rht_bucket_var to return NULL NeilBrown
2018-06-01  4:44 ` [PATCH 15/18] rhashtable: use bit_spin_locks to protect hash bucket NeilBrown
2018-06-02  5:03   ` Herbert Xu
2018-06-02  9:53     ` Eric Dumazet
2018-06-04  0:25       ` NeilBrown
2018-06-04  2:52         ` [PATCH 15a/18] rhashtables: add lockdep tracking to bucket bit-spin-locks NeilBrown
2018-06-04 18:16           ` Simon Horman
2018-06-04 21:37             ` NeilBrown
2018-06-01  4:44 ` [PATCH 01/18] rhashtable: silence RCU warning in rhashtable_test NeilBrown
2018-06-01  4:44 ` [PATCH 07/18] rhashtable: use cmpxchg() to protect ->future_tbl NeilBrown
2018-06-01 16:44   ` Herbert Xu
2018-06-01  4:44 ` [PATCH 13/18] rhashtable: don't hold lock on first table throughout insertion NeilBrown
2018-06-01  4:44 ` [PATCH 16/18] rhashtable: allow percpu element counter NeilBrown
2018-06-01  4:44 ` [PATCH 03/18] rhashtable: remove nulls_base and related code NeilBrown
2018-06-07  2:49   ` NeilBrown
2018-06-13  6:25     ` Herbert Xu
2018-06-01  4:44 ` [PATCH 08/18] rhashtable: clean up dereference of ->future_tbl NeilBrown
2018-06-01 16:54   ` Herbert Xu
2018-06-01  4:44 ` [PATCH 06/18] rhashtable: simplify nested_table_alloc() and rht_bucket_nested_insert() NeilBrown
2018-06-01 16:28   ` Herbert Xu
2018-06-01  4:44 ` [PATCH 09/18] rhashtable: use cmpxchg() in nested_table_alloc() NeilBrown
2018-06-01  4:44 ` [PATCH 05/18] rhashtable: simplify INIT_RHT_NULLS_HEAD() NeilBrown
2018-06-01 16:24   ` Herbert Xu
2018-06-01  4:44 ` [PATCH 12/18] rhashtable: add rhashtable_walk_prev() NeilBrown
2018-06-01  4:44 ` [PATCH 02/18] rhashtable: split rhashtable.h NeilBrown
2018-06-01 10:48   ` Herbert Xu
2018-06-01  4:44 ` [PATCH 04/18] rhashtable: detect when object movement might have invalidated a lookup NeilBrown
2018-06-01 16:06   ` Herbert Xu
2018-06-04  3:38     ` NeilBrown
2018-07-06  7:08     ` NeilBrown [this message]
2018-07-12  5:46       ` [PATCH resend] " David Miller
2018-07-12  5:48         ` David Miller
2018-07-15 23:55           ` NeilBrown
2018-07-15 23:57           ` [PATCH - revised] " NeilBrown
2018-07-16  0:51             ` Herbert Xu
2018-07-16  1:23               ` NeilBrown
2018-07-16  2:16                 ` Herbert Xu
2018-07-16  3:26                   ` NeilBrown
2018-07-17  6:30                     ` Herbert Xu
2018-07-20  6:24                       ` NeilBrown
2018-07-18 20:14             ` David Miller
2018-07-20  6:30               ` NeilBrown
2018-07-20  6:43                 ` David Miller
2018-07-20  7:09                   ` NeilBrown
2018-07-23  1:56               ` [PATCH net-next] rhashtable: detect when object movement between tables " NeilBrown
2018-07-26 20:55                 ` David Miller
2018-07-26 22:04                   ` NeilBrown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87k1q8yh70.fsf@notabene.neil.brown.name \
    --to=neilb@suse.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=tgraf@suug.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).