Some users of rhashtable might need to change the key of an object and move it to a different location in the table. Other users might want to allocate objects using SLAB_TYPESAFE_BY_RCU which can result in the same memory allocation being used for a different (type-compatible) purpose and similarly end up in a different hash-chain. To support these, we store a unique NULLS_MARKER at the end of each chain, and when a search fails to find a match, we check if the NULLS marker found was the expected one. If not, the search is repeated. The unique NULLS_MARKER is derived from the address of the head of the chain. If an object is removed and re-added to the same hash chain, we won't notice by looking that the NULLS marker. In this case we must be sure that it was not re-added *after* its original location, or a lookup may incorrectly fail. The easiest solution is to ensure it is inserted at the start of the chain. insert_slow() already does that, insert_fast() does not. So this patch changes insert_fast to always insert at the head of the chain. Note that such a user must do their own double-checking of the object found by rhashtable_lookup_fast() after ensuring mutual exclusion which anything that might change the key, such as successfully taking a new reference. Signed-off-by: NeilBrown --- include/linux/rhashtable.h | 35 +++++++++++++++++++++++------------ lib/rhashtable.c | 8 +++++--- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index eb7111039247..10435a77b156 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -75,8 +75,10 @@ struct bucket_table { struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; +#define RHT_NULLS_MARKER(ptr) \ + ((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1)) #define INIT_RHT_NULLS_HEAD(ptr) \ - ((ptr) = (typeof(ptr)) NULLS_MARKER(0)) + ((ptr) = RHT_NULLS_MARKER(&(ptr))) static inline bool rht_is_a_nulls(const struct rhash_head *ptr) { @@ -471,6 +473,7 @@ static inline struct rhash_head *__rhashtable_lookup( .ht = ht, .key = key, }; + struct rhash_head __rcu * const *head; struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; @@ -478,13 +481,19 @@ static inline struct rhash_head *__rhashtable_lookup( tbl = rht_dereference_rcu(ht->tbl, ht); restart: hash = rht_key_hashfn(ht, tbl, key, params); - rht_for_each_rcu(he, tbl, hash) { - if (params.obj_cmpfn ? - params.obj_cmpfn(&arg, rht_obj(ht, he)) : - rhashtable_compare(&arg, rht_obj(ht, he))) - continue; - return he; - } + head = rht_bucket(tbl, hash); + do { + rht_for_each_rcu_continue(he, *head, tbl, hash) { + if (params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, he)) : + rhashtable_compare(&arg, rht_obj(ht, he))) + continue; + return he; + } + /* An object might have been moved to a different hash chain, + * while we walk along it - better check and retry. + */ + } while (he != RHT_NULLS_MARKER(head)); /* Ensure we see any new tables. */ smp_rmb(); @@ -580,6 +589,7 @@ static inline void *__rhashtable_insert_fast( .ht = ht, .key = key, }; + struct rhash_head __rcu **headp; struct rhash_head __rcu **pprev; struct bucket_table *tbl; struct rhash_head *head; @@ -603,12 +613,13 @@ static inline void *__rhashtable_insert_fast( } elasticity = RHT_ELASTICITY; - pprev = rht_bucket_insert(ht, tbl, hash); + headp = rht_bucket_insert(ht, tbl, hash); + pprev = headp; data = ERR_PTR(-ENOMEM); if (!pprev) goto out; - rht_for_each_continue(head, *pprev, tbl, hash) { + rht_for_each_continue(head, *headp, tbl, hash) { struct rhlist_head *plist; struct rhlist_head *list; @@ -648,7 +659,7 @@ static inline void *__rhashtable_insert_fast( if (unlikely(rht_grow_above_100(ht, tbl))) goto slow_path; - head = rht_dereference_bucket(*pprev, tbl, hash); + head = rht_dereference_bucket(*headp, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (rhlist) { @@ -658,7 +669,7 @@ static inline void *__rhashtable_insert_fast( RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(*pprev, obj); + rcu_assign_pointer(*headp, obj); atomic_inc(&ht->nelems); if (rht_grow_above_75(ht, tbl)) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 0e04947b7e0c..1737fbd049da 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -1164,8 +1164,7 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); - static struct rhash_head __rcu *rhnull = - (struct rhash_head __rcu *)NULLS_MARKER(0); + static struct rhash_head __rcu *rhnull; unsigned int index = hash & ((1 << tbl->nest) - 1); unsigned int size = tbl->size >> tbl->nest; unsigned int subhash = hash; @@ -1183,8 +1182,11 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, subhash >>= shift; } - if (!ntbl) + if (!ntbl) { + if (!rhnull) + INIT_RHT_NULLS_HEAD(rhnull); return &rhnull; + } return &ntbl[subhash].bucket; -- 2.14.0.rc0.dirty