linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andreas Dilger <adilger@dilger.ca>
To: Sultan Alsawaf <sultan@kerneltoast.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] mbcache: Speed up cache entry creation
Date: Tue, 23 Jul 2019 10:56:05 -0600	[thread overview]
Message-ID: <5EDDA127-031C-4F16-9B9B-8DBC94C7E471@dilger.ca> (raw)
In-Reply-To: <20190723053549.14465-1-sultan@kerneltoast.com>

[-- Attachment #1: Type: text/plain, Size: 7743 bytes --]

On Jul 22, 2019, at 11:35 PM, Sultan Alsawaf <sultan@kerneltoast.com> wrote:
> 
> From: Sultan Alsawaf <sultan@kerneltoast.com>
> 
> In order to prevent redundant entry creation by racing against itself,
> mb_cache_entry_create scans through a hash-list of all current entries
> in order to see if another allocation for the requested new entry has
> been made. Furthermore, it allocates memory for a new entry before
> scanning through this hash-list, which results in that allocated memory
> being discarded when the requested new entry is already present.
> 
> Speed up cache entry creation by keeping a small linked list of
> requested new entries in progress, and scanning through that first
> instead of the large hash-list. And don't allocate memory for a new
> entry until it's known that the allocated memory will be used.

Do you have any kind of performance metrics that show this is an actual
improvement in performance?  This would be either macro-level benchmarks
(e.g. fio, but this seems unlikely to show any benefit), or micro-level
measurements (e.g. flame graph) that show a net reduction in CPU cycles,
lock contention, etc. in this part of the code.

Cheers, Andreas

> Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
> ---
> fs/mbcache.c | 82 ++++++++++++++++++++++++++++++++++++----------------
> 1 file changed, 57 insertions(+), 25 deletions(-)
> 
> diff --git a/fs/mbcache.c b/fs/mbcache.c
> index 97c54d3a2227..289f3664061e 100644
> --- a/fs/mbcache.c
> +++ b/fs/mbcache.c
> @@ -25,9 +25,14 @@
>  * size hash table is used for fast key lookups.
>  */
> 
> +struct mb_bucket {
> +	struct hlist_bl_head hash;
> +	struct list_head req_list;
> +};
> +
> struct mb_cache {
> 	/* Hash table of entries */
> -	struct hlist_bl_head	*c_hash;
> +	struct mb_bucket	*c_bucket;
> 	/* log2 of hash table size */
> 	int			c_bucket_bits;
> 	/* Maximum entries in cache to avoid degrading hash too much */
> @@ -42,15 +47,21 @@ struct mb_cache {
> 	struct work_struct	c_shrink_work;
> };
> 
> +struct mb_cache_req {
> +	struct list_head lnode;
> +	u32 key;
> +	u64 value;
> +};
> +
> static struct kmem_cache *mb_entry_cache;
> 
> static unsigned long mb_cache_shrink(struct mb_cache *cache,
> 				     unsigned long nr_to_scan);
> 
> -static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
> -							u32 key)
> +static inline struct mb_bucket *mb_cache_entry_bucket(struct mb_cache *cache,
> +						      u32 key)
> {
> -	return &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
> +	return &cache->c_bucket[hash_32(key, cache->c_bucket_bits)];
> }
> 
> /*
> @@ -77,6 +88,8 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
> 	struct mb_cache_entry *entry, *dup;
> 	struct hlist_bl_node *dup_node;
> 	struct hlist_bl_head *head;
> +	struct mb_cache_req *tmp_req, req;
> +	struct mb_bucket *bucket;
> 
> 	/* Schedule background reclaim if there are too many entries */
> 	if (cache->c_entry_count >= cache->c_max_entries)
> @@ -85,9 +98,33 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
> 	if (cache->c_entry_count >= 2*cache->c_max_entries)
> 		mb_cache_shrink(cache, SYNC_SHRINK_BATCH);
> 
> +	bucket = mb_cache_entry_bucket(cache, key);
> +	head = &bucket->hash;
> +	hlist_bl_lock(head);
> +	list_for_each_entry(tmp_req, &bucket->req_list, lnode) {
> +		if (tmp_req->key == key && tmp_req->value == value) {
> +			hlist_bl_unlock(head);
> +			return -EBUSY;
> +		}
> +	}
> +	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
> +		if (dup->e_key == key && dup->e_value == value) {
> +			hlist_bl_unlock(head);
> +			return -EBUSY;
> +		}
> +	}
> +	req.key = key;
> +	req.value = value;
> +	list_add(&req.lnode, &bucket->req_list);
> +	hlist_bl_unlock(head);
> +
> 	entry = kmem_cache_alloc(mb_entry_cache, mask);
> -	if (!entry)
> +	if (!entry) {
> +		hlist_bl_lock(head);
> +		list_del(&req.lnode);
> +		hlist_bl_unlock(head);
> 		return -ENOMEM;
> +	}
> 
> 	INIT_LIST_HEAD(&entry->e_list);
> 	/* One ref for hash, one ref returned */
> @@ -96,15 +133,9 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
> 	entry->e_value = value;
> 	entry->e_reusable = reusable;
> 	entry->e_referenced = 0;
> -	head = mb_cache_entry_head(cache, key);
> +
> 	hlist_bl_lock(head);
> -	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
> -		if (dup->e_key == key && dup->e_value == value) {
> -			hlist_bl_unlock(head);
> -			kmem_cache_free(mb_entry_cache, entry);
> -			return -EBUSY;
> -		}
> -	}
> +	list_del(&req.lnode);
> 	hlist_bl_add_head(&entry->e_hash_list, head);
> 	hlist_bl_unlock(head);
> 
> @@ -133,7 +164,7 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
> 	struct hlist_bl_node *node;
> 	struct hlist_bl_head *head;
> 
> -	head = mb_cache_entry_head(cache, key);
> +	head = &mb_cache_entry_bucket(cache, key)->hash;
> 	hlist_bl_lock(head);
> 	if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
> 		node = entry->e_hash_list.next;
> @@ -202,7 +233,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
> 	struct hlist_bl_head *head;
> 	struct mb_cache_entry *entry;
> 
> -	head = mb_cache_entry_head(cache, key);
> +	head = &mb_cache_entry_bucket(cache, key)->hash;
> 	hlist_bl_lock(head);
> 	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
> 		if (entry->e_key == key && entry->e_value == value) {
> @@ -230,7 +261,7 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
> 	struct hlist_bl_head *head;
> 	struct mb_cache_entry *entry;
> 
> -	head = mb_cache_entry_head(cache, key);
> +	head = &mb_cache_entry_bucket(cache, key)->hash;
> 	hlist_bl_lock(head);
> 	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
> 		if (entry->e_key == key && entry->e_value == value) {
> @@ -300,7 +331,7 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
> 		 * from under us.
> 		 */
> 		spin_unlock(&cache->c_list_lock);
> -		head = mb_cache_entry_head(cache, entry->e_key);
> +		head = &mb_cache_entry_bucket(cache, entry->e_key)->hash;
> 		hlist_bl_lock(head);
> 		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
> 			hlist_bl_del_init(&entry->e_hash_list);
> @@ -354,21 +385,22 @@ struct mb_cache *mb_cache_create(int bucket_bits)
> 	cache->c_max_entries = bucket_count << 4;
> 	INIT_LIST_HEAD(&cache->c_list);
> 	spin_lock_init(&cache->c_list_lock);
> -	cache->c_hash = kmalloc_array(bucket_count,
> -				      sizeof(struct hlist_bl_head),
> -				      GFP_KERNEL);
> -	if (!cache->c_hash) {
> +	cache->c_bucket = kmalloc_array(bucket_count, sizeof(*cache->c_bucket),
> +					GFP_KERNEL);
> +	if (!cache->c_bucket) {
> 		kfree(cache);
> 		goto err_out;
> 	}
> -	for (i = 0; i < bucket_count; i++)
> -		INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
> +	for (i = 0; i < bucket_count; i++) {
> +		INIT_HLIST_BL_HEAD(&cache->c_bucket[i].hash);
> +		INIT_LIST_HEAD(&cache->c_bucket[i].req_list);
> +	}
> 
> 	cache->c_shrink.count_objects = mb_cache_count;
> 	cache->c_shrink.scan_objects = mb_cache_scan;
> 	cache->c_shrink.seeks = DEFAULT_SEEKS;
> 	if (register_shrinker(&cache->c_shrink)) {
> -		kfree(cache->c_hash);
> +		kfree(cache->c_bucket);
> 		kfree(cache);
> 		goto err_out;
> 	}
> @@ -409,7 +441,7 @@ void mb_cache_destroy(struct mb_cache *cache)
> 		WARN_ON(atomic_read(&entry->e_refcnt) != 1);
> 		mb_cache_entry_put(cache, entry);
> 	}
> -	kfree(cache->c_hash);
> +	kfree(cache->c_bucket);
> 	kfree(cache);
> }
> EXPORT_SYMBOL(mb_cache_destroy);
> --
> 2.22.0
> 


Cheers, Andreas






[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 873 bytes --]

  reply	other threads:[~2019-07-23 16:56 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-23  5:35 [PATCH] mbcache: Speed up cache entry creation Sultan Alsawaf
2019-07-23 16:56 ` Andreas Dilger [this message]
2019-07-24  4:01   ` Sultan Alsawaf
2019-07-25 21:16     ` Andreas Dilger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5EDDA127-031C-4F16-9B9B-8DBC94C7E471@dilger.ca \
    --to=adilger@dilger.ca \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sultan@kerneltoast.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).