All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ramsay Jones <ramsay@ramsayjones.plus.com>
To: git@jeffhostetler.com, git@vger.kernel.org
Cc: gitster@pobox.com, peff@peff.net,
	Jeff Hostetler <jeffhost@microsoft.com>
Subject: Re: [PATCH v2 5/7] name-hash: perf improvement for lazy_init_name_hash
Date: Thu, 23 Mar 2017 15:25:44 +0000	[thread overview]
Message-ID: <e987714e-473f-cad1-159b-18459ffeb241@ramsayjones.plus.com> (raw)
In-Reply-To: <1490276825-41544-6-git-send-email-git@jeffhostetler.com>



On 23/03/17 13:47, git@jeffhostetler.com wrote:
> From: Jeff Hostetler <jeffhost@microsoft.com>
> 
[snip]

> 
> diff --git a/name-hash.c b/name-hash.c
> index 3f7722a..71ef07e 100644
> --- a/name-hash.c
> +++ b/name-hash.c
> @@ -23,15 +23,21 @@ static int dir_entry_cmp(const struct dir_entry *e1,
>  			name ? name : e2->name, e1->namelen);
>  }
>  
> -static struct dir_entry *find_dir_entry(struct index_state *istate,
> -		const char *name, unsigned int namelen)
> +static struct dir_entry *find_dir_entry__hash(struct index_state *istate,
> +		const char *name, unsigned int namelen, unsigned int hash)
>  {
>  	struct dir_entry key;
> -	hashmap_entry_init(&key, memihash(name, namelen));
> +	hashmap_entry_init(&key, hash);
>  	key.namelen = namelen;
>  	return hashmap_get(&istate->dir_hash, &key, name);
>  }
>  
> +static struct dir_entry *find_dir_entry(struct index_state *istate,
> +		const char *name, unsigned int namelen)
> +{
> +	return find_dir_entry__hash(istate, name, namelen, memihash(name, namelen));
> +}
> +
>  static struct dir_entry *hash_dir_entry(struct index_state *istate,
>  		struct cache_entry *ce, int namelen)
>  {
> @@ -112,21 +118,493 @@ static int cache_entry_cmp(const struct cache_entry *ce1,
>  	return remove ? !(ce1 == ce2) : 0;
>  }
>  
> -static void lazy_init_name_hash(struct index_state *istate)
> +static int lazy_try_threaded = 1;
> +static int lazy_nr_dir_threads;
> +
> +#ifdef NO_PTHREADS
> +
> +static inline int lookup_lazy_params(struct index_state *istate)
>  {
> -	int nr;
> +	return 0;
> +}
> +
> +static inline void threaded_lazy_init_name_hash(
> +	struct index_state *istate)
> +{
> +}
> +
> +#else
> +
> +#include "thread-utils.h"
> +
> +/*
> + * Set a minimum number of cache_entries that we will handle per
> + * thread and use that to decide how many threads to run (upto
> + * the number on the system).
> + *
> + * For guidance setting the lower per-thread bound, see:
> + *     t/helper/test-lazy-init-name-hash --analyze
> + */
> +#define LAZY_THREAD_COST (2000)
> +
> +/*
> + * We use n mutexes to guard n partitions of the "istate->dir_hash"
> + * hashtable.  Since "find" and "insert" operations will hash to a
> + * particular bucket and modify/search a single chain, we can say
> + * that "all chains mod n" are guarded by the same mutex -- rather
> + * than having a single mutex to guard the entire table.  (This does
> + * require that we disable "rehashing" on the hashtable.)
> + *
> + * So, a larger value here decreases the probability of a collision
> + * and the time that each thread must wait for the mutex.
> + */
> +#define LAZY_MAX_MUTEX   (32)
> +
> +static pthread_mutex_t *lazy_dir_mutex_array;
> +
> +/*
> + * An array of lazy_entry items is used by the n threads in
> + * the directory parse (first) phase to (lock-free) store the
> + * intermediate results.  These values are then referenced by
> + * the 2 threads in the second phase.
> + */
> +struct lazy_entry {
> +	struct dir_entry *dir;
> +	unsigned int hash_dir;
> +	unsigned int hash_name;
> +};
> +
> +/*
> + * Decide if we want to use threads (if available) to load
> + * the hash tables.  We set "lazy_nr_dir_threads" to zero when
> + * it is not worth it.
> + */
> +static int lookup_lazy_params(struct index_state *istate)
> +{
> +	int nr_cpus;
> +
> +	lazy_nr_dir_threads = 0;
> +
> +	if (!lazy_try_threaded)
> +		return 0;
> +
> +	/*
> +	 * If we are respecting case, just use the original
> +	 * code to build the "istate->name_hash".  We don't
> +	 * need the complexity here.
> +	 */
> +	if (!ignore_case)
> +		return 0;
> +
> +	nr_cpus = online_cpus();
> +	if (nr_cpus < 2)
> +		return 0;
> +
> +	if (istate->cache_nr < 2 * LAZY_THREAD_COST)
> +		return 0;
> +
> +	if (istate->cache_nr < nr_cpus * LAZY_THREAD_COST)
> +		nr_cpus = istate->cache_nr / LAZY_THREAD_COST;
> +	lazy_nr_dir_threads = nr_cpus;
> +	return lazy_nr_dir_threads;
> +}
> +
> +/*
> + * Initialize n mutexes for use when searching and inserting
> + * into "istate->dir_hash".  All "dir" threads are trying
> + * to insert partial pathnames into the hash as they iterate
> + * over their portions of the index, so lock contention is
> + * high.
> + *
> + * However, the hashmap is going to put items into bucket
> + * chains based on their hash values.  Use that to create n
> + * mutexes and lock on mutex[bucket(hash) % n].  This will
> + * decrease the collision rate by (hopefully) by a factor of n.
> + */
> +static void init_dir_mutex(void)
> +{
> +	int j;
> +
> +	lazy_dir_mutex_array = xcalloc(LAZY_MAX_MUTEX, sizeof(pthread_mutex_t));
> +
> +	for (j = 0; j < LAZY_MAX_MUTEX; j++)
> +		init_recursive_mutex(&lazy_dir_mutex_array[j]);
> +}
> +
> +static void cleanup_dir_mutex(void)
> +{
> +	int j;
> +
> +	for (j = 0; j < LAZY_MAX_MUTEX; j++)
> +		pthread_mutex_destroy(&lazy_dir_mutex_array[j]);
> +
> +	free(lazy_dir_mutex_array);
> +}
> +
> +static void lock_dir_mutex(int j)
> +{
> +	pthread_mutex_lock(&lazy_dir_mutex_array[j]);
> +}
> +
> +static void unlock_dir_mutex(int j)
> +{
> +	pthread_mutex_unlock(&lazy_dir_mutex_array[j]);
> +}
> +
> +static inline int compute_dir_lock_nr(
> +	const struct hashmap *map,
> +	unsigned int hash)
> +{
> +	return hashmap_bucket(map, hash) % LAZY_MAX_MUTEX;
> +}
> +
> +static struct dir_entry *hash_dir_entry_with_parent_and_prefix(
> +	struct index_state *istate,
> +	struct dir_entry *parent,
> +	struct strbuf *prefix)
> +{
> +	struct dir_entry *dir;
> +	unsigned int hash;
> +	int lock_nr;
> +
> +	/*
> +	 * Either we have a parent directory and path with slash(es)
> +	 * or the directory is an immediate child of the root directory.
> +	 */
> +	assert((parent != NULL) ^ (strchr(prefix->buf, '/') == 0));

sparse complains about 'Using plain integer as a NULL pointer', (the
return from strchr() is NULL, if '/' is not found) so maybe:

+	assert((parent != NULL) ^ (strchr(prefix->buf, '/') == NULL));


ATB,
Ramsay Jones

  reply	other threads:[~2017-03-23 15:25 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-23 13:46 [PATCH v2 0/7] thread lazy_init_name_hash git
2017-03-23 13:46 ` [PATCH v2 1/7] name-hash: specify initial size for istate.dir_hash table git
2017-03-23 13:47 ` [PATCH v2 2/7] hashmap: allow memihash computation to be continued git
2017-03-23 13:47 ` [PATCH v2 3/7] hashmap: Add disallow_rehash setting git
2017-03-23 13:47 ` [PATCH v2 4/7] hashmap: document memihash_cont, hashmap_disallow_rehash api git
2017-03-23 13:47 ` [PATCH v2 5/7] name-hash: perf improvement for lazy_init_name_hash git
2017-03-23 15:25   ` Ramsay Jones [this message]
2017-03-23 17:45     ` Stefan Beller
2017-03-24 12:36       ` Jeff Hostetler
2017-03-27 20:24   ` Junio C Hamano
2017-03-27 20:50     ` Jeff Hostetler
2017-03-23 13:47 ` [PATCH v2 6/7] name-hash: add test-lazy-init-name-hash git
2017-03-23 15:29   ` Ramsay Jones
2017-03-23 13:47 ` [PATCH v2 7/7] name-hash: add perf test for lazy_init_name_hash git
2017-03-23 17:52 ` [PATCH v2 0/7] thread lazy_init_name_hash Junio C Hamano
2017-03-24 12:39   ` Jeff Hostetler
2017-03-24 16:36     ` Stefan Beller
2017-03-24 17:44     ` Junio C Hamano
2017-04-06  2:22 ` Duy Nguyen
2017-04-06 13:53   ` Jeff Hostetler

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e987714e-473f-cad1-159b-18459ffeb241@ramsayjones.plus.com \
    --to=ramsay@ramsayjones.plus.com \
    --cc=git@jeffhostetler.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jeffhost@microsoft.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.