All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1] speed up refresh_index() by utilizing preload_index()
@ 2018-10-29 20:41 Ben Peart
  2018-10-30  2:28 ` Junio C Hamano
  0 siblings, 1 reply; 2+ messages in thread
From: Ben Peart @ 2018-10-29 20:41 UTC (permalink / raw)
  To: git; +Cc: gitster, pclouds, Ben Peart

From: Ben Peart <benpeart@microsoft.com>

Speed up refresh_index() by utilizing preload_index() to do most of the work
spread across multiple threads.  This works because most cache entries will
get marked CE_UPTODATE so that refresh_cache_ent() can bail out early when
called from within refresh_index().

On a Windows repo with ~200K files, this drops refresh times from 6.64
seconds to 2.87 seconds for a savings of 57%.

Signed-off-by: Ben Peart <benpeart@microsoft.com>
---

Notes:
    Base Ref: master
    Web-Diff: https://github.com/benpeart/git/commit/feee1054c2
    Checkout: git fetch https://github.com/benpeart/git refresh-index-multithread-preload-v1 && git checkout feee1054c2

 cache.h         | 3 +++
 preload-index.c | 8 ++++----
 read-cache.c    | 6 ++++++
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/cache.h b/cache.h
index f7fabdde8f..883099db08 100644
--- a/cache.h
+++ b/cache.h
@@ -659,6 +659,9 @@ extern int daemonize(void);
 /* Initialize and use the cache information */
 struct lock_file;
 extern int read_index(struct index_state *);
+extern void preload_index(struct index_state *index,
+			  const struct pathspec *pathspec,
+			  unsigned int refresh_flags);
 extern int read_index_preload(struct index_state *,
 			      const struct pathspec *pathspec,
 			      unsigned int refresh_flags);
diff --git a/preload-index.c b/preload-index.c
index 9e7152ab14..222792ccbc 100644
--- a/preload-index.c
+++ b/preload-index.c
@@ -9,7 +9,7 @@
 #include "progress.h"
 
 #ifdef NO_PTHREADS
-static void preload_index(struct index_state *index,
+void preload_index(struct index_state *index,
 			  const struct pathspec *pathspec,
 			  unsigned int refresh_flags)
 {
@@ -100,9 +100,9 @@ static void *preload_thread(void *_data)
 	return NULL;
 }
 
-static void preload_index(struct index_state *index,
-			  const struct pathspec *pathspec,
-			  unsigned int refresh_flags)
+void preload_index(struct index_state *index,
+		   const struct pathspec *pathspec,
+		   unsigned int refresh_flags)
 {
 	int threads, i, work, offset;
 	struct thread_data data[MAX_PARALLEL];
diff --git a/read-cache.c b/read-cache.c
index d57958233e..53733d651d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1496,6 +1496,12 @@ int refresh_index(struct index_state *istate, unsigned int flags,
 	typechange_fmt = (in_porcelain ? "T\t%s\n" : "%s needs update\n");
 	added_fmt = (in_porcelain ? "A\t%s\n" : "%s needs update\n");
 	unmerged_fmt = (in_porcelain ? "U\t%s\n" : "%s: needs merge\n");
+	/*
+	 * Use the multi-threaded preload_index() to refresh most of the
+	 * cache entries quickly then in the single threaded loop below,
+	 * we only have to do the special cases that are left.
+	 */
+	preload_index(istate, pathspec, 0);
 	for (i = 0; i < istate->cache_nr; i++) {
 		struct cache_entry *ce, *new_entry;
 		int cache_errno = 0;

base-commit: c670b1f876521c9f7cd40184bf7ed05aad843433
-- 
2.9.2.gvfs.2.27918.g0990287eef


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v1] speed up refresh_index() by utilizing preload_index()
  2018-10-29 20:41 [PATCH v1] speed up refresh_index() by utilizing preload_index() Ben Peart
@ 2018-10-30  2:28 ` Junio C Hamano
  0 siblings, 0 replies; 2+ messages in thread
From: Junio C Hamano @ 2018-10-30  2:28 UTC (permalink / raw)
  To: Ben Peart; +Cc: git, pclouds, Ben Peart

Ben Peart <peartben@gmail.com> writes:

> From: Ben Peart <benpeart@microsoft.com>
>
> Speed up refresh_index() by utilizing preload_index() to do most of the work
> spread across multiple threads.  This works because most cache entries will
> get marked CE_UPTODATE so that refresh_cache_ent() can bail out early when
> called from within refresh_index().
>
> On a Windows repo with ~200K files, this drops refresh times from 6.64
> seconds to 2.87 seconds for a savings of 57%.
>
> Signed-off-by: Ben Peart <benpeart@microsoft.com>
> ---

OK.  We used to only expose the whole "read the index file into an
istate, and then do the lstat() part in parallel", but now we also
make the "do the lstat() part" available separately.

Which makes sense.


> diff --git a/cache.h b/cache.h
> index f7fabdde8f..883099db08 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -659,6 +659,9 @@ extern int daemonize(void);
>  /* Initialize and use the cache information */
>  struct lock_file;
>  extern int read_index(struct index_state *);
> +extern void preload_index(struct index_state *index,
> +			  const struct pathspec *pathspec,
> +			  unsigned int refresh_flags);
>  extern int read_index_preload(struct index_state *,
>  			      const struct pathspec *pathspec,
>  			      unsigned int refresh_flags);
> diff --git a/preload-index.c b/preload-index.c
> index 9e7152ab14..222792ccbc 100644
> --- a/preload-index.c
> +++ b/preload-index.c
> @@ -9,7 +9,7 @@
>  #include "progress.h"
>  
>  #ifdef NO_PTHREADS
> -static void preload_index(struct index_state *index,
> +void preload_index(struct index_state *index,
>  			  const struct pathspec *pathspec,
>  			  unsigned int refresh_flags)
>  {
> @@ -100,9 +100,9 @@ static void *preload_thread(void *_data)
>  	return NULL;
>  }
>  
> -static void preload_index(struct index_state *index,
> -			  const struct pathspec *pathspec,
> -			  unsigned int refresh_flags)
> +void preload_index(struct index_state *index,
> +		   const struct pathspec *pathspec,
> +		   unsigned int refresh_flags)
>  {
>  	int threads, i, work, offset;
>  	struct thread_data data[MAX_PARALLEL];
> diff --git a/read-cache.c b/read-cache.c
> index d57958233e..53733d651d 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -1496,6 +1496,12 @@ int refresh_index(struct index_state *istate, unsigned int flags,
>  	typechange_fmt = (in_porcelain ? "T\t%s\n" : "%s needs update\n");
>  	added_fmt = (in_porcelain ? "A\t%s\n" : "%s needs update\n");
>  	unmerged_fmt = (in_porcelain ? "U\t%s\n" : "%s: needs merge\n");
> +	/*
> +	 * Use the multi-threaded preload_index() to refresh most of the
> +	 * cache entries quickly then in the single threaded loop below,
> +	 * we only have to do the special cases that are left.
> +	 */
> +	preload_index(istate, pathspec, 0);
>  	for (i = 0; i < istate->cache_nr; i++) {
>  		struct cache_entry *ce, *new_entry;
>  		int cache_errno = 0;
>
> base-commit: c670b1f876521c9f7cd40184bf7ed05aad843433

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2018-10-30  2:28 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-29 20:41 [PATCH v1] speed up refresh_index() by utilizing preload_index() Ben Peart
2018-10-30  2:28 ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.