All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH] status: show progress bar if refreshing the index takes too long
Date: Fri,  7 Sep 2018 17:51:33 +0200	[thread overview]
Message-ID: <20180907155133.27737-1-pclouds@gmail.com> (raw)

Refreshing the index is usually very fast, but it can still take a
long time sometimes. Cold cache is one, or something else silly (*).
In this case, it's good to show something to let the user know "git
status" is not hanging, it's just busy doing something.

(*) I got called by my colleague because her "git status" took very
    long and looked pretty much like hanging. After a bit of strace,
    it looks to me that git was trying to rehash every single file,
    and this was a big repository. This process could take minutes.

    In this case, I think it was probably because she copied this
    repository to a new place and stat data did not match anymore. So
    git fell back to hashing.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 I need to get this out of my head before I forget. I obviously think
 this is a good idea and could be done in more places, even just to
 deal with cold cache. The hint about "git status -uno" for example,
 could be accompanied by a progress bar for scanning for untracked
 files...

 Another note about rehashing files as part of refresh. We probably
 could do better by hashing in parallel. Or perhaps not because having
 a big lock around object database pretty much kills performance, and
 if I remember correctly none of my core was 100% consumed (i.e. CPU
 bottleneck as an indication that multithread is a good idea...)

 Anyway that's it! Weekend after a long week! I'll read mails and
 respond tomorrow.

 builtin/am.c     |  2 +-
 builtin/commit.c |  6 ++++--
 cache.h          |  7 +++++--
 preload-index.c  | 44 +++++++++++++++++++++++++++++++++++++++-----
 read-cache.c     | 10 ++++++++++
 sequencer.c      |  2 +-
 6 files changed, 60 insertions(+), 11 deletions(-)

diff --git a/builtin/am.c b/builtin/am.c
index 5e866d17c7..22a93cfef3 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -2324,7 +2324,7 @@ int cmd_am(int argc, const char **argv, const char *prefix)
 	/* Ensure a valid committer ident can be constructed */
 	git_committer_info(IDENT_STRICT);
 
-	if (read_index_preload(&the_index, NULL) < 0)
+	if (read_index_preload(&the_index, NULL, 0) < 0)
 		die(_("failed to read the index"));
 
 	if (in_progress) {
diff --git a/builtin/commit.c b/builtin/commit.c
index 0d9828e29e..eaf639ece6 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1355,8 +1355,10 @@ int cmd_status(int argc, const char **argv, const char *prefix)
 		       PATHSPEC_PREFER_FULL,
 		       prefix, argv);
 
-	read_cache_preload(&s.pathspec);
-	refresh_index(&the_index, REFRESH_QUIET|REFRESH_UNMERGED, &s.pathspec, NULL, NULL);
+	read_index_preload(&the_index, &s.pathspec, REFRESH_PROGRESS);
+	refresh_index(&the_index,
+		      REFRESH_QUIET|REFRESH_UNMERGED|REFRESH_PROGRESS,
+		      &s.pathspec, NULL, NULL);
 
 	if (use_optional_locks())
 		fd = hold_locked_index(&index_lock, 0);
diff --git a/cache.h b/cache.h
index 4d014541ab..35da02be90 100644
--- a/cache.h
+++ b/cache.h
@@ -410,7 +410,7 @@ void validate_cache_entries(const struct index_state *istate);
 
 #define read_cache() read_index(&the_index)
 #define read_cache_from(path) read_index_from(&the_index, (path), (get_git_dir()))
-#define read_cache_preload(pathspec) read_index_preload(&the_index, (pathspec))
+#define read_cache_preload(pathspec) read_index_preload(&the_index, (pathspec), 0)
 #define is_cache_unborn() is_index_unborn(&the_index)
 #define read_cache_unmerged() read_index_unmerged(&the_index)
 #define discard_cache() discard_index(&the_index)
@@ -659,7 +659,9 @@ extern int daemonize(void);
 /* Initialize and use the cache information */
 struct lock_file;
 extern int read_index(struct index_state *);
-extern int read_index_preload(struct index_state *, const struct pathspec *pathspec);
+extern int read_index_preload(struct index_state *,
+			      const struct pathspec *pathspec,
+			      unsigned int refresh_flags);
 extern int do_read_index(struct index_state *istate, const char *path,
 			 int must_exist); /* for testting only! */
 extern int read_index_from(struct index_state *, const char *path,
@@ -814,6 +816,7 @@ extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st);
 #define REFRESH_IGNORE_MISSING	0x0008	/* ignore non-existent */
 #define REFRESH_IGNORE_SUBMODULES	0x0010	/* ignore submodules */
 #define REFRESH_IN_PORCELAIN	0x0020	/* user friendly output, not "needs update" */
+#define REFRESH_PROGRESS	0x0040  /* show progress bar if stderr is tty */
 extern int refresh_index(struct index_state *, unsigned int flags, const struct pathspec *pathspec, char *seen, const char *header_msg);
 extern struct cache_entry *refresh_cache_entry(struct index_state *, struct cache_entry *, unsigned int);
 
diff --git a/preload-index.c b/preload-index.c
index 71cd2437a3..bf7dbed779 100644
--- a/preload-index.c
+++ b/preload-index.c
@@ -5,10 +5,12 @@
 #include "pathspec.h"
 #include "dir.h"
 #include "fsmonitor.h"
+#include "progress.h"
 
 #ifdef NO_PTHREADS
 static void preload_index(struct index_state *index,
-			  const struct pathspec *pathspec)
+			  const struct pathspec *pathspec,
+			  unsigned int refresh_flags)
 {
 	; /* nothing */
 }
@@ -25,16 +27,23 @@ static void preload_index(struct index_state *index,
 #define MAX_PARALLEL (20)
 #define THREAD_COST (500)
 
+struct progress_data {
+	unsigned long n;
+	struct progress *progress;
+	pthread_mutex_t mutex;
+};
+
 struct thread_data {
 	pthread_t pthread;
 	struct index_state *index;
 	struct pathspec pathspec;
+	struct progress_data *progress;
 	int offset, nr;
 };
 
 static void *preload_thread(void *_data)
 {
-	int nr;
+	int nr, last_nr;
 	struct thread_data *p = _data;
 	struct index_state *index = p->index;
 	struct cache_entry **cep = index->cache + p->offset;
@@ -43,6 +52,7 @@ static void *preload_thread(void *_data)
 	nr = p->nr;
 	if (nr + p->offset > index->cache_nr)
 		nr = index->cache_nr - p->offset;
+	last_nr = nr;
 
 	do {
 		struct cache_entry *ce = *cep++;
@@ -58,6 +68,15 @@ static void *preload_thread(void *_data)
 			continue;
 		if (ce->ce_flags & CE_FSMONITOR_VALID)
 			continue;
+		if (p->progress && !(nr & 31)) {
+			struct progress_data *pd = p->progress;
+
+			pthread_mutex_lock(&pd->mutex);
+			pd->n += last_nr - nr;
+			display_progress(pd->progress, pd->n);
+			pthread_mutex_unlock(&pd->mutex);
+			last_nr = nr;
+		}
 		if (!ce_path_match(index, ce, &p->pathspec, NULL))
 			continue;
 		if (threaded_has_symlink_leading_path(&cache, ce->name, ce_namelen(ce)))
@@ -74,11 +93,13 @@ static void *preload_thread(void *_data)
 }
 
 static void preload_index(struct index_state *index,
-			  const struct pathspec *pathspec)
+			  const struct pathspec *pathspec,
+			  unsigned int refresh_flags)
 {
 	int threads, i, work, offset;
 	struct thread_data data[MAX_PARALLEL];
 	uint64_t start = getnanotime();
+	struct progress_data pd;
 
 	if (!core_preload_index)
 		return;
@@ -93,6 +114,13 @@ static void preload_index(struct index_state *index,
 	offset = 0;
 	work = DIV_ROUND_UP(index->cache_nr, threads);
 	memset(&data, 0, sizeof(data));
+
+	memset(&pd, 0, sizeof(pd));
+	if (refresh_flags & REFRESH_PROGRESS && isatty(2)) {
+		pd.progress = start_delayed_progress(_("Refreshing index"), index->cache_nr);
+		pthread_mutex_init(&pd.mutex, NULL);
+	}
+
 	for (i = 0; i < threads; i++) {
 		struct thread_data *p = data+i;
 		p->index = index;
@@ -100,6 +128,8 @@ static void preload_index(struct index_state *index,
 			copy_pathspec(&p->pathspec, pathspec);
 		p->offset = offset;
 		p->nr = work;
+		if (pd.progress)
+			p->progress = &pd;
 		offset += work;
 		if (pthread_create(&p->pthread, NULL, preload_thread, p))
 			die("unable to create threaded lstat");
@@ -109,15 +139,19 @@ static void preload_index(struct index_state *index,
 		if (pthread_join(p->pthread, NULL))
 			die("unable to join threaded lstat");
 	}
+	if (pd.progress)
+		stop_progress(&pd.progress);
+
 	trace_performance_since(start, "preload index");
 }
 #endif
 
 int read_index_preload(struct index_state *index,
-		       const struct pathspec *pathspec)
+		       const struct pathspec *pathspec,
+		       unsigned int refresh_flags)
 {
 	int retval = read_index(index);
 
-	preload_index(index, pathspec);
+	preload_index(index, pathspec, refresh_flags);
 	return retval;
 }
diff --git a/read-cache.c b/read-cache.c
index 7b1354d759..e8725a5162 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -23,6 +23,7 @@
 #include "split-index.h"
 #include "utf8.h"
 #include "fsmonitor.h"
+#include "progress.h"
 
 /* Mask for the name length in ce_flags in the on-disk index */
 
@@ -1477,6 +1478,11 @@ int refresh_index(struct index_state *istate, unsigned int flags,
 	const char *added_fmt;
 	const char *unmerged_fmt;
 	uint64_t start = getnanotime();
+	struct progress *progress = NULL;
+
+	if (flags & REFRESH_PROGRESS && isatty(2))
+		progress = start_delayed_progress(_("Refresh index"),
+						  istate->cache_nr);
 
 	modified_fmt = (in_porcelain ? "M\t%s\n" : "%s: needs update\n");
 	deleted_fmt = (in_porcelain ? "D\t%s\n" : "%s: needs update\n");
@@ -1516,6 +1522,8 @@ int refresh_index(struct index_state *istate, unsigned int flags,
 		new_entry = refresh_cache_ent(istate, ce, options, &cache_errno, &changed);
 		if (new_entry == ce)
 			continue;
+		if (progress)
+			display_progress(progress, i);
 		if (!new_entry) {
 			const char *fmt;
 
@@ -1547,6 +1555,8 @@ int refresh_index(struct index_state *istate, unsigned int flags,
 
 		replace_index_entry(istate, i, new_entry);
 	}
+	if (progress)
+		stop_progress(&progress);
 	trace_performance_since(start, "refresh index");
 	return has_errors;
 }
diff --git a/sequencer.c b/sequencer.c
index dc2c58d464..e0cd17df70 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -1909,7 +1909,7 @@ static int read_and_refresh_cache(struct replay_opts *opts)
 {
 	struct lock_file index_lock = LOCK_INIT;
 	int index_fd = hold_locked_index(&index_lock, 0);
-	if (read_index_preload(&the_index, NULL) < 0) {
+	if (read_index_preload(&the_index, NULL, 0) < 0) {
 		rollback_lock_file(&index_lock);
 		return error(_("git %s: failed to read the index"),
 			_(action_name(opts)));
-- 
2.19.0.rc0.337.ge906d732e7


             reply	other threads:[~2018-09-07 15:51 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-07 15:51 Nguyễn Thái Ngọc Duy [this message]
2018-09-07 17:38 ` [PATCH] status: show progress bar if refreshing the index takes too long Eric Sunshine
2018-09-07 20:29   ` Derrick Stolee
2018-09-07 21:10     ` Eric Sunshine
2018-09-15 17:56 ` [PATCH v2] " Nguyễn Thái Ngọc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180907155133.27737-1-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.