* [PATCH v4 1/4] p0004-read-tree: perf test to time read-tree
2017-04-04 21:08 [PATCH v4 0/4] read-cache: speed up add_index_entry git
@ 2017-04-04 21:08 ` git
2017-04-04 21:08 ` [PATCH v4 2/4] read-cache: add strcmp_offset function git
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: git @ 2017-04-04 21:08 UTC (permalink / raw)
To: git; +Cc: gitster, peff, Jeff Hostetler
From: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
t/perf/p0004-read-tree.sh | 84 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 84 insertions(+)
create mode 100755 t/perf/p0004-read-tree.sh
diff --git a/t/perf/p0004-read-tree.sh b/t/perf/p0004-read-tree.sh
new file mode 100755
index 0000000..ffbbf8e
--- /dev/null
+++ b/t/perf/p0004-read-tree.sh
@@ -0,0 +1,84 @@
+#!/bin/sh
+
+test_description="Tests performance of read-tree"
+
+. ./perf-lib.sh
+
+test_perf_default_repo
+test_checkout_worktree
+
+## usage: dir depth width files
+make_paths () {
+ for f in $(seq $4)
+ do
+ echo $1/file$f
+ done;
+ if test $2 -gt 0;
+ then
+ for w in $(seq $3)
+ do
+ make_paths $1/dir$w $(($2 - 1)) $3 $4
+ done
+ fi
+ return 0
+}
+
+fill_index () {
+ make_paths $1 $2 $3 $4 |
+ sed "s/^/100644 $EMPTY_BLOB /" |
+ git update-index --index-info
+ return 0
+}
+
+br_base=xxx_base_xxx
+br_work=xxx_work_xxx
+
+new_dir=xxx_dir_xxx
+
+## (5, 10, 9) will create 999,999 files.
+## (4, 10, 9) will create 99,999 files.
+depth=5
+width=10
+files=9
+
+export br_base
+export br_work
+export new_dir
+export depth
+export width
+export files
+
+## The number of files in the xxx_base_xxx branch.
+nr_base=$(git ls-files | wc -l)
+export nr_base
+
+## Inflate the index with thousands of empty files and commit it.
+## Turn on sparse-checkout so that we don't have to populate them
+## later when we start switching branches.
+test_expect_success 'inflate the index' '
+ git reset --hard &&
+ git branch $br_base &&
+ git branch $br_work &&
+ git checkout $br_work &&
+ fill_index $new_dir $depth $width $files &&
+ git commit -m $br_work &&
+ echo $new_dir/file1 >.git/info/sparse-checkout &&
+ git config --local core.sparsecheckout 1 &&
+ git reset --hard
+'
+
+## The number of files in the xxx_work_xxx branch.
+nr_work=$(git ls-files | wc -l)
+export nr_work
+
+test_perf "read-tree ($nr_work)" '
+ git read-tree -m $br_base $br_work -n
+'
+
+test_perf "switch branches ($nr_base $nr_work)" '
+ git checkout $br_base &&
+ git checkout $br_work
+'
+
+
+test_done
--
2.9.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v4 2/4] read-cache: add strcmp_offset function
2017-04-04 21:08 [PATCH v4 0/4] read-cache: speed up add_index_entry git
2017-04-04 21:08 ` [PATCH v4 1/4] p0004-read-tree: perf test to time read-tree git
@ 2017-04-04 21:08 ` git
2017-04-04 21:08 ` [PATCH v4 3/4] test-strcmp-offset: created test for strcmp_offset git
2017-04-04 21:08 ` [PATCH v4 4/4] read-cache: speed up add_index_entry during checkout git
3 siblings, 0 replies; 5+ messages in thread
From: git @ 2017-04-04 21:08 UTC (permalink / raw)
To: git; +Cc: gitster, peff, Jeff Hostetler
From: Jeff Hostetler <jeffhost@microsoft.com>
Add strcmp_offset() function to also return the offset of the
first change.
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
cache.h | 1 +
read-cache.c | 29 +++++++++++++++++++++++++++++
2 files changed, 30 insertions(+)
diff --git a/cache.h b/cache.h
index 80b6372..4d82490 100644
--- a/cache.h
+++ b/cache.h
@@ -574,6 +574,7 @@ extern int write_locked_index(struct index_state *, struct lock_file *lock, unsi
extern int discard_index(struct index_state *);
extern int unmerged_index(const struct index_state *);
extern int verify_path(const char *path);
+extern int strcmp_offset(const char *s1_in, const char *s2_in, int *first_change);
extern int index_dir_exists(struct index_state *istate, const char *name, int namelen);
extern void adjust_dirname_case(struct index_state *istate, char *name);
extern struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase);
diff --git a/read-cache.c b/read-cache.c
index 9054369..b3fc77d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -887,6 +887,35 @@ static int has_file_name(struct index_state *istate,
return retval;
}
+
+/*
+ * Like strcmp(), but also return the offset of the first change.
+ */
+int strcmp_offset(const char *s1_in, const char *s2_in, int *first_change)
+{
+ const unsigned char *s1 = (const unsigned char *)s1_in;
+ const unsigned char *s2 = (const unsigned char *)s2_in;
+ int diff = 0;
+ int k;
+
+ *first_change = 0;
+ for (k=0; s1[k]; k++)
+ if ((diff = (s1[k] - s2[k])))
+ goto found_it;
+ if (!s2[k])
+ return 0;
+ diff = -1;
+
+found_it:
+ *first_change = k;
+ if (diff > 0)
+ return 1;
+ else if (diff < 0)
+ return -1;
+ else
+ return 0;
+}
+
/*
* Do we have another file with a pathname that is a proper
* subset of the name we're trying to add?
--
2.9.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v4 3/4] test-strcmp-offset: created test for strcmp_offset
2017-04-04 21:08 [PATCH v4 0/4] read-cache: speed up add_index_entry git
2017-04-04 21:08 ` [PATCH v4 1/4] p0004-read-tree: perf test to time read-tree git
2017-04-04 21:08 ` [PATCH v4 2/4] read-cache: add strcmp_offset function git
@ 2017-04-04 21:08 ` git
2017-04-04 21:08 ` [PATCH v4 4/4] read-cache: speed up add_index_entry during checkout git
3 siblings, 0 replies; 5+ messages in thread
From: git @ 2017-04-04 21:08 UTC (permalink / raw)
To: git; +Cc: gitster, peff, Jeff Hostetler
From: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
Makefile | 1 +
t/helper/.gitignore | 1 +
t/helper/test-strcmp-offset.c | 64 +++++++++++++++++++++++++++++++++++++++++++
t/t0065-strcmp-offset.sh | 11 ++++++++
4 files changed, 77 insertions(+)
create mode 100644 t/helper/test-strcmp-offset.c
create mode 100755 t/t0065-strcmp-offset.sh
diff --git a/Makefile b/Makefile
index 9ec6065..4c4c246 100644
--- a/Makefile
+++ b/Makefile
@@ -631,6 +631,7 @@ TEST_PROGRAMS_NEED_X += test-scrap-cache-tree
TEST_PROGRAMS_NEED_X += test-sha1
TEST_PROGRAMS_NEED_X += test-sha1-array
TEST_PROGRAMS_NEED_X += test-sigchain
+TEST_PROGRAMS_NEED_X += test-strcmp-offset
TEST_PROGRAMS_NEED_X += test-string-list
TEST_PROGRAMS_NEED_X += test-submodule-config
TEST_PROGRAMS_NEED_X += test-subprocess
diff --git a/t/helper/.gitignore b/t/helper/.gitignore
index d6e8b36..0a89531 100644
--- a/t/helper/.gitignore
+++ b/t/helper/.gitignore
@@ -25,6 +25,7 @@
/test-sha1
/test-sha1-array
/test-sigchain
+/test-strcmp-offset
/test-string-list
/test-submodule-config
/test-subprocess
diff --git a/t/helper/test-strcmp-offset.c b/t/helper/test-strcmp-offset.c
new file mode 100644
index 0000000..fe01318
--- /dev/null
+++ b/t/helper/test-strcmp-offset.c
@@ -0,0 +1,64 @@
+#include "cache.h"
+
+struct test_data {
+ const char *s1;
+ const char *s2;
+ int first_change;
+};
+
+static struct test_data data[] = {
+ { "abc", "abc", 0 },
+ { "abc", "def", 0 },
+
+ { "abc", "abz", 2 },
+
+ { "abc", "abcdef", 3 },
+
+ { "abc\xF0zzz", "abc\xFFzzz", 3 },
+
+ { NULL, NULL, 0 }
+};
+
+int try_pair(const char *sa, const char *sb, int first_change)
+{
+ int failed = 0;
+ int offset, r_exp, r_tst;
+ int r_exp_sign, r_tst_sign;
+
+ /*
+ * Because differnt CRTs behave differently, only rely on signs
+ * of the result values.
+ */
+ r_exp = strcmp(sa, sb);
+ r_exp_sign = ((r_exp < 0) ? -1 : ((r_exp == 0) ? 0 : 1));
+
+ r_tst = strcmp_offset(sa, sb, &offset);
+ r_tst_sign = ((r_tst < 0) ? -1 : ((r_tst == 0) ? 0 : 1));
+
+ if (r_tst_sign != r_exp_sign) {
+ error("FAIL: '%s' vs '%s', result expect %d, observed %d\n",
+ sa, sb, r_exp_sign, r_tst_sign);
+ failed = 1;
+ }
+
+ if (offset != first_change) {
+ error("FAIL: '%s' vs '%s', offset expect %d, observed %d\n",
+ sa, sb, first_change, offset);
+ failed = 1;
+ }
+
+ return failed;
+}
+
+int cmd_main(int argc, const char **argv)
+{
+ int failed = 0;
+ int k;
+
+ for (k=0; data[k].s1; k++) {
+ failed += try_pair(data[k].s1, data[k].s2, data[k].first_change);
+ failed += try_pair(data[k].s2, data[k].s1, data[k].first_change);
+ }
+
+ return failed;
+}
diff --git a/t/t0065-strcmp-offset.sh b/t/t0065-strcmp-offset.sh
new file mode 100755
index 0000000..0176c8c
--- /dev/null
+++ b/t/t0065-strcmp-offset.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+test_description='Test strcmp_offset functionality'
+
+. ./test-lib.sh
+
+test_expect_success run_helper '
+ test-strcmp-offset
+'
+
+test_done
--
2.9.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v4 4/4] read-cache: speed up add_index_entry during checkout
2017-04-04 21:08 [PATCH v4 0/4] read-cache: speed up add_index_entry git
` (2 preceding siblings ...)
2017-04-04 21:08 ` [PATCH v4 3/4] test-strcmp-offset: created test for strcmp_offset git
@ 2017-04-04 21:08 ` git
3 siblings, 0 replies; 5+ messages in thread
From: git @ 2017-04-04 21:08 UTC (permalink / raw)
To: git; +Cc: gitster, peff, Jeff Hostetler
From: Jeff Hostetler <jeffhost@microsoft.com>
Teach add_index_entry_with_check() and has_dir_name()
to see if the path of the new item is greater than the
last path in the index array before attempting to search
for it.
During checkout, merge_working_tree() populates the new
index in sorted order, so this change saves at least 2
lookups per file.
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
read-cache.c | 44 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 43 insertions(+), 1 deletion(-)
diff --git a/read-cache.c b/read-cache.c
index b3fc77d..bf9fc53 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -927,6 +927,21 @@ static int has_dir_name(struct index_state *istate,
int stage = ce_stage(ce);
const char *name = ce->name;
const char *slash = name + ce_namelen(ce);
+ int len_eq_last;
+ int cmp_last = 0;
+
+ if (istate->cache_nr > 0) {
+ /*
+ * Compare the entry's full path with the last path in the index.
+ * If it sorts AFTER the last entry in the index and they have no
+ * common prefix, then there cannot be any F/D name conflicts.
+ */
+ cmp_last = strcmp_offset(name,
+ istate->cache[istate->cache_nr-1]->name,
+ &len_eq_last);
+ if (cmp_last > 0 && len_eq_last == 0)
+ return retval;
+ }
for (;;) {
int len;
@@ -939,6 +954,24 @@ static int has_dir_name(struct index_state *istate,
}
len = slash - name;
+ if (cmp_last > 0) {
+ /*
+ * If this part of the directory prefix (including the trailing
+ * slash) already appears in the path of the last entry in the
+ * index, then we cannot also have a file with this prefix (or
+ * any parent directory prefix).
+ */
+ if (len+1 <= len_eq_last)
+ return retval;
+ /*
+ * If this part of the directory prefix (excluding the trailing
+ * slash) is longer than the known equal portions, then this part
+ * of the prefix cannot collide with a file. Go on to the parent.
+ */
+ if (len > len_eq_last)
+ continue;
+ }
+
pos = index_name_stage_pos(istate, name, len, stage);
if (pos >= 0) {
/*
@@ -1030,7 +1063,16 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
if (!(option & ADD_CACHE_KEEP_CACHE_TREE))
cache_tree_invalidate_path(istate, ce->name);
- pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
+
+ /*
+ * If this entry's path sorts after the last entry in the index,
+ * we can avoid searching for it.
+ */
+ if (istate->cache_nr > 0 &&
+ strcmp(ce->name, istate->cache[istate->cache_nr - 1]->name) > 0)
+ pos = -istate->cache_nr - 1;
+ else
+ pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
/* existing match? Just replace it. */
if (pos >= 0) {
--
2.9.3
^ permalink raw reply related [flat|nested] 5+ messages in thread