All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH/RFC] Replace ce_namelen() with a ce_namelen field
@ 2012-07-04  9:18 Thomas Gummerer
  2012-07-04 10:00 ` Thomas Rast
                   ` (3 more replies)
  0 siblings, 4 replies; 18+ messages in thread
From: Thomas Gummerer @ 2012-07-04  9:18 UTC (permalink / raw)
  To: git; +Cc: trast, gitster, mhagger, pclouds, Thomas Gummerer

Replace the ce_namelen() function in cache.h with a ce_namelen
field in struct cache_entry. This will both give us a tiny bit
of a performance enhancement when working with long pathnames
and is part of the refactoring for the index-v5 file format.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 builtin/add.c            |    4 ++--
 builtin/apply.c          |    1 +
 builtin/blame.c          |    1 +
 builtin/checkout-index.c |    8 ++++----
 builtin/checkout.c       |    7 ++++---
 builtin/clean.c          |    2 +-
 builtin/commit.c         |    2 +-
 builtin/grep.c           |    2 +-
 builtin/ls-files.c       |    8 ++++----
 builtin/rm.c             |    2 +-
 builtin/update-index.c   |    9 ++++++---
 cache-tree.c             |    4 ++--
 cache.h                  |   11 ++---------
 diff-lib.c               |    6 +++---
 entry.c                  |    2 +-
 merge-recursive.c        |    2 +-
 name-hash.c              |    4 ++--
 preload-index.c          |    2 +-
 read-cache.c             |   43 ++++++++++++++++++++++++-------------------
 rerere.c                 |    2 +-
 resolve-undo.c           |    2 +-
 sha1_name.c              |    6 +++---
 submodule.c              |    2 +-
 tree.c                   |    1 +
 unpack-trees.c           |   31 ++++++++++++++++---------------
 25 files changed, 85 insertions(+), 79 deletions(-)

diff --git a/builtin/add.c b/builtin/add.c
index b79336d..63ef9c5 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -114,7 +114,7 @@ static void fill_pathspec_matches(const char **pathspec, char *seen, int specs)
 		return;
 	for (i = 0; i < active_nr; i++) {
 		struct cache_entry *ce = active_cache[i];
-		match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, seen);
+		match_pathspec(pathspec, ce->name, ce->ce_namelen, 0, seen);
 	}
 }
 
@@ -163,7 +163,7 @@ static void treat_gitlinks(const char **pathspec)
 	for (i = 0; i < active_nr; i++) {
 		struct cache_entry *ce = active_cache[i];
 		if (S_ISGITLINK(ce->ce_mode)) {
-			int len = ce_namelen(ce), j;
+			int len = ce->ce_namelen, j;
 			for (j = 0; pathspec[j]; j++) {
 				int len2 = strlen(pathspec[j]);
 				if (len2 <= len || pathspec[j][len] != '/' ||
diff --git a/builtin/apply.c b/builtin/apply.c
index dda9ea0..c9fae60 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -3503,6 +3503,7 @@ static void add_index_file(const char *path, unsigned mode, void *buf, unsigned
 	memcpy(ce->name, path, namelen);
 	ce->ce_mode = create_ce_mode(mode);
 	ce->ce_flags = namelen;
+	ce->ce_namelen = namelen;
 	if (S_ISGITLINK(mode)) {
 		const char *s = buf;
 
diff --git a/builtin/blame.c b/builtin/blame.c
index 24d3dd5..72b2ed4 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -2154,6 +2154,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt,
 	hashcpy(ce->sha1, origin->blob_sha1);
 	memcpy(ce->name, path, len);
 	ce->ce_flags = create_ce_flags(len, 0);
+	ce->ce_namelen = len;
 	ce->ce_mode = create_ce_mode(mode);
 	add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
 
diff --git a/builtin/checkout-index.c b/builtin/checkout-index.c
index c16d82b..ecabe1a 100644
--- a/builtin/checkout-index.c
+++ b/builtin/checkout-index.c
@@ -55,7 +55,7 @@ static int checkout_file(const char *name, int prefix_length)
 
 	while (pos < active_nr) {
 		struct cache_entry *ce = active_cache[pos];
-		if (ce_namelen(ce) != namelen ||
+		if (ce->ce_namelen != namelen ||
 		    memcmp(ce->name, name, namelen))
 			break;
 		has_same_name = 1;
@@ -100,12 +100,12 @@ static void checkout_all(const char *prefix, int prefix_length)
 		    && (CHECKOUT_ALL != checkout_stage || !ce_stage(ce)))
 			continue;
 		if (prefix && *prefix &&
-		    (ce_namelen(ce) <= prefix_length ||
+		    (ce->ce_namelen <= prefix_length ||
 		     memcmp(prefix, ce->name, prefix_length)))
 			continue;
 		if (last_ce && to_tempfile) {
-			if (ce_namelen(last_ce) != ce_namelen(ce)
-			    || memcmp(last_ce->name, ce->name, ce_namelen(ce)))
+			if (last_ce->ce_namelen != ce->ce_namelen
+			    || memcmp(last_ce->name, ce->name, ce->ce_namelen))
 				write_tempfile_record(last_ce->name, prefix_length);
 		}
 		if (checkout_entry(ce, &state,
diff --git a/builtin/checkout.c b/builtin/checkout.c
index 3ddda34..e05f89c 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -74,6 +74,7 @@ static int update_some(const unsigned char *sha1, const char *base, int baselen,
 	memcpy(ce->name, base, baselen);
 	memcpy(ce->name + baselen, pathname, len - baselen);
 	ce->ce_flags = create_ce_flags(len, 0) | CE_UPDATE;
+	ce->ce_namelen = len;
 	ce->ce_mode = create_ce_mode(mode);
 	add_cache_entry(ce, ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
 	return 0;
@@ -244,7 +245,7 @@ static int checkout_paths(struct tree *source_tree, const char **pathspec,
 		struct cache_entry *ce = active_cache[pos];
 		if (source_tree && !(ce->ce_flags & CE_UPDATE))
 			continue;
-		match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, ps_matched);
+		match_pathspec(pathspec, ce->name, ce->ce_namelen, 0, ps_matched);
 	}
 
 	if (report_path_error(ps_matched, pathspec, prefix))
@@ -257,7 +258,7 @@ static int checkout_paths(struct tree *source_tree, const char **pathspec,
 	/* Any unmerged paths? */
 	for (pos = 0; pos < active_nr; pos++) {
 		struct cache_entry *ce = active_cache[pos];
-		if (match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
+		if (match_pathspec(pathspec, ce->name, ce->ce_namelen, 0, NULL)) {
 			if (!ce_stage(ce))
 				continue;
 			if (opts->force) {
@@ -284,7 +285,7 @@ static int checkout_paths(struct tree *source_tree, const char **pathspec,
 		struct cache_entry *ce = active_cache[pos];
 		if (source_tree && !(ce->ce_flags & CE_UPDATE))
 			continue;
-		if (match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
+		if (match_pathspec(pathspec, ce->name, ce->ce_namelen, 0, NULL)) {
 			if (!ce_stage(ce)) {
 				errs |= checkout_entry(ce, &state, NULL);
 				continue;
diff --git a/builtin/clean.c b/builtin/clean.c
index 0c7b3d0..512d31c 100644
--- a/builtin/clean.c
+++ b/builtin/clean.c
@@ -128,7 +128,7 @@ int cmd_clean(int argc, const char **argv, const char *prefix)
 		pos = -pos - 1;
 		if (pos < active_nr) {
 			ce = active_cache[pos];
-			if (ce_namelen(ce) == len &&
+			if (ce->ce_namelen == len &&
 			    !memcmp(ce->name, ent->name, len))
 				continue; /* Yup, this one exists unmerged */
 		}
diff --git a/builtin/commit.c b/builtin/commit.c
index a2ec73d..e08b87e 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -200,7 +200,7 @@ static int list_paths(struct string_list *list, const char *with_tree,
 
 		if (ce->ce_flags & CE_UPDATE)
 			continue;
-		if (!match_pathspec(pattern, ce->name, ce_namelen(ce), 0, m))
+		if (!match_pathspec(pattern, ce->name, ce->ce_namelen, 0, m))
 			continue;
 		item = string_list_insert(list, ce->name);
 		if (ce_skip_worktree(ce))
diff --git a/builtin/grep.c b/builtin/grep.c
index 643938d..dd3b1cc 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -416,7 +416,7 @@ static int grep_cache(struct grep_opt *opt, const struct pathspec *pathspec, int
 		struct cache_entry *ce = active_cache[nr];
 		if (!S_ISREG(ce->ce_mode))
 			continue;
-		if (!match_pathspec_depth(pathspec, ce->name, ce_namelen(ce), 0, NULL))
+		if (!match_pathspec_depth(pathspec, ce->name, ce->ce_namelen, 0, NULL))
 			continue;
 		/*
 		 * If CE_VALID is on, we assume worktree file and its cache entry
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 7cff175..64a3e6e 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -104,7 +104,7 @@ static void show_killed_files(struct dir_struct *dir)
 				 * ent->name in the cache.  Does it expect
 				 * ent->name to be a directory?
 				 */
-				len = ce_namelen(active_cache[pos]);
+				len = active_cache[pos]->ce_namelen;
 				if ((ent->len < len) &&
 				    !strncmp(active_cache[pos]->name,
 					     ent->name, ent->len) &&
@@ -130,10 +130,10 @@ static void show_ce_entry(const char *tag, struct cache_entry *ce)
 {
 	int len = max_prefix_len;
 
-	if (len >= ce_namelen(ce))
+	if (len >= ce->ce_namelen)
 		die("git ls-files: internal error - cache entry not superset of prefix");
 
-	if (!match_pathspec(pathspec, ce->name, ce_namelen(ce), len, ps_matched))
+	if (!match_pathspec(pathspec, ce->name, ce->ce_namelen, len, ps_matched))
 		return;
 
 	if (tag && *tag && show_valid_bit &&
@@ -162,7 +162,7 @@ static void show_ce_entry(const char *tag, struct cache_entry *ce)
 		       find_unique_abbrev(ce->sha1,abbrev),
 		       ce_stage(ce));
 	}
-	write_name(ce->name, ce_namelen(ce));
+	write_name(ce->name, ce->ce_namelen);
 	if (debug_mode) {
 		printf("  ctime: %d:%d\n", ce->ce_ctime.sec, ce->ce_ctime.nsec);
 		printf("  mtime: %d:%d\n", ce->ce_mtime.sec, ce->ce_mtime.nsec);
diff --git a/builtin/rm.c b/builtin/rm.c
index 90c8a50..9a636b7 100644
--- a/builtin/rm.c
+++ b/builtin/rm.c
@@ -171,7 +171,7 @@ int cmd_rm(int argc, const char **argv, const char *prefix)
 
 	for (i = 0; i < active_nr; i++) {
 		struct cache_entry *ce = active_cache[i];
-		if (!match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, seen))
+		if (!match_pathspec(pathspec, ce->name, ce->ce_namelen, 0, seen))
 			continue;
 		ALLOC_GROW(list.name, list.nr + 1, list.alloc);
 		list.name[list.nr++] = ce->name;
diff --git a/builtin/update-index.c b/builtin/update-index.c
index 5f038d6..784b52b 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -96,6 +96,7 @@ static int add_one_path(struct cache_entry *old, const char *path, int len, stru
 	ce = xcalloc(1, size);
 	memcpy(ce->name, path, len);
 	ce->ce_flags = len;
+	ce->ce_namelen = len;
 	fill_stat_cache_info(ce, st);
 	ce->ce_mode = ce_mode_from_stat(old, st->st_mode);
 
@@ -236,6 +237,7 @@ static int add_cacheinfo(unsigned int mode, const unsigned char *sha1,
 	hashcpy(ce->sha1, sha1);
 	memcpy(ce->name, path, len);
 	ce->ce_flags = create_ce_flags(len, stage);
+	ce->ce_namelen = len;
 	ce->ce_mode = create_ce_mode(mode);
 	if (assume_unchanged)
 		ce->ce_flags |= CE_VALID;
@@ -434,6 +436,7 @@ static struct cache_entry *read_one_ent(const char *which,
 	hashcpy(ce->sha1, sha1);
 	memcpy(ce->name, path, namelen);
 	ce->ce_flags = create_ce_flags(namelen, stage);
+	ce->ce_namelen = namelen;
 	ce->ce_mode = create_ce_mode(mode);
 	return ce;
 }
@@ -453,7 +456,7 @@ static int unresolve_one(const char *path)
 		if (pos < active_nr) {
 			struct cache_entry *ce = active_cache[pos];
 			if (ce_stage(ce) &&
-			    ce_namelen(ce) == namelen &&
+			    ce->ce_namelen == namelen &&
 			    !memcmp(ce->name, path, namelen))
 				return 0;
 		}
@@ -466,7 +469,7 @@ static int unresolve_one(const char *path)
 		pos = -pos-1;
 		if (pos < active_nr) {
 			struct cache_entry *ce = active_cache[pos];
-			if (ce_namelen(ce) == namelen &&
+			if (ce->ce_namelen == namelen &&
 			    !memcmp(ce->name, path, namelen)) {
 				fprintf(stderr,
 					"%s: skipping still unmerged path.\n",
@@ -569,7 +572,7 @@ static int do_reupdate(int ac, const char **av,
 			continue;
 		if (has_head)
 			old = read_one_ent(NULL, head_sha1,
-					   ce->name, ce_namelen(ce), 0);
+					   ce->name, ce->ce_namelen, 0);
 		if (old && ce->ce_mode == old->ce_mode &&
 		    !hashcmp(ce->sha1, old->sha1)) {
 			free(old);
diff --git a/cache-tree.c b/cache-tree.c
index 28ed657..b5f5cd0 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -270,7 +270,7 @@ static int update_one(struct cache_tree *it,
 		int pathlen, sublen, subcnt;
 
 		path = ce->name;
-		pathlen = ce_namelen(ce);
+		pathlen = ce->ce_namelen;
 		if (pathlen <= baselen || memcmp(base, path, baselen))
 			break; /* at the end of this level */
 
@@ -313,7 +313,7 @@ static int update_one(struct cache_tree *it,
 		unsigned mode;
 
 		path = ce->name;
-		pathlen = ce_namelen(ce);
+		pathlen = ce->ce_namelen;
 		if (pathlen <= baselen || memcmp(base, path, baselen))
 			break; /* at the end of this level */
 
diff --git a/cache.h b/cache.h
index cc5048c..5f93f22 100644
--- a/cache.h
+++ b/cache.h
@@ -128,6 +128,7 @@ struct cache_entry {
 	unsigned int ce_gid;
 	unsigned int ce_size;
 	unsigned int ce_flags;
+	unsigned int ce_namelen;
 	unsigned char sha1[20];
 	struct cache_entry *next;
 	struct cache_entry *dir_next;
@@ -205,15 +206,7 @@ static inline unsigned create_ce_flags(size_t len, unsigned stage)
 	return (len | (stage << CE_STAGESHIFT));
 }
 
-static inline size_t ce_namelen(const struct cache_entry *ce)
-{
-	size_t len = ce->ce_flags & CE_NAMEMASK;
-	if (len < CE_NAMEMASK)
-		return len;
-	return strlen(ce->name + CE_NAMEMASK) + CE_NAMEMASK;
-}
-
-#define ce_size(ce) cache_entry_size(ce_namelen(ce))
+#define ce_size(ce) cache_entry_size(ce->ce_namelen)
 #define ce_stage(ce) ((CE_STAGEMASK & (ce)->ce_flags) >> CE_STAGESHIFT)
 #define ce_uptodate(ce) ((ce)->ce_flags & CE_UPTODATE)
 #define ce_skip_worktree(ce) ((ce)->ce_flags & CE_SKIP_WORKTREE)
diff --git a/diff-lib.c b/diff-lib.c
index fc0dff3..720391b 100644
--- a/diff-lib.c
+++ b/diff-lib.c
@@ -32,7 +32,7 @@ static int check_removed(const struct cache_entry *ce, struct stat *st)
 			return -1;
 		return 1;
 	}
-	if (has_symlink_leading_path(ce->name, ce_namelen(ce)))
+	if (has_symlink_leading_path(ce->name, ce->ce_namelen))
 		return 1;
 	if (S_ISDIR(st->st_mode)) {
 		unsigned char sub[20];
@@ -115,7 +115,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option)
 			int num_compare_stages = 0;
 			size_t path_len;
 
-			path_len = ce_namelen(ce);
+			path_len = ce->ce_namelen;
 
 			dpath = xmalloc(combine_diff_path_size(5, path_len));
 			dpath->path = (char *) &(dpath->parent[5]);
@@ -319,7 +319,7 @@ static int show_modified(struct rev_info *revs,
 	if (revs->combine_merges && !cached &&
 	    (hashcmp(sha1, old->sha1) || hashcmp(old->sha1, new->sha1))) {
 		struct combine_diff_path *p;
-		int pathlen = ce_namelen(new);
+		int pathlen = new->ce_namelen;
 
 		p = xmalloc(combine_diff_path_size(2, pathlen));
 		p->path = (char *) &p->parent[2];
diff --git a/entry.c b/entry.c
index 17a6bcc..57ee28a 100644
--- a/entry.c
+++ b/entry.c
@@ -242,7 +242,7 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *t
 
 	memcpy(path, state->base_dir, len);
 	strcpy(path + len, ce->name);
-	len += ce_namelen(ce);
+	len += ce->ce_namelen;
 
 	if (!check_path(path, len, &st, state->base_dir_len)) {
 		unsigned changed = ce_match_stat(ce, &st, CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE);
diff --git a/merge-recursive.c b/merge-recursive.c
index 680937c..9beed5d 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -254,7 +254,7 @@ struct tree *write_tree_from_memory(struct merge_options *o)
 			struct cache_entry *ce = active_cache[i];
 			if (ce_stage(ce))
 				fprintf(stderr, "BUG: %d %.*s\n", ce_stage(ce),
-					(int)ce_namelen(ce), ce->name);
+					(int)ce->ce_namelen, ce->name);
 		}
 		die("Bug in merge-recursive.c");
 	}
diff --git a/name-hash.c b/name-hash.c
index d8d25c2..5f0c998 100644
--- a/name-hash.c
+++ b/name-hash.c
@@ -75,7 +75,7 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
 		return;
 	ce->ce_flags |= CE_HASHED;
 	ce->next = ce->dir_next = NULL;
-	hash = hash_name(ce->name, ce_namelen(ce));
+	hash = hash_name(ce->name, ce->ce_namelen);
 	pos = insert_hash(hash, ce, &istate->name_hash);
 	if (pos) {
 		ce->next = *pos;
@@ -125,7 +125,7 @@ static int slow_same_name(const char *name1, int len1, const char *name2, int le
 
 static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase)
 {
-	int len = ce_namelen(ce);
+	int len = ce->ce_namelen;
 
 	/*
 	 * Always do exact compare, even if we want a case-ignoring comparison;
diff --git a/preload-index.c b/preload-index.c
index 49cb08d..95ca421 100644
--- a/preload-index.c
+++ b/preload-index.c
@@ -55,7 +55,7 @@ static void *preload_thread(void *_data)
 			continue;
 		if (!ce_path_match(ce, &pathspec))
 			continue;
-		if (threaded_has_symlink_leading_path(&cache, ce->name, ce_namelen(ce)))
+		if (threaded_has_symlink_leading_path(&cache, ce->name, ce->ce_namelen))
 			continue;
 		if (lstat(ce->name, &st))
 			continue;
diff --git a/read-cache.c b/read-cache.c
index ef355cc..01281ab 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -56,6 +56,7 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n
 	copy_cache_entry(new, old);
 	new->ce_flags &= ~(CE_STATE_MASK | CE_NAMEMASK);
 	new->ce_flags |= (namelen >= CE_NAMEMASK ? CE_NAMEMASK : namelen);
+	new->ce_namelen = namelen;
 	memcpy(new->name, new_name, namelen + 1);
 
 	cache_tree_invalidate_path(istate->cache_tree, old->name);
@@ -492,7 +493,7 @@ int remove_file_from_index(struct index_state *istate, const char *path)
 
 static int compare_name(struct cache_entry *ce, const char *path, int namelen)
 {
-	return namelen != ce_namelen(ce) || memcmp(path, ce->name, namelen);
+	return namelen != ce->ce_namelen || memcmp(path, ce->name, namelen);
 }
 
 static int index_name_pos_also_unmerged(struct index_state *istate,
@@ -520,8 +521,8 @@ static int index_name_pos_also_unmerged(struct index_state *istate,
 
 static int different_name(struct cache_entry *ce, struct cache_entry *alias)
 {
-	int len = ce_namelen(ce);
-	return ce_namelen(alias) != len || memcmp(ce->name, alias->name, len);
+	int len = ce->ce_namelen;
+	return alias->ce_namelen != len || memcmp(ce->name, alias->name, len);
 }
 
 /*
@@ -542,7 +543,7 @@ static struct cache_entry *create_alias_ce(struct cache_entry *ce, struct cache_
 		die("Will not add file alias '%s' ('%s' already exists in index)", ce->name, alias->name);
 
 	/* Ok, create the new entry using the name of the existing alias */
-	len = ce_namelen(alias);
+	len = alias->ce_namelen;
 	new = xcalloc(1, cache_entry_size(len));
 	memcpy(new->name, alias->name, len);
 	copy_cache_entry(new, ce);
@@ -582,6 +583,7 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st,
 	ce = xcalloc(1, size);
 	memcpy(ce->name, path, namelen);
 	ce->ce_flags = namelen;
+	ce->ce_namelen = namelen;
 	if (!intent_only)
 		fill_stat_cache_info(ce, st);
 	else
@@ -623,7 +625,7 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st,
 		}
 	}
 
-	alias = index_name_exists(istate, ce->name, ce_namelen(ce), ignore_case);
+	alias = index_name_exists(istate, ce->name, ce->ce_namelen, ignore_case);
 	if (alias && !ce_stage(alias) && !ie_match_stat(istate, alias, st, ce_option)) {
 		/* Nothing changed, really */
 		free(ce);
@@ -684,6 +686,7 @@ struct cache_entry *make_cache_entry(unsigned int mode,
 	hashcpy(ce->sha1, sha1);
 	memcpy(ce->name, path, len);
 	ce->ce_flags = create_ce_flags(len, stage);
+	ce->ce_namelen = len;
 	ce->ce_mode = create_ce_mode(mode);
 
 	if (refresh)
@@ -694,13 +697,13 @@ struct cache_entry *make_cache_entry(unsigned int mode,
 
 int ce_same_name(struct cache_entry *a, struct cache_entry *b)
 {
-	int len = ce_namelen(a);
-	return ce_namelen(b) == len && !memcmp(a->name, b->name, len);
+	int len = a->ce_namelen;
+	return b->ce_namelen == len && !memcmp(a->name, b->name, len);
 }
 
 int ce_path_match(const struct cache_entry *ce, const struct pathspec *pathspec)
 {
-	return match_pathspec_depth(pathspec, ce->name, ce_namelen(ce), 0, NULL);
+	return match_pathspec_depth(pathspec, ce->name, ce->ce_namelen, 0, NULL);
 }
 
 /*
@@ -772,14 +775,14 @@ static int has_file_name(struct index_state *istate,
 			 const struct cache_entry *ce, int pos, int ok_to_replace)
 {
 	int retval = 0;
-	int len = ce_namelen(ce);
+	int len = ce->ce_namelen;
 	int stage = ce_stage(ce);
 	const char *name = ce->name;
 
 	while (pos < istate->cache_nr) {
 		struct cache_entry *p = istate->cache[pos++];
 
-		if (len >= ce_namelen(p))
+		if (len >= p->ce_namelen)
 			break;
 		if (memcmp(name, p->name, len))
 			break;
@@ -807,7 +810,7 @@ static int has_dir_name(struct index_state *istate,
 	int retval = 0;
 	int stage = ce_stage(ce);
 	const char *name = ce->name;
-	const char *slash = name + ce_namelen(ce);
+	const char *slash = name + ce->ce_namelen;
 
 	for (;;) {
 		int len;
@@ -848,7 +851,7 @@ static int has_dir_name(struct index_state *istate,
 		 */
 		while (pos < istate->cache_nr) {
 			struct cache_entry *p = istate->cache[pos];
-			if ((ce_namelen(p) <= len) ||
+			if ((p->ce_namelen <= len) ||
 			    (p->name[len] != '/') ||
 			    memcmp(p->name, name, len))
 				break; /* not our subdirectory */
@@ -1235,8 +1238,8 @@ struct ondisk_cache_entry_extended {
 #define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len)
 #define ondisk_cache_entry_extended_size(len) align_flex_name(ondisk_cache_entry_extended,len)
 #define ondisk_ce_size(ce) (((ce)->ce_flags & CE_EXTENDED) ? \
-			    ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
-			    ondisk_cache_entry_size(ce_namelen(ce)))
+			    ondisk_cache_entry_extended_size(ce->ce_namelen) : \
+			    ondisk_cache_entry_size(ce->ce_namelen))
 
 static int verify_hdr(struct cache_header *hdr, unsigned long size)
 {
@@ -1320,6 +1323,7 @@ static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *on
 	ce->ce_gid   = ntoh_l(ondisk->gid);
 	ce->ce_size  = ntoh_l(ondisk->size);
 	ce->ce_flags = flags;
+	ce->ce_namelen = len;
 	hashcpy(ce->sha1, ondisk->sha1);
 	memcpy(ce->name, name, len);
 	ce->name[len] = '\0';
@@ -1681,7 +1685,7 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
 		size = ondisk_ce_size(ce);
 		ondisk = xcalloc(1, size);
 		name = copy_cache_entry_to_ondisk(ondisk, ce);
-		memcpy(name, ce->name, ce_namelen(ce));
+		memcpy(name, ce->name, ce->ce_namelen);
 	} else {
 		int common, to_remove, prefix_size;
 		unsigned char to_remove_vi[16];
@@ -1698,15 +1702,15 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
 			size = offsetof(struct ondisk_cache_entry_extended, name);
 		else
 			size = offsetof(struct ondisk_cache_entry, name);
-		size += prefix_size + (ce_namelen(ce) - common + 1);
+		size += prefix_size + (ce->ce_namelen - common + 1);
 
 		ondisk = xcalloc(1, size);
 		name = copy_cache_entry_to_ondisk(ondisk, ce);
 		memcpy(name, to_remove_vi, prefix_size);
-		memcpy(name + prefix_size, ce->name + common, ce_namelen(ce) - common);
+		memcpy(name + prefix_size, ce->name + common, ce->ce_namelen - common);
 
 		strbuf_splice(previous_name, common, to_remove,
-			      ce->name + common, ce_namelen(ce) - common);
+			      ce->name + common, ce->ce_namelen - common);
 	}
 
 	result = ce_write(c, fd, ondisk, size);
@@ -1846,6 +1850,7 @@ int read_index_unmerged(struct index_state *istate)
 		new_ce = xcalloc(1, size);
 		memcpy(new_ce->name, ce->name, len);
 		new_ce->ce_flags = create_ce_flags(len, 0) | CE_CONFLICTED;
+		new_ce->ce_namelen = len;
 		new_ce->ce_mode = ce->ce_mode;
 		if (add_index_entry(istate, new_ce, 0))
 			return error("%s: cannot drop to stage #0",
@@ -1876,7 +1881,7 @@ int index_name_is_other(const struct index_state *istate, const char *name,
 	pos = -pos - 1;
 	if (pos < istate->cache_nr) {
 		struct cache_entry *ce = istate->cache[pos];
-		if (ce_namelen(ce) == namelen &&
+		if (ce->ce_namelen == namelen &&
 		    !memcmp(ce->name, name, namelen))
 			return 0; /* Yup, this one exists unmerged */
 	}
diff --git a/rerere.c b/rerere.c
index dcb525a..044190d 100644
--- a/rerere.c
+++ b/rerere.c
@@ -320,7 +320,7 @@ static int handle_cache(const char *path, unsigned char *sha1, const char *outpu
 		if (active_nr <= pos)
 			break;
 		ce = active_cache[pos++];
-		if (ce_namelen(ce) != len || memcmp(ce->name, path, len)
+		if (ce->ce_namelen != len || memcmp(ce->name, path, len)
 		    || ce_stage(ce) != i + 1)
 			break;
 		mmfile[i].ptr = read_sha1_file(ce->sha1, &type, &size);
diff --git a/resolve-undo.c b/resolve-undo.c
index 72b4612..6fdee85 100644
--- a/resolve-undo.c
+++ b/resolve-undo.c
@@ -165,7 +165,7 @@ void unmerge_index(struct index_state *istate, const char **pathspec)
 
 	for (i = 0; i < istate->cache_nr; i++) {
 		struct cache_entry *ce = istate->cache[i];
-		if (!match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, NULL))
+		if (!match_pathspec(pathspec, ce->name, ce->ce_namelen, 0, NULL))
 			continue;
 		i = unmerge_index_entry_at(istate, i);
 	}
diff --git a/sha1_name.c b/sha1_name.c
index c633113..2c4af98 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -967,7 +967,7 @@ static void diagnose_invalid_index_path(int stage,
 		pos = -pos - 1;
 	if (pos < active_nr) {
 		ce = active_cache[pos];
-		if (ce_namelen(ce) == namelen &&
+		if (ce->ce_namelen == namelen &&
 		    !memcmp(ce->name, filename, namelen))
 			die("Path '%s' is in the index, but not at stage %d.\n"
 			    "Did you mean ':%d:%s'?",
@@ -985,7 +985,7 @@ static void diagnose_invalid_index_path(int stage,
 		pos = -pos - 1;
 	if (pos < active_nr) {
 		ce = active_cache[pos];
-		if (ce_namelen(ce) == fullnamelen &&
+		if (ce->ce_namelen == fullnamelen &&
 		    !memcmp(ce->name, fullname, fullnamelen))
 			die("Path '%s' is in the index, but not '%s'.\n"
 			    "Did you mean ':%d:%s' aka ':%d:./%s'?",
@@ -1087,7 +1087,7 @@ int get_sha1_with_context_1(const char *name, unsigned char *sha1,
 			pos = -pos - 1;
 		while (pos < active_nr) {
 			ce = active_cache[pos];
-			if (ce_namelen(ce) != namelen ||
+			if (ce->ce_namelen != namelen ||
 			    memcmp(ce->name, cp, namelen))
 				break;
 			if (ce_stage(ce) == stage) {
diff --git a/submodule.c b/submodule.c
index 959d349..c5f5f15 100644
--- a/submodule.c
+++ b/submodule.c
@@ -112,7 +112,7 @@ void gitmodules_config(void)
 			pos = -1 - pos;
 			if (active_nr > pos) {  /* there is a .gitmodules */
 				const struct cache_entry *ce = active_cache[pos];
-				if (ce_namelen(ce) == 11 &&
+				if (ce->ce_namelen == 11 &&
 				    !memcmp(ce->name, ".gitmodules", 11))
 					gitmodules_is_unmerged = 1;
 			}
diff --git a/tree.c b/tree.c
index 676e9f7..f2a66be 100644
--- a/tree.c
+++ b/tree.c
@@ -23,6 +23,7 @@ static int read_one_entry_opt(const unsigned char *sha1, const char *base, int b
 
 	ce->ce_mode = create_ce_mode(mode);
 	ce->ce_flags = create_ce_flags(baselen + len, stage);
+	ce->ce_namelen = baselen + len;
 	memcpy(ce->name, base, baselen);
 	memcpy(ce->name + baselen, pathname, len+1);
 	hashcpy(ce->sha1, sha1);
diff --git a/unpack-trees.c b/unpack-trees.c
index ad40109..e5e6fc5 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -175,11 +175,11 @@ static void display_error_msgs(struct unpack_trees_options *o)
  */
 static void unlink_entry(struct cache_entry *ce)
 {
-	if (!check_leading_path(ce->name, ce_namelen(ce)))
+	if (!check_leading_path(ce->name, ce->ce_namelen))
 		return;
 	if (remove_or_warn(ce->ce_mode, ce->name))
 		return;
-	schedule_dir_for_removal(ce->name, ce_namelen(ce));
+	schedule_dir_for_removal(ce->name, ce->ce_namelen);
 }
 
 static struct checkout state;
@@ -324,7 +324,7 @@ static int locate_in_src_index(struct cache_entry *ce,
 			       struct unpack_trees_options *o)
 {
 	struct index_state *index = o->src_index;
-	int len = ce_namelen(ce);
+	int len = ce->ce_namelen;
 	int pos = index_name_pos(index, ce->name, len);
 	if (pos < 0)
 		pos = -1 - pos;
@@ -340,12 +340,12 @@ static void mark_ce_used_same_name(struct cache_entry *ce,
 				   struct unpack_trees_options *o)
 {
 	struct index_state *index = o->src_index;
-	int len = ce_namelen(ce);
+	int len = ce->ce_namelen;
 	int pos;
 
 	for (pos = locate_in_src_index(ce, o); pos < index->cache_nr; pos++) {
 		struct cache_entry *next = index->cache[pos];
-		if (len != ce_namelen(next) ||
+		if (len != next->ce_namelen ||
 		    memcmp(ce->name, next->name, len))
 			break;
 		mark_ce_used(next, o);
@@ -370,14 +370,14 @@ static void add_same_unmerged(struct cache_entry *ce,
 			      struct unpack_trees_options *o)
 {
 	struct index_state *index = o->src_index;
-	int len = ce_namelen(ce);
+	int len = ce->ce_namelen;
 	int pos = index_name_pos(index, ce->name, len);
 
 	if (0 <= pos)
 		die("programming error in a caller of mark_ce_used_same_name");
 	for (pos = -pos - 1; pos < index->cache_nr; pos++) {
 		struct cache_entry *next = index->cache[pos];
-		if (len != ce_namelen(next) ||
+		if (len != next->ce_namelen ||
 		    memcmp(ce->name, next->name, len))
 			break;
 		add_entry(o, next, 0, 0);
@@ -493,7 +493,7 @@ static int do_compare_entry(const struct cache_entry *ce, const struct traverse_
 			return cmp;
 	}
 	pathlen = info->pathlen;
-	ce_len = ce_namelen(ce);
+	ce_len = ce->ce_namelen;
 
 	/* If ce_len < pathlen then we must have previously hit "name == directory" entry */
 	if (ce_len < pathlen)
@@ -516,7 +516,7 @@ static int compare_entry(const struct cache_entry *ce, const struct traverse_inf
 	 * Even if the beginning compared identically, the ce should
 	 * compare as bigger than a directory leading up to it!
 	 */
-	return ce_namelen(ce) > traverse_path_len(info, n);
+	return ce->ce_namelen > traverse_path_len(info, n);
 }
 
 static int ce_in_traverse_path(const struct cache_entry *ce,
@@ -530,7 +530,7 @@ static int ce_in_traverse_path(const struct cache_entry *ce,
 	 * If ce (blob) is the same name as the path (which is a tree
 	 * we will be descending into), it won't be inside it.
 	 */
-	return (info->pathlen < ce_namelen(ce));
+	return (info->pathlen < ce->ce_namelen);
 }
 
 static struct cache_entry *create_ce_entry(const struct traverse_info *info, const struct name_entry *n, int stage)
@@ -540,6 +540,7 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, con
 
 	ce->ce_mode = create_ce_mode(n->mode);
 	ce->ce_flags = create_ce_flags(len, stage);
+	ce->ce_namelen = len;
 	hashcpy(ce->sha1, n->sha1);
 	make_traverse_path(ce->name, info, n);
 
@@ -657,7 +658,7 @@ static int find_cache_pos(struct traverse_info *info,
 		if (ce_slash)
 			ce_len = ce_slash - ce_name;
 		else
-			ce_len = ce_namelen(ce) - pfxlen;
+			ce_len = ce->ce_namelen - pfxlen;
 		cmp = name_compare(p->path, p_len, ce_name, ce_len);
 		/*
 		 * Exact match; if we have a directory we need to
@@ -938,7 +939,7 @@ static int clear_ce_flags_1(struct cache_entry **cache, int nr,
 
 		/* Non-directory */
 		dtype = ce_to_dtype(ce);
-		ret = excluded_from_list(ce->name, ce_namelen(ce), name, &dtype, el);
+		ret = excluded_from_list(ce->name, ce->ce_namelen, name, &dtype, el);
 		if (ret < 0)
 			ret = defval;
 		if (ret > 0)
@@ -1293,7 +1294,7 @@ static int verify_clean_subdirectory(struct cache_entry *ce,
 	     i < o->src_index->cache_nr;
 	     i++) {
 		struct cache_entry *ce2 = o->src_index->cache[i];
-		int len = ce_namelen(ce2);
+		int len = ce2->ce_namelen;
 		if (len < namelen ||
 		    strncmp(ce->name, ce2->name, namelen) ||
 		    ce2->name[namelen] != '/')
@@ -1411,7 +1412,7 @@ static int verify_absent_1(struct cache_entry *ce,
 	if (o->index_only || o->reset || !o->update)
 		return 0;
 
-	len = check_leading_path(ce->name, ce_namelen(ce));
+	len = check_leading_path(ce->name, ce->ce_namelen);
 	if (!len)
 		return 0;
 	else if (len > 0) {
@@ -1430,7 +1431,7 @@ static int verify_absent_1(struct cache_entry *ce,
 				     strerror(errno));
 		return 0;
 	} else {
-		return check_ok_to_remove(ce->name, ce_namelen(ce),
+		return check_ok_to_remove(ce->name, ce->ce_namelen,
 					  ce_to_dtype(ce), ce, &st,
 					  error_type, o);
 	}
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH/RFC] Replace ce_namelen() with a ce_namelen field
  2012-07-04  9:18 [PATCH/RFC] Replace ce_namelen() with a ce_namelen field Thomas Gummerer
@ 2012-07-04 10:00 ` Thomas Rast
  2012-07-04 10:01 ` Nguyen Thai Ngoc Duy
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 18+ messages in thread
From: Thomas Rast @ 2012-07-04 10:00 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, gitster, mhagger, pclouds

Thomas Gummerer <t.gummerer@gmail.com> writes:

> Replace the ce_namelen() function in cache.h with a ce_namelen
> field in struct cache_entry. This will both give us a tiny bit
> of a performance enhancement when working with long pathnames
> and is part of the refactoring for the index-v5 file format.

Expand this, at least proportionally to the damage incurred.

For example, I know that you know that there's a good reason why we want
to put it in a new field when reading from v5 :-)

> Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
> ---
>  builtin/add.c            |    4 ++--
>  builtin/apply.c          |    1 +
>  builtin/blame.c          |    1 +
>  builtin/checkout-index.c |    8 ++++----
>  builtin/checkout.c       |    7 ++++---
>  builtin/clean.c          |    2 +-
>  builtin/commit.c         |    2 +-
>  builtin/grep.c           |    2 +-
>  builtin/ls-files.c       |    8 ++++----
>  builtin/rm.c             |    2 +-
>  builtin/update-index.c   |    9 ++++++---
>  cache-tree.c             |    4 ++--
>  cache.h                  |   11 ++---------
>  diff-lib.c               |    6 +++---
>  entry.c                  |    2 +-
>  merge-recursive.c        |    2 +-
>  name-hash.c              |    4 ++--
>  preload-index.c          |    2 +-
>  read-cache.c             |   43 ++++++++++++++++++++++++-------------------
>  rerere.c                 |    2 +-
>  resolve-undo.c           |    2 +-
>  sha1_name.c              |    6 +++---
>  submodule.c              |    2 +-
>  tree.c                   |    1 +
>  unpack-trees.c           |   31 ++++++++++++++++---------------
>  25 files changed, 85 insertions(+), 79 deletions(-)
[...]
> diff --git a/cache.h b/cache.h
> index cc5048c..5f93f22 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -128,6 +128,7 @@ struct cache_entry {
>  	unsigned int ce_gid;
>  	unsigned int ce_size;
>  	unsigned int ce_flags;
> +	unsigned int ce_namelen;
>  	unsigned char sha1[20];
>  	struct cache_entry *next;
>  	struct cache_entry *dir_next;
> @@ -205,15 +206,7 @@ static inline unsigned create_ce_flags(size_t len, unsigned stage)
>  	return (len | (stage << CE_STAGESHIFT));
>  }
>  
> -static inline size_t ce_namelen(const struct cache_entry *ce)
> -{
> -	size_t len = ce->ce_flags & CE_NAMEMASK;
> -	if (len < CE_NAMEMASK)
> -		return len;
> -	return strlen(ce->name + CE_NAMEMASK) + CE_NAMEMASK;
> -}

AFAICT almost all of the changes relate to the change from ce_namelen(ce)
to ce->ce_namelen.  What's stopping you from defining

#define ce_namelen(ce) ((ce)->ce_namelen)

as a simple compatibility macro?

I'm not sure whether such internal backwards-compatibility is desirable,
but it would certainly make the patch much more reviewable.  If Junio
would rather see the conversion, maybe do it in a separate patch that
simply substitutes the ce->ce_namelen for all the ce_namelen(ce)?

-- 
Thomas Rast
trast@{inf,student}.ethz.ch

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH/RFC] Replace ce_namelen() with a ce_namelen field
  2012-07-04  9:18 [PATCH/RFC] Replace ce_namelen() with a ce_namelen field Thomas Gummerer
  2012-07-04 10:00 ` Thomas Rast
@ 2012-07-04 10:01 ` Nguyen Thai Ngoc Duy
  2012-07-04 10:07 ` Nguyen Thai Ngoc Duy
  2012-07-04 19:26 ` Junio C Hamano
  3 siblings, 0 replies; 18+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-07-04 10:01 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, gitster, mhagger

On Wed, Jul 4, 2012 at 4:18 PM, Thomas Gummerer <t.gummerer@gmail.com> wrote:
> Replace the ce_namelen() function in cache.h with a ce_namelen
> field in struct cache_entry. This will both give us a tiny bit
> of a performance enhancement when working with long pathnames
> and is part of the refactoring for the index-v5 file format.

Can we just wrap ce->namelen in ce_namelen() instead? Less changes
this way. We can do the replacement once v5 patches come.

static inline int ce_namelen(struct cache_entry *ce)
{
   return ce->namelen;
}

something like that.
-- 
Duy

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH/RFC] Replace ce_namelen() with a ce_namelen field
  2012-07-04  9:18 [PATCH/RFC] Replace ce_namelen() with a ce_namelen field Thomas Gummerer
  2012-07-04 10:00 ` Thomas Rast
  2012-07-04 10:01 ` Nguyen Thai Ngoc Duy
@ 2012-07-04 10:07 ` Nguyen Thai Ngoc Duy
  2012-07-04 19:26 ` Junio C Hamano
  3 siblings, 0 replies; 18+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-07-04 10:07 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, gitster, mhagger

On Wed, Jul 4, 2012 at 4:18 PM, Thomas Gummerer <t.gummerer@gmail.com> wrote:
> Replace the ce_namelen() function in cache.h with a ce_namelen
> field in struct cache_entry. This will both give us a tiny bit
> of a performance enhancement when working with long pathnames
> and is part of the refactoring for the index-v5 file format.

In the name of performance maybe you want to replace strlen(ce->name)
with ce_namelen(ce) or ce->ce_namelen. git-grep says there are three
occurrences, but you may want to relax the search a bit because it's
not always "strlen(ce->name)".
-- 
Duy

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH/RFC] Replace ce_namelen() with a ce_namelen field
  2012-07-04  9:18 [PATCH/RFC] Replace ce_namelen() with a ce_namelen field Thomas Gummerer
                   ` (2 preceding siblings ...)
  2012-07-04 10:07 ` Nguyen Thai Ngoc Duy
@ 2012-07-04 19:26 ` Junio C Hamano
  2012-07-06 16:07   ` Introduction of " Thomas Gummerer
  3 siblings, 1 reply; 18+ messages in thread
From: Junio C Hamano @ 2012-07-04 19:26 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pclouds

Thomas Gummerer <t.gummerer@gmail.com> writes:

> Replace the ce_namelen() function in cache.h with a ce_namelen
> field in struct cache_entry.

NAK, at least in this form that duplicates the same information in
ce_namelen (new field) and bits in ce_flags, making it easier for
them to go out of sync with new bugs (or misconversions in this
patch, if there is one).

At least, keep the ce_namelen() inline used in the existing calling
site, as the whole point of it is that they do not have to care how
the name length is stored or computed.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Introduction of a ce_namelen field
  2012-07-04 19:26 ` Junio C Hamano
@ 2012-07-06 16:07   ` Thomas Gummerer
  2012-07-06 16:07     ` [PATCH/RFC v2 1/2] Strip namelen out of ce_flags into " Thomas Gummerer
  2012-07-06 16:07     ` [PATCH/RFC v2 2/2] Replace strlen() with ce_namelen() Thomas Gummerer
  0 siblings, 2 replies; 18+ messages in thread
From: Thomas Gummerer @ 2012-07-06 16:07 UTC (permalink / raw)
  To: git; +Cc: trast, gitster, mhagger, pclouds, t.gummerer

Thanks to the review of Junio, Duy and Thomas here is a reroll of the
patches, where the name length is separated from the flags in the
in-memory format and which includes a little bit of a performance
optimization by using the ce_namelen field instead of strlen() in
a couple of places thanks to the suggestion of Duy.

[PATCH/RFC v2 1/2] Strip namelen out of ce_flags into a ce_namelen
[PATCH/RFC v2 2/2] Replace strlen(ce->name) with ce_namlen

builtin/apply.c        |    3 ++-
builtin/blame.c        |    3 ++-
builtin/checkout.c     |    3 ++-
builtin/update-index.c |    9 ++++++---
cache.h                |   18 ++++++------------
read-cache.c           |   58 +++++++++++++++++++++++++++++++++++-----------------------
tree.c                 |    7 ++++---
unpack-trees.c         |    5 +++--
8 files changed, 60 insertions(+), 46 deletions(-)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH/RFC v2 1/2] Strip namelen out of ce_flags into a ce_namelen field
  2012-07-06 16:07   ` Introduction of " Thomas Gummerer
@ 2012-07-06 16:07     ` Thomas Gummerer
  2012-07-09  4:59       ` Junio C Hamano
  2012-07-06 16:07     ` [PATCH/RFC v2 2/2] Replace strlen() with ce_namelen() Thomas Gummerer
  1 sibling, 1 reply; 18+ messages in thread
From: Thomas Gummerer @ 2012-07-06 16:07 UTC (permalink / raw)
  To: git; +Cc: trast, gitster, mhagger, pclouds, t.gummerer

Strip the name length from the ce_flags field and move it
into its own ce_namelen field in struct cache_entry. This
will both give us a tiny bit of a performance enhancement
when working with long pathnames and is part of the
refactoring for the index-v5 file format.

Index-v5 won't store the name length in the on disk index
file, so storing it in the flags wouldn't make sense for
index-v5.

It also enhances readability, by making it more clear what
is a flag, and where the length is stored and make it clear
which functions use stages in comparisions and which only
use the length.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 builtin/apply.c        |    3 ++-
 builtin/blame.c        |    3 ++-
 builtin/checkout.c     |    3 ++-
 builtin/update-index.c |    9 +++++---
 cache.h                |   18 ++++++----------
 read-cache.c           |   54 +++++++++++++++++++++++++++++-------------------
 tree.c                 |    7 ++++---
 unpack-trees.c         |    3 ++-
 8 files changed, 57 insertions(+), 43 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index dda9ea0..10f83fc 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -3502,7 +3502,8 @@ static void add_index_file(const char *path, unsigned mode, void *buf, unsigned
 	ce = xcalloc(1, ce_size);
 	memcpy(ce->name, path, namelen);
 	ce->ce_mode = create_ce_mode(mode);
-	ce->ce_flags = namelen;
+	ce->ce_flags = 0;
+	ce->ce_namelen = namelen;
 	if (S_ISGITLINK(mode)) {
 		const char *s = buf;
 
diff --git a/builtin/blame.c b/builtin/blame.c
index 24d3dd5..e181368 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -2153,7 +2153,8 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt,
 	ce = xcalloc(1, size);
 	hashcpy(ce->sha1, origin->blob_sha1);
 	memcpy(ce->name, path, len);
-	ce->ce_flags = create_ce_flags(len, 0);
+	ce->ce_flags = 0;
+	ce->ce_namelen = len;
 	ce->ce_mode = create_ce_mode(mode);
 	add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
 
diff --git a/builtin/checkout.c b/builtin/checkout.c
index 3ddda34..5c06444 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -73,7 +73,8 @@ static int update_some(const unsigned char *sha1, const char *base, int baselen,
 	hashcpy(ce->sha1, sha1);
 	memcpy(ce->name, base, baselen);
 	memcpy(ce->name + baselen, pathname, len - baselen);
-	ce->ce_flags = create_ce_flags(len, 0) | CE_UPDATE;
+	ce->ce_flags = CE_UPDATE;
+	ce->ce_namelen = len;
 	ce->ce_mode = create_ce_mode(mode);
 	add_cache_entry(ce, ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
 	return 0;
diff --git a/builtin/update-index.c b/builtin/update-index.c
index 5f038d6..911090f 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -95,7 +95,8 @@ static int add_one_path(struct cache_entry *old, const char *path, int len, stru
 	size = cache_entry_size(len);
 	ce = xcalloc(1, size);
 	memcpy(ce->name, path, len);
-	ce->ce_flags = len;
+	ce->ce_flags = 0;
+	ce->ce_namelen = len;
 	fill_stat_cache_info(ce, st);
 	ce->ce_mode = ce_mode_from_stat(old, st->st_mode);
 
@@ -235,7 +236,8 @@ static int add_cacheinfo(unsigned int mode, const unsigned char *sha1,
 
 	hashcpy(ce->sha1, sha1);
 	memcpy(ce->name, path, len);
-	ce->ce_flags = create_ce_flags(len, stage);
+	ce->ce_flags = create_ce_flags(stage);
+	ce->ce_namelen = len;
 	ce->ce_mode = create_ce_mode(mode);
 	if (assume_unchanged)
 		ce->ce_flags |= CE_VALID;
@@ -433,7 +435,8 @@ static struct cache_entry *read_one_ent(const char *which,
 
 	hashcpy(ce->sha1, sha1);
 	memcpy(ce->name, path, namelen);
-	ce->ce_flags = create_ce_flags(namelen, stage);
+	ce->ce_flags = create_ce_flags(stage);
+	ce->ce_namelen = namelen;
 	ce->ce_mode = create_ce_mode(mode);
 	return ce;
 }
diff --git a/cache.h b/cache.h
index cc5048c..14160b7 100644
--- a/cache.h
+++ b/cache.h
@@ -128,6 +128,7 @@ struct cache_entry {
 	unsigned int ce_gid;
 	unsigned int ce_size;
 	unsigned int ce_flags;
+	unsigned int ce_namelen;
 	unsigned char sha1[20];
 	struct cache_entry *next;
 	struct cache_entry *dir_next;
@@ -198,21 +199,12 @@ static inline void copy_cache_entry(struct cache_entry *dst, struct cache_entry
 	dst->ce_flags = (dst->ce_flags & ~CE_STATE_MASK) | state;
 }
 
-static inline unsigned create_ce_flags(size_t len, unsigned stage)
+static inline unsigned create_ce_flags(unsigned stage)
 {
-	if (len >= CE_NAMEMASK)
-		len = CE_NAMEMASK;
-	return (len | (stage << CE_STAGESHIFT));
-}
-
-static inline size_t ce_namelen(const struct cache_entry *ce)
-{
-	size_t len = ce->ce_flags & CE_NAMEMASK;
-	if (len < CE_NAMEMASK)
-		return len;
-	return strlen(ce->name + CE_NAMEMASK) + CE_NAMEMASK;
+	return (stage << CE_STAGESHIFT);
 }
 
+#define ce_namelen(ce) ((ce)->ce_namelen)
 #define ce_size(ce) cache_entry_size(ce_namelen(ce))
 #define ce_stage(ce) ((CE_STAGEMASK & (ce)->ce_flags) >> CE_STAGESHIFT)
 #define ce_uptodate(ce) ((ce)->ce_flags & CE_UPTODATE)
@@ -448,6 +440,7 @@ extern int discard_index(struct index_state *);
 extern int unmerged_index(const struct index_state *);
 extern int verify_path(const char *path);
 extern struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int igncase);
+extern int index_name_stage_pos(const struct index_state *, const char *name, int stage, int namelen);
 extern int index_name_pos(const struct index_state *, const char *name, int namelen);
 #define ADD_CACHE_OK_TO_ADD 1		/* Ok to add */
 #define ADD_CACHE_OK_TO_REPLACE 2	/* Ok to replace file/directory */
@@ -857,6 +850,7 @@ extern int validate_headref(const char *ref);
 extern int base_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2);
 extern int df_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2);
 extern int cache_name_compare(const char *name1, int len1, const char *name2, int len2);
+extern int cache_name_stage_compare(const char *name1, int stage1, int len1, const char *name2, int stage2, int len2);
 
 extern void *read_object_with_reference(const unsigned char *sha1,
 					const char *required_type,
diff --git a/read-cache.c b/read-cache.c
index ef355cc..ea75c89 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -54,8 +54,8 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n
 
 	new = xmalloc(cache_entry_size(namelen));
 	copy_cache_entry(new, old);
-	new->ce_flags &= ~(CE_STATE_MASK | CE_NAMEMASK);
-	new->ce_flags |= (namelen >= CE_NAMEMASK ? CE_NAMEMASK : namelen);
+	new->ce_flags &= ~CE_STATE_MASK;
+	new->ce_namelen = namelen;
 	memcpy(new->name, new_name, namelen + 1);
 
 	cache_tree_invalidate_path(istate->cache_tree, old->name);
@@ -395,10 +395,8 @@ int df_name_compare(const char *name1, int len1, int mode1,
 	return c1 - c2;
 }
 
-int cache_name_compare(const char *name1, int flags1, const char *name2, int flags2)
+int cache_name_stage_compare(const char *name1, int stage1, int len1, const char *name2, int stage2, int len2)
 {
-	int len1 = flags1 & CE_NAMEMASK;
-	int len2 = flags2 & CE_NAMEMASK;
 	int len = len1 < len2 ? len1 : len2;
 	int cmp;
 
@@ -410,18 +408,19 @@ int cache_name_compare(const char *name1, int flags1, const char *name2, int fla
 	if (len1 > len2)
 		return 1;
 
-	/* Compare stages  */
-	flags1 &= CE_STAGEMASK;
-	flags2 &= CE_STAGEMASK;
-
-	if (flags1 < flags2)
+	if (stage1 < stage2)
 		return -1;
-	if (flags1 > flags2)
+	if (stage1 > stage2)
 		return 1;
 	return 0;
 }
 
-int index_name_pos(const struct index_state *istate, const char *name, int namelen)
+int cache_name_compare(const char *name1, int len1, const char *name2, int len2)
+{
+	return cache_name_stage_compare(name1, 0, len1, name2, 0, len2);
+}
+
+int index_name_stage_pos(const struct index_state *istate, const char *name, int stage, int namelen)
 {
 	int first, last;
 
@@ -430,7 +429,7 @@ int index_name_pos(const struct index_state *istate, const char *name, int namel
 	while (last > first) {
 		int next = (last + first) >> 1;
 		struct cache_entry *ce = istate->cache[next];
-		int cmp = cache_name_compare(name, namelen, ce->name, ce->ce_flags);
+		int cmp = cache_name_stage_compare(name, stage, namelen, ce->name, ce_stage(ce), ce->ce_namelen);
 		if (!cmp)
 			return next;
 		if (cmp < 0) {
@@ -442,6 +441,11 @@ int index_name_pos(const struct index_state *istate, const char *name, int namel
 	return -first-1;
 }
 
+int index_name_pos(const struct index_state *istate, const char *name, int namelen)
+{
+	return index_name_stage_pos(istate, name, 0, namelen);
+}
+
 /* Remove entry, return true if there are more entries to go.. */
 int remove_index_entry_at(struct index_state *istate, int pos)
 {
@@ -581,7 +585,7 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st,
 	size = cache_entry_size(namelen);
 	ce = xcalloc(1, size);
 	memcpy(ce->name, path, namelen);
-	ce->ce_flags = namelen;
+	ce->ce_namelen = namelen;
 	if (!intent_only)
 		fill_stat_cache_info(ce, st);
 	else
@@ -683,7 +687,8 @@ struct cache_entry *make_cache_entry(unsigned int mode,
 
 	hashcpy(ce->sha1, sha1);
 	memcpy(ce->name, path, len);
-	ce->ce_flags = create_ce_flags(len, stage);
+	ce->ce_flags = create_ce_flags(stage);
+	ce->ce_namelen = len;
 	ce->ce_mode = create_ce_mode(mode);
 
 	if (refresh)
@@ -820,7 +825,7 @@ static int has_dir_name(struct index_state *istate,
 		}
 		len = slash - name;
 
-		pos = index_name_pos(istate, name, create_ce_flags(len, stage));
+		pos = index_name_stage_pos(istate, name, stage, len);
 		if (pos >= 0) {
 			/*
 			 * Found one, but not so fast.  This could
@@ -910,7 +915,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
 	int new_only = option & ADD_CACHE_NEW_ONLY;
 
 	cache_tree_invalidate_path(istate->cache_tree, ce->name);
-	pos = index_name_pos(istate, ce->name, ce->ce_flags);
+	pos = index_name_stage_pos(istate, ce->name, ce_stage(ce), ce->ce_namelen);
 
 	/* existing match? Just replace it. */
 	if (pos >= 0) {
@@ -942,7 +947,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
 		if (!ok_to_replace)
 			return error("'%s' appears as both a file and as a directory",
 				     ce->name);
-		pos = index_name_pos(istate, ce->name, ce->ce_flags);
+		pos = index_name_stage_pos(istate, ce->name, ce_stage(ce), ce->ce_namelen);
 		pos = -pos-1;
 	}
 	return pos + 1;
@@ -1319,7 +1324,8 @@ static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *on
 	ce->ce_uid   = ntoh_l(ondisk->uid);
 	ce->ce_gid   = ntoh_l(ondisk->gid);
 	ce->ce_size  = ntoh_l(ondisk->size);
-	ce->ce_flags = flags;
+	ce->ce_flags = flags & ~CE_NAMEMASK;
+	ce->ce_namelen = len;
 	hashcpy(ce->sha1, ondisk->sha1);
 	memcpy(ce->name, name, len);
 	ce->name[len] = '\0';
@@ -1743,7 +1749,7 @@ int write_index(struct index_state *istate, int newfd)
 {
 	git_SHA_CTX c;
 	struct cache_header hdr;
-	int i, err, removed, extended, hdr_version;
+	int i, err, removed, extended, hdr_version, len;
 	struct cache_entry **cache = istate->cache;
 	int entries = istate->cache_nr;
 	struct stat st;
@@ -1759,6 +1765,11 @@ int write_index(struct index_state *istate, int newfd)
 			extended++;
 			cache[i]->ce_flags |= CE_EXTENDED;
 		}
+		if (cache[i]->ce_namelen >= CE_NAMEMASK)
+			len = CE_NAMEMASK;
+		else
+			len = cache[i]->ce_namelen;
+		cache[i]->ce_flags |= len;
 	}
 
 	if (!istate->version)
@@ -1845,7 +1856,8 @@ int read_index_unmerged(struct index_state *istate)
 		size = cache_entry_size(len);
 		new_ce = xcalloc(1, size);
 		memcpy(new_ce->name, ce->name, len);
-		new_ce->ce_flags = create_ce_flags(len, 0) | CE_CONFLICTED;
+		new_ce->ce_flags = CE_CONFLICTED;
+		new_ce->ce_namelen = len;
 		new_ce->ce_mode = ce->ce_mode;
 		if (add_index_entry(istate, new_ce, 0))
 			return error("%s: cannot drop to stage #0",
diff --git a/tree.c b/tree.c
index 676e9f7..7ab02d8 100644
--- a/tree.c
+++ b/tree.c
@@ -22,7 +22,8 @@ static int read_one_entry_opt(const unsigned char *sha1, const char *base, int b
 	ce = xcalloc(1, size);
 
 	ce->ce_mode = create_ce_mode(mode);
-	ce->ce_flags = create_ce_flags(baselen + len, stage);
+	ce->ce_flags = create_ce_flags(stage);
+	ce->ce_namelen = baselen + len;
 	memcpy(ce->name, base, baselen);
 	memcpy(ce->name + baselen, pathname, len+1);
 	hashcpy(ce->sha1, sha1);
@@ -133,8 +134,8 @@ static int cmp_cache_name_compare(const void *a_, const void *b_)
 
 	ce1 = *((const struct cache_entry **)a_);
 	ce2 = *((const struct cache_entry **)b_);
-	return cache_name_compare(ce1->name, ce1->ce_flags,
-				  ce2->name, ce2->ce_flags);
+	return cache_name_stage_compare(ce1->name, ce_stage(ce1), ce1->ce_namelen,
+				  ce2->name, ce_stage(ce2), ce2->ce_namelen);
 }
 
 int read_tree(struct tree *tree, int stage, struct pathspec *match)
diff --git a/unpack-trees.c b/unpack-trees.c
index ad40109..9981dd3 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -539,7 +539,8 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, con
 	struct cache_entry *ce = xcalloc(1, cache_entry_size(len));
 
 	ce->ce_mode = create_ce_mode(n->mode);
-	ce->ce_flags = create_ce_flags(len, stage);
+	ce->ce_flags = create_ce_flags(stage);
+	ce->ce_namelen = len;
 	hashcpy(ce->sha1, n->sha1);
 	make_traverse_path(ce->name, info, n);
 
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH/RFC v2 2/2] Replace strlen() with ce_namelen()
  2012-07-06 16:07   ` Introduction of " Thomas Gummerer
  2012-07-06 16:07     ` [PATCH/RFC v2 1/2] Strip namelen out of ce_flags into " Thomas Gummerer
@ 2012-07-06 16:07     ` Thomas Gummerer
  2012-07-09  2:49       ` Junio C Hamano
  1 sibling, 1 reply; 18+ messages in thread
From: Thomas Gummerer @ 2012-07-06 16:07 UTC (permalink / raw)
  To: git; +Cc: trast, gitster, mhagger, pclouds, t.gummerer

Replace strlen(ce->name) with ce_namelen() in a couple
of places which gives us some additional bits of
performance.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 read-cache.c   |    4 ++--
 unpack-trees.c |    2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index ea75c89..a77877a 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1124,7 +1124,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, const char **p
 			continue;
 
 		if (pathspec &&
-		    !match_pathspec(pathspec, ce->name, strlen(ce->name), 0, seen))
+		    !match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, seen))
 			filtered = 1;
 
 		if (ce_stage(ce)) {
@@ -1852,7 +1852,7 @@ int read_index_unmerged(struct index_state *istate)
 		if (!ce_stage(ce))
 			continue;
 		unmerged = 1;
-		len = strlen(ce->name);
+		len = ce_namelen(ce);
 		size = cache_entry_size(len);
 		new_ce = xcalloc(1, size);
 		memcpy(new_ce->name, ce->name, len);
diff --git a/unpack-trees.c b/unpack-trees.c
index 9981dd3..abd0988 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1289,7 +1289,7 @@ static int verify_clean_subdirectory(struct cache_entry *ce,
 	 * First let's make sure we do not have a local modification
 	 * in that directory.
 	 */
-	namelen = strlen(ce->name);
+	namelen = ce_namelen(ce);
 	for (i = locate_in_src_index(ce, o);
 	     i < o->src_index->cache_nr;
 	     i++) {
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH/RFC v2 2/2] Replace strlen() with ce_namelen()
  2012-07-06 16:07     ` [PATCH/RFC v2 2/2] Replace strlen() with ce_namelen() Thomas Gummerer
@ 2012-07-09  2:49       ` Junio C Hamano
  0 siblings, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2012-07-09  2:49 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pclouds

Thomas Gummerer <t.gummerer@gmail.com> writes:

> Replace strlen(ce->name) with ce_namelen() in a couple
> of places which gives us some additional bits of
> performance.
>
> Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>

Very sensible, with or without the previous patch.

I am kind of surprised that we are very good and have only these
three places that had these unnecessary pessimization.

> ---
>  read-cache.c   |    4 ++--
>  unpack-trees.c |    2 +-
>  2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/read-cache.c b/read-cache.c
> index ea75c89..a77877a 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -1124,7 +1124,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, const char **p
>  			continue;
>  
>  		if (pathspec &&
> -		    !match_pathspec(pathspec, ce->name, strlen(ce->name), 0, seen))
> +		    !match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, seen))
>  			filtered = 1;
>  
>  		if (ce_stage(ce)) {
> @@ -1852,7 +1852,7 @@ int read_index_unmerged(struct index_state *istate)
>  		if (!ce_stage(ce))
>  			continue;
>  		unmerged = 1;
> -		len = strlen(ce->name);
> +		len = ce_namelen(ce);
>  		size = cache_entry_size(len);
>  		new_ce = xcalloc(1, size);
>  		memcpy(new_ce->name, ce->name, len);
> diff --git a/unpack-trees.c b/unpack-trees.c
> index 9981dd3..abd0988 100644
> --- a/unpack-trees.c
> +++ b/unpack-trees.c
> @@ -1289,7 +1289,7 @@ static int verify_clean_subdirectory(struct cache_entry *ce,
>  	 * First let's make sure we do not have a local modification
>  	 * in that directory.
>  	 */
> -	namelen = strlen(ce->name);
> +	namelen = ce_namelen(ce);
>  	for (i = locate_in_src_index(ce, o);
>  	     i < o->src_index->cache_nr;
>  	     i++) {

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH/RFC v2 1/2] Strip namelen out of ce_flags into a ce_namelen field
  2012-07-06 16:07     ` [PATCH/RFC v2 1/2] Strip namelen out of ce_flags into " Thomas Gummerer
@ 2012-07-09  4:59       ` Junio C Hamano
  2012-07-11  9:22         ` [PATCH v3 0/3] Introduction of " Thomas Gummerer
  2012-07-11 16:29         ` [PATCH/RFC v2 1/2] Strip namelen out of ce_flags into a ce_namelen field Junio C Hamano
  0 siblings, 2 replies; 18+ messages in thread
From: Junio C Hamano @ 2012-07-09  4:59 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pclouds

Thomas Gummerer <t.gummerer@gmail.com> writes:

> Strip the name length from the ce_flags field and move it
> into its own ce_namelen field in struct cache_entry. This
> will both give us a tiny bit of a performance enhancement
> when working with long pathnames and is part of the
> refactoring for the index-v5 file format.

Careful.

I do not mind moving name length out of low bits of ce_flags in
principle, but if you claim this helps performance, you need to
substanticate it.

FYI, the current layout was designed for performance to take
advantage of the fact that most paths are short (among 39k paths in
the kernel tree, the longest seems to be 80 bytes long) and using
strlen() is rarely needed (the code even skips the first 4k bytes
when it has to use strlen()).  It of course also shaves a few
hundred kilobytes of memory necessary to hold the length separately
in cache entries).

> Index-v5 won't store the name length in the on disk index
> file, so storing it in the flags wouldn't make sense for
> index-v5.

I think this split could make sense even without a new on-disk
format.

On the other hand, even if the result of this patch were to prove
undesirable, it is still possible for the read_index code for v5
format to convert from its ondisk_cache_entry (which does not have
length anywhere) to the canonical cache_entry struct (which stores
length for common short path names in lower bits of flags field),
and write_index code could do the reverse.

Because of the above two points, what index-v5 has or does not have
is mostly immaterial when judging this patch, unless such a
conversion has very high cost.

> It also enhances readability, by making it more clear what
> is a flag, and where the length is stored and make it clear
> which functions use stages in comparisions and which only
> use the length.

That can be true, but if we were to go this route, the patch should
be able to make CE_NAMEMASK totally private to the codepath that
reads and writes v2/v3/v4 format and nowhere else.  The ce_flags
field in "struct cache_entry" that is visible to everybody from
cache.h shouldn't need the padding of 12-low bits that is not used
(I think CE_STAGESHIFT could even become 0, even though I do not see
an immediate need for such a change).

Shouldn't "struct ondisk_cache_entry" in read-cache.c the only place
that need to know about this old layout, no?

> diff --git a/builtin/apply.c b/builtin/apply.c
> index dda9ea0..10f83fc 100644
> --- a/builtin/apply.c
> +++ b/builtin/apply.c
> @@ -3502,7 +3502,8 @@ static void add_index_file(const char *path, unsigned mode, void *buf, unsigned
>  	ce = xcalloc(1, ce_size);
>  	memcpy(ce->name, path, namelen);
>  	ce->ce_mode = create_ce_mode(mode);
> -	ce->ce_flags = namelen;

I think this should have been create_ce_flags(namelen, 0)
originally, so I would prefer the new code to spell it as
create_ce_flags(0).

> +	ce->ce_flags = 0;
> +	ce->ce_namelen = namelen;

> diff --git a/builtin/blame.c b/builtin/blame.c
> index 24d3dd5..e181368 100644
> --- a/builtin/blame.c
> +++ b/builtin/blame.c
> @@ -2153,7 +2153,8 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt,
>  	ce = xcalloc(1, size);
>  	hashcpy(ce->sha1, origin->blob_sha1);
>  	memcpy(ce->name, path, len);
> -	ce->ce_flags = create_ce_flags(len, 0);
> +	ce->ce_flags = 0;

Likewise.  I think it is more in line with your "making it more
clear" if you said create_ce_flags(0) here.  I won't repeat this for
the remainder of the patch where a bare number is stored without
using create_ce_flags(stage).

> +#define ce_namelen(ce) ((ce)->ce_namelen)

Good.

> @@ -448,6 +440,7 @@ extern int discard_index(struct index_state *);
>  extern int unmerged_index(const struct index_state *);
>  extern int verify_path(const char *path);
>  extern struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int igncase);
> +extern int index_name_stage_pos(const struct index_state *, const char *name, int stage, int namelen);

The name string and its length form a pair; keep them next to each
other (i.e. swap stage and namelen).

> @@ -857,6 +850,7 @@ extern int validate_headref(const char *ref);
>  extern int base_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2);
>  extern int df_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2);
>  extern int cache_name_compare(const char *name1, int len1, const char *name2, int len2);
> +extern int cache_name_stage_compare(const char *name1, int stage1, int len1, const char *name2, int stage2, int len2);

Likewise.

> diff --git a/read-cache.c b/read-cache.c
> index ef355cc..ea75c89 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -395,10 +395,8 @@ int df_name_compare(const char *name1, int len1, int mode1,
>  	return c1 - c2;
>  }
>  
> -int cache_name_compare(const char *name1, int flags1, const char *name2, int flags2)
> +int cache_name_stage_compare(const char *name1, int stage1, int len1, const char *name2, int stage2, int len2)
>  {
> -	int len1 = flags1 & CE_NAMEMASK;
> -	int len2 = flags2 & CE_NAMEMASK;
>  	int len = len1 < len2 ? len1 : len2;
>  	int cmp;

Isn't this a _BUGFIX_?  It appears to me that the original code
would only compare the first 4k bytes and ignore the rest, if two
cache entries, both with overlong names, are compared.  Care to come
up with a test case to demonstrate the breakage and a bugfix without
the remainder of this patch, to be applied to 'master' and older
maintenance releases?  Perpahs something along the lines of this:

 read-cache.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 1df6adf..d226e5e 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -397,11 +397,15 @@ int df_name_compare(const char *name1, int len1, int mode1,
 
 int cache_name_compare(const char *name1, int flags1, const char *name2, int flags2)
 {
-	int len1 = flags1 & CE_NAMEMASK;
-	int len2 = flags2 & CE_NAMEMASK;
-	int len = len1 < len2 ? len1 : len2;
-	int cmp;
-
+	int len1, len2, len, cmp;
+
+	len1 = flags1 & CE_NAMEMASK;
+	if (len1 >= CE_NAMEMASK)
+		len1 = strlen(name1)
+	len2 = flags2 & CE_NAMEMASK;
+	if (len2 >= CE_NAMEMASK)
+		len2 = strlen(name2);
+	len = len1 < len2 ? len1 : len2;
 	cmp = memcmp(name1, name2, len);
 	if (cmp)
 		return cmp;

Pros and cons of adding a new ce_namelen field can be discussed
separately and built on top of such a bugfix.

> @@ -1319,7 +1324,8 @@ static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *on
>  	ce->ce_uid   = ntoh_l(ondisk->uid);
>  	ce->ce_gid   = ntoh_l(ondisk->gid);
>  	ce->ce_size  = ntoh_l(ondisk->size);
> -	ce->ce_flags = flags;
> +	ce->ce_flags = flags & ~CE_NAMEMASK;
> +	ce->ce_namelen = len;

OK, so this part wants to make (ce->ce_flags & CE_NAMEMASK) MBZ
(must be zero), which is a step in the right direction (provided
that the creation of ce_namelen field is the right thing to do, that
is).

> @@ -1743,7 +1749,7 @@ int write_index(struct index_state *istate, int newfd)
>  {
>  	git_SHA_CTX c;
>  	struct cache_header hdr;
> -	int i, err, removed, extended, hdr_version;
> +	int i, err, removed, extended, hdr_version, len;
>  	struct cache_entry **cache = istate->cache;
>  	int entries = istate->cache_nr;
>  	struct stat st;
> @@ -1759,6 +1765,11 @@ int write_index(struct index_state *istate, int newfd)
>  			extended++;
>  			cache[i]->ce_flags |= CE_EXTENDED;
>  		}
> +		if (cache[i]->ce_namelen >= CE_NAMEMASK)
> +			len = CE_NAMEMASK;
> +		else
> +			len = cache[i]->ce_namelen;
> +		cache[i]->ce_flags |= len;
>  	}

This is extremely dubious.  Didn't the earlier hunk at 1319 declare
that low bits of ce_flags is MBZ?  I would understand it if
something like this is done to ondisk in ce_write_entry(), but I do
not think it is consistent to contaminate the low bits of ce_flags
here while your cache_entry_from_ondisk() clears them.

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v3 0/3] Introduction of a ce_namelen field
  2012-07-09  4:59       ` Junio C Hamano
@ 2012-07-11  9:22         ` Thomas Gummerer
  2012-07-11  9:22           ` [PATCH v3 1/3] read-cache.c: Handle long filenames correctly Thomas Gummerer
                             ` (2 more replies)
  2012-07-11 16:29         ` [PATCH/RFC v2 1/2] Strip namelen out of ce_flags into a ce_namelen field Junio C Hamano
  1 sibling, 3 replies; 18+ messages in thread
From: Thomas Gummerer @ 2012-07-11  9:22 UTC (permalink / raw)
  To: git; +Cc: gitster, trast, mhagger, pclouds, t.gummerer

Thanks to Junio for reviewing v2 of this patch series.

The speedup coming from introducing the field exists, but is minimal,
here are the times for 1000 runs of git ls-files on the Webkit index,
first without the ce_namelen field, and then with the ce_namelen field.
$ time ./test.sh

real 4m40.895s
user 3m39.642s
sys 0m40.159s

$ time ./test.sh

real 4m34.872s
user 3m37.176s
sys 0m40.401s

1/3 is a bugfix for very long files which share at least 4096
characters in their filename. It also includes a test, tough I'm
not sure I have put it in the right place.

The other two patches are rerolls from v2 of this series.

[PATCH v3 1/3] read-cache.c: Handle long filenames correctly
[PATCH v3 2/3] Strip namelen out of ce_flags into a ce_namelen field
[PATCH v3 3/3] Replace strlen() with ce_namelen()

builtin/apply.c           |    3 ++-
builtin/blame.c           |    3 ++-
builtin/checkout.c        |    3 ++-
builtin/update-index.c    |    9 ++++++---
cache.h                   |   19 ++++++-------------
read-cache.c              |   62 +++++++++++++++++++++++++++++++++++++++-----------------------
t/t0007-long-filenames.sh |   62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
tree.c                    |    7 ++++---
unpack-trees.c            |    5 +++--
9 files changed, 126 insertions(+), 47 deletions(-)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v3 1/3] read-cache.c: Handle long filenames correctly
  2012-07-11  9:22         ` [PATCH v3 0/3] Introduction of " Thomas Gummerer
@ 2012-07-11  9:22           ` Thomas Gummerer
  2012-07-11 11:34             ` Nguyen Thai Ngoc Duy
  2012-07-11 16:33             ` Junio C Hamano
  2012-07-11  9:22           ` [PATCH v3 2/3] Strip namelen out of ce_flags into a ce_namelen field Thomas Gummerer
  2012-07-11  9:22           ` [PATCH v3 3/3] Replace strlen() with ce_namelen() Thomas Gummerer
  2 siblings, 2 replies; 18+ messages in thread
From: Thomas Gummerer @ 2012-07-11  9:22 UTC (permalink / raw)
  To: git; +Cc: gitster, trast, mhagger, pclouds, t.gummerer

Make git handle long file/path names (> 4096 characters) correctly.

There is a bug in the current version, which causes very long
file/pathnames to be handled incorrectly, or not even added to
the index, if they share the first 4096 characters.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 read-cache.c              |    9 +++++--
 t/t0007-long-filenames.sh |   62 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 2 deletions(-)
 create mode 100755 t/t0007-long-filenames.sh

diff --git a/read-cache.c b/read-cache.c
index ef355cc..4c6bf5f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -399,8 +399,13 @@ int cache_name_compare(const char *name1, int flags1, const char *name2, int fla
 {
 	int len1 = flags1 & CE_NAMEMASK;
 	int len2 = flags2 & CE_NAMEMASK;
-	int len = len1 < len2 ? len1 : len2;
-	int cmp;
+	int len, cmp;
+
+	if (len1 >= CE_NAMEMASK)
+		len1 = strlen(name1 + CE_NAMEMASK) + CE_NAMEMASK;
+	if (len2 >= CE_NAMEMASK)
+		len2 = strlen(name2 + CE_NAMEMASK) + CE_NAMEMASK;
+	len = len1 < len2 ? len1 : len2;
 
 	cmp = memcmp(name1, name2, len);
 	if (cmp)
diff --git a/t/t0007-long-filenames.sh b/t/t0007-long-filenames.sh
new file mode 100755
index 0000000..2cf6035
--- /dev/null
+++ b/t/t0007-long-filenames.sh
@@ -0,0 +1,62 @@
+
+#!/bin/sh
+
+test_description='very long file names in the index handled correctly'
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+	git init &&
+
+	a=a && # 1
+	a=$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a && # 16
+	a=$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a && # 256
+	a=$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a && # 4096
+	a=${a}q &&
+
+	>path1 &&
+	git update-index --add path1 &&
+	(
+		git ls-files -s path1 |
+		sed -e "s/	.*/	/" |
+		tr -d "\012"
+		echo "$a"
+	) | git update-index --index-info &&
+
+	a=a && # 1
+	a=$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a && # 16
+	a=$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a && # 256
+	a=$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a && # 4096
+	a=${a}e &&
+
+	>path2 &&
+	git update-index --add path2 &&
+	(
+		git ls-files -s path2 |
+		sed -e "s/	.*/	/" |
+		tr -d "\012"
+		echo "$a"
+	) | git update-index --index-info &&
+	len=$(git ls-files "a*" | wc -c) &&
+	test $len = 8196
+'
+
+test_expect_success 'validate git ls-files output for a known tree' '
+	git ls-files "a*" >current &&
+	a=a && # 1
+	a=$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a && # 16
+	a=$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a && # 256
+	a=$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a && # 4096
+	a=${a}e &&
+	echo $a >expected &&
+
+	a=a && # 1
+	a=$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a && # 16
+	a=$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a && # 256
+	a=$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a$a && # 4096
+	a=${a}q &&
+	echo $a >>expected &&
+
+	test_cmp expected current
+'
+
+test_done
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v3 2/3] Strip namelen out of ce_flags into a ce_namelen field
  2012-07-11  9:22         ` [PATCH v3 0/3] Introduction of " Thomas Gummerer
  2012-07-11  9:22           ` [PATCH v3 1/3] read-cache.c: Handle long filenames correctly Thomas Gummerer
@ 2012-07-11  9:22           ` Thomas Gummerer
  2012-07-11  9:22           ` [PATCH v3 3/3] Replace strlen() with ce_namelen() Thomas Gummerer
  2 siblings, 0 replies; 18+ messages in thread
From: Thomas Gummerer @ 2012-07-11  9:22 UTC (permalink / raw)
  To: git; +Cc: gitster, trast, mhagger, pclouds, t.gummerer

Strip the name length from the ce_flags field and move it
into its own ce_namelen field in struct cache_entry. This
will both give us a tiny bit of a performance enhancement
when working with long pathnames and is a refactoring for
more readability of the code.

It enhances readability, by making it more clear what
is a flag, and where the length is stored and make it clear
which functions use stages in comparisions and which only
use the length.

It also makes CE_NAMEMASK private, so that users don't
mistakenly write the name length in the flags.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 builtin/apply.c        |    3 ++-
 builtin/blame.c        |    3 ++-
 builtin/checkout.c     |    3 ++-
 builtin/update-index.c |    9 ++++---
 cache.h                |   19 +++++---------
 read-cache.c           |   67 ++++++++++++++++++++++++++++--------------------
 tree.c                 |    7 ++---
 unpack-trees.c         |    3 ++-
 8 files changed, 63 insertions(+), 51 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index dda9ea0..347633c 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -3502,7 +3502,8 @@ static void add_index_file(const char *path, unsigned mode, void *buf, unsigned
 	ce = xcalloc(1, ce_size);
 	memcpy(ce->name, path, namelen);
 	ce->ce_mode = create_ce_mode(mode);
-	ce->ce_flags = namelen;
+	ce->ce_flags = create_ce_flags(0);
+	ce->ce_namelen = namelen;
 	if (S_ISGITLINK(mode)) {
 		const char *s = buf;
 
diff --git a/builtin/blame.c b/builtin/blame.c
index 24d3dd5..a28004a 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -2153,7 +2153,8 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt,
 	ce = xcalloc(1, size);
 	hashcpy(ce->sha1, origin->blob_sha1);
 	memcpy(ce->name, path, len);
-	ce->ce_flags = create_ce_flags(len, 0);
+	ce->ce_flags = create_ce_flags(0);
+	ce->ce_namelen = len;
 	ce->ce_mode = create_ce_mode(mode);
 	add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
 
diff --git a/builtin/checkout.c b/builtin/checkout.c
index 3ddda34..2dfa45b 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -73,7 +73,8 @@ static int update_some(const unsigned char *sha1, const char *base, int baselen,
 	hashcpy(ce->sha1, sha1);
 	memcpy(ce->name, base, baselen);
 	memcpy(ce->name + baselen, pathname, len - baselen);
-	ce->ce_flags = create_ce_flags(len, 0) | CE_UPDATE;
+	ce->ce_flags = create_ce_flags(0) | CE_UPDATE;
+	ce->ce_namelen = len;
 	ce->ce_mode = create_ce_mode(mode);
 	add_cache_entry(ce, ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
 	return 0;
diff --git a/builtin/update-index.c b/builtin/update-index.c
index 5f038d6..e747def 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -95,7 +95,8 @@ static int add_one_path(struct cache_entry *old, const char *path, int len, stru
 	size = cache_entry_size(len);
 	ce = xcalloc(1, size);
 	memcpy(ce->name, path, len);
-	ce->ce_flags = len;
+	ce->ce_flags = create_ce_flags(0);
+	ce->ce_namelen = len;
 	fill_stat_cache_info(ce, st);
 	ce->ce_mode = ce_mode_from_stat(old, st->st_mode);
 
@@ -235,7 +236,8 @@ static int add_cacheinfo(unsigned int mode, const unsigned char *sha1,
 
 	hashcpy(ce->sha1, sha1);
 	memcpy(ce->name, path, len);
-	ce->ce_flags = create_ce_flags(len, stage);
+	ce->ce_flags = create_ce_flags(stage);
+	ce->ce_namelen = len;
 	ce->ce_mode = create_ce_mode(mode);
 	if (assume_unchanged)
 		ce->ce_flags |= CE_VALID;
@@ -433,7 +435,8 @@ static struct cache_entry *read_one_ent(const char *which,
 
 	hashcpy(ce->sha1, sha1);
 	memcpy(ce->name, path, namelen);
-	ce->ce_flags = create_ce_flags(namelen, stage);
+	ce->ce_flags = create_ce_flags(stage);
+	ce->ce_namelen = namelen;
 	ce->ce_mode = create_ce_mode(mode);
 	return ce;
 }
diff --git a/cache.h b/cache.h
index cc5048c..22d9484 100644
--- a/cache.h
+++ b/cache.h
@@ -128,13 +128,13 @@ struct cache_entry {
 	unsigned int ce_gid;
 	unsigned int ce_size;
 	unsigned int ce_flags;
+	unsigned int ce_namelen;
 	unsigned char sha1[20];
 	struct cache_entry *next;
 	struct cache_entry *dir_next;
 	char name[FLEX_ARRAY]; /* more */
 };
 
-#define CE_NAMEMASK  (0x0fff)
 #define CE_STAGEMASK (0x3000)
 #define CE_EXTENDED  (0x4000)
 #define CE_VALID     (0x8000)
@@ -198,21 +198,12 @@ static inline void copy_cache_entry(struct cache_entry *dst, struct cache_entry
 	dst->ce_flags = (dst->ce_flags & ~CE_STATE_MASK) | state;
 }
 
-static inline unsigned create_ce_flags(size_t len, unsigned stage)
+static inline unsigned create_ce_flags(unsigned stage)
 {
-	if (len >= CE_NAMEMASK)
-		len = CE_NAMEMASK;
-	return (len | (stage << CE_STAGESHIFT));
-}
-
-static inline size_t ce_namelen(const struct cache_entry *ce)
-{
-	size_t len = ce->ce_flags & CE_NAMEMASK;
-	if (len < CE_NAMEMASK)
-		return len;
-	return strlen(ce->name + CE_NAMEMASK) + CE_NAMEMASK;
+	return (stage << CE_STAGESHIFT);
 }
 
+#define ce_namelen(ce) ((ce)->ce_namelen)
 #define ce_size(ce) cache_entry_size(ce_namelen(ce))
 #define ce_stage(ce) ((CE_STAGEMASK & (ce)->ce_flags) >> CE_STAGESHIFT)
 #define ce_uptodate(ce) ((ce)->ce_flags & CE_UPTODATE)
@@ -448,6 +439,7 @@ extern int discard_index(struct index_state *);
 extern int unmerged_index(const struct index_state *);
 extern int verify_path(const char *path);
 extern struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int igncase);
+extern int index_name_stage_pos(const struct index_state *, const char *name, int namelen, int stage);
 extern int index_name_pos(const struct index_state *, const char *name, int namelen);
 #define ADD_CACHE_OK_TO_ADD 1		/* Ok to add */
 #define ADD_CACHE_OK_TO_REPLACE 2	/* Ok to replace file/directory */
@@ -857,6 +849,7 @@ extern int validate_headref(const char *ref);
 extern int base_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2);
 extern int df_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2);
 extern int cache_name_compare(const char *name1, int len1, const char *name2, int len2);
+extern int cache_name_stage_compare(const char *name1, int len1, int stage1, const char *name2, int len2, int stage2);
 
 extern void *read_object_with_reference(const unsigned char *sha1,
 					const char *required_type,
diff --git a/read-cache.c b/read-cache.c
index 4c6bf5f..ac13bca 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -17,6 +17,10 @@
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int really);
 
+/* Mask for the name length in ce_flags in the on-disk index */
+
+#define CE_NAMEMASK  (0x0fff)
+
 /* Index extensions.
  *
  * The first letter should be 'A'..'Z' for extensions that are not
@@ -54,8 +58,8 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n
 
 	new = xmalloc(cache_entry_size(namelen));
 	copy_cache_entry(new, old);
-	new->ce_flags &= ~(CE_STATE_MASK | CE_NAMEMASK);
-	new->ce_flags |= (namelen >= CE_NAMEMASK ? CE_NAMEMASK : namelen);
+	new->ce_flags &= ~CE_STATE_MASK;
+	new->ce_namelen = namelen;
 	memcpy(new->name, new_name, namelen + 1);
 
 	cache_tree_invalidate_path(istate->cache_tree, old->name);
@@ -395,17 +399,10 @@ int df_name_compare(const char *name1, int len1, int mode1,
 	return c1 - c2;
 }
 
-int cache_name_compare(const char *name1, int flags1, const char *name2, int flags2)
+int cache_name_stage_compare(const char *name1, int len1, int stage1, const char *name2, int len2, int stage2)
 {
-	int len1 = flags1 & CE_NAMEMASK;
-	int len2 = flags2 & CE_NAMEMASK;
-	int len, cmp;
-
-	if (len1 >= CE_NAMEMASK)
-		len1 = strlen(name1 + CE_NAMEMASK) + CE_NAMEMASK;
-	if (len2 >= CE_NAMEMASK)
-		len2 = strlen(name2 + CE_NAMEMASK) + CE_NAMEMASK;
-	len = len1 < len2 ? len1 : len2;
+	int len = len1 < len2 ? len1 : len2;
+	int cmp;
 
 	cmp = memcmp(name1, name2, len);
 	if (cmp)
@@ -415,18 +412,19 @@ int cache_name_compare(const char *name1, int flags1, const char *name2, int fla
 	if (len1 > len2)
 		return 1;
 
-	/* Compare stages  */
-	flags1 &= CE_STAGEMASK;
-	flags2 &= CE_STAGEMASK;
-
-	if (flags1 < flags2)
+	if (stage1 < stage2)
 		return -1;
-	if (flags1 > flags2)
+	if (stage1 > stage2)
 		return 1;
 	return 0;
 }
 
-int index_name_pos(const struct index_state *istate, const char *name, int namelen)
+int cache_name_compare(const char *name1, int len1, const char *name2, int len2)
+{
+	return cache_name_stage_compare(name1, len1, 0, name2, len2, 0);
+}
+
+int index_name_stage_pos(const struct index_state *istate, const char *name, int namelen, int stage)
 {
 	int first, last;
 
@@ -435,7 +433,7 @@ int index_name_pos(const struct index_state *istate, const char *name, int namel
 	while (last > first) {
 		int next = (last + first) >> 1;
 		struct cache_entry *ce = istate->cache[next];
-		int cmp = cache_name_compare(name, namelen, ce->name, ce->ce_flags);
+		int cmp = cache_name_stage_compare(name, namelen, stage, ce->name, ce_namelen(ce), ce_stage(ce));
 		if (!cmp)
 			return next;
 		if (cmp < 0) {
@@ -447,6 +445,11 @@ int index_name_pos(const struct index_state *istate, const char *name, int namel
 	return -first-1;
 }
 
+int index_name_pos(const struct index_state *istate, const char *name, int namelen)
+{
+	return index_name_stage_pos(istate, name, namelen, 0);
+}
+
 /* Remove entry, return true if there are more entries to go.. */
 int remove_index_entry_at(struct index_state *istate, int pos)
 {
@@ -586,7 +589,7 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st,
 	size = cache_entry_size(namelen);
 	ce = xcalloc(1, size);
 	memcpy(ce->name, path, namelen);
-	ce->ce_flags = namelen;
+	ce->ce_namelen = namelen;
 	if (!intent_only)
 		fill_stat_cache_info(ce, st);
 	else
@@ -688,7 +691,8 @@ struct cache_entry *make_cache_entry(unsigned int mode,
 
 	hashcpy(ce->sha1, sha1);
 	memcpy(ce->name, path, len);
-	ce->ce_flags = create_ce_flags(len, stage);
+	ce->ce_flags = create_ce_flags(stage);
+	ce->ce_namelen = len;
 	ce->ce_mode = create_ce_mode(mode);
 
 	if (refresh)
@@ -825,7 +829,7 @@ static int has_dir_name(struct index_state *istate,
 		}
 		len = slash - name;
 
-		pos = index_name_pos(istate, name, create_ce_flags(len, stage));
+		pos = index_name_stage_pos(istate, name, len, stage);
 		if (pos >= 0) {
 			/*
 			 * Found one, but not so fast.  This could
@@ -915,7 +919,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
 	int new_only = option & ADD_CACHE_NEW_ONLY;
 
 	cache_tree_invalidate_path(istate->cache_tree, ce->name);
-	pos = index_name_pos(istate, ce->name, ce->ce_flags);
+	pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
 
 	/* existing match? Just replace it. */
 	if (pos >= 0) {
@@ -947,7 +951,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
 		if (!ok_to_replace)
 			return error("'%s' appears as both a file and as a directory",
 				     ce->name);
-		pos = index_name_pos(istate, ce->name, ce->ce_flags);
+		pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
 		pos = -pos-1;
 	}
 	return pos + 1;
@@ -1324,7 +1328,8 @@ static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *on
 	ce->ce_uid   = ntoh_l(ondisk->uid);
 	ce->ce_gid   = ntoh_l(ondisk->gid);
 	ce->ce_size  = ntoh_l(ondisk->size);
-	ce->ce_flags = flags;
+	ce->ce_flags = flags & ~CE_NAMEMASK;
+	ce->ce_namelen = len;
 	hashcpy(ce->sha1, ondisk->sha1);
 	memcpy(ce->name, name, len);
 	ce->name[len] = '\0';
@@ -1651,6 +1656,8 @@ static void ce_smudge_racily_clean_entry(struct cache_entry *ce)
 static char *copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
 				       struct cache_entry *ce)
 {
+	short flags;
+
 	ondisk->ctime.sec = htonl(ce->ce_ctime.sec);
 	ondisk->mtime.sec = htonl(ce->ce_mtime.sec);
 	ondisk->ctime.nsec = htonl(ce->ce_ctime.nsec);
@@ -1662,7 +1669,10 @@ static char *copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
 	ondisk->gid  = htonl(ce->ce_gid);
 	ondisk->size = htonl(ce->ce_size);
 	hashcpy(ondisk->sha1, ce->sha1);
-	ondisk->flags = htons(ce->ce_flags);
+
+	flags = ce->ce_flags;
+	flags |= (ce_namelen(ce) >= CE_NAMEMASK ? CE_NAMEMASK : ce_namelen(ce));
+	ondisk->flags = htons(flags);
 	if (ce->ce_flags & CE_EXTENDED) {
 		struct ondisk_cache_entry_extended *ondisk2;
 		ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
@@ -1850,7 +1860,8 @@ int read_index_unmerged(struct index_state *istate)
 		size = cache_entry_size(len);
 		new_ce = xcalloc(1, size);
 		memcpy(new_ce->name, ce->name, len);
-		new_ce->ce_flags = create_ce_flags(len, 0) | CE_CONFLICTED;
+		new_ce->ce_flags = create_ce_flags(0) | CE_CONFLICTED;
+		new_ce->ce_namelen = len;
 		new_ce->ce_mode = ce->ce_mode;
 		if (add_index_entry(istate, new_ce, 0))
 			return error("%s: cannot drop to stage #0",
diff --git a/tree.c b/tree.c
index 676e9f7..62fed63 100644
--- a/tree.c
+++ b/tree.c
@@ -22,7 +22,8 @@ static int read_one_entry_opt(const unsigned char *sha1, const char *base, int b
 	ce = xcalloc(1, size);
 
 	ce->ce_mode = create_ce_mode(mode);
-	ce->ce_flags = create_ce_flags(baselen + len, stage);
+	ce->ce_flags = create_ce_flags(stage);
+	ce->ce_namelen = baselen + len;
 	memcpy(ce->name, base, baselen);
 	memcpy(ce->name + baselen, pathname, len+1);
 	hashcpy(ce->sha1, sha1);
@@ -133,8 +134,8 @@ static int cmp_cache_name_compare(const void *a_, const void *b_)
 
 	ce1 = *((const struct cache_entry **)a_);
 	ce2 = *((const struct cache_entry **)b_);
-	return cache_name_compare(ce1->name, ce1->ce_flags,
-				  ce2->name, ce2->ce_flags);
+	return cache_name_stage_compare(ce1->name, ce1->ce_namelen, ce_stage(ce1),
+				  ce2->name, ce2->ce_namelen, ce_stage(ce2));
 }
 
 int read_tree(struct tree *tree, int stage, struct pathspec *match)
diff --git a/unpack-trees.c b/unpack-trees.c
index ad40109..9981dd3 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -539,7 +539,8 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, con
 	struct cache_entry *ce = xcalloc(1, cache_entry_size(len));
 
 	ce->ce_mode = create_ce_mode(n->mode);
-	ce->ce_flags = create_ce_flags(len, stage);
+	ce->ce_flags = create_ce_flags(stage);
+	ce->ce_namelen = len;
 	hashcpy(ce->sha1, n->sha1);
 	make_traverse_path(ce->name, info, n);
 
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v3 3/3] Replace strlen() with ce_namelen()
  2012-07-11  9:22         ` [PATCH v3 0/3] Introduction of " Thomas Gummerer
  2012-07-11  9:22           ` [PATCH v3 1/3] read-cache.c: Handle long filenames correctly Thomas Gummerer
  2012-07-11  9:22           ` [PATCH v3 2/3] Strip namelen out of ce_flags into a ce_namelen field Thomas Gummerer
@ 2012-07-11  9:22           ` Thomas Gummerer
  2012-07-11 16:35             ` Junio C Hamano
  2 siblings, 1 reply; 18+ messages in thread
From: Thomas Gummerer @ 2012-07-11  9:22 UTC (permalink / raw)
  To: git; +Cc: gitster, trast, mhagger, pclouds, t.gummerer

Replace strlen(ce->name) with ce_namelen() in a couple
of places which gives us some additional bits of
performance.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 read-cache.c   |    4 ++--
 unpack-trees.c |    2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index ac13bca..2f8159f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1128,7 +1128,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, const char **p
 			continue;
 
 		if (pathspec &&
-		    !match_pathspec(pathspec, ce->name, strlen(ce->name), 0, seen))
+		    !match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, seen))
 			filtered = 1;
 
 		if (ce_stage(ce)) {
@@ -1856,7 +1856,7 @@ int read_index_unmerged(struct index_state *istate)
 		if (!ce_stage(ce))
 			continue;
 		unmerged = 1;
-		len = strlen(ce->name);
+		len = ce_namelen(ce);
 		size = cache_entry_size(len);
 		new_ce = xcalloc(1, size);
 		memcpy(new_ce->name, ce->name, len);
diff --git a/unpack-trees.c b/unpack-trees.c
index 9981dd3..abd0988 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1289,7 +1289,7 @@ static int verify_clean_subdirectory(struct cache_entry *ce,
 	 * First let's make sure we do not have a local modification
 	 * in that directory.
 	 */
-	namelen = strlen(ce->name);
+	namelen = ce_namelen(ce);
 	for (i = locate_in_src_index(ce, o);
 	     i < o->src_index->cache_nr;
 	     i++) {
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 1/3] read-cache.c: Handle long filenames correctly
  2012-07-11  9:22           ` [PATCH v3 1/3] read-cache.c: Handle long filenames correctly Thomas Gummerer
@ 2012-07-11 11:34             ` Nguyen Thai Ngoc Duy
  2012-07-11 16:33             ` Junio C Hamano
  1 sibling, 0 replies; 18+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-07-11 11:34 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, gitster, trast, mhagger

On Wed, Jul 11, 2012 at 4:22 PM, Thomas Gummerer <t.gummerer@gmail.com> wrote:
> Make git handle long file/path names (> 4096 characters) correctly.
>
> There is a bug in the current version, which causes very long
> file/pathnames to be handled incorrectly, or not even added to
> the index, if they share the first 4096 characters.

The patch looks correct to me though we're stepping on the border
here. Linux's PATH_MAX is 4k and Git already has hard time dealing
with >=4k paths (even when a single path component is less than 4k).

> +       >path1 &&
> +       git update-index --add path1 &&
> +       (
> +               git ls-files -s path1 |
> +               sed -e "s/      .*/     /" |
> +               tr -d "\012"
> +               echo "$a"
> +       ) | git update-index --index-info &&
> +

or

BLOB=$(git hash-object -w -t blob --stdin </dev/null)
git update-index --cacheinfo 100644 $BLOB $a

I don't think git cares much in these tests and using empty tree sha-1
may even work (git recognizes that sha-1 automatically), but it may
hurt the reader..
-- 
Duy

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH/RFC v2 1/2] Strip namelen out of ce_flags into a ce_namelen field
  2012-07-09  4:59       ` Junio C Hamano
  2012-07-11  9:22         ` [PATCH v3 0/3] Introduction of " Thomas Gummerer
@ 2012-07-11 16:29         ` Junio C Hamano
  1 sibling, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2012-07-11 16:29 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pclouds

Junio C Hamano <gitster@pobox.com> writes:

>> @@ -395,10 +395,8 @@ int df_name_compare(const char *name1, int len1, int mode1,
>>  	return c1 - c2;
>>  }
>>  
>> -int cache_name_compare(const char *name1, int flags1, const char *name2, int flags2)
>> +int cache_name_stage_compare(const char *name1, int stage1, int len1, const char *name2, int stage2, int len2)
>>  {
>> -	int len1 = flags1 & CE_NAMEMASK;
>> -	int len2 = flags2 & CE_NAMEMASK;
>>  	int len = len1 < len2 ? len1 : len2;
>>  	int cmp;
>
> Isn't this a _BUGFIX_?  It appears to me that the original code
> would only compare the first 4k bytes and ignore the rest, if two
> cache entries, both with overlong names, are compared.  Care to come
> up with a test case to demonstrate the breakage and a bugfix without
> the remainder of this patch, to be applied to 'master' and older
> maintenance releases?

Perhaps something like this (based on v1.7.0.9 as this may deserve
to go to older maintenance releases).

-- >8 --
Subject: [PATCH] cache_name_compare(): do not truncate while comparing paths

We failed to use ce_namelen() equivalent and instead only compared
up to the CE_NAMEMASK bytes by mistake.  Adding an overlong path
that shares the same common prefix as an existing entry in the index
did not add a new entry, but instead replaced the existing one, as
the result.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 read-cache.c             | 13 +++++++++----
 t/t3006-ls-files-long.sh | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 4 deletions(-)
 create mode 100755 t/t3006-ls-files-long.sh

diff --git a/read-cache.c b/read-cache.c
index f1f789b..0cd13aa 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -405,10 +405,15 @@ int df_name_compare(const char *name1, int len1, int mode1,
 
 int cache_name_compare(const char *name1, int flags1, const char *name2, int flags2)
 {
-	int len1 = flags1 & CE_NAMEMASK;
-	int len2 = flags2 & CE_NAMEMASK;
-	int len = len1 < len2 ? len1 : len2;
-	int cmp;
+	int len1, len2, len, cmp;
+
+	len1 = flags1 & CE_NAMEMASK;
+	if (CE_NAMEMASK <= len1)
+		len1 = strlen(name1 + CE_NAMEMASK) + CE_NAMEMASK;
+	len2 = flags2 & CE_NAMEMASK;
+	if (CE_NAMEMASK <= len2)
+		len2 = strlen(name2 + CE_NAMEMASK) + CE_NAMEMASK;
+	len = len1 < len2 ? len1 : len2;
 
 	cmp = memcmp(name1, name2, len);
 	if (cmp)
diff --git a/t/t3006-ls-files-long.sh b/t/t3006-ls-files-long.sh
new file mode 100755
index 0000000..202ad65
--- /dev/null
+++ b/t/t3006-ls-files-long.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+test_description='overly long paths'
+. ./test-lib.sh
+
+test_expect_success setup '
+	p=filefilefilefilefilefilefilefile &&
+	p=$p$p$p$p$p$p$p$p$p$p$p$p$p$p$p$p &&
+	p=$p$p$p$p$p$p$p$p$p$p$p$p$p$p$p$p &&
+
+	path_a=${p}_a &&
+	path_z=${p}_z &&
+
+	blob_a=$(echo frotz | git hash-object -w --stdin) &&
+	blob_z=$(echo nitfol | git hash-object -w --stdin) &&
+
+	pat="100644 %s 0\t%s\n"
+'
+
+test_expect_success 'overly-long path by itself is not a problem' '
+	printf "$pat" "$blob_a" "$path_a" |
+	git update-index --add --index-info &&
+	echo "$path_a" >expect &&
+	git ls-files >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'overly-long path does not replace another by mistake' '
+	printf "$pat" "$blob_a" "$path_a" "$blob_z" "$path_z" |
+	git update-index --add --index-info &&
+	(
+		echo "$path_a"
+		echo "$path_z"
+	) >expect &&
+	git ls-files >actual &&
+	test_cmp expect actual
+'
+
+test_done
-- 
1.7.11

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 1/3] read-cache.c: Handle long filenames correctly
  2012-07-11  9:22           ` [PATCH v3 1/3] read-cache.c: Handle long filenames correctly Thomas Gummerer
  2012-07-11 11:34             ` Nguyen Thai Ngoc Duy
@ 2012-07-11 16:33             ` Junio C Hamano
  1 sibling, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2012-07-11 16:33 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pclouds

Thomas Gummerer <t.gummerer@gmail.com> writes:

> Make git handle long file/path names (> 4096 characters) correctly.
>
> There is a bug in the current version, which causes very long
> file/pathnames to be handled incorrectly, or not even added to
> the index, if they share the first 4096 characters.
>
> Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>

Ahh, I guess I should have opened my mailbox before starting to look
into this myself ;-)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 3/3] Replace strlen() with ce_namelen()
  2012-07-11  9:22           ` [PATCH v3 3/3] Replace strlen() with ce_namelen() Thomas Gummerer
@ 2012-07-11 16:35             ` Junio C Hamano
  0 siblings, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2012-07-11 16:35 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pclouds

Thanks for resending; I've merged this independently to 'next' and
will merge it to 'master' soonish.  This one is trivially correct.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2012-07-11 16:35 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-04  9:18 [PATCH/RFC] Replace ce_namelen() with a ce_namelen field Thomas Gummerer
2012-07-04 10:00 ` Thomas Rast
2012-07-04 10:01 ` Nguyen Thai Ngoc Duy
2012-07-04 10:07 ` Nguyen Thai Ngoc Duy
2012-07-04 19:26 ` Junio C Hamano
2012-07-06 16:07   ` Introduction of " Thomas Gummerer
2012-07-06 16:07     ` [PATCH/RFC v2 1/2] Strip namelen out of ce_flags into " Thomas Gummerer
2012-07-09  4:59       ` Junio C Hamano
2012-07-11  9:22         ` [PATCH v3 0/3] Introduction of " Thomas Gummerer
2012-07-11  9:22           ` [PATCH v3 1/3] read-cache.c: Handle long filenames correctly Thomas Gummerer
2012-07-11 11:34             ` Nguyen Thai Ngoc Duy
2012-07-11 16:33             ` Junio C Hamano
2012-07-11  9:22           ` [PATCH v3 2/3] Strip namelen out of ce_flags into a ce_namelen field Thomas Gummerer
2012-07-11  9:22           ` [PATCH v3 3/3] Replace strlen() with ce_namelen() Thomas Gummerer
2012-07-11 16:35             ` Junio C Hamano
2012-07-11 16:29         ` [PATCH/RFC v2 1/2] Strip namelen out of ce_flags into a ce_namelen field Junio C Hamano
2012-07-06 16:07     ` [PATCH/RFC v2 2/2] Replace strlen() with ce_namelen() Thomas Gummerer
2012-07-09  2:49       ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.