All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] merge-recursive: change current file dir string_lists to hashmap
@ 2017-09-06 22:28 Kevin Willford
  2017-09-06 23:44 ` Junio C Hamano
  0 siblings, 1 reply; 2+ messages in thread
From: Kevin Willford @ 2017-09-06 22:28 UTC (permalink / raw)
  To: git; +Cc: gitster, peff, Kevin Willford

The code was using two string_lists, one for the directories and
one for the files.  The code never checks the lists independently
so we should be able to only use one list.  The string_list also
is a O(log n) for lookup and insertion.  Switching this to use a
hashmap will give O(1) which will save some time when there are
millions of paths that will be checked.

Also cleaned up a memory leak and method where the return was not
being used.

Signed-off-by: Kevin Willford <kewillf@microsoft.com>
---
 merge-recursive.c | 76 ++++++++++++++++++++++++++++++++++++++++---------------
 merge-recursive.h |  3 +--
 2 files changed, 57 insertions(+), 22 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index 1494ffdb82..ebfe01017f 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -24,6 +24,31 @@
 #include "dir.h"
 #include "submodule.h"
 
+struct path_hashmap_entry {
+	struct hashmap_entry;
+	char path[FLEX_ARRAY];
+};
+
+static int path_hashmap_cmp(const void *cmp_data,
+			    const void *entry,
+			    const void *entry_or_key,
+			    const void *keydata)
+{
+	const struct path_hashmap_entry *a = entry;
+	const struct path_hashmap_entry *b = entry_or_key;
+	const char *key = keydata;
+
+	if (ignore_case)
+		return strcasecmp(a->path, key ? key : b->path);
+	else
+		return strcmp(a->path, key ? key : b->path);
+}
+
+static unsigned int path_hash(const char *path)
+{
+	return ignore_case ? strihash(path) : strhash(path);
+}
+
 static void flush_output(struct merge_options *o)
 {
 	if (o->buffer_output < 2 && o->obuf.len) {
@@ -314,29 +339,25 @@ static int save_files_dirs(const unsigned char *sha1,
 		struct strbuf *base, const char *path,
 		unsigned int mode, int stage, void *context)
 {
+	struct path_hashmap_entry *entry;
 	int baselen = base->len;
 	struct merge_options *o = context;
 
 	strbuf_addstr(base, path);
 
-	if (S_ISDIR(mode))
-		string_list_insert(&o->current_directory_set, base->buf);
-	else
-		string_list_insert(&o->current_file_set, base->buf);
+	FLEX_ALLOC_MEM(entry, path, base->buf, base->len);
+	hashmap_entry_init(entry, path_hash(entry->path));
+	hashmap_add(&o->current_file_dir_set, entry);
 
 	strbuf_setlen(base, baselen);
 	return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
 }
 
-static int get_files_dirs(struct merge_options *o, struct tree *tree)
+static void get_files_dirs(struct merge_options *o, struct tree *tree)
 {
-	int n;
 	struct pathspec match_all;
 	memset(&match_all, 0, sizeof(match_all));
-	if (read_tree_recursive(tree, "", 0, 0, &match_all, save_files_dirs, o))
-		return 0;
-	n = o->current_file_set.nr + o->current_directory_set.nr;
-	return n;
+	read_tree_recursive(tree, "", 0, 0, &match_all, save_files_dirs, o);
 }
 
 /*
@@ -646,6 +667,7 @@ static void add_flattened_path(struct strbuf *out, const char *s)
 
 static char *unique_path(struct merge_options *o, const char *path, const char *branch)
 {
+	struct path_hashmap_entry *entry;
 	struct strbuf newpath = STRBUF_INIT;
 	int suffix = 0;
 	size_t base_len;
@@ -654,14 +676,16 @@ static char *unique_path(struct merge_options *o, const char *path, const char *
 	add_flattened_path(&newpath, branch);
 
 	base_len = newpath.len;
-	while (string_list_has_string(&o->current_file_set, newpath.buf) ||
-	       string_list_has_string(&o->current_directory_set, newpath.buf) ||
+	while (hashmap_get_from_hash(&o->current_file_dir_set,
+				     path_hash(newpath.buf), newpath.buf) ||
 	       (!o->call_depth && file_exists(newpath.buf))) {
 		strbuf_setlen(&newpath, base_len);
 		strbuf_addf(&newpath, "_%d", suffix++);
 	}
 
-	string_list_insert(&o->current_file_set, newpath.buf);
+	FLEX_ALLOC_MEM(entry, path, newpath.buf, newpath.len);
+	hashmap_entry_init(entry, path_hash(entry->path));
+	hashmap_add(&o->current_file_dir_set, entry);
 	return strbuf_detach(&newpath, NULL);
 }
 
@@ -1945,8 +1969,14 @@ int merge_trees(struct merge_options *o,
 	if (unmerged_cache()) {
 		struct string_list *entries, *re_head, *re_merge;
 		int i;
-		string_list_clear(&o->current_file_set, 1);
-		string_list_clear(&o->current_directory_set, 1);
+		/*
+		 * Only need the hashmap while processing entries, so
+		 * initialize it here and free it when we are done running
+		 * through the entries. Keeping it in the merge_options as
+		 * opposed to decaring a local hashmap is for convenience
+		 * so that we don't have to pass it to around.
+		 */
+		hashmap_init(&o->current_file_dir_set, path_hashmap_cmp, NULL, 512);
 		get_files_dirs(o, head);
 		get_files_dirs(o, merge);
 
@@ -1956,7 +1986,7 @@ int merge_trees(struct merge_options *o,
 		re_merge = get_renames(o, merge, common, head, merge, entries);
 		clean = process_renames(o, re_head, re_merge);
 		if (clean < 0)
-			return clean;
+			goto cleanup;
 		for (i = entries->nr-1; 0 <= i; i--) {
 			const char *path = entries->items[i].string;
 			struct stage_data *e = entries->items[i].util;
@@ -1964,8 +1994,10 @@ int merge_trees(struct merge_options *o,
 				int ret = process_entry(o, path, e);
 				if (!ret)
 					clean = 0;
-				else if (ret < 0)
-					return ret;
+				else if (ret < 0) {
+					clean = ret;
+					goto cleanup;
+				}
 			}
 		}
 		for (i = 0; i < entries->nr; i++) {
@@ -1975,13 +2007,19 @@ int merge_trees(struct merge_options *o,
 				    entries->items[i].string);
 		}
 
+cleanup:
 		string_list_clear(re_merge, 0);
 		string_list_clear(re_head, 0);
 		string_list_clear(entries, 1);
 
+		hashmap_free(&o->current_file_dir_set, 1);
+
 		free(re_merge);
 		free(re_head);
 		free(entries);
+
+		if (clean < 0)
+			return clean;
 	}
 	else
 		clean = 1;
@@ -2177,8 +2215,6 @@ void init_merge_options(struct merge_options *o)
 	if (o->verbosity >= 5)
 		o->buffer_output = 0;
 	strbuf_init(&o->obuf, 0);
-	string_list_init(&o->current_file_set, 1);
-	string_list_init(&o->current_directory_set, 1);
 	string_list_init(&o->df_conflict_file_set, 1);
 }
 
diff --git a/merge-recursive.h b/merge-recursive.h
index 735343b413..80d69d1401 100644
--- a/merge-recursive.h
+++ b/merge-recursive.h
@@ -25,8 +25,7 @@ struct merge_options {
 	int show_rename_progress;
 	int call_depth;
 	struct strbuf obuf;
-	struct string_list current_file_set;
-	struct string_list current_directory_set;
+	struct hashmap current_file_dir_set;
 	struct string_list df_conflict_file_set;
 };
 
-- 
2.14.1.327.g2b87382360.dirty


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v2] merge-recursive: change current file dir string_lists to hashmap
  2017-09-06 22:28 [PATCH v2] merge-recursive: change current file dir string_lists to hashmap Kevin Willford
@ 2017-09-06 23:44 ` Junio C Hamano
  0 siblings, 0 replies; 2+ messages in thread
From: Junio C Hamano @ 2017-09-06 23:44 UTC (permalink / raw)
  To: Kevin Willford; +Cc: git, peff

Kevin Willford <kewillf@microsoft.com> writes:

> The code was using two string_lists, one for the directories and
> one for the files.  The code never checks the lists independently
> so we should be able to only use one list.  The string_list also
> is a O(log n) for lookup and insertion.  Switching this to use a
> hashmap will give O(1) which will save some time when there are
> millions of paths that will be checked.
>
> Also cleaned up a memory leak and method where the return was not
> being used.
>
> Signed-off-by: Kevin Willford <kewillf@microsoft.com>
> ---
>  merge-recursive.c | 76 ++++++++++++++++++++++++++++++++++++++++---------------
>  merge-recursive.h |  3 +--
>  2 files changed, 57 insertions(+), 22 deletions(-)
>
> diff --git a/merge-recursive.c b/merge-recursive.c
> index 1494ffdb82..ebfe01017f 100644
> --- a/merge-recursive.c
> +++ b/merge-recursive.c
> @@ -24,6 +24,31 @@
>  #include "dir.h"
>  #include "submodule.h"
>  
> +struct path_hashmap_entry {
> +	struct hashmap_entry;

You seem to have lost the squash you privately agreed to that is
needed in order to make it compile?

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2017-09-06 23:45 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-06 22:28 [PATCH v2] merge-recursive: change current file dir string_lists to hashmap Kevin Willford
2017-09-06 23:44 ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.