All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4] Threaded grep
@ 2010-01-25 22:51 Fredrik Kuivinen
  2010-01-25 23:59 ` Linus Torvalds
  2010-01-26  1:20 ` [PATCH v4] Threaded grep Junio C Hamano
  0 siblings, 2 replies; 12+ messages in thread
From: Fredrik Kuivinen @ 2010-01-25 22:51 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Linus Torvalds, Johannes Sixt

Make git grep use threads when it is available.

The results below are best of five runs in the Linux repository (on a
box with two cores).

With the patch:

git grep qwerty
1.58user 0.55system 0:01.16elapsed 183%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+800outputs (0major+5774minor)pagefaults 0swaps

Without:

git grep qwerty
1.59user 0.43system 0:02.02elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+800outputs (0major+3716minor)pagefaults 0swaps


And with a pattern with quite a few matches:

With the patch:

$ /usr/bin/time git grep void
5.61user 0.56system 0:03.44elapsed 179%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+800outputs (0major+5587minor)pagefaults 0swaps

Without:

$ /usr/bin/time git grep void
5.36user 0.51system 0:05.87elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+800outputs (0major+3693minor)pagefaults 0swaps

In either case we gain about 40% by the threading.

Signed-off-by: Fredrik Kuivinen <frekui@gmail.com>
---

The patch has been rebased on top of next.

Additional changes since v3:

* Fix some issues with Git's native pthreads implementation on
  Windows (pthread_cond_broadcast is still used though).
* Fix style issues.
* When greping in a tree, allocate memory for the data buffer as
  late as possible.
* Return void from grep_sha1_async and grep_file_async instead of
  always returning 0.

 builtin-grep.c |  394 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
 grep.c         |  106 +++++++++++++--
 grep.h         |    6 +
 3 files changed, 457 insertions(+), 49 deletions(-)

diff --git a/builtin-grep.c b/builtin-grep.c
index da854fa..252cb0b 100644
--- a/builtin-grep.c
+++ b/builtin-grep.c
@@ -16,11 +16,270 @@
 #include "quote.h"
 #include "dir.h"
 
+#ifndef NO_PTHREADS
+#include "thread-utils.h"
+#include <pthread.h>
+#endif
+
 static char const * const grep_usage[] = {
 	"git grep [options] [-e] <pattern> [<rev>...] [[--] path...]",
 	NULL
 };
 
+static int use_threads = 1;
+
+#ifndef NO_PTHREADS
+#define THREADS 8
+static pthread_t threads[THREADS];
+
+static void *load_sha1(const unsigned char *sha1, unsigned long *size,
+		       const char *name);
+static void *load_file(const char *filename, size_t *sz);
+
+enum work_type {WORK_SHA1, WORK_FILE};
+
+/* We use one producer thread and THREADS consumer
+ * threads. The producer adds struct work_items to 'todo' and the
+ * consumers pick work items from the same array.
+ */
+struct work_item
+{
+	enum work_type type;
+	char *name;
+
+	/* if type == WORK_SHA1, then 'identifier' is a SHA1,
+	 * otherwise type == WORK_FILE, and 'identifier' is a NUL
+	 * terminated filename.
+	 */
+	void *identifier;
+	char done;
+	struct strbuf out;
+};
+
+/* In the range [todo_done, todo_start) in 'todo' we have work_items
+ * that have been or are processed by a consumer thread. We haven't
+ * written the result for these to stdout yet.
+ *
+ * The work_items in [todo_start, todo_end) are waiting to be picked
+ * up by a consumer thread.
+ *
+ * The ranges are modulo TODO_SIZE.
+ */
+#define TODO_SIZE 128
+static struct work_item todo[TODO_SIZE];
+static int todo_start;
+static int todo_end;
+static int todo_done;
+
+/* Has all work items been added? */
+static int all_work_added;
+
+/* This lock protects all the variables above. */
+static pthread_mutex_t grep_lock;
+
+/* Used to serialize calls to read_sha1_file. */
+static pthread_mutex_t read_sha1_lock;
+
+/* Signalled when a new work_item is added to todo. */
+static pthread_cond_t cond_add;
+
+/* Signalled when the result from one work_item is written to
+ * stdout.
+ */
+static pthread_cond_t cond_write;
+
+/* Signalled when we are finished with everything. */
+static pthread_cond_t cond_result;
+
+static void add_work(enum work_type type, char *name, void *id)
+{
+	pthread_mutex_lock(&grep_lock);
+
+	while ((todo_end+1) % ARRAY_SIZE(todo) == todo_done) {
+		pthread_cond_wait(&cond_write, &grep_lock);
+	}
+
+	todo[todo_end].type = type;
+	todo[todo_end].name = name;
+	todo[todo_end].identifier = id;
+	todo[todo_end].done = 0;
+	strbuf_reset(&todo[todo_end].out);
+	todo_end = (todo_end + 1) % ARRAY_SIZE(todo);
+
+	pthread_cond_signal(&cond_add);
+	pthread_mutex_unlock(&grep_lock);
+}
+
+static struct work_item *get_work()
+{
+	struct work_item *ret;
+
+	pthread_mutex_lock(&grep_lock);
+	while (todo_start == todo_end && !all_work_added) {
+		pthread_cond_wait(&cond_add, &grep_lock);
+	}
+
+	if (todo_start == todo_end && all_work_added) {
+		ret = NULL;
+	} else {
+		ret = &todo[todo_start];
+		todo_start = (todo_start + 1) % ARRAY_SIZE(todo);
+	}
+	pthread_mutex_unlock(&grep_lock);
+	return ret;
+}
+
+static void grep_sha1_async(struct grep_opt *opt, char *name,
+			    const unsigned char *sha1)
+{
+	unsigned char *s;
+	s = xmalloc(20);
+	memcpy(s, sha1, 20);
+	add_work(WORK_SHA1, name, s);
+}
+
+static void grep_file_async(struct grep_opt *opt, char *name,
+			    const char *filename)
+{
+	add_work(WORK_FILE, name, xstrdup(filename));
+}
+
+static void work_done(struct work_item *w)
+{
+	int old_done;
+
+	pthread_mutex_lock(&grep_lock);
+	w->done = 1;
+	old_done = todo_done;
+	for(; todo[todo_done].done && todo_done != todo_start;
+	    todo_done = (todo_done+1) % ARRAY_SIZE(todo)) {
+		w = &todo[todo_done];
+		write_or_die(1, w->out.buf, w->out.len);
+		free(w->name);
+		free(w->identifier);
+	}
+
+	if (old_done != todo_done)
+		pthread_cond_signal(&cond_write);
+
+	if (all_work_added && todo_done == todo_end)
+		pthread_cond_signal(&cond_result);
+
+	pthread_mutex_unlock(&grep_lock);
+}
+
+static void *run(void *arg)
+{
+	int hit = 0;
+	struct grep_opt *opt = arg;
+
+	while (1) {
+		struct work_item *w = get_work();
+		if (!w)
+			break;
+
+		opt->output_priv = w;
+		if (w->type == WORK_SHA1) {
+			unsigned long sz;
+			void* data;
+
+			pthread_mutex_lock(&read_sha1_lock);
+			data = load_sha1(w->identifier, &sz, w->name);
+			pthread_mutex_unlock(&read_sha1_lock);
+
+			if (data) {
+				hit |= grep_buffer(opt, w->name, data, sz);
+				free(data);
+			}
+		} else if (w->type == WORK_FILE) {
+			size_t sz;
+			void* data = load_file(w->identifier, &sz);
+			if (data) {
+				hit |= grep_buffer(opt, w->name, data, sz);
+				free(data);
+			}
+		} else {
+			assert(0);
+		}
+
+		work_done(w);
+	}
+
+	return (void*) (intptr_t) hit;
+}
+
+static void strbuf_out(struct grep_opt *opt, const void *buf, size_t size)
+{
+	struct work_item *w = opt->output_priv;
+	strbuf_add(&w->out, buf, size);
+}
+
+static void start_threads(struct grep_opt *opt)
+{
+	int i;
+
+	pthread_mutex_init(&grep_lock, NULL);
+	pthread_mutex_init(&read_sha1_lock, NULL);
+	pthread_cond_init(&cond_add, NULL);
+	pthread_cond_init(&cond_write, NULL);
+	pthread_cond_init(&cond_result, NULL);
+
+	for (i = 0; i < ARRAY_SIZE(todo); i++) {
+		strbuf_init(&todo[i].out, 0);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(threads); i++) {
+		int err;
+		struct grep_opt *o = grep_opt_dup(opt);
+		o->output = strbuf_out;
+		compile_grep_patterns(o);
+		err = pthread_create(&threads[i], NULL, run, o);
+
+		if (err)
+			die("grep: failed to create thread: %s",
+			    strerror(err));
+	}
+}
+
+static int wait_all()
+{
+	int hit = 0;
+	int i;
+
+	pthread_mutex_lock(&grep_lock);
+	all_work_added = 1;
+
+	/* Wait until all work is done. */
+	while (todo_done != todo_end)
+		pthread_cond_wait(&cond_result, &grep_lock);
+
+	/* Wake up all the consumer threads so they can see that there
+	 * is no more work to do.
+	 */
+	pthread_cond_broadcast(&cond_add);
+	pthread_mutex_unlock(&grep_lock);
+
+	for (i = 0; i < ARRAY_SIZE(threads); i++) {
+		void *h;
+		pthread_join(threads[i], &h);
+		hit |= (int) (intptr_t) h;
+	}
+
+	pthread_mutex_destroy(&grep_lock);
+	pthread_mutex_destroy(&read_sha1_lock);
+	pthread_cond_destroy(&cond_add);
+	pthread_cond_destroy(&cond_write);
+	pthread_cond_destroy(&cond_result);
+
+	return hit;
+}
+#else /* !NO_PTHREADS */
+static int wait_all()
+{
+	return 0;
+}
+#endif
+
 static int grep_config(const char *var, const char *value, void *cb)
 {
 	struct grep_opt *opt = cb;
@@ -144,37 +403,60 @@ static int pathspec_matches(const char **paths, const char *name, int max_depth)
 	return 0;
 }
 
-static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name, int tree_name_len)
+static void *load_sha1(const unsigned char *sha1, unsigned long *size,
+		       const char *name)
 {
-	unsigned long size;
-	char *data;
 	enum object_type type;
-	int hit;
-	struct strbuf pathbuf = STRBUF_INIT;
+	char *data = read_sha1_file(sha1, &type, size);
 
-	data = read_sha1_file(sha1, &type, &size);
-	if (!data) {
+	if (!data)
 		error("'%s': unable to read %s", name, sha1_to_hex(sha1));
-		return 0;
-	}
+
+	return data;
+}
+
+static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1,
+		     const char *filename, int tree_name_len)
+{
+	struct strbuf pathbuf = STRBUF_INIT;
+	char *name;
+
 	if (opt->relative && opt->prefix_length) {
-		quote_path_relative(name + tree_name_len, -1, &pathbuf, opt->prefix);
-		strbuf_insert(&pathbuf, 0, name, tree_name_len);
-		name = pathbuf.buf;
+		quote_path_relative(filename + tree_name_len, -1, &pathbuf,
+				    opt->prefix);
+		strbuf_insert(&pathbuf, 0, filename, tree_name_len);
+	} else {
+		strbuf_addstr(&pathbuf, filename);
+	}
+
+	name = strbuf_detach(&pathbuf, NULL);
+
+#ifndef NO_PTHREADS
+	if (use_threads) {
+		grep_sha1_async(opt, name, sha1);
+		return 0;
+	} else
+#endif
+	{
+		int hit;
+		unsigned long sz;
+		void *data = load_sha1(sha1, &sz, name);
+		if (!data)
+			hit = 0;
+		else
+			hit = grep_buffer(opt, name, data, sz);
+
+		free(data);
+		free(name);
+		return hit;
 	}
-	hit = grep_buffer(opt, name, data, size);
-	strbuf_release(&pathbuf);
-	free(data);
-	return hit;
 }
 
-static int grep_file(struct grep_opt *opt, const char *filename)
+static void *load_file(const char *filename, size_t *sz)
 {
 	struct stat st;
-	int i;
 	char *data;
-	size_t sz;
-	struct strbuf buf = STRBUF_INIT;
+	int i;
 
 	if (lstat(filename, &st) < 0) {
 	err_ret:
@@ -184,25 +466,52 @@ static int grep_file(struct grep_opt *opt, const char *filename)
 	}
 	if (!S_ISREG(st.st_mode))
 		return 0;
-	sz = xsize_t(st.st_size);
+	*sz = xsize_t(st.st_size);
 	i = open(filename, O_RDONLY);
 	if (i < 0)
 		goto err_ret;
-	data = xmalloc(sz + 1);
-	if (st.st_size != read_in_full(i, data, sz)) {
+	data = xmalloc(*sz + 1);
+	if (st.st_size != read_in_full(i, data, *sz)) {
 		error("'%s': short read %s", filename, strerror(errno));
 		close(i);
 		free(data);
 		return 0;
 	}
 	close(i);
-	data[sz] = 0;
+	data[*sz] = 0;
+	return data;
+}
+
+static int grep_file(struct grep_opt *opt, const char *filename)
+{
+	struct strbuf buf = STRBUF_INIT;
+	char *name;
+
 	if (opt->relative && opt->prefix_length)
-		filename = quote_path_relative(filename, -1, &buf, opt->prefix);
-	i = grep_buffer(opt, filename, data, sz);
-	strbuf_release(&buf);
-	free(data);
-	return i;
+		quote_path_relative(filename, -1, &buf, opt->prefix);
+	else
+		strbuf_addstr(&buf, filename);
+	name = strbuf_detach(&buf, NULL);
+
+#ifndef NO_PTHREADS
+	if (use_threads) {
+		grep_file_async(opt, name, filename);
+		return 0;
+	} else
+#endif
+	{
+		int hit;
+		size_t sz;
+		void *data = load_file(filename, &sz);
+		if (!data)
+			hit = 0;
+		else
+			hit = grep_buffer(opt, name, data, sz);
+
+		free(data);
+		free(name);
+		return hit;
+	}
 }
 
 static int grep_cache(struct grep_opt *opt, const char **paths, int cached)
@@ -572,6 +881,17 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 		opt.regflags |= REG_ICASE;
 	if ((opt.regflags != REG_NEWLINE) && opt.fixed)
 		die("cannot mix --fixed-strings and regexp");
+
+#ifndef NO_PTHREADS
+	if (online_cpus() == 1 || !grep_threads_ok(&opt))
+		use_threads = 0;
+
+	if (use_threads)
+		start_threads(&opt);
+#else
+	use_threads = 0;
+#endif
+
 	compile_grep_patterns(&opt);
 
 	/* Check revs and then paths */
@@ -609,17 +929,26 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 	}
 
 	if (!use_index) {
+		int hit;
 		if (cached)
 			die("--cached cannot be used with --no-index.");
 		if (list.nr)
 			die("--no-index cannot be used with revs.");
-		return !grep_directory(&opt, paths);
+		hit = grep_directory(&opt, paths);
+		if (use_threads)
+			hit |= wait_all();
+		return !hit;
 	}
 
 	if (!list.nr) {
+		int hit;
 		if (!cached)
 			setup_work_tree();
-		return !grep_cache(&opt, paths, cached);
+
+		hit = grep_cache(&opt, paths, cached);
+		if (use_threads)
+			hit |= wait_all();
+		return !hit;
 	}
 
 	if (cached)
@@ -631,6 +960,9 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 		if (grep_object(&opt, paths, real_obj, list.objects[i].name))
 			hit = 1;
 	}
+
+	if (use_threads)
+		hit |= wait_all();
 	free_grep_patterns(&opt);
 	return !hit;
 }
diff --git a/grep.c b/grep.c
index 8e1f7de..d281a02 100644
--- a/grep.c
+++ b/grep.c
@@ -29,6 +29,28 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat,
 	p->next = NULL;
 }
 
+struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
+{
+	struct grep_pat *pat;
+	struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
+	*ret = *opt;
+
+	ret->pattern_list = NULL;
+	ret->pattern_tail = &ret->pattern_list;
+
+	for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
+	{
+		if(pat->token == GREP_PATTERN_HEAD)
+			append_header_grep_pattern(ret, pat->field,
+						   pat->pattern);
+		else
+			append_grep_pattern(ret, pat->pattern, pat->origin,
+					    pat->no, pat->token);
+	}
+
+	return ret;
+}
+
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
 	int err;
@@ -253,7 +275,8 @@ static int word_char(char ch)
 
 static void show_name(struct grep_opt *opt, const char *name)
 {
-	printf("%s%c", name, opt->null_following_name ? '\0' : '\n');
+	opt->output(opt, name, strlen(name));
+	opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
 }
 
 
@@ -490,24 +513,32 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol,
 		      const char *name, unsigned lno, char sign)
 {
 	int rest = eol - bol;
+	char sign_str[1];
 
+	sign_str[0] = sign;
 	if (opt->pre_context || opt->post_context) {
 		if (opt->last_shown == 0) {
 			if (opt->show_hunk_mark)
-				fputs("--\n", stdout);
+				opt->output(opt, "--\n", 3);
 			else
 				opt->show_hunk_mark = 1;
 		} else if (lno > opt->last_shown + 1)
-			fputs("--\n", stdout);
+			opt->output(opt, "--\n", 3);
 	}
 	opt->last_shown = lno;
 
 	if (opt->null_following_name)
-		sign = '\0';
-	if (opt->pathname)
-		printf("%s%c", name, sign);
-	if (opt->linenum)
-		printf("%d%c", lno, sign);
+		sign_str[0] = '\0';
+	if (opt->pathname) {
+		opt->output(opt, name, strlen(name));
+		opt->output(opt, sign_str, 1);
+	}
+	if (opt->linenum) {
+		char buf[32];
+		snprintf(buf, sizeof(buf), "%d", lno);
+		opt->output(opt, buf, strlen(buf));
+		opt->output(opt, sign_str, 1);
+	}
 	if (opt->color) {
 		regmatch_t match;
 		enum grep_context ctx = GREP_CONTEXT_BODY;
@@ -518,18 +549,22 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol,
 		while (next_match(opt, bol, eol, ctx, &match, eflags)) {
 			if (match.rm_so == match.rm_eo)
 				break;
-			printf("%.*s%s%.*s%s",
-			       (int)match.rm_so, bol,
-			       opt->color_match,
-			       (int)(match.rm_eo - match.rm_so), bol + match.rm_so,
-			       GIT_COLOR_RESET);
+
+			opt->output(opt, bol, match.rm_so);
+			opt->output(opt, opt->color_match,
+				    strlen(opt->color_match));
+			opt->output(opt, bol + match.rm_so,
+				    (int)(match.rm_eo - match.rm_so));
+			opt->output(opt, GIT_COLOR_RESET,
+				    strlen(GIT_COLOR_RESET));
 			bol += match.rm_eo;
 			rest -= match.rm_eo;
 			eflags = REG_NOTBOL;
 		}
 		*eol = ch;
 	}
-	printf("%.*s\n", rest, bol);
+	opt->output(opt, bol, rest);
+	opt->output(opt, "\n", 1);
 }
 
 static int match_funcname(struct grep_opt *opt, char *bol, char *eol)
@@ -667,6 +702,32 @@ static int look_ahead(struct grep_opt *opt,
 	return 0;
 }
 
+int grep_threads_ok(const struct grep_opt *opt)
+{
+	/* If this condition is true, then we may use the attribute
+	 * machinery in grep_buffer_1. The attribute code is not
+	 * thread safe, so we disable the use of threads.
+	 */
+	if (opt->funcname && !opt->unmatch_name_only && !opt->status_only &&
+	    !opt->name_only)
+		return 0;
+
+	/* If we are showing hunk marks, we should not do it for the
+	 * first match. The synchronization problem we get for this
+	 * constraint is not yet solved, so we disable threading in
+	 * this case.
+	 */
+	if (opt->pre_context || opt->post_context)
+		return 0;
+
+	return 1;
+}
+
+static void std_output(struct grep_opt *opt, const void *buf, size_t size)
+{
+	fwrite(buf, size, 1, stdout);
+}
+
 static int grep_buffer_1(struct grep_opt *opt, const char *name,
 			 char *buf, unsigned long size, int collect_hits)
 {
@@ -682,6 +743,9 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
 
 	opt->last_shown = 0;
 
+	if (!opt->output)
+		opt->output = std_output;
+
 	if (buffer_is_binary(buf, size)) {
 		switch (opt->binary) {
 		case GREP_BINARY_DEFAULT:
@@ -754,7 +818,9 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
 			if (opt->status_only)
 				return 1;
 			if (binary_match_only) {
-				printf("Binary file %s matches\n", name);
+				opt->output(opt, "Binary file ", 12);
+				opt->output(opt, name, strlen(name));
+				opt->output(opt, " matches\n", 9);
 				return 1;
 			}
 			if (opt->name_only) {
@@ -810,9 +876,13 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
 	 * which feels mostly useless but sometimes useful.  Maybe
 	 * make it another option?  For now suppress them.
 	 */
-	if (opt->count && count)
-		printf("%s%c%u\n", name,
-		       opt->null_following_name ? '\0' : ':', count);
+	if (opt->count && count) {
+		char buf[32];
+		opt->output(opt, name, strlen(name));
+		snprintf(buf, sizeof(buf), "%c%u\n",
+			 opt->null_following_name ? '\0' : ':', count);
+		opt->output(opt, buf, strlen(buf));
+	}
 	return !!last_hit;
 }
 
diff --git a/grep.h b/grep.h
index 0c61b00..9703087 100644
--- a/grep.h
+++ b/grep.h
@@ -91,6 +91,9 @@ struct grep_opt {
 	unsigned last_shown;
 	int show_hunk_mark;
 	void *priv;
+
+	void (*output)(struct grep_opt *opt, const void *data, size_t size);
+	void *output_priv;
 };
 
 extern void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *origin, int no, enum grep_pat_token t);
@@ -99,4 +102,7 @@ extern void compile_grep_patterns(struct grep_opt *opt);
 extern void free_grep_patterns(struct grep_opt *opt);
 extern int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size);
 
+extern struct grep_opt *grep_opt_dup(const struct grep_opt *opt);
+extern int grep_threads_ok(const struct grep_opt *opt);
+
 #endif

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] Threaded grep
  2010-01-25 22:51 [PATCH v4] Threaded grep Fredrik Kuivinen
@ 2010-01-25 23:59 ` Linus Torvalds
  2010-01-26 12:10   ` Fredrik Kuivinen
  2010-01-26  1:20 ` [PATCH v4] Threaded grep Junio C Hamano
  1 sibling, 1 reply; 12+ messages in thread
From: Linus Torvalds @ 2010-01-25 23:59 UTC (permalink / raw)
  To: Fredrik Kuivinen; +Cc: Git Mailing List, Junio C Hamano, Johannes Sixt



On Mon, 25 Jan 2010, Fredrik Kuivinen wrote:
> 
> The results below are best of five runs in the Linux repository (on a
> box with two cores).
> 
> git grep qwerty

Before:

	real	0m0.531s
	user	0m0.412s
	sys	0m0.112s

After:

	real	0m0.151s
	user	0m0.720s
	sys	0m0.272s


> $ /usr/bin/time git grep void

Before:

	real	0m1.144s
	user	0m0.988s
	sys	0m0.148s

After:
	real	0m0.290s
	user	0m1.732s
	sys	0m0.232s

So it's helping a lot (~3.5x and ~3.9x) on this 4-core HT setup. 

I don't seem to ever get more than a 4x speedup, so my guess is that HT 
simply isn't able to do much of anything with this load. 

The profile for the threaded case says:

    51.73%      git  libc-2.11.1.so                 [.] re_search_internal
    11.47%      git  [kernel]                       [k] copy_user_generic_string
     2.90%      git  libc-2.11.1.so                 [.] __strlen_sse2
     2.66%      git  [kernel]                       [k] link_path_walk
     2.55%      git  [kernel]                       [k] intel_pmu_enable_all
     2.40%      git  [kernel]                       [k] __d_lookup
     1.71%      git  libc-2.11.1.so                 [.] __GI___libc_malloc
     1.55%      git  [kernel]                       [k] _raw_spin_lock
     1.43%      git  [kernel]                       [k] sys_futex
     1.30%      git  libc-2.11.1.so                 [.] __cfree
     1.28%      git  [kernel]                       [k] intel_pmu_disable_all
     1.25%      git  libc-2.11.1.so                 [.] __GI_memchr
     1.14%      git  libc-2.11.1.so                 [.] _int_malloc
     1.02%      git  [kernel]                       [k] effective_load

and the only thing that makes me go "eh?" there is the strlen(). Why is 
that so hot?  But locking doesn't seem to be the biggest issue, and in 
general I think this is all pretty good. The 'effective_load' thing is the 
scheduler, so there's certainly some context switching going on, probably 
still due to excessive synchronization, but it's equally clear that that 
is certainly not a dominant factor.

One potentially interesting data point is that if I make NR_THREADS be 16, 
performance goes down, and I get more locking overhead. So NR_THREADS of 8 
works well on this machine.

So ack from me. The patch looks reasonably clean too, at least for 
something as complex as a multi-threaded grep.

One worry is, of course, whether all regex() implementations are 
thread-safe. Maybe there are broken libraries that have hidden global 
state in them?

			Linus

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] Threaded grep
  2010-01-25 22:51 [PATCH v4] Threaded grep Fredrik Kuivinen
  2010-01-25 23:59 ` Linus Torvalds
@ 2010-01-26  1:20 ` Junio C Hamano
  2010-01-26 11:43   ` Fredrik Kuivinen
  1 sibling, 1 reply; 12+ messages in thread
From: Junio C Hamano @ 2010-01-26  1:20 UTC (permalink / raw)
  To: Fredrik Kuivinen; +Cc: git, Linus Torvalds, Johannes Sixt

Fredrik Kuivinen <frekui@gmail.com> writes:

> The patch has been rebased on top of next.
>
> Additional changes since v3:
>
> * Fix some issues with Git's native pthreads implementation on
>   Windows (pthread_cond_broadcast is still used though).
> * Fix style issues.
> * When greping in a tree, allocate memory for the data buffer as
>   late as possible.
> * Return void from grep_sha1_async and grep_file_async instead of
>   always returning 0.

Thanks; I've fixed up a few old-style declaration header and queued the
result to 'pu'.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] Threaded grep
  2010-01-26  1:20 ` [PATCH v4] Threaded grep Junio C Hamano
@ 2010-01-26 11:43   ` Fredrik Kuivinen
  2010-01-26 17:21     ` Junio C Hamano
  0 siblings, 1 reply; 12+ messages in thread
From: Fredrik Kuivinen @ 2010-01-26 11:43 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Linus Torvalds, Johannes Sixt

On Mon, Jan 25, 2010 at 05:20:39PM -0800, Junio C Hamano wrote:
> Fredrik Kuivinen <frekui@gmail.com> writes:
> 
> > The patch has been rebased on top of next.
> >
> > Additional changes since v3:
> >
> > * Fix some issues with Git's native pthreads implementation on
> >   Windows (pthread_cond_broadcast is still used though).
> > * Fix style issues.
> > * When greping in a tree, allocate memory for the data buffer as
> >   late as possible.
> > * Return void from grep_sha1_async and grep_file_async instead of
> >   always returning 0.
> 
> Thanks; I've fixed up a few old-style declaration header and queued the
> result to 'pu'.

I just noticed that I forgot to take the read_sha1 lock in
grep_tree. The result is a race condition in read_sha1_file.

Here is a patch to fix this. It applies on top of ae35c68 (Threaded
grep). Could you please squash it in? Let me know if you want me to
resend the entire patch instead.

(The only important thing is that we now take the read_sha1_mutex in
grep_tree. The rest is there just to make it self-consistent.)


- Fredrik

--- 8< ---

diff --git a/builtin-grep.c b/builtin-grep.c
index 7ecf222..6cc743d 100644
--- a/builtin-grep.c
+++ b/builtin-grep.c
@@ -75,10 +75,15 @@ static int todo_done;
 static int all_work_added;
 
 /* This lock protects all the variables above. */
-static pthread_mutex_t grep_lock;
+static pthread_mutex_t grep_mutex;
 
 /* Used to serialize calls to read_sha1_file. */
-static pthread_mutex_t read_sha1_lock;
+static pthread_mutex_t read_sha1_mutex;
+
+#define grep_lock() pthread_mutex_lock(&grep_mutex)
+#define grep_unlock() pthread_mutex_unlock(&grep_mutex)
+#define read_sha1_lock() pthread_mutex_lock(&read_sha1_mutex)
+#define read_sha1_unlock() pthread_mutex_unlock(&read_sha1_mutex)
 
 /* Signalled when a new work_item is added to todo. */
 static pthread_cond_t cond_add;
@@ -93,10 +98,10 @@ static pthread_cond_t cond_result;
 
 static void add_work(enum work_type type, char *name, void *id)
 {
-	pthread_mutex_lock(&grep_lock);
+	grep_lock();
 
 	while ((todo_end+1) % ARRAY_SIZE(todo) == todo_done) {
-		pthread_cond_wait(&cond_write, &grep_lock);
+		pthread_cond_wait(&cond_write, &grep_mutex);
 	}
 
 	todo[todo_end].type = type;
@@ -107,16 +112,16 @@ static void add_work(enum work_type type, char *name, void *id)
 	todo_end = (todo_end + 1) % ARRAY_SIZE(todo);
 
 	pthread_cond_signal(&cond_add);
-	pthread_mutex_unlock(&grep_lock);
+	grep_unlock();
 }
 
 static struct work_item *get_work(void)
 {
 	struct work_item *ret;
 
-	pthread_mutex_lock(&grep_lock);
+	grep_lock();
 	while (todo_start == todo_end && !all_work_added) {
-		pthread_cond_wait(&cond_add, &grep_lock);
+		pthread_cond_wait(&cond_add, &grep_mutex);
 	}
 
 	if (todo_start == todo_end && all_work_added) {
@@ -125,7 +130,7 @@ static struct work_item *get_work(void)
 		ret = &todo[todo_start];
 		todo_start = (todo_start + 1) % ARRAY_SIZE(todo);
 	}
-	pthread_mutex_unlock(&grep_lock);
+	grep_unlock();
 	return ret;
 }
 
@@ -148,7 +153,7 @@ static void work_done(struct work_item *w)
 {
 	int old_done;
 
-	pthread_mutex_lock(&grep_lock);
+	grep_lock();
 	w->done = 1;
 	old_done = todo_done;
 	for(; todo[todo_done].done && todo_done != todo_start;
@@ -165,7 +170,7 @@ static void work_done(struct work_item *w)
 	if (all_work_added && todo_done == todo_end)
 		pthread_cond_signal(&cond_result);
 
-	pthread_mutex_unlock(&grep_lock);
+	grep_unlock();
 }
 
 static void *run(void *arg)
@@ -181,11 +186,7 @@ static void *run(void *arg)
 		opt->output_priv = w;
 		if (w->type == WORK_SHA1) {
 			unsigned long sz;
-			void* data;
-
-			pthread_mutex_lock(&read_sha1_lock);
-			data = load_sha1(w->identifier, &sz, w->name);
-			pthread_mutex_unlock(&read_sha1_lock);
+			void* data = load_sha1(w->identifier, &sz, w->name);
 
 			if (data) {
 				hit |= grep_buffer(opt, w->name, data, sz);
@@ -218,8 +219,8 @@ static void start_threads(struct grep_opt *opt)
 {
 	int i;
 
-	pthread_mutex_init(&grep_lock, NULL);
-	pthread_mutex_init(&read_sha1_lock, NULL);
+	pthread_mutex_init(&grep_mutex, NULL);
+	pthread_mutex_init(&read_sha1_mutex, NULL);
 	pthread_cond_init(&cond_add, NULL);
 	pthread_cond_init(&cond_write, NULL);
 	pthread_cond_init(&cond_result, NULL);
@@ -246,18 +247,18 @@ static int wait_all(void)
 	int hit = 0;
 	int i;
 
-	pthread_mutex_lock(&grep_lock);
+	grep_lock();
 	all_work_added = 1;
 
 	/* Wait until all work is done. */
 	while (todo_done != todo_end)
-		pthread_cond_wait(&cond_result, &grep_lock);
+		pthread_cond_wait(&cond_result, &grep_mutex);
 
 	/* Wake up all the consumer threads so they can see that there
 	 * is no more work to do.
 	 */
 	pthread_cond_broadcast(&cond_add);
-	pthread_mutex_unlock(&grep_lock);
+	grep_unlock();
 
 	for (i = 0; i < ARRAY_SIZE(threads); i++) {
 		void *h;
@@ -265,8 +266,8 @@ static int wait_all(void)
 		hit |= (int) (intptr_t) h;
 	}
 
-	pthread_mutex_destroy(&grep_lock);
-	pthread_mutex_destroy(&read_sha1_lock);
+	pthread_mutex_destroy(&grep_mutex);
+	pthread_mutex_destroy(&read_sha1_mutex);
 	pthread_cond_destroy(&cond_add);
 	pthread_cond_destroy(&cond_write);
 	pthread_cond_destroy(&cond_result);
@@ -274,6 +275,9 @@ static int wait_all(void)
 	return hit;
 }
 #else /* !NO_PTHREADS */
+#define read_sha1_lock()
+#define read_sha1_unlock()
+
 static int wait_all(void)
 {
 	return 0;
@@ -407,7 +411,11 @@ static void *load_sha1(const unsigned char *sha1, unsigned long *size,
 		       const char *name)
 {
 	enum object_type type;
-	char *data = read_sha1_file(sha1, &type, size);
+	char *data;
+
+	read_sha1_lock();
+	data = read_sha1_file(sha1, &type, size);
+	read_sha1_unlock();
 
 	if (!data)
 		error("'%s': unable to read %s", name, sha1_to_hex(sha1));
@@ -596,7 +604,10 @@ static int grep_tree(struct grep_opt *opt, const char **paths,
 			void *data;
 			unsigned long size;
 
+			read_sha1_lock();
 			data = read_sha1_file(entry.sha1, &type, &size);
+			read_sha1_unlock();
+
 			if (!data)
 				die("unable to read tree (%s)",
 				    sha1_to_hex(entry.sha1));

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] Threaded grep
  2010-01-25 23:59 ` Linus Torvalds
@ 2010-01-26 12:10   ` Fredrik Kuivinen
  2010-01-26 15:28     ` Linus Torvalds
  0 siblings, 1 reply; 12+ messages in thread
From: Fredrik Kuivinen @ 2010-01-26 12:10 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Git Mailing List, Junio C Hamano, Johannes Sixt

On Tue, Jan 26, 2010 at 00:59, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> The profile for the threaded case says:
>
>    51.73%      git  libc-2.11.1.so                 [.] re_search_internal
>    11.47%      git  [kernel]                       [k] copy_user_generic_string
>     2.90%      git  libc-2.11.1.so                 [.] __strlen_sse2
>     2.66%      git  [kernel]                       [k] link_path_walk
>     2.55%      git  [kernel]                       [k] intel_pmu_enable_all
>     2.40%      git  [kernel]                       [k] __d_lookup
>     1.71%      git  libc-2.11.1.so                 [.] __GI___libc_malloc
>     1.55%      git  [kernel]                       [k] _raw_spin_lock
>     1.43%      git  [kernel]                       [k] sys_futex
>     1.30%      git  libc-2.11.1.so                 [.] __cfree
>     1.28%      git  [kernel]                       [k] intel_pmu_disable_all
>     1.25%      git  libc-2.11.1.so                 [.] __GI_memchr
>     1.14%      git  libc-2.11.1.so                 [.] _int_malloc
>     1.02%      git  [kernel]                       [k] effective_load
>
> and the only thing that makes me go "eh?" there is the strlen(). Why is
> that so hot?  But locking doesn't seem to be the biggest issue, and in
> general I think this is all pretty good. The 'effective_load' thing is the
> scheduler, so there's certainly some context switching going on, probably
> still due to excessive synchronization, but it's equally clear that that
> is certainly not a dominant factor.

I see the strlen in my profiles as well, but I haven't figured out
where it comes from. I get the following:

    51.16%  git-grep  /lib/tls/i686/cmov/libc-2.10.1.so
[.] 0x000000000b14c6
    10.12%  git-grep  /lib/tls/i686/cmov/libc-2.10.1.so
[.] __GI_strlen
     9.27%  git-grep  [kernel]
[k] __copy_to_user_ll
     4.68%  git-grep  /lib/tls/i686/cmov/libc-2.10.1.so
[.] __memchr
     1.72%  git-grep  [kernel]
[k] __d_lookup
     1.18%  git-grep  /lib/i686/cmov/libcrypto.so.0.9.8
[.] sha1_block_asm_data_order
     1.11%  git-grep  [kernel]
[k] __ticket_spin_lock
     0.84%  git-grep  [vdso]
[.] 0x00000000b6c422

If I use perf record -g I get

    10.39%  git-grep  /lib/tls/i686/cmov/libc-2.10.1.so
[.] __GI_strlen
                |
                |--99.05%-- look_ahead
                |          grep_buffer_1
                |          grep_buffer
                |          run
                |          start_thread
                |          __clone
                |
                |--0.64%-- grep_file
                |          grep_cache
                |          cmd_grep
                |          run_builtin
                |          handle_internal_command
                |          main
                |          __libc_start_main
                |          0x804ae81
                 --0.32%-- [...]

This doesn't make much sense to me as look_ahead doesn't call strlen
(I compiled git with -O0 to avoid any issues with inlined functions).
But I haven't used perf so much, so maybe I'm reading the output the
wrong way.

> One potentially interesting data point is that if I make NR_THREADS be 16,
> performance goes down, and I get more locking overhead. So NR_THREADS of 8
> works well on this machine.

Interesting. I get the best results with 8 threads as well, but I only
have two cores.

> One worry is, of course, whether all regex() implementations are
> thread-safe. Maybe there are broken libraries that have hidden global
> state in them?

That would certainly be a problem. A quick google search didn't show
any known bugs. Of course, this doesn't tell us anything about the
unknown ones.

- Fredrik

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] Threaded grep
  2010-01-26 12:10   ` Fredrik Kuivinen
@ 2010-01-26 15:28     ` Linus Torvalds
  2010-01-26 16:30       ` Benjamin Kramer
  0 siblings, 1 reply; 12+ messages in thread
From: Linus Torvalds @ 2010-01-26 15:28 UTC (permalink / raw)
  To: Fredrik Kuivinen; +Cc: Git Mailing List, Junio C Hamano, Johannes Sixt


On Tue, 26 Jan 2010, Fredrik Kuivinen wrote:
> 
> I see the strlen in my profiles as well, but I haven't figured out
> where it comes from.

Looks like this in gdb:

#0  0x0000003e3687f2e0 in __strlen_sse2 () from /lib64/libc.so.6
#1  0x0000003e368c04c5 in regexec@@GLIBC_2.3.4 () from /lib64/libc.so.6
#2  0x000000000047677a in look_ahead (opt=<value optimized out>, 
    name=<value optimized out>, buf=<value optimized out>, 
    size=<value optimized out>, collect_hits=<value optimized out>)
    at grep.c:679
#3  grep_buffer_1 (opt=<value optimized out>, name=<value optimized out>, 
    buf=<value optimized out>, size=<value optimized out>, 
    collect_hits=<value optimized out>) at grep.c:790

so it's sadly internal to regex. It would be nice if there was a 
non-string interface to regexec (ie a "buffer + length" instead of a 
NUL-terminated string).

> If I use perf record -g I get

I suspect that libc isn't compiled with frame pointers, so call chains end 
up being unreliable.

		Linus

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] Threaded grep
  2010-01-26 15:28     ` Linus Torvalds
@ 2010-01-26 16:30       ` Benjamin Kramer
  2010-01-26 16:44         ` Linus Torvalds
  0 siblings, 1 reply; 12+ messages in thread
From: Benjamin Kramer @ 2010-01-26 16:30 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Fredrik Kuivinen, Git Mailing List, Junio C Hamano, Johannes Sixt

BSD and glibc have an extension to regexec which takes a buffer + length pair
instead of a NUL-terminated string. Since we already have the length
computed this can save us a strlen call.
---

On 26.01.10 16:28, Linus Torvalds wrote:
> so it's sadly internal to regex. It would be nice if there was a 
> non-string interface to regexec (ie a "buffer + length" instead of a 
> NUL-terminated string).

BSD and glibc have an "REG_STARTEND" flag to do that. I made a small
PoC patch to use it if it's available but it didn't give any significant
speedup on my system.



 grep.c |    9 ++++++++-
 1 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/grep.c b/grep.c
index d281a02..60cce46 100644
--- a/grep.c
+++ b/grep.c
@@ -675,8 +675,15 @@ static int look_ahead(struct grep_opt *opt,
 
 		if (p->fixed)
 			hit = !fixmatch(p->pattern, bol, p->ignore_case, &m);
-		else
+		else {
+#ifdef REG_STARTEND
+			m.rm_so = 0;
+			m.rm_eo = *left_p;
+			hit = !regexec(&p->regexp, bol, 1, &m, REG_STARTEND);
+#else
 			hit = !regexec(&p->regexp, bol, 1, &m, 0);
+#endif
+		}
 		if (!hit || m.rm_so < 0 || m.rm_eo < 0)
 			continue;
 		if (earliest < 0 || m.rm_so < earliest)
--
1.7.0.rc0.12.gc33c3

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] Threaded grep
  2010-01-26 16:30       ` Benjamin Kramer
@ 2010-01-26 16:44         ` Linus Torvalds
  2010-01-26 16:56           ` Linus Torvalds
  0 siblings, 1 reply; 12+ messages in thread
From: Linus Torvalds @ 2010-01-26 16:44 UTC (permalink / raw)
  To: Benjamin Kramer
  Cc: Fredrik Kuivinen, Git Mailing List, Junio C Hamano, Johannes Sixt



On Tue, 26 Jan 2010, Benjamin Kramer wrote:
> 
> BSD and glibc have an "REG_STARTEND" flag to do that. I made a small
> PoC patch to use it if it's available but it didn't give any significant
> speedup on my system.

Goodie.  It's noticeable for me. This is what I reported earlier:

> > $ /usr/bin/time git grep void
> 
> Before:
> 
>         real    0m1.144s
>         user    0m0.988s
>         sys     0m0.148s
> 
> After:
>         real    0m0.290s
>         user    0m1.732s
>         sys     0m0.232s

and with your patch I get

	real	0m0.239s
	user	0m1.392s
	sys	0m0.276s

and the profile shows no strlen in it:

    57.12%      git  libc-2.11.1.so                 [.] re_search_internal
     5.59%      git  [kernel]                       [k] copy_user_generic_string
     4.09%      git  [kernel]                       [k] _raw_spin_lock
     2.57%      git  [kernel]                       [k] intel_pmu_enable_all
     2.46%      git  [kernel]                       [k] __d_lookup
     1.94%      git  libc-2.11.1.so                 [.] re_string_reconstruct
     1.87%      git  [kernel]                       [k] kmem_cache_alloc
     1.68%      git  libc-2.11.1.so                 [.] _int_free
     1.53%      git  [kernel]                       [k] find_get_page
     1.43%      git  [kernel]                       [k] update_curr
     1.27%      git  libc-2.11.1.so                 [.] __GI___libc_malloc
     1.17%      git  [kernel]                       [k] _atomic_dec_and_lock
     1.00%      git  libc-2.11.1.so                 [.] __GI_memcpy

Side note: the tailing end of the profiles aren't very stable, probably 
because the grep executes so quickly and in so many threads, so the 
functions in the one-percent range will move up and down the list 
depending on just exactly where we happened to get profile hits. 
Similarly, the raw_spin_lock numbers vary.

But the big picture is stable, and that 57% number (and the nonlock 
copy_user_generic_string) is consistent. And your patch definitely helped 
both actual performance and is visible in the profile: re_search_internal 
went from ~52% to ~57%.

So ack on that patch. Looks like a good thing to do, and with the #ifdef, 
it looks like it should just automatically DTRT based on regexec 
implementation.

		Linus

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] Threaded grep
  2010-01-26 16:44         ` Linus Torvalds
@ 2010-01-26 16:56           ` Linus Torvalds
  2010-01-26 17:19             ` Mike Hommey
  2010-01-26 17:48             ` [PATCH] grep: use REG_STARTEND (if available) to speed up regexec Benjamin Kramer
  0 siblings, 2 replies; 12+ messages in thread
From: Linus Torvalds @ 2010-01-26 16:56 UTC (permalink / raw)
  To: Benjamin Kramer
  Cc: Fredrik Kuivinen, Git Mailing List, Junio C Hamano, Johannes Sixt



On Tue, 26 Jan 2010, Linus Torvalds wrote:
>
> Goodie.  It's noticeable for me. This is what I reported earlier:
> 
> > > $ /usr/bin/time git grep void
> > 
> > Before:
> > 
> >         real    0m1.144s
> > 
> > After:
> >         real    0m0.290s
> 
> and with your patch I get
> 
> 	real	0m0.239s

Btw, I have to also say that this whole performance reduction _feels_ 
good. It's very noticeable in normal use. "git grep" was always fast (it's 
been getting a bit slower as the kernel has grown, though), but it used to 
be still a noticeable pause.

Now it just -feels- very immediate. That quarter second is short enough 
that I can see the pause, but I don't feel it. It's like the results just 
"are there" rather than get searched for.

But perhaps even more importantly, it's also noticeable for me in the 
cold-cache case. IOW, after I do

	echo 3 > /proc/sys/vm/drop_caches

the threaded grep is able to do much better at reading the disk:

Before threading:

	[torvalds@nehalem linux]$ time git grep void > /dev/null 

	real	0m11.745s
	user	0m2.380s
	sys	0m1.200s

After:

	[torvalds@nehalem linux]$ time ~/git/git grep void > /dev/null 

	real	0m3.710s
	user	0m2.564s
	sys	0m2.076s

although it is worth noting that that machine has an Intel SSD, which is 
why it gets sped up so much by parallel IO (there's no seek penalty, and 
it is able to read multiple channels in parallel, so this gives much 
better IO patterns for it - with rotational media the numbers might be 
very different).

IOW, the whole threaded grep thing is a 4x performance improvement in 
hot-cache, and a 3x improvement in cold-cache.

Major good mojo.

		Linus

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] Threaded grep
  2010-01-26 16:56           ` Linus Torvalds
@ 2010-01-26 17:19             ` Mike Hommey
  2010-01-26 17:48             ` [PATCH] grep: use REG_STARTEND (if available) to speed up regexec Benjamin Kramer
  1 sibling, 0 replies; 12+ messages in thread
From: Mike Hommey @ 2010-01-26 17:19 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Benjamin Kramer, Fredrik Kuivinen, Git Mailing List,
	Junio C Hamano, Johannes Sixt

On Tue, Jan 26, 2010 at 08:56:50AM -0800, Linus Torvalds wrote:
<snip>
> although it is worth noting that that machine has an Intel SSD, which is 
> why it gets sped up so much by parallel IO (there's no seek penalty, and 
> it is able to read multiple channels in parallel, so this gives much 
> better IO patterns for it - with rotational media the numbers might be 
> very different).

For rotational disks, using FIEMAP to get the position of the files on
disk to reorder how we read them could help.

Mike

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] Threaded grep
  2010-01-26 11:43   ` Fredrik Kuivinen
@ 2010-01-26 17:21     ` Junio C Hamano
  0 siblings, 0 replies; 12+ messages in thread
From: Junio C Hamano @ 2010-01-26 17:21 UTC (permalink / raw)
  To: Fredrik Kuivinen; +Cc: git, Linus Torvalds, Johannes Sixt

Fredrik Kuivinen <frekui@gmail.com> writes:

> I just noticed that I forgot to take the read_sha1 lock in
> grep_tree. The result is a race condition in read_sha1_file.
>
> Here is a patch to fix this. It applies on top of ae35c68 (Threaded
> grep). Could you please squash it in?

Thanks; will do.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] grep: use REG_STARTEND (if available) to speed up regexec
  2010-01-26 16:56           ` Linus Torvalds
  2010-01-26 17:19             ` Mike Hommey
@ 2010-01-26 17:48             ` Benjamin Kramer
  1 sibling, 0 replies; 12+ messages in thread
From: Benjamin Kramer @ 2010-01-26 17:48 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Fredrik Kuivinen, Git Mailing List, Linus Torvalds, Johannes Sixt

BSD and glibc have an extension to regexec which takes a buffer + length pair
instead of a NUL-terminated string. Since we already have the length computed
this can save us a strlen call inside regexec.

Signed-off-by: Benjamin Kramer <benny.kra@googlemail.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
---

Resend of my previous patch with SOB and correct title.

 grep.c |    9 ++++++++-
 1 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/grep.c b/grep.c
index 8e1f7de..452c2cb 100644
--- a/grep.c
+++ b/grep.c
@@ -640,8 +640,15 @@ static int look_ahead(struct grep_opt *opt,
 
 		if (p->fixed)
 			hit = !fixmatch(p->pattern, bol, p->ignore_case, &m);
-		else
+		else {
+#ifdef REG_STARTEND
+			m.rm_so = 0;
+			m.rm_eo = *left_p;
+			hit = !regexec(&p->regexp, bol, 1, &m, REG_STARTEND);
+#else
 			hit = !regexec(&p->regexp, bol, 1, &m, 0);
+#endif
+		}
 		if (!hit || m.rm_so < 0 || m.rm_eo < 0)
 			continue;
 		if (earliest < 0 || m.rm_so < earliest)
-- 
1.7.0.rc0.12.gc33c3

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2010-01-26 17:48 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-01-25 22:51 [PATCH v4] Threaded grep Fredrik Kuivinen
2010-01-25 23:59 ` Linus Torvalds
2010-01-26 12:10   ` Fredrik Kuivinen
2010-01-26 15:28     ` Linus Torvalds
2010-01-26 16:30       ` Benjamin Kramer
2010-01-26 16:44         ` Linus Torvalds
2010-01-26 16:56           ` Linus Torvalds
2010-01-26 17:19             ` Mike Hommey
2010-01-26 17:48             ` [PATCH] grep: use REG_STARTEND (if available) to speed up regexec Benjamin Kramer
2010-01-26  1:20 ` [PATCH v4] Threaded grep Junio C Hamano
2010-01-26 11:43   ` Fredrik Kuivinen
2010-01-26 17:21     ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.