All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 0/5] builtin/grep.c: fix a tiny logic flaw
@ 2019-02-12 22:26 Rasmus Villemoes
  2019-02-12 22:26 ` [RFC PATCH 1/5] builtin/grep.c: change todo_* variables to unsigned Rasmus Villemoes
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Rasmus Villemoes @ 2019-02-12 22:26 UTC (permalink / raw)
  To: git; +Cc: Rasmus Villemoes

Background: I noticed that the condition in add_work() for when the
producer could add a new item was oddly different from the other
conditions on the todo_* bookkeeping variables - namely, in the other
cases we want todo_a != todo_b, whereas in add_work the condition is
todo_a+1!=todo_b. Another hint that something is slightly off is that
the code would break down if TODO_SIZE was set to 1.

The practical effect is negligible, and fixing it seems to be a bit
involved, hence probably not worth the churn - and if that's the
verdict, I suggest adding a comment in add_work() for future readers
and/or people who copy the producer/consumer logic to their own code.

Rasmus Villemoes (5):
  builtin/grep.c: change todo_* variables to unsigned
  builtin/grep.c: refactor loop in work_done() slightly
  builtin/grep.c: add shorthand for &todo[todo_end] in add_work()
  builtin/grep.c: add todo_item helper
  builtin/grep.c: fix fence-post error in add_work()

 builtin/grep.c | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

-- 
2.20.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [RFC PATCH 1/5] builtin/grep.c: change todo_* variables to unsigned
  2019-02-12 22:26 [RFC PATCH 0/5] builtin/grep.c: fix a tiny logic flaw Rasmus Villemoes
@ 2019-02-12 22:26 ` Rasmus Villemoes
  2019-02-12 22:26 ` [RFC PATCH 2/5] builtin/grep.c: refactor loop in work_done() slightly Rasmus Villemoes
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Rasmus Villemoes @ 2019-02-12 22:26 UTC (permalink / raw)
  To: git; +Cc: Rasmus Villemoes

In preparation for subsequent patches that make todo_* free-running
instead of reducing them mod TODO_SIZE, change their type to unsigned
to avoid undefined behaviour in case anybody ever greps more than 2
billion files.

Signed-off-by: Rasmus Villemoes <rv@rasmusvillemoes.dk>
---
 builtin/grep.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 580fd38f41..6c1e90d43b 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -58,9 +58,9 @@ struct work_item {
  */
 #define TODO_SIZE 128
 static struct work_item todo[TODO_SIZE];
-static int todo_start;
-static int todo_end;
-static int todo_done;
+static unsigned int todo_start;
+static unsigned int todo_end;
+static unsigned int todo_done;
 
 /* Has all work items been added? */
 static int all_work_added;
@@ -132,7 +132,7 @@ static struct work_item *get_work(void)
 
 static void work_done(struct work_item *w)
 {
-	int old_done;
+	unsigned int old_done;
 
 	grep_lock();
 	w->done = 1;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [RFC PATCH 2/5] builtin/grep.c: refactor loop in work_done() slightly
  2019-02-12 22:26 [RFC PATCH 0/5] builtin/grep.c: fix a tiny logic flaw Rasmus Villemoes
  2019-02-12 22:26 ` [RFC PATCH 1/5] builtin/grep.c: change todo_* variables to unsigned Rasmus Villemoes
@ 2019-02-12 22:26 ` Rasmus Villemoes
  2019-02-12 22:26 ` [RFC PATCH 3/5] builtin/grep.c: add shorthand for &todo[todo_end] in add_work() Rasmus Villemoes
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Rasmus Villemoes @ 2019-02-12 22:26 UTC (permalink / raw)
  To: git; +Cc: Rasmus Villemoes

As preparation for changing all accesses to the todo array to use a
helper function, do the .done check in the loop body.

Signed-off-by: Rasmus Villemoes <rv@rasmusvillemoes.dk>
---
 builtin/grep.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 6c1e90d43b..211ae54222 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -137,9 +137,11 @@ static void work_done(struct work_item *w)
 	grep_lock();
 	w->done = 1;
 	old_done = todo_done;
-	for(; todo[todo_done].done && todo_done != todo_start;
+	for(; todo_done != todo_start;
 	    todo_done = (todo_done+1) % ARRAY_SIZE(todo)) {
 		w = &todo[todo_done];
+		if (!w->done)
+			break;
 		if (w->out.len) {
 			const char *p = w->out.buf;
 			size_t len = w->out.len;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [RFC PATCH 3/5] builtin/grep.c: add shorthand for &todo[todo_end] in add_work()
  2019-02-12 22:26 [RFC PATCH 0/5] builtin/grep.c: fix a tiny logic flaw Rasmus Villemoes
  2019-02-12 22:26 ` [RFC PATCH 1/5] builtin/grep.c: change todo_* variables to unsigned Rasmus Villemoes
  2019-02-12 22:26 ` [RFC PATCH 2/5] builtin/grep.c: refactor loop in work_done() slightly Rasmus Villemoes
@ 2019-02-12 22:26 ` Rasmus Villemoes
  2019-02-12 22:26 ` [RFC PATCH 4/5] builtin/grep.c: add todo_item helper Rasmus Villemoes
  2019-02-12 22:26 ` [RFC PATCH 5/5] builtin/grep.c: fix fence-post error in add_work() Rasmus Villemoes
  4 siblings, 0 replies; 6+ messages in thread
From: Rasmus Villemoes @ 2019-02-12 22:26 UTC (permalink / raw)
  To: git; +Cc: Rasmus Villemoes

Signed-off-by: Rasmus Villemoes <rv@rasmusvillemoes.dk>
---
 builtin/grep.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 211ae54222..92b9e6198d 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -93,18 +93,20 @@ static int skip_first_line;
 
 static void add_work(struct grep_opt *opt, const struct grep_source *gs)
 {
+	struct work_item *w;
+
 	grep_lock();
 
 	while ((todo_end+1) % ARRAY_SIZE(todo) == todo_done) {
 		pthread_cond_wait(&cond_write, &grep_mutex);
 	}
 
-	todo[todo_end].source = *gs;
+	w = &todo[todo_end];
+	w->source = *gs;
 	if (opt->binary != GREP_BINARY_TEXT)
-		grep_source_load_driver(&todo[todo_end].source,
-					opt->repo->index);
-	todo[todo_end].done = 0;
-	strbuf_reset(&todo[todo_end].out);
+		grep_source_load_driver(&w->source, opt->repo->index);
+	w->done = 0;
+	strbuf_reset(&w->out);
 	todo_end = (todo_end + 1) % ARRAY_SIZE(todo);
 
 	pthread_cond_signal(&cond_add);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [RFC PATCH 4/5] builtin/grep.c: add todo_item helper
  2019-02-12 22:26 [RFC PATCH 0/5] builtin/grep.c: fix a tiny logic flaw Rasmus Villemoes
                   ` (2 preceding siblings ...)
  2019-02-12 22:26 ` [RFC PATCH 3/5] builtin/grep.c: add shorthand for &todo[todo_end] in add_work() Rasmus Villemoes
@ 2019-02-12 22:26 ` Rasmus Villemoes
  2019-02-12 22:26 ` [RFC PATCH 5/5] builtin/grep.c: fix fence-post error in add_work() Rasmus Villemoes
  4 siblings, 0 replies; 6+ messages in thread
From: Rasmus Villemoes @ 2019-02-12 22:26 UTC (permalink / raw)
  To: git; +Cc: Rasmus Villemoes

Use a helper for indexing into the todo array with any of the todo_*
variables, in preparation for not keeping those reduced mod TODO_SIZE.

Signed-off-by: Rasmus Villemoes <rv@rasmusvillemoes.dk>
---
 builtin/grep.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 92b9e6198d..35ed79b0dd 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -62,6 +62,11 @@ static unsigned int todo_start;
 static unsigned int todo_end;
 static unsigned int todo_done;
 
+static inline struct work_item *todo_item(unsigned int idx)
+{
+	return &todo[idx % ARRAY_SIZE(todo)];
+}
+
 /* Has all work items been added? */
 static int all_work_added;
 
@@ -101,7 +106,7 @@ static void add_work(struct grep_opt *opt, const struct grep_source *gs)
 		pthread_cond_wait(&cond_write, &grep_mutex);
 	}
 
-	w = &todo[todo_end];
+	w = todo_item(todo_end);
 	w->source = *gs;
 	if (opt->binary != GREP_BINARY_TEXT)
 		grep_source_load_driver(&w->source, opt->repo->index);
@@ -125,7 +130,7 @@ static struct work_item *get_work(void)
 	if (todo_start == todo_end && all_work_added) {
 		ret = NULL;
 	} else {
-		ret = &todo[todo_start];
+		ret = todo_item(todo_start);
 		todo_start = (todo_start + 1) % ARRAY_SIZE(todo);
 	}
 	grep_unlock();
@@ -141,7 +146,7 @@ static void work_done(struct work_item *w)
 	old_done = todo_done;
 	for(; todo_done != todo_start;
 	    todo_done = (todo_done+1) % ARRAY_SIZE(todo)) {
-		w = &todo[todo_done];
+		w = todo_item(todo_done);
 		if (!w->done)
 			break;
 		if (w->out.len) {
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [RFC PATCH 5/5] builtin/grep.c: fix fence-post error in add_work()
  2019-02-12 22:26 [RFC PATCH 0/5] builtin/grep.c: fix a tiny logic flaw Rasmus Villemoes
                   ` (3 preceding siblings ...)
  2019-02-12 22:26 ` [RFC PATCH 4/5] builtin/grep.c: add todo_item helper Rasmus Villemoes
@ 2019-02-12 22:26 ` Rasmus Villemoes
  4 siblings, 0 replies; 6+ messages in thread
From: Rasmus Villemoes @ 2019-02-12 22:26 UTC (permalink / raw)
  To: git; +Cc: Rasmus Villemoes

We're only using 127 of the slots in todo[], which can easily be seen
by adding this hack

--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -93,6 +93,8 @@ static int skip_first_line;

 static void add_work(struct grep_opt *opt, const struct grep_source *gs)
 {
+	static int count;
+
 	grep_lock();

 	while ((todo_end+1) % ARRAY_SIZE(todo) == todo_done) {
@@ -108,6 +110,7 @@ static void add_work(struct grep_opt *opt, const struct grep_source *gs)
 	todo_end = (todo_end + 1) % ARRAY_SIZE(todo);

 	pthread_cond_signal(&cond_add);
+	fprintf(stderr, "added work item %3d\n", ++count);
 	grep_unlock();
 }

@@ -173,6 +176,7 @@ static void *run(void *arg)
 	int hit = 0;
 	struct grep_opt *opt = arg;

+	sleep(2);
 	while (1) {
 		struct work_item *w = get_work();
 		if (!w)

Of course, just removing the +1 after todo_end would be instant
deadlock, since nothing would ever change todo_end or todo_done from
0.

The problem boils down to the fact that arithmetic mod 128 cannot
capture the 129 possible values of end-done (which
is (end-start)+(start-done), i.e. the total number of items waiting to
be picked up or that have been picked up by a worker).

To fix this, don't keep the todo_* variables reduced mod 128, and only
do that when using them as indices into todo[]. Then we can rewrite
the condition in add_work() to the proper one: Wait until todo_end is
not a full round ahead of todo_done.

Signed-off-by: Rasmus Villemoes <rv@rasmusvillemoes.dk>
---
 builtin/grep.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 35ed79b0dd..ce158cabbb 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -102,7 +102,7 @@ static void add_work(struct grep_opt *opt, const struct grep_source *gs)
 
 	grep_lock();
 
-	while ((todo_end+1) % ARRAY_SIZE(todo) == todo_done) {
+	while (todo_end - todo_done == ARRAY_SIZE(todo)) {
 		pthread_cond_wait(&cond_write, &grep_mutex);
 	}
 
@@ -112,7 +112,7 @@ static void add_work(struct grep_opt *opt, const struct grep_source *gs)
 		grep_source_load_driver(&w->source, opt->repo->index);
 	w->done = 0;
 	strbuf_reset(&w->out);
-	todo_end = (todo_end + 1) % ARRAY_SIZE(todo);
+	todo_end += 1;
 
 	pthread_cond_signal(&cond_add);
 	grep_unlock();
@@ -131,7 +131,7 @@ static struct work_item *get_work(void)
 		ret = NULL;
 	} else {
 		ret = todo_item(todo_start);
-		todo_start = (todo_start + 1) % ARRAY_SIZE(todo);
+		todo_start += 1;
 	}
 	grep_unlock();
 	return ret;
@@ -144,8 +144,7 @@ static void work_done(struct work_item *w)
 	grep_lock();
 	w->done = 1;
 	old_done = todo_done;
-	for(; todo_done != todo_start;
-	    todo_done = (todo_done+1) % ARRAY_SIZE(todo)) {
+	for(; todo_done != todo_start; todo_done += 1) {
 		w = todo_item(todo_done);
 		if (!w->done)
 			break;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-02-12 22:27 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-12 22:26 [RFC PATCH 0/5] builtin/grep.c: fix a tiny logic flaw Rasmus Villemoes
2019-02-12 22:26 ` [RFC PATCH 1/5] builtin/grep.c: change todo_* variables to unsigned Rasmus Villemoes
2019-02-12 22:26 ` [RFC PATCH 2/5] builtin/grep.c: refactor loop in work_done() slightly Rasmus Villemoes
2019-02-12 22:26 ` [RFC PATCH 3/5] builtin/grep.c: add shorthand for &todo[todo_end] in add_work() Rasmus Villemoes
2019-02-12 22:26 ` [RFC PATCH 4/5] builtin/grep.c: add todo_item helper Rasmus Villemoes
2019-02-12 22:26 ` [RFC PATCH 5/5] builtin/grep.c: fix fence-post error in add_work() Rasmus Villemoes

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.