All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] introduce SWAP macro
@ 2017-01-28 21:13 René Scharfe
  2017-01-28 21:38 ` [PATCH 1/5] add " René Scharfe
                   ` (5 more replies)
  0 siblings, 6 replies; 50+ messages in thread
From: René Scharfe @ 2017-01-28 21:13 UTC (permalink / raw)
  To: Git List; +Cc: Junio C Hamano, Johannes Schindelin

Exchanging the value of two variables requires declaring a temporary
variable and repeating their names.  The swap macro in apply.c
simplifies this for its callers without changing the compiled binary.
Polish this macro and export it, then use it throughout the code to
reduce repetition and hide the declaration of temporary variables.

  add SWAP macro
  apply: use SWAP macro
  use SWAP macro
  diff: use SWAP macro
  graph: use SWAP macro

 apply.c                       | 23 +++++++----------------
 builtin/diff-tree.c           |  4 +---
 builtin/diff.c                |  9 +++------
 contrib/coccinelle/swap.cocci | 28 ++++++++++++++++++++++++++++
 diff-no-index.c               |  6 ++----
 diff.c                        | 12 ++++--------
 git-compat-util.h             | 10 ++++++++++
 graph.c                       | 11 ++---------
 line-range.c                  |  3 +--
 merge-recursive.c             |  5 +----
 object.c                      |  4 +---
 pack-revindex.c               |  5 +----
 prio-queue.c                  |  4 +---
 strbuf.h                      |  4 +---
 14 files changed, 63 insertions(+), 65 deletions(-)
 create mode 100644 contrib/coccinelle/swap.cocci

-- 
2.11.0


^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH 1/5] add SWAP macro
  2017-01-28 21:13 [PATCH 0/5] introduce SWAP macro René Scharfe
@ 2017-01-28 21:38 ` René Scharfe
  2017-01-30 15:39   ` Johannes Schindelin
                     ` (2 more replies)
  2017-01-28 21:40 ` [PATCH 2/5] apply: use " René Scharfe
                   ` (4 subsequent siblings)
  5 siblings, 3 replies; 50+ messages in thread
From: René Scharfe @ 2017-01-28 21:38 UTC (permalink / raw)
  To: Git List; +Cc: Junio C Hamano, Johannes Schindelin

Add a macro for exchanging the values of variables.  It allows users
to avoid repetition and takes care of the temporary variable for them.
It also makes sure that the storage sizes of its two parameters are the
same.  Its memcpy(1) calls are optimized away by current compilers.

Also add a conservative semantic patch for transforming only swaps of
variables of the same type.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 contrib/coccinelle/swap.cocci | 28 ++++++++++++++++++++++++++++
 git-compat-util.h             | 10 ++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 contrib/coccinelle/swap.cocci

diff --git a/contrib/coccinelle/swap.cocci b/contrib/coccinelle/swap.cocci
new file mode 100644
index 0000000000..a0934d1fda
--- /dev/null
+++ b/contrib/coccinelle/swap.cocci
@@ -0,0 +1,28 @@
+@ swap_with_declaration @
+type T;
+identifier tmp;
+T a, b;
+@@
+- T tmp = a;
++ T tmp;
++ tmp = a;
+  a = b;
+  b = tmp;
+
+@ swap @
+type T;
+T tmp, a, b;
+@@
+- tmp = a;
+- a = b;
+- b = tmp;
++ SWAP(a, b);
+
+@ extends swap @
+identifier unused;
+@@
+  {
+  ...
+- T unused;
+  ... when != unused
+  }
diff --git a/git-compat-util.h b/git-compat-util.h
index 87237b092b..66cd466eea 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -527,6 +527,16 @@ static inline int ends_with(const char *str, const char *suffix)
 	return strip_suffix(str, suffix, &len);
 }
 
+#define SWAP(a, b) do {						\
+	void *_swap_a_ptr = &(a);				\
+	void *_swap_b_ptr = &(b);				\
+	unsigned char _swap_buffer[sizeof(a)];			\
+	memcpy(_swap_buffer, _swap_a_ptr, sizeof(a));		\
+	memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a) +		\
+	       BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b)));	\
+	memcpy(_swap_b_ptr, _swap_buffer, sizeof(a));		\
+} while (0)
+
 #if defined(NO_MMAP) || defined(USE_WIN32_MMAP)
 
 #ifndef PROT_READ
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 2/5] apply: use SWAP macro
  2017-01-28 21:13 [PATCH 0/5] introduce SWAP macro René Scharfe
  2017-01-28 21:38 ` [PATCH 1/5] add " René Scharfe
@ 2017-01-28 21:40 ` René Scharfe
  2017-01-28 21:40 ` [PATCH 3/5] " René Scharfe
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 50+ messages in thread
From: René Scharfe @ 2017-01-28 21:40 UTC (permalink / raw)
  To: Git List; +Cc: Junio C Hamano, Johannes Schindelin

Use the exported macro SWAP instead of the file-scoped macro swap and
remove the latter's definition.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 apply.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/apply.c b/apply.c
index 2ed808d429..0e2caeab9c 100644
--- a/apply.c
+++ b/apply.c
@@ -2187,29 +2187,20 @@ static int parse_chunk(struct apply_state *state, char *buffer, unsigned long si
 	return offset + hdrsize + patchsize;
 }
 
-#define swap(a,b) myswap((a),(b),sizeof(a))
-
-#define myswap(a, b, size) do {		\
-	unsigned char mytmp[size];	\
-	memcpy(mytmp, &a, size);		\
-	memcpy(&a, &b, size);		\
-	memcpy(&b, mytmp, size);		\
-} while (0)
-
 static void reverse_patches(struct patch *p)
 {
 	for (; p; p = p->next) {
 		struct fragment *frag = p->fragments;
 
-		swap(p->new_name, p->old_name);
-		swap(p->new_mode, p->old_mode);
-		swap(p->is_new, p->is_delete);
-		swap(p->lines_added, p->lines_deleted);
-		swap(p->old_sha1_prefix, p->new_sha1_prefix);
+		SWAP(p->new_name, p->old_name);
+		SWAP(p->new_mode, p->old_mode);
+		SWAP(p->is_new, p->is_delete);
+		SWAP(p->lines_added, p->lines_deleted);
+		SWAP(p->old_sha1_prefix, p->new_sha1_prefix);
 
 		for (; frag; frag = frag->next) {
-			swap(frag->newpos, frag->oldpos);
-			swap(frag->newlines, frag->oldlines);
+			SWAP(frag->newpos, frag->oldpos);
+			SWAP(frag->newlines, frag->oldlines);
 		}
 	}
 }
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 3/5] use SWAP macro
  2017-01-28 21:13 [PATCH 0/5] introduce SWAP macro René Scharfe
  2017-01-28 21:38 ` [PATCH 1/5] add " René Scharfe
  2017-01-28 21:40 ` [PATCH 2/5] apply: use " René Scharfe
@ 2017-01-28 21:40 ` René Scharfe
  2017-01-30 16:03   ` Johannes Schindelin
  2017-01-30 22:22   ` Junio C Hamano
  2017-01-28 21:41 ` [PATCH 4/5] diff: " René Scharfe
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 50+ messages in thread
From: René Scharfe @ 2017-01-28 21:40 UTC (permalink / raw)
  To: Git List; +Cc: Junio C Hamano, Johannes Schindelin

Apply the semantic patch swap.cocci to convert hand-rolled swaps to use
the macro SWAP.  The resulting code is shorter and easier to read, the
object code is effectively unchanged.

The patch for object.c had to be hand-edited in order to preserve the
comment before the change; Coccinelle tried to eat it for some reason.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 builtin/diff-tree.c | 4 +---
 builtin/diff.c      | 9 +++------
 diff-no-index.c     | 3 +--
 diff.c              | 8 +++-----
 graph.c             | 5 +----
 line-range.c        | 3 +--
 merge-recursive.c   | 5 +----
 object.c            | 4 +---
 pack-revindex.c     | 5 +----
 prio-queue.c        | 4 +---
 strbuf.h            | 4 +---
 11 files changed, 15 insertions(+), 39 deletions(-)

diff --git a/builtin/diff-tree.c b/builtin/diff-tree.c
index 806dd7a885..8ce00480cd 100644
--- a/builtin/diff-tree.c
+++ b/builtin/diff-tree.c
@@ -147,9 +147,7 @@ int cmd_diff_tree(int argc, const char **argv, const char *prefix)
 		tree1 = opt->pending.objects[0].item;
 		tree2 = opt->pending.objects[1].item;
 		if (tree2->flags & UNINTERESTING) {
-			struct object *tmp = tree2;
-			tree2 = tree1;
-			tree1 = tmp;
+			SWAP(tree2, tree1);
 		}
 		diff_tree_sha1(tree1->oid.hash,
 			       tree2->oid.hash,
diff --git a/builtin/diff.c b/builtin/diff.c
index 7f91f6d226..3d64b85337 100644
--- a/builtin/diff.c
+++ b/builtin/diff.c
@@ -45,12 +45,9 @@ static void stuff_change(struct diff_options *opt,
 		return;
 
 	if (DIFF_OPT_TST(opt, REVERSE_DIFF)) {
-		unsigned tmp;
-		const unsigned char *tmp_u;
-		const char *tmp_c;
-		tmp = old_mode; old_mode = new_mode; new_mode = tmp;
-		tmp_u = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_u;
-		tmp_c = old_name; old_name = new_name; new_name = tmp_c;
+		SWAP(old_mode, new_mode);
+		SWAP(old_sha1, new_sha1);
+		SWAP(old_name, new_name);
 	}
 
 	if (opt->prefix &&
diff --git a/diff-no-index.c b/diff-no-index.c
index f420786039..1ae09894d7 100644
--- a/diff-no-index.c
+++ b/diff-no-index.c
@@ -186,9 +186,8 @@ static int queue_diff(struct diff_options *o,
 
 		if (DIFF_OPT_TST(o, REVERSE_DIFF)) {
 			unsigned tmp;
-			const char *tmp_c;
 			tmp = mode1; mode1 = mode2; mode2 = tmp;
-			tmp_c = name1; name1 = name2; name2 = tmp_c;
+			SWAP(name1, name2);
 		}
 
 		d1 = noindex_filespec(name1, mode1);
diff --git a/diff.c b/diff.c
index f08cd8e033..9de1ba264f 100644
--- a/diff.c
+++ b/diff.c
@@ -5118,13 +5118,11 @@ void diff_change(struct diff_options *options,
 
 	if (DIFF_OPT_TST(options, REVERSE_DIFF)) {
 		unsigned tmp;
-		const unsigned char *tmp_c;
-		tmp = old_mode; old_mode = new_mode; new_mode = tmp;
-		tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c;
+		SWAP(old_mode, new_mode);
+		SWAP(old_sha1, new_sha1);
 		tmp = old_sha1_valid; old_sha1_valid = new_sha1_valid;
 			new_sha1_valid = tmp;
-		tmp = old_dirty_submodule; old_dirty_submodule = new_dirty_submodule;
-			new_dirty_submodule = tmp;
+		SWAP(old_dirty_submodule, new_dirty_submodule);
 	}
 
 	if (options->prefix &&
diff --git a/graph.c b/graph.c
index d4e8519c90..4c722303d2 100644
--- a/graph.c
+++ b/graph.c
@@ -997,7 +997,6 @@ static void graph_output_post_merge_line(struct git_graph *graph, struct strbuf
 static void graph_output_collapsing_line(struct git_graph *graph, struct strbuf *sb)
 {
 	int i;
-	int *tmp_mapping;
 	short used_horizontal = 0;
 	int horizontal_edge = -1;
 	int horizontal_edge_target = -1;
@@ -1132,9 +1131,7 @@ static void graph_output_collapsing_line(struct git_graph *graph, struct strbuf
 	/*
 	 * Swap mapping and new_mapping
 	 */
-	tmp_mapping = graph->mapping;
-	graph->mapping = graph->new_mapping;
-	graph->new_mapping = tmp_mapping;
+	SWAP(graph->mapping, graph->new_mapping);
 
 	/*
 	 * If graph->mapping indicates that all of the branch lines
diff --git a/line-range.c b/line-range.c
index de4e32f942..323399d16c 100644
--- a/line-range.c
+++ b/line-range.c
@@ -269,8 +269,7 @@ int parse_range_arg(const char *arg, nth_line_fn_t nth_line_cb,
 		return -1;
 
 	if (*begin && *end && *end < *begin) {
-		long tmp;
-		tmp = *end; *end = *begin; *begin = tmp;
+		SWAP(*end, *begin);
 	}
 
 	return 0;
diff --git a/merge-recursive.c b/merge-recursive.c
index d327209443..b7ff1ada3c 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -1390,14 +1390,11 @@ static int process_renames(struct merge_options *o,
 			branch1 = o->branch1;
 			branch2 = o->branch2;
 		} else {
-			struct rename *tmp;
 			renames1 = b_renames;
 			renames2Dst = &a_by_dst;
 			branch1 = o->branch2;
 			branch2 = o->branch1;
-			tmp = ren2;
-			ren2 = ren1;
-			ren1 = tmp;
+			SWAP(ren2, ren1);
 		}
 
 		if (ren1->processed)
diff --git a/object.c b/object.c
index 67d9a9e221..e680d881a4 100644
--- a/object.c
+++ b/object.c
@@ -104,9 +104,7 @@ struct object *lookup_object(const unsigned char *sha1)
 		 * that we do not need to walk the hash table the next
 		 * time we look for it.
 		 */
-		struct object *tmp = obj_hash[i];
-		obj_hash[i] = obj_hash[first];
-		obj_hash[first] = tmp;
+		SWAP(obj_hash[i], obj_hash[first]);
 	}
 	return obj;
 }
diff --git a/pack-revindex.c b/pack-revindex.c
index 6bc7c94033..1b7ebd8d7e 100644
--- a/pack-revindex.c
+++ b/pack-revindex.c
@@ -59,7 +59,6 @@ static void sort_revindex(struct revindex_entry *entries, unsigned n, off_t max)
 	 * be a no-op, as everybody lands in the same zero-th bucket.
 	 */
 	for (bits = 0; max >> bits; bits += DIGIT_SIZE) {
-		struct revindex_entry *swap;
 		unsigned i;
 
 		memset(pos, 0, BUCKETS * sizeof(*pos));
@@ -97,9 +96,7 @@ static void sort_revindex(struct revindex_entry *entries, unsigned n, off_t max)
 		 * Now "to" contains the most sorted list, so we swap "from" and
 		 * "to" for the next iteration.
 		 */
-		swap = from;
-		from = to;
-		to = swap;
+		SWAP(from, to);
 	}
 
 	/*
diff --git a/prio-queue.c b/prio-queue.c
index e4365b00d6..17252d231b 100644
--- a/prio-queue.c
+++ b/prio-queue.c
@@ -12,9 +12,7 @@ static inline int compare(struct prio_queue *queue, int i, int j)
 
 static inline void swap(struct prio_queue *queue, int i, int j)
 {
-	struct prio_queue_entry tmp = queue->array[i];
-	queue->array[i] = queue->array[j];
-	queue->array[j] = tmp;
+	SWAP(queue->array[i], queue->array[j]);
 }
 
 void prio_queue_reverse(struct prio_queue *queue)
diff --git a/strbuf.h b/strbuf.h
index 2262b12683..cf1b5409e7 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -109,9 +109,7 @@ extern void strbuf_attach(struct strbuf *, void *, size_t, size_t);
  */
 static inline void strbuf_swap(struct strbuf *a, struct strbuf *b)
 {
-	struct strbuf tmp = *a;
-	*a = *b;
-	*b = tmp;
+	SWAP(*a, *b);
 }
 
 
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 4/5] diff: use SWAP macro
  2017-01-28 21:13 [PATCH 0/5] introduce SWAP macro René Scharfe
                   ` (2 preceding siblings ...)
  2017-01-28 21:40 ` [PATCH 3/5] " René Scharfe
@ 2017-01-28 21:41 ` René Scharfe
  2017-01-30 16:04   ` Johannes Schindelin
  2017-01-30 22:22   ` Junio C Hamano
  2017-01-28 21:42 ` [PATCH 5/5] graph: " René Scharfe
  2017-01-30 23:20 ` [PATCH 0/5] introduce " Junio C Hamano
  5 siblings, 2 replies; 50+ messages in thread
From: René Scharfe @ 2017-01-28 21:41 UTC (permalink / raw)
  To: Git List; +Cc: Junio C Hamano, Johannes Schindelin

Use the macro SWAP to exchange the value of pairs of variables instead
of swapping them manually with the help of a temporary variable.  The
resulting code is shorter and easier to read.

The two cases were not transformed by the semantic patch swap.cocci
because it's extra careful and handles only cases where the types of all
variables are the same -- and here we swap two ints and use an unsigned
temporary variable for that.  Nevertheless the conversion is safe, as
the value range is preserved with and without the patch.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 diff-no-index.c | 3 +--
 diff.c          | 4 +---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/diff-no-index.c b/diff-no-index.c
index 1ae09894d7..df762fd0f7 100644
--- a/diff-no-index.c
+++ b/diff-no-index.c
@@ -185,8 +185,7 @@ static int queue_diff(struct diff_options *o,
 		struct diff_filespec *d1, *d2;
 
 		if (DIFF_OPT_TST(o, REVERSE_DIFF)) {
-			unsigned tmp;
-			tmp = mode1; mode1 = mode2; mode2 = tmp;
+			SWAP(mode1, mode2);
 			SWAP(name1, name2);
 		}
 
diff --git a/diff.c b/diff.c
index 9de1ba264f..6c4f3f6b72 100644
--- a/diff.c
+++ b/diff.c
@@ -5117,11 +5117,9 @@ void diff_change(struct diff_options *options,
 		return;
 
 	if (DIFF_OPT_TST(options, REVERSE_DIFF)) {
-		unsigned tmp;
 		SWAP(old_mode, new_mode);
 		SWAP(old_sha1, new_sha1);
-		tmp = old_sha1_valid; old_sha1_valid = new_sha1_valid;
-			new_sha1_valid = tmp;
+		SWAP(old_sha1_valid, new_sha1_valid);
 		SWAP(old_dirty_submodule, new_dirty_submodule);
 	}
 
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 5/5] graph: use SWAP macro
  2017-01-28 21:13 [PATCH 0/5] introduce SWAP macro René Scharfe
                   ` (3 preceding siblings ...)
  2017-01-28 21:41 ` [PATCH 4/5] diff: " René Scharfe
@ 2017-01-28 21:42 ` René Scharfe
  2017-01-30 16:16   ` Johannes Schindelin
  2017-01-30 23:20 ` [PATCH 0/5] introduce " Junio C Hamano
  5 siblings, 1 reply; 50+ messages in thread
From: René Scharfe @ 2017-01-28 21:42 UTC (permalink / raw)
  To: Git List; +Cc: Junio C Hamano, Johannes Schindelin

Exchange the values of graph->columns and graph->new_columns using the
macro SWAP instead of hand-rolled code.  The result is shorter and
easier to read.

This transformation was not done by the semantic patch swap.cocci
because there's an unrelated statement between the second and the last
step of the exchange, so it didn't match the expected pattern.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 graph.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/graph.c b/graph.c
index 4c722303d2..29b0f51dc5 100644
--- a/graph.c
+++ b/graph.c
@@ -463,7 +463,6 @@ static void graph_update_width(struct git_graph *graph,
 static void graph_update_columns(struct git_graph *graph)
 {
 	struct commit_list *parent;
-	struct column *tmp_columns;
 	int max_new_columns;
 	int mapping_idx;
 	int i, seen_this, is_commit_in_columns;
@@ -476,11 +475,8 @@ static void graph_update_columns(struct git_graph *graph)
 	 * We'll re-use the old columns array as storage to compute the new
 	 * columns list for the commit after this one.
 	 */
-	tmp_columns = graph->columns;
-	graph->columns = graph->new_columns;
+	SWAP(graph->columns, graph->new_columns);
 	graph->num_columns = graph->num_new_columns;
-
-	graph->new_columns = tmp_columns;
 	graph->num_new_columns = 0;
 
 	/*
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-28 21:38 ` [PATCH 1/5] add " René Scharfe
@ 2017-01-30 15:39   ` Johannes Schindelin
  2017-01-30 16:48     ` René Scharfe
  2017-01-30 16:01   ` Johannes Schindelin
  2017-04-24 11:29   ` Jeff King
  2 siblings, 1 reply; 50+ messages in thread
From: Johannes Schindelin @ 2017-01-30 15:39 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 543 bytes --]

Hi René,

On Sat, 28 Jan 2017, René Scharfe wrote:

> Add a macro for exchanging the values of variables.  It allows users to
> avoid repetition and takes care of the temporary variable for them.  It
> also makes sure that the storage sizes of its two parameters are the
> same.  Its memcpy(1) calls are optimized away by current compilers.

How certain are we that "current compilers" optimize that away? And about
which "current compilers" are we talking? GCC? GCC 6? Clang? Clang 3?
Clang 3.8.*? Visual C 2005+?

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-28 21:38 ` [PATCH 1/5] add " René Scharfe
  2017-01-30 15:39   ` Johannes Schindelin
@ 2017-01-30 16:01   ` Johannes Schindelin
  2017-01-30 16:59     ` René Scharfe
  2017-01-30 18:41     ` Johannes Sixt
  2017-04-24 11:29   ` Jeff King
  2 siblings, 2 replies; 50+ messages in thread
From: Johannes Schindelin @ 2017-01-30 16:01 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 1634 bytes --]

Hi René,

On Sat, 28 Jan 2017, René Scharfe wrote:

> diff --git a/git-compat-util.h b/git-compat-util.h
> index 87237b092b..66cd466eea 100644
> --- a/git-compat-util.h
> +++ b/git-compat-util.h
> @@ -527,6 +527,16 @@ static inline int ends_with(const char *str, const char *suffix)
>  	return strip_suffix(str, suffix, &len);
>  }
>  
> +#define SWAP(a, b) do {						\
> +	void *_swap_a_ptr = &(a);				\
> +	void *_swap_b_ptr = &(b);				\
> +	unsigned char _swap_buffer[sizeof(a)];			\
> +	memcpy(_swap_buffer, _swap_a_ptr, sizeof(a));		\
> +	memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a) +		\
> +	       BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b)));	\
> +	memcpy(_swap_b_ptr, _swap_buffer, sizeof(a));		\
> +} while (0)
> +
>  #if defined(NO_MMAP) || defined(USE_WIN32_MMAP)

It may seem as a matter of taste, or maybe not: I prefer this without the
_swap_a_ptr (and I would also prefer not to use identifiers starting with
an underscore, as section 7.1.3 Reserved Identifiers of the C99 standard
says they are reserved):

+#define SWAP(a, b) do {						\
+	unsigned char swap_buffer_[sizeof(a)];			\
+	memcpy(swap_buffer_, &(a), sizeof(a));			\
+	memcpy(&(a), &(b), sizeof(a) +				\
+	       BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b)));	\
+	memcpy(&(b), swap_buffer_, sizeof(a));			\
+} while (0)

One idea to address the concern that not all C compilers people use to
build Git may optimize away those memcpy()s: we could also introduce a
SWAP_PRIMITIVE_TYPE (or SWAP2 or SIMPLE_SWAP or whatever) that accepts
only primitive types. But since __typeof__() is not portable...

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 3/5] use SWAP macro
  2017-01-28 21:40 ` [PATCH 3/5] " René Scharfe
@ 2017-01-30 16:03   ` Johannes Schindelin
  2017-01-30 17:18     ` René Scharfe
  2017-01-30 22:22   ` Junio C Hamano
  1 sibling, 1 reply; 50+ messages in thread
From: Johannes Schindelin @ 2017-01-30 16:03 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 661 bytes --]

Hi René,

On Sat, 28 Jan 2017, René Scharfe wrote:

> diff --git a/builtin/diff-tree.c b/builtin/diff-tree.c
> index 806dd7a885..8ce00480cd 100644
> --- a/builtin/diff-tree.c
> +++ b/builtin/diff-tree.c
> @@ -147,9 +147,7 @@ int cmd_diff_tree(int argc, const char **argv, const char *prefix)
>  		tree1 = opt->pending.objects[0].item;
>  		tree2 = opt->pending.objects[1].item;
>  		if (tree2->flags & UNINTERESTING) {
> -			struct object *tmp = tree2;
> -			tree2 = tree1;
> -			tree1 = tmp;
> +			SWAP(tree2, tree1);
>  		}

Is there a way to transform away the curly braces for blocks that become
single-line blocks, too?

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 4/5] diff: use SWAP macro
  2017-01-28 21:41 ` [PATCH 4/5] diff: " René Scharfe
@ 2017-01-30 16:04   ` Johannes Schindelin
  2017-01-30 17:26     ` René Scharfe
  2017-01-30 22:22   ` Junio C Hamano
  1 sibling, 1 reply; 50+ messages in thread
From: Johannes Schindelin @ 2017-01-30 16:04 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 780 bytes --]

Hi René,

On Sat, 28 Jan 2017, René Scharfe wrote:

> Use the macro SWAP to exchange the value of pairs of variables instead
> of swapping them manually with the help of a temporary variable.  The
> resulting code is shorter and easier to read.
> 
> The two cases were not transformed by the semantic patch swap.cocci
> because it's extra careful and handles only cases where the types of all
> variables are the same -- and here we swap two ints and use an unsigned
> temporary variable for that.  Nevertheless the conversion is safe, as
> the value range is preserved with and without the patch.

One way to make this more obvious would be to change the type to signed
first, and then transform (which then would catch these cases too,
right?).

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 5/5] graph: use SWAP macro
  2017-01-28 21:42 ` [PATCH 5/5] graph: " René Scharfe
@ 2017-01-30 16:16   ` Johannes Schindelin
  2017-01-30 17:41     ` René Scharfe
  0 siblings, 1 reply; 50+ messages in thread
From: Johannes Schindelin @ 2017-01-30 16:16 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 709 bytes --]

Hi René,

On Sat, 28 Jan 2017, René Scharfe wrote:

> Exchange the values of graph->columns and graph->new_columns using the
> macro SWAP instead of hand-rolled code.  The result is shorter and
> easier to read.
> 
> This transformation was not done by the semantic patch swap.cocci
> because there's an unrelated statement between the second and the last
> step of the exchange, so it didn't match the expected pattern.

Is it really true that Coccinelle cannot be told to look for a code block
that declares a variable that is then used *only* in the lines we want to
match and replace?

I never used the tool, and a quick web search did not clarify the picture,
either...

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-30 15:39   ` Johannes Schindelin
@ 2017-01-30 16:48     ` René Scharfe
  2017-01-30 20:48       ` Johannes Schindelin
  0 siblings, 1 reply; 50+ messages in thread
From: René Scharfe @ 2017-01-30 16:48 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Git List, Junio C Hamano

Am 30.01.2017 um 16:39 schrieb Johannes Schindelin:
> Hi René,
>
> On Sat, 28 Jan 2017, René Scharfe wrote:
>
>> Add a macro for exchanging the values of variables.  It allows users to
>> avoid repetition and takes care of the temporary variable for them.  It
>> also makes sure that the storage sizes of its two parameters are the
>> same.  Its memcpy(1) calls are optimized away by current compilers.
>
> How certain are we that "current compilers" optimize that away? And about
> which "current compilers" are we talking? GCC? GCC 6? Clang? Clang 3?
> Clang 3.8.*? Visual C 2005+?

GCC 4.4.7 and clang 3.2 on x86-64 compile the macro to the same object 
code as a manual swap ; see https://godbolt.org/g/F4b9M9.  Both were 
released 2012.  That website doesn't offer Visual C(++), but since you 
added the original macro in e5a94313c0 ("Teach git-apply about '-R'", 
2006) I guess we have that part covered. ;)

René

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-30 16:01   ` Johannes Schindelin
@ 2017-01-30 16:59     ` René Scharfe
  2017-01-30 18:41     ` Johannes Sixt
  1 sibling, 0 replies; 50+ messages in thread
From: René Scharfe @ 2017-01-30 16:59 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Git List, Junio C Hamano

Am 30.01.2017 um 17:01 schrieb Johannes Schindelin:
> Hi René,
>
> On Sat, 28 Jan 2017, René Scharfe wrote:
>
>> diff --git a/git-compat-util.h b/git-compat-util.h
>> index 87237b092b..66cd466eea 100644
>> --- a/git-compat-util.h
>> +++ b/git-compat-util.h
>> @@ -527,6 +527,16 @@ static inline int ends_with(const char *str, const char *suffix)
>>  	return strip_suffix(str, suffix, &len);
>>  }
>>
>> +#define SWAP(a, b) do {						\
>> +	void *_swap_a_ptr = &(a);				\
>> +	void *_swap_b_ptr = &(b);				\
>> +	unsigned char _swap_buffer[sizeof(a)];			\
>> +	memcpy(_swap_buffer, _swap_a_ptr, sizeof(a));		\
>> +	memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a) +		\
>> +	       BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b)));	\
>> +	memcpy(_swap_b_ptr, _swap_buffer, sizeof(a));		\
>> +} while (0)
>> +
>>  #if defined(NO_MMAP) || defined(USE_WIN32_MMAP)
>
> It may seem as a matter of taste, or maybe not: I prefer this without the
> _swap_a_ptr (and I would also prefer not to use identifiers starting with
> an underscore, as section 7.1.3 Reserved Identifiers of the C99 standard
> says they are reserved):
>
> +#define SWAP(a, b) do {						\
> +	unsigned char swap_buffer_[sizeof(a)];			\
> +	memcpy(swap_buffer_, &(a), sizeof(a));			\
> +	memcpy(&(a), &(b), sizeof(a) +				\
> +	       BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b)));	\
> +	memcpy(&(b), swap_buffer_, sizeof(a));			\
> +} while (0)

We can move the underscore to the end, but using a and b directly will 
give surprising results if the parameters have side effects.  E.g. if 
you want to swap the first two elements of two arrays you might want to 
do this:

	SWAP(*x++, *y++);
	SWAP(*x++, *y++);

And that would increment twice as much as one would guess and access 
unexpected elements.

> One idea to address the concern that not all C compilers people use to
> build Git may optimize away those memcpy()s: we could also introduce a
> SWAP_PRIMITIVE_TYPE (or SWAP2 or SIMPLE_SWAP or whatever) that accepts
> only primitive types. But since __typeof__() is not portable...

I wouldn't worry too much about such a solution before seeing that SWAP 
(even with memcpy(3) -- this function is probably optimized quite 
heavily on most platforms) causes an actual performance problem.

René

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 3/5] use SWAP macro
  2017-01-30 16:03   ` Johannes Schindelin
@ 2017-01-30 17:18     ` René Scharfe
  0 siblings, 0 replies; 50+ messages in thread
From: René Scharfe @ 2017-01-30 17:18 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Git List, Junio C Hamano

Am 30.01.2017 um 17:03 schrieb Johannes Schindelin:
> Hi René,
>
> On Sat, 28 Jan 2017, René Scharfe wrote:
>
>> diff --git a/builtin/diff-tree.c b/builtin/diff-tree.c
>> index 806dd7a885..8ce00480cd 100644
>> --- a/builtin/diff-tree.c
>> +++ b/builtin/diff-tree.c
>> @@ -147,9 +147,7 @@ int cmd_diff_tree(int argc, const char **argv, const char *prefix)
>>  		tree1 = opt->pending.objects[0].item;
>>  		tree2 = opt->pending.objects[1].item;
>>  		if (tree2->flags & UNINTERESTING) {
>> -			struct object *tmp = tree2;
>> -			tree2 = tree1;
>> -			tree1 = tmp;
>> +			SWAP(tree2, tree1);
>>  		}
>
> Is there a way to transform away the curly braces for blocks that become
> single-line blocks, too?

Interesting question.  I guess this can be done by using a Python script 
(see contrib/coccinelle/strbuf.cocci for an example).  I'll leave this 
as homework for readers interested in Coccinelle, at least for a while. :)

René

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 4/5] diff: use SWAP macro
  2017-01-30 16:04   ` Johannes Schindelin
@ 2017-01-30 17:26     ` René Scharfe
  0 siblings, 0 replies; 50+ messages in thread
From: René Scharfe @ 2017-01-30 17:26 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Git List, Junio C Hamano

Am 30.01.2017 um 17:04 schrieb Johannes Schindelin:
> Hi René,
>
> On Sat, 28 Jan 2017, René Scharfe wrote:
>
>> Use the macro SWAP to exchange the value of pairs of variables instead
>> of swapping them manually with the help of a temporary variable.  The
>> resulting code is shorter and easier to read.
>>
>> The two cases were not transformed by the semantic patch swap.cocci
>> because it's extra careful and handles only cases where the types of all
>> variables are the same -- and here we swap two ints and use an unsigned
>> temporary variable for that.  Nevertheless the conversion is safe, as
>> the value range is preserved with and without the patch.
>
> One way to make this more obvious would be to change the type to signed
> first, and then transform (which then would catch these cases too,
> right?).

I'm not sure it would be more obvious, but it would certainly make the 
type change more explicit.  In diff-index.c we might even want to change 
the type of the swapped values from int to unsigned, which is more 
fitting for file modes.  In diff.c we'd need to add a separate variable, 
as tmp is shared with other (unsigned) swaps.

René


^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 5/5] graph: use SWAP macro
  2017-01-30 16:16   ` Johannes Schindelin
@ 2017-01-30 17:41     ` René Scharfe
  0 siblings, 0 replies; 50+ messages in thread
From: René Scharfe @ 2017-01-30 17:41 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Git List, Junio C Hamano

Am 30.01.2017 um 17:16 schrieb Johannes Schindelin:
> Hi René,
>
> On Sat, 28 Jan 2017, René Scharfe wrote:
>
>> Exchange the values of graph->columns and graph->new_columns using the
>> macro SWAP instead of hand-rolled code.  The result is shorter and
>> easier to read.
>>
>> This transformation was not done by the semantic patch swap.cocci
>> because there's an unrelated statement between the second and the last
>> step of the exchange, so it didn't match the expected pattern.
>
> Is it really true that Coccinelle cannot be told to look for a code block
> that declares a variable that is then used *only* in the lines we want to
> match and replace?

Hope I parsed your question correctly; my answer would be that it can't 
be true because that's basically what the proposed semantic patch does:

	@ swap @
	type T;
	T tmp, a, b;
	@@
	- tmp = a;
	- a = b;
	- b = tmp;
	+ SWAP(a, b);

	@ extends swap @
	identifier unused;
	@@
	  {
	  ...
	- T unused;
	  ... when != unused
	  }

The first part (up to the "+") looks for a opportunities to use SWAP, 
and the second part looks for blocks where that transformation was done 
and we declare identifiers that are/became unused.

It did not match the code in graph.c because the pattern was basically:

	tmp = a;
	a = b;
	something = totally_different;
	b = tmp;

Coccinelle can be told to ignore such unrelated code by adding "... when 
!= tmp" etc. (which matches context lines that don't reference tmp), but 
that's slooow.  (Perhaps I just did it wrong, though.)

René

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-30 16:01   ` Johannes Schindelin
  2017-01-30 16:59     ` René Scharfe
@ 2017-01-30 18:41     ` Johannes Sixt
  2017-01-30 21:03       ` Johannes Schindelin
  1 sibling, 1 reply; 50+ messages in thread
From: Johannes Sixt @ 2017-01-30 18:41 UTC (permalink / raw)
  To: Johannes Schindelin, René Scharfe; +Cc: Git List, Junio C Hamano

Am 30.01.2017 um 17:01 schrieb Johannes Schindelin:
> On Sat, 28 Jan 2017, René Scharfe wrote:
>> diff --git a/git-compat-util.h b/git-compat-util.h
>> index 87237b092b..66cd466eea 100644
>> --- a/git-compat-util.h
>> +++ b/git-compat-util.h
>> @@ -527,6 +527,16 @@ static inline int ends_with(const char *str, const char *suffix)
>>  	return strip_suffix(str, suffix, &len);
>>  }
>>
>> +#define SWAP(a, b) do {						\
>> +	void *_swap_a_ptr = &(a);				\
>> +	void *_swap_b_ptr = &(b);				\
>> +	unsigned char _swap_buffer[sizeof(a)];			\
>> +	memcpy(_swap_buffer, _swap_a_ptr, sizeof(a));		\
>> +	memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a) +		\
>> +	       BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b)));	\
>> +	memcpy(_swap_b_ptr, _swap_buffer, sizeof(a));		\
>> +} while (0)
>> +
>>  #if defined(NO_MMAP) || defined(USE_WIN32_MMAP)
>
> It may seem as a matter of taste, or maybe not: I prefer this without the
> _swap_a_ptr

The purpose of these pointers is certainly to avoid that the macro 
arguments are evaluated more than once.

-- Hannes


^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-30 16:48     ` René Scharfe
@ 2017-01-30 20:48       ` Johannes Schindelin
  2017-01-30 21:46         ` René Scharfe
  0 siblings, 1 reply; 50+ messages in thread
From: Johannes Schindelin @ 2017-01-30 20:48 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 2696 bytes --]

Hi René,

On Mon, 30 Jan 2017, René Scharfe wrote:

> Am 30.01.2017 um 16:39 schrieb Johannes Schindelin:
>
> > On Sat, 28 Jan 2017, René Scharfe wrote:
> >
> > > Add a macro for exchanging the values of variables.  It allows users
> > > to avoid repetition and takes care of the temporary variable for
> > > them.  It also makes sure that the storage sizes of its two
> > > parameters are the same.  Its memcpy(1) calls are optimized away by
> > > current compilers.
> >
> > How certain are we that "current compilers" optimize that away? And
> > about which "current compilers" are we talking? GCC? GCC 6? Clang?
> > Clang 3?  Clang 3.8.*? Visual C 2005+?
> 
> GCC 4.4.7 and clang 3.2 on x86-64 compile the macro to the same object
> code as a manual swap ; see https://godbolt.org/g/F4b9M9.  Both were
> released 2012.

Good. Thank you.

> That website doesn't offer Visual C(++), but since you added the
> original macro in e5a94313c0 ("Teach git-apply about '-R'", 2006) I
> guess we have that part covered. ;)

Back then, I was not able to build Git using Visual C *at all*. It
required a Herculean effort to make that happen, and it did not happen
before the Git for Windows project was started in 2007.

So I tried to verify that Visual C optimizes this well, and oh my deity,
this was not easy. In Debug mode, it does not optimize, i.e. the memcpy()
will be called, even for simple 32-bit integers. In Release mode, Visual
Studio's defaults turn on "whole-program optimization" which means that
the only swapping that is going on in the end is that the meaning of two
registers is swapped, i.e. the SWAP() macro is expanded to... no assembler
code at all.

After trying this and that and something else, I finally ended up with the
file-scope optimized SWAP() resulting in this assembly code:

00007FF791311000  mov         eax,dword ptr [rcx]  
00007FF791311002  mov         r8d,dword ptr [rdx]  
00007FF791311005  mov         dword ptr [rcx],r8d  
00007FF791311008  mov         dword ptr [rdx],eax  

However, recent events (including some discussions on this mailing list
which had unfortunate consequences) made me trust my instincts more. And
my instincts tell me that it would be unwise to replace code that swaps
primitive types in the straight-forward way by code that relies on
advanced compiler optimization to generate efficient code, otherwise
producing very suboptimal code.

The commit you quoted embarrasses me, and I have no excuse for it. I would
love to see that myswap() ugliness fixed by replacing it with a construct
that is simpler, and generates good code even without any smart compiler.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-30 18:41     ` Johannes Sixt
@ 2017-01-30 21:03       ` Johannes Schindelin
  2017-01-30 22:09         ` René Scharfe
  0 siblings, 1 reply; 50+ messages in thread
From: Johannes Schindelin @ 2017-01-30 21:03 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: René Scharfe, Git List, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 1585 bytes --]

Hi Hannes,

On Mon, 30 Jan 2017, Johannes Sixt wrote:

> Am 30.01.2017 um 17:01 schrieb Johannes
> Schindelin:
> > On Sat, 28 Jan 2017, René Scharfe wrote:
> > > diff --git a/git-compat-util.h
> > > b/git-compat-util.h
> > > index 87237b092b..66cd466eea 100644
> > > --- a/git-compat-util.h
> > > +++ b/git-compat-util.h
> > > @@ -527,6 +527,16 @@ static inline int
> > > @@ ends_with(const char *str, const char
> > > @@ *suffix)
> > >  	return strip_suffix(str, suffix, &len);
> > >  }
> > >
> > > +#define SWAP(a, b) do {
> > > \
> > > +	void *_swap_a_ptr = &(a);
> > > \
> > > +	void *_swap_b_ptr = &(b);
> > > \
> > > +	unsigned char _swap_buffer[sizeof(a)];
> > > \
> > > +	memcpy(_swap_buffer, _swap_a_ptr,
> > > sizeof(a));		\
> > > +	memcpy(_swap_a_ptr, _swap_b_ptr,
> > > sizeof(a) +		\
> > > +	       BUILD_ASSERT_OR_ZERO(sizeof(a)
> > > == sizeof(b)));	\
> > > +	memcpy(_swap_b_ptr, _swap_buffer,
> > > sizeof(a));		\
> > > +} while (0)
> > > +
> > >  #if defined(NO_MMAP) ||
> > >  defined(USE_WIN32_MMAP)
> >
> > It may seem as a matter of taste, or maybe
> > not: I prefer this without the
> > _swap_a_ptr
> 
> The purpose of these pointers is certainly to
> avoid that the macro arguments are evaluated
> more than once.

I mistook "a" being used in sizeof(a) for breaking that assumption, but of
course a is *not* evaluated in that case. It is curious, though, that an
expression like "sizeof(a++)" would not be rejected.

Further, what would SWAP(a++, b) do? Swap a and b, and *then* increment a?

Ciao,
Johannes

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-30 20:48       ` Johannes Schindelin
@ 2017-01-30 21:46         ` René Scharfe
  2017-01-31 12:13           ` Johannes Schindelin
  0 siblings, 1 reply; 50+ messages in thread
From: René Scharfe @ 2017-01-30 21:46 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Git List, Junio C Hamano

Am 30.01.2017 um 21:48 schrieb Johannes Schindelin:
> So I tried to verify that Visual C optimizes this well, and oh my deity,
> this was not easy. In Debug mode, it does not optimize, i.e. the memcpy()
> will be called, even for simple 32-bit integers. In Release mode, Visual
> Studio's defaults turn on "whole-program optimization" which means that
> the only swapping that is going on in the end is that the meaning of two
> registers is swapped, i.e. the SWAP() macro is expanded to... no assembler
> code at all.
>
> After trying this and that and something else, I finally ended up with the
> file-scope optimized SWAP() resulting in this assembly code:
>
> 00007FF791311000  mov         eax,dword ptr [rcx]
> 00007FF791311002  mov         r8d,dword ptr [rdx]
> 00007FF791311005  mov         dword ptr [rcx],r8d
> 00007FF791311008  mov         dword ptr [rdx],eax

This looks good.

> However, recent events (including some discussions on this mailing list
> which had unfortunate consequences) made me trust my instincts more. And
> my instincts tell me that it would be unwise to replace code that swaps
> primitive types in the straight-forward way by code that relies on
> advanced compiler optimization to generate efficient code, otherwise
> producing very suboptimal code.

I don't know how difficult it was to arrive at the result above, but I 
wouldn't call inlining memcpy(3) an advanced optimization (anymore?), 
since the major compilers seem to be doing that.

The SWAP in prio-queue.c seems to be the one with the biggest 
performance impact.  Or perhaps it's the one in lookup_object()?  The 
former is easier to measure, though.

Here's what I get with CFLAGS="-builtin -O2" (best of five):

$ time ./t/helper/test-prio-queue $(seq 100000 -1 1) dump >/dev/null

real    0m0.142s
user    0m0.120s
sys     0m0.020s

And this is with CFLAGS="-no-builtin -O2":

$ time ./t/helper/test-prio-queue $(seq 100000 -1 1) dump >/dev/null

real    0m0.170s
user    0m0.156s
sys     0m0.012s

Hmm.  Not nice, but also not prohibitively slow.

> The commit you quoted embarrasses me, and I have no excuse for it. I would
> love to see that myswap() ugliness fixed by replacing it with a construct
> that is simpler, and generates good code even without any smart compiler.

I don't see a way to do that without adding a type parameter.

René

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-30 21:03       ` Johannes Schindelin
@ 2017-01-30 22:09         ` René Scharfe
  2017-01-30 22:21           ` Brandon Williams
  2017-01-31 12:03           ` Johannes Schindelin
  0 siblings, 2 replies; 50+ messages in thread
From: René Scharfe @ 2017-01-30 22:09 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Johannes Sixt, Git List, Junio C Hamano

Am 30.01.2017 um 22:03 schrieb Johannes Schindelin:
> It is curious, though, that an
> expression like "sizeof(a++)" would not be rejected.

Clang normally warns about something like this ("warning: expression 
with side effects has no effect in an unevaluated context 
[-Wunevaluated-expression]"), but not if the code is part of a macro.  I 
don't know if that's intended, but it sure is helpful in the case of SWAP.

> Further, what would SWAP(a++, b) do? Swap a and b, and *then* increment a?

That might be a valid expectation, but GCC says "error: lvalue required 
as unary '&' operand" and clang puts it "error: cannot take the address 
of an rvalue of type".

René

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-30 22:09         ` René Scharfe
@ 2017-01-30 22:21           ` Brandon Williams
  2017-01-31 21:03             ` René Scharfe
  2017-01-31 12:03           ` Johannes Schindelin
  1 sibling, 1 reply; 50+ messages in thread
From: Brandon Williams @ 2017-01-30 22:21 UTC (permalink / raw)
  To: René Scharfe
  Cc: Johannes Schindelin, Johannes Sixt, Git List, Junio C Hamano

On 01/30, René Scharfe wrote:
> Am 30.01.2017 um 22:03 schrieb Johannes Schindelin:
> >It is curious, though, that an
> >expression like "sizeof(a++)" would not be rejected.
> 
> Clang normally warns about something like this ("warning: expression
> with side effects has no effect in an unevaluated context
> [-Wunevaluated-expression]"), but not if the code is part of a
> macro.  I don't know if that's intended, but it sure is helpful in
> the case of SWAP.
> 
> >Further, what would SWAP(a++, b) do? Swap a and b, and *then* increment a?
> 
> That might be a valid expectation, but GCC says "error: lvalue
> required as unary '&' operand" and clang puts it "error: cannot take
> the address of an rvalue of type".
> 
> René

Perhaps we could disallow a side-effect operator in the macro.  By
disallow I mean place a comment at the definition to the macro and
hopefully catch something like that in code-review.  We have the same
issue with the `ALLOC_GROW()` macro.

-- 
Brandon Williams

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 3/5] use SWAP macro
  2017-01-28 21:40 ` [PATCH 3/5] " René Scharfe
  2017-01-30 16:03   ` Johannes Schindelin
@ 2017-01-30 22:22   ` Junio C Hamano
  2017-01-31 21:02     ` René Scharfe
  1 sibling, 1 reply; 50+ messages in thread
From: Junio C Hamano @ 2017-01-30 22:22 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Johannes Schindelin

René Scharfe <l.s.r@web.de> writes:

>  		if (tree2->flags & UNINTERESTING) {
> -			struct object *tmp = tree2;
> -			tree2 = tree1;
> -			tree1 = tmp;
> +			SWAP(tree2, tree1);

A human would have written this SWAP(tree1, tree2).

Not that I think such a manual fix-up should be made in _this_
patch, which may end up mixing mechanical conversion (which we may
want to keep reproducible) and hand tweaks.  But this swapped swap
reads somewhat silly.

> diff --git a/diff-no-index.c b/diff-no-index.c
> index f420786039..1ae09894d7 100644
> --- a/diff-no-index.c
> +++ b/diff-no-index.c
> @@ -186,9 +186,8 @@ static int queue_diff(struct diff_options *o,
>  
>  		if (DIFF_OPT_TST(o, REVERSE_DIFF)) {
>  			unsigned tmp;
> -			const char *tmp_c;
>  			tmp = mode1; mode1 = mode2; mode2 = tmp;
> -			tmp_c = name1; name1 = name2; name2 = tmp_c;
> +			SWAP(name1, name2);

Curious that mode swapping is left for a later iteration.

> diff --git a/diff.c b/diff.c
> index f08cd8e033..9de1ba264f 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -5118,13 +5118,11 @@ void diff_change(struct diff_options *options,
>  
>  	if (DIFF_OPT_TST(options, REVERSE_DIFF)) {
>  		unsigned tmp;
> -		const unsigned char *tmp_c;
> -		tmp = old_mode; old_mode = new_mode; new_mode = tmp;
> -		tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c;
> +		SWAP(old_mode, new_mode);
> +		SWAP(old_sha1, new_sha1);
>  		tmp = old_sha1_valid; old_sha1_valid = new_sha1_valid;
>  			new_sha1_valid = tmp;

So is this one.

> diff --git a/merge-recursive.c b/merge-recursive.c
> ...
> -			tmp = ren2;
> -			ren2 = ren1;
> -			ren1 = tmp;
> +			SWAP(ren2, ren1);

A human would have written this SWAP(ren1, ren2).


^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 4/5] diff: use SWAP macro
  2017-01-28 21:41 ` [PATCH 4/5] diff: " René Scharfe
  2017-01-30 16:04   ` Johannes Schindelin
@ 2017-01-30 22:22   ` Junio C Hamano
  1 sibling, 0 replies; 50+ messages in thread
From: Junio C Hamano @ 2017-01-30 22:22 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Johannes Schindelin

René Scharfe <l.s.r@web.de> writes:

> Use the macro SWAP to exchange the value of pairs of variables instead
> of swapping them manually with the help of a temporary variable.  The
> resulting code is shorter and easier to read.
>
> The two cases were not transformed by the semantic patch swap.cocci
> because it's extra careful and handles only cases where the types of all
> variables are the same

Ah, that explains my "huh?" in 3/5; thanks.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 0/5] introduce SWAP macro
  2017-01-28 21:13 [PATCH 0/5] introduce SWAP macro René Scharfe
                   ` (4 preceding siblings ...)
  2017-01-28 21:42 ` [PATCH 5/5] graph: " René Scharfe
@ 2017-01-30 23:20 ` Junio C Hamano
  5 siblings, 0 replies; 50+ messages in thread
From: Junio C Hamano @ 2017-01-30 23:20 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Johannes Schindelin

René Scharfe <l.s.r@web.de> writes:

> Exchanging the value of two variables requires declaring a temporary
> variable and repeating their names.  The swap macro in apply.c
> simplifies this for its callers without changing the compiled binary.
> Polish this macro and export it, then use it throughout the code to
> reduce repetition and hide the declaration of temporary variables.

All looked reasonable (modulo "swap tree2 and tree1???" ordering).
Also the object-id ones looked good.

Thanks.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-30 22:09         ` René Scharfe
  2017-01-30 22:21           ` Brandon Williams
@ 2017-01-31 12:03           ` Johannes Schindelin
  1 sibling, 0 replies; 50+ messages in thread
From: Johannes Schindelin @ 2017-01-31 12:03 UTC (permalink / raw)
  To: René Scharfe; +Cc: Johannes Sixt, Git List, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 985 bytes --]

Hi René,

On Mon, 30 Jan 2017, René Scharfe wrote:

> Am 30.01.2017 um 22:03 schrieb Johannes Schindelin:
> > It is curious, though, that an expression like "sizeof(a++)" would not
> > be rejected.
> 
> Clang normally warns about something like this ("warning: expression
> with side effects has no effect in an unevaluated context
> [-Wunevaluated-expression]"), but not if the code is part of a macro.  I
> don't know if that's intended, but it sure is helpful in the case of
> SWAP.

Thank you for clarifying.

> > Further, what would SWAP(a++, b) do? Swap a and b, and *then*
> > increment a?
> 
> That might be a valid expectation, but GCC says "error: lvalue required
> as unary '&' operand" and clang puts it "error: cannot take the address
> of an rvalue of type".

Okay, now we know what the tool says.

I would like to take a step back and state that it does not make much
sense to support SWAP(a++, b), as it is confusing at best.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-30 21:46         ` René Scharfe
@ 2017-01-31 12:13           ` Johannes Schindelin
  2017-01-31 21:02             ` René Scharfe
  0 siblings, 1 reply; 50+ messages in thread
From: Johannes Schindelin @ 2017-01-31 12:13 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 1294 bytes --]

Hi René,

On Mon, 30 Jan 2017, René Scharfe wrote:

> Am 30.01.2017 um 21:48 schrieb Johannes Schindelin:
>
> > The commit you quoted embarrasses me, and I have no excuse for it. I
> > would love to see that myswap() ugliness fixed by replacing it with a
> > construct that is simpler, and generates good code even without any
> > smart compiler.
> 
> I don't see a way to do that without adding a type parameter.

Exactly. And you know what? I would be very okay with that type parameter.

Coccinelle [*1*] should be able to cope with that, too, mehtinks.

It would be trivially "optimized" out of the box, even when compiling with
Tiny C or in debug mode.

And it would even allow things like this:

#define SIMPLE_SWAP(T, a, b) do { T tmp_ = a; a = b; b = tmp_; } while (0)
...
	uint32_t large;
	char nybble;

	...

	if (!(large & ~0xf)) {
		SIMPLE_SWAP(char, nybble, large);
		...
	}

i.e. mixing types, when possible.

And while I do not necessarily expect that we need anything like this
anytime soon, merely the fact that it allows for this flexibility, while
being very readable at the same time, would make it a pretty good design
in my book.

Ciao,
Dscho

P.S.: I am sure glad they use the French name and do not translate it to
English.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 3/5] use SWAP macro
  2017-01-30 22:22   ` Junio C Hamano
@ 2017-01-31 21:02     ` René Scharfe
  0 siblings, 0 replies; 50+ messages in thread
From: René Scharfe @ 2017-01-31 21:02 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git List, Johannes Schindelin

Am 30.01.2017 um 23:22 schrieb Junio C Hamano:
> René Scharfe <l.s.r@web.de> writes:
>
>>  		if (tree2->flags & UNINTERESTING) {
>> -			struct object *tmp = tree2;
>> -			tree2 = tree1;
>> -			tree1 = tmp;
>> +			SWAP(tree2, tree1);
>
> A human would have written this SWAP(tree1, tree2).
>
> Not that I think such a manual fix-up should be made in _this_
> patch, which may end up mixing mechanical conversion (which we may
> want to keep reproducible) and hand tweaks.  But this swapped swap
> reads somewhat silly.

Well, a human wrote "tmp = tree2" -- sometimes one likes to count down 
instead of up, I guess.

We can make Coccinelle order the parameters alphabetically, but I don't 
know how to do so without duplicating most of the semantic patch.  And 
thats would result in e.g. SWAP(new_tree, old_tree), which might be odd 
as well.

I'd just leave them in whatever order they were, but that's probably 
because I'm lazy.

René

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-31 12:13           ` Johannes Schindelin
@ 2017-01-31 21:02             ` René Scharfe
  2017-02-01  0:44               ` Ramsay Jones
  2017-02-01 11:39               ` Johannes Schindelin
  0 siblings, 2 replies; 50+ messages in thread
From: René Scharfe @ 2017-01-31 21:02 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Git List, Junio C Hamano

Am 31.01.2017 um 13:13 schrieb Johannes Schindelin:
> Hi René,
>
> On Mon, 30 Jan 2017, René Scharfe wrote:
>
>> Am 30.01.2017 um 21:48 schrieb Johannes Schindelin:
>>
>>> The commit you quoted embarrasses me, and I have no excuse for it. I
>>> would love to see that myswap() ugliness fixed by replacing it with a
>>> construct that is simpler, and generates good code even without any
>>> smart compiler.
>>
>> I don't see a way to do that without adding a type parameter.
>
> Exactly. And you know what? I would be very okay with that type parameter.
>
> Coccinelle [*1*] should be able to cope with that, too, mehtinks.

Yes, a semantic patch can turn the type of the temporary variable into a 
macro parameter.  Programmers would have to type the type, though, 
making the macro only half as good.

> It would be trivially "optimized" out of the box, even when compiling with
> Tiny C or in debug mode.

Such a compiler is already slowed down by memset(3) calls for 
initializing objects and lack of other optimizations.  I doubt a few 
more memcpy(3) calls would make that much of a difference.

NB: git as compiled with TCC fails several tests, alas.  Builds wickedly 
fast, though.

> And it would even allow things like this:
>
> #define SIMPLE_SWAP(T, a, b) do { T tmp_ = a; a = b; b = tmp_; } while (0)
> ...
> 	uint32_t large;
> 	char nybble;
>
> 	...
>
> 	if (!(large & ~0xf)) {
> 		SIMPLE_SWAP(char, nybble, large);
> 		...
> 	}
>
> i.e. mixing types, when possible.
>
> And while I do not necessarily expect that we need anything like this
> anytime soon, merely the fact that it allows for this flexibility, while
> being very readable at the same time, would make it a pretty good design
> in my book.

Such a skinny macro which only hides repetition is kind of attractive 
due to its simplicity; I can't say the same about the mixed type example 
above, though.

The fat version isn't that bad either even without inlining, includes a 
few safety checks and doesn't require us to tell the compiler something 
it already knows very well.  I'd rather let the machine do the work.

René

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-30 22:21           ` Brandon Williams
@ 2017-01-31 21:03             ` René Scharfe
  2017-01-31 21:35               ` Jeff King
  0 siblings, 1 reply; 50+ messages in thread
From: René Scharfe @ 2017-01-31 21:03 UTC (permalink / raw)
  To: Brandon Williams
  Cc: Johannes Schindelin, Johannes Sixt, Git List, Junio C Hamano

Am 30.01.2017 um 23:21 schrieb Brandon Williams:
> On 01/30, René Scharfe wrote:
>> Am 30.01.2017 um 22:03 schrieb Johannes Schindelin:
>>> It is curious, though, that an
>>> expression like "sizeof(a++)" would not be rejected.
>>
>> Clang normally warns about something like this ("warning: expression
>> with side effects has no effect in an unevaluated context
>> [-Wunevaluated-expression]"), but not if the code is part of a
>> macro.  I don't know if that's intended, but it sure is helpful in
>> the case of SWAP.
>>
>>> Further, what would SWAP(a++, b) do? Swap a and b, and *then* increment a?
>>
>> That might be a valid expectation, but GCC says "error: lvalue
>> required as unary '&' operand" and clang puts it "error: cannot take
>> the address of an rvalue of type".
>>
>> René
>
> Perhaps we could disallow a side-effect operator in the macro.  By
> disallow I mean place a comment at the definition to the macro and
> hopefully catch something like that in code-review.  We have the same
> issue with the `ALLOC_GROW()` macro.

SWAP(a++, ...) is caught by the compiler, SWAP(*a++, ...) works fine. 
Technically that should be enough. :)  A comment wouldn't hurt, of course.

René

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-31 21:03             ` René Scharfe
@ 2017-01-31 21:35               ` Jeff King
  2017-01-31 22:29                 ` Junio C Hamano
  2017-02-01 11:28                 ` Johannes Schindelin
  0 siblings, 2 replies; 50+ messages in thread
From: Jeff King @ 2017-01-31 21:35 UTC (permalink / raw)
  To: René Scharfe
  Cc: Brandon Williams, Johannes Schindelin, Johannes Sixt, Git List,
	Junio C Hamano

On Tue, Jan 31, 2017 at 10:03:01PM +0100, René Scharfe wrote:

> > Perhaps we could disallow a side-effect operator in the macro.  By
> > disallow I mean place a comment at the definition to the macro and
> > hopefully catch something like that in code-review.  We have the same
> > issue with the `ALLOC_GROW()` macro.
> 
> SWAP(a++, ...) is caught by the compiler, SWAP(*a++, ...) works fine.
> Technically that should be enough. :)  A comment wouldn't hurt, of course.

One funny thing is that your macro takes address-of itself, behind the
scenes. I wonder if it would be more natural for it to take
pointers-to-objects, making it look more like a real function (i.e.,
SWAP(&a, &b) instead of SWAP(a, b)". And then these funny corner cases
become quite obvious in the caller, because the caller is the one who
has to type "&".

-Peff

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-31 21:35               ` Jeff King
@ 2017-01-31 22:29                 ` Junio C Hamano
  2017-01-31 22:36                   ` Jeff King
  2017-02-01 11:28                 ` Johannes Schindelin
  1 sibling, 1 reply; 50+ messages in thread
From: Junio C Hamano @ 2017-01-31 22:29 UTC (permalink / raw)
  To: Jeff King
  Cc: René Scharfe, Brandon Williams, Johannes Schindelin,
	Johannes Sixt, Git List

Jeff King <peff@peff.net> writes:

> ... I wonder if it would be more natural for it to take
> pointers-to-objects, making it look more like a real function (i.e.,
> SWAP(&a, &b) instead of SWAP(a, b)". And then these funny corner cases
> become quite obvious in the caller, because the caller is the one who
> has to type "&".

Hmmmm.  

While this looks very attractive in theory by forcing 'a' and 'b' to
be lvalues, it probably invites mistakes go unnoticed during the
review when the code wants to swap two pointer variables.  

For example,

apply.c:            SWAP(p->new_name, p->old_name);

is now a bug and will swap only the first byte of these names, and
the correct way to spell it would become:

apply.c:            SWAP(&p->new_name, &p->old_name);

The latter clearly looks like swapping the new and old names, which
is good, but I do not have any confidence that I will immediately
spot a bug when presented the former under the new world order.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-31 22:29                 ` Junio C Hamano
@ 2017-01-31 22:36                   ` Jeff King
  0 siblings, 0 replies; 50+ messages in thread
From: Jeff King @ 2017-01-31 22:36 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: René Scharfe, Brandon Williams, Johannes Schindelin,
	Johannes Sixt, Git List

On Tue, Jan 31, 2017 at 02:29:51PM -0800, Junio C Hamano wrote:

> Jeff King <peff@peff.net> writes:
> 
> > ... I wonder if it would be more natural for it to take
> > pointers-to-objects, making it look more like a real function (i.e.,
> > SWAP(&a, &b) instead of SWAP(a, b)". And then these funny corner cases
> > become quite obvious in the caller, because the caller is the one who
> > has to type "&".
> 
> Hmmmm.  
> 
> While this looks very attractive in theory by forcing 'a' and 'b' to
> be lvalues, it probably invites mistakes go unnoticed during the
> review when the code wants to swap two pointer variables.  
> 
> For example,
> 
> apply.c:            SWAP(p->new_name, p->old_name);
> 
> is now a bug and will swap only the first byte of these names, and
> the correct way to spell it would become:
> 
> apply.c:            SWAP(&p->new_name, &p->old_name);
> 
> The latter clearly looks like swapping the new and old names, which
> is good, but I do not have any confidence that I will immediately
> spot a bug when presented the former under the new world order.

Yes, it's a problem with any function (or function-like interface) that
takes an untyped pointer. You don't know if the caller meant the pointer
or the pointer-to-pointer. In that sense it's no different than:

  memcpy(p->new_name, p->old_name, len);

Is that right, or did we mean to copy the pointers themselves?

It does also help the reverse case:

  int a, b;
  SWAP(a, b);

which would give an error (you forgot the "&"). I don't know which is
more likely to be productive. But at least requiring pointer values
makes it consistent with other functions like memcpy, etc.

-Peff

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-31 21:02             ` René Scharfe
@ 2017-02-01  0:44               ` Ramsay Jones
  2017-02-01 11:39               ` Johannes Schindelin
  1 sibling, 0 replies; 50+ messages in thread
From: Ramsay Jones @ 2017-02-01  0:44 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List



On 31/01/17 21:02, René Scharfe wrote:
[snip]
>> It would be trivially "optimized" out of the box, even when compiling with
>> Tiny C or in debug mode.
> 
> Such a compiler is already slowed down by memset(3) calls for initializing objects and lack of other optimizations.  I doubt a few more memcpy(3) calls would make that much of a difference.
> 
> NB: git as compiled with TCC fails several tests, alas.  Builds wickedly fast, though.

Hmm, a couple of years ago, I used tcc to build git and it worked
just fine (and it passed all tests). Prompted by this note, I just
built tcc from the current mob branch (@5420bb8) and built git OK,
but, yes, lots of tests now fail. :(

ATB,
Ramsay Jones



^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-31 21:35               ` Jeff King
  2017-01-31 22:29                 ` Junio C Hamano
@ 2017-02-01 11:28                 ` Johannes Schindelin
  2017-02-01 11:47                   ` Jeff King
  1 sibling, 1 reply; 50+ messages in thread
From: Johannes Schindelin @ 2017-02-01 11:28 UTC (permalink / raw)
  To: Jeff King
  Cc: René Scharfe, Brandon Williams, Johannes Sixt, Git List,
	Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 1390 bytes --]

Hi Peff,

On Tue, 31 Jan 2017, Jeff King wrote:

> On Tue, Jan 31, 2017 at 10:03:01PM +0100, René Scharfe wrote:
> 
> > > Perhaps we could disallow a side-effect operator in the macro.  By
> > > disallow I mean place a comment at the definition to the macro and
> > > hopefully catch something like that in code-review.  We have the
> > > same issue with the `ALLOC_GROW()` macro.
> > 
> > SWAP(a++, ...) is caught by the compiler, SWAP(*a++, ...) works fine.
> > Technically that should be enough. :)  A comment wouldn't hurt, of
> > course.
> 
> One funny thing is that your macro takes address-of itself, behind the
> scenes. I wonder if it would be more natural for it to take
> pointers-to-objects, making it look more like a real function (i.e.,
> SWAP(&a, &b) instead of SWAP(a, b)". And then these funny corner cases
> become quite obvious in the caller, because the caller is the one who
> has to type "&".

But forcing SWAP(&a, &b) would make it even more cumbersome to use, and it
would also make it harder to optimize, say, by using registers instead of
addressable memory (think swapping two 32-bit integers: there is *no* need
to write them into memory just to swap them).

And I think I should repeat my point that this discussion veers towards
making simple swaps *more* complicated, rather than less complicated. Bad
direction.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-31 21:02             ` René Scharfe
  2017-02-01  0:44               ` Ramsay Jones
@ 2017-02-01 11:39               ` Johannes Schindelin
  1 sibling, 0 replies; 50+ messages in thread
From: Johannes Schindelin @ 2017-02-01 11:39 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 2247 bytes --]

Hi René,

On Tue, 31 Jan 2017, René Scharfe wrote:

> Am 31.01.2017 um 13:13 schrieb Johannes Schindelin:
>
> > #define SIMPLE_SWAP(T, a, b) do { T tmp_ = a; a = b; b = tmp_; } while (0)
> > ...
> >  uint32_t large;
> >  char nybble;
> >
> >  ...
> >
> >  if (!(large & ~0xf)) {
> >   SIMPLE_SWAP(char, nybble, large);
> >   ...
> >  }
> >
> > i.e. mixing types, when possible.
> >
> > And while I do not necessarily expect that we need anything like this
> > anytime soon, merely the fact that it allows for this flexibility, while
> > being very readable at the same time, would make it a pretty good design
> > in my book.
> 
> Such a skinny macro which only hides repetition is kind of attractive due to
> its simplicity; I can't say the same about the mixed type example above,
> though.
> 
> The fat version isn't that bad either even without inlining, includes a few
> safety checks and doesn't require us to tell the compiler something it
> already knows very well.  I'd rather let the machine do the work.

I am a big fan of letting the machine do the work. But I am not a big fan
of *creating* work for the machine.

So if you asked me what I would think of a script that, given a patch "in
mbox format", automatically fixes all formatting issues that typically
take up a sizable chunk of review time, I would say: yeah, let's get this
done! It would probably take away some fun because then reviewers could
bike-shed less, but I'd think that is a good thing.

If you asked me what my opinion is about a program you wrote that gathers
all the threads and sub threads of code^Wpatch reviews on the Git mailing
list, and cross-references them with public Git branches, and with Junio's
What's Cooking mail, and with the SHA-1s in `pu`: Great! That would
relieve me of a ton of really boring and grueling work, if the machine can
do it, all the better.

And if you ask me about adding a complex macro that adds a bunch of work
for the C compiler just to produce the same assembler code as a one-liner
macro would have produced much easier, I will reply that we could maybe
instead spend that time on letting the machine perform tasks that already
need to be done... :-)

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-02-01 11:28                 ` Johannes Schindelin
@ 2017-02-01 11:47                   ` Jeff King
  2017-02-01 18:06                     ` René Scharfe
  0 siblings, 1 reply; 50+ messages in thread
From: Jeff King @ 2017-02-01 11:47 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: René Scharfe, Brandon Williams, Johannes Sixt, Git List,
	Junio C Hamano

On Wed, Feb 01, 2017 at 12:28:13PM +0100, Johannes Schindelin wrote:

> > One funny thing is that your macro takes address-of itself, behind the
> > scenes. I wonder if it would be more natural for it to take
> > pointers-to-objects, making it look more like a real function (i.e.,
> > SWAP(&a, &b) instead of SWAP(a, b)". And then these funny corner cases
> > become quite obvious in the caller, because the caller is the one who
> > has to type "&".
> 
> But forcing SWAP(&a, &b) would make it even more cumbersome to use, and it
> would also make it harder to optimize, say, by using registers instead of
> addressable memory (think swapping two 32-bit integers: there is *no* need
> to write them into memory just to swap them).

I don't find the register thing compelling at all. I'd expect modern
compilers to optimize *(&a) into just "a" and keep using a register. I'm
sure there are compilers that don't, but I'm also not sure how much we
_care_. If your compiler doesn't do basic micro-optimizations, then you
live with it or get a better compiler.

I'm much more interested in what's readable and maintainable, versus
trying to micro-optimize something that hasn't even been shown to be
measurably interesting.

That said...

> And I think I should repeat my point that this discussion veers towards
> making simple swaps *more* complicated, rather than less complicated. Bad
> direction.

I'm not altogether convinced that SWAP() is an improvement in
readability. I really like that it's shorter than the code it replaces,
but there is a danger with introducing opaque constructs. It's one more
thing for somebody familiar with C but new to the project to learn or
get wrong.

IMHO the main maintenance gain from René's patch is that you don't have
to specify the type, which means you can never have a memory-overflow
bug due to incorrect types. If we lose that, then I don't see all that
much value in the whole thing.

-Peff

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-02-01 11:47                   ` Jeff King
@ 2017-02-01 18:06                     ` René Scharfe
  2017-02-01 18:33                       ` Junio C Hamano
  0 siblings, 1 reply; 50+ messages in thread
From: René Scharfe @ 2017-02-01 18:06 UTC (permalink / raw)
  To: Jeff King, Johannes Schindelin
  Cc: Brandon Williams, Johannes Sixt, Git List, Junio C Hamano

Am 01.02.2017 um 12:47 schrieb Jeff King:
> I'm not altogether convinced that SWAP() is an improvement in
> readability. I really like that it's shorter than the code it replaces,
> but there is a danger with introducing opaque constructs. It's one more
> thing for somebody familiar with C but new to the project to learn or
> get wrong.

I'm biased, of course, but SIMPLE_SWAP and SWAP exchange the values of 
two variables -- how could someone get that wrong?  Would it help to 
name the macro SWAP_VALUES?  Or XCHG? ;)

> IMHO the main maintenance gain from René's patch is that you don't have
> to specify the type, which means you can never have a memory-overflow
> bug due to incorrect types. If we lose that, then I don't see all that
> much value in the whole thing.

Size checks could be added to SIMPLE_SWAP as well.

The main benefit of a swap macro is reduced repetition IMHO: Users 
specify the variables to swap once instead of twice in a special order, 
and with SWAP they don't need to declare their type again.  Squeezing 
out redundancy makes the code easier to read and modify.

René

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-02-01 18:06                     ` René Scharfe
@ 2017-02-01 18:33                       ` Junio C Hamano
  2017-02-07 22:04                         ` René Scharfe
  0 siblings, 1 reply; 50+ messages in thread
From: Junio C Hamano @ 2017-02-01 18:33 UTC (permalink / raw)
  To: René Scharfe
  Cc: Jeff King, Johannes Schindelin, Brandon Williams, Johannes Sixt,
	Git List

René Scharfe <l.s.r@web.de> writes:

> Size checks could be added to SIMPLE_SWAP as well.

Between SWAP() and SIMPLE_SWAP() I am undecided.

If the compiler can infer the type and the size of the two
"locations" given to the macro, there is no technical reason to
require the caller to specify the type as an extra argument, so
SIMPLE_SWAP() may not necessarily an improvement over SWAP() from
that point of view.  If the redundancy is used as a sanity check,
I'd be in favor of SIMPLE_SWAP(), though.

If the goal of SIMPLE_SWAP(), on the other hand, were to support the
"only swap char with int for small value" example earlier in the
thread, it means you cannot sanity check the type of things being
swapped in the macro, and the readers of the code need to know about
the details of what variables are being swapped.  It looks to me
that it defeats the main benefit of using a macro.

> The main benefit of a swap macro is reduced repetition IMHO: Users
> specify the variables to swap once instead of twice in a special
> order, and with SWAP they don't need to declare their type again.
> Squeezing out redundancy makes the code easier to read and modify.

Yes.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-02-01 18:33                       ` Junio C Hamano
@ 2017-02-07 22:04                         ` René Scharfe
  2017-02-07 22:30                           ` Junio C Hamano
  0 siblings, 1 reply; 50+ messages in thread
From: René Scharfe @ 2017-02-07 22:04 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jeff King, Johannes Schindelin, Brandon Williams, Johannes Sixt,
	Git List

Am 01.02.2017 um 19:33 schrieb Junio C Hamano:
> René Scharfe <l.s.r@web.de> writes:
> 
>> Size checks could be added to SIMPLE_SWAP as well.
> 
> Between SWAP() and SIMPLE_SWAP() I am undecided.
> 
> If the compiler can infer the type and the size of the two
> "locations" given to the macro, there is no technical reason to
> require the caller to specify the type as an extra argument, so
> SIMPLE_SWAP() may not necessarily an improvement over SWAP() from
> that point of view.  If the redundancy is used as a sanity check,
> I'd be in favor of SIMPLE_SWAP(), though.
> 
> If the goal of SIMPLE_SWAP(), on the other hand, were to support the
> "only swap char with int for small value" example earlier in the
> thread, it means you cannot sanity check the type of things being
> swapped in the macro, and the readers of the code need to know about
> the details of what variables are being swapped.  It looks to me
> that it defeats the main benefit of using a macro.

Full type inference could be done with C11's _Generic for basic types,
while typeof would be needed for complex ones, I guess.  Checking that
sizes match is better than nothing and portable to ancient platforms,
though.  Having an explicit type given is portable and easy to use for
checks, of course, e.g. like this:

#define SIMPLE_SWAP(T, a, b) do { \
	T swap_tmp_ = a + BUILD_ASSERT_OR_ZERO(sizeof(T) == sizeof(a)); \
	a = b + BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b)); \
	b = swap_tmp_; \
} while(0)

It doesn't support expressions with side effects, but that's probably
not much of a concern.

Swapping between different types would then still have to be done
manually, but I wonder how common that is -- couldn't find such a case
in our tree.

René

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-02-07 22:04                         ` René Scharfe
@ 2017-02-07 22:30                           ` Junio C Hamano
  2017-02-08 15:14                             ` Johannes Schindelin
  0 siblings, 1 reply; 50+ messages in thread
From: Junio C Hamano @ 2017-02-07 22:30 UTC (permalink / raw)
  To: René Scharfe
  Cc: Jeff King, Johannes Schindelin, Brandon Williams, Johannes Sixt,
	Git List

René Scharfe <l.s.r@web.de> writes:

> Swapping between different types would then still have to be done
> manually, but I wonder how common that is -- couldn't find such a case
> in our tree.

I do not think it is a common thing to do, and more importantly, I
doubt we want to hide such a swap inside a macro.  And that is why
I said the seemingly extra "type" thing may be an improvement over
your original SWAP() thing if it gives us more type safety.

It seems that the thread has been quite for a while. Perhaps people
are happy enough with your patches?  If so, let's move it forward,
but I'll wait for a while in case follow-up discussion appears
soonish.  The changes are fairly well isolated and I do not think we
are in a hurry.



^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-02-07 22:30                           ` Junio C Hamano
@ 2017-02-08 15:14                             ` Johannes Schindelin
  0 siblings, 0 replies; 50+ messages in thread
From: Johannes Schindelin @ 2017-02-08 15:14 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: René Scharfe, Jeff King, Brandon Williams, Johannes Sixt, Git List

[-- Attachment #1: Type: text/plain, Size: 1189 bytes --]

Hi Junio & René,

On Tue, 7 Feb 2017, Junio C Hamano wrote:

> René Scharfe <l.s.r@web.de> writes:
> 
> > Swapping between different types would then still have to be done
> > manually, but I wonder how common that is -- couldn't find such a case
> > in our tree.
> 
> I do not think it is a common thing to do, and more importantly, I doubt
> we want to hide such a swap inside a macro.  And that is why I said the
> seemingly extra "type" thing may be an improvement over your original
> SWAP() thing if it gives us more type safety.
> 
> It seems that the thread has been quite for a while. Perhaps people are
> happy enough with your patches?  If so, let's move it forward, but I'll
> wait for a while in case follow-up discussion appears soonish.  The
> changes are fairly well isolated and I do not think we are in a hurry.

I am still unhappy about choosing to complicate things and lean heavily on
the compiler to make things right again.

But it appears that I am the only one with this concern, so go ahead. I
promise not to say "I told you so" in case that it breaks things or that
certain platforms are experiencing a disadvantage.

Ciao,
Johannes

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-01-28 21:38 ` [PATCH 1/5] add " René Scharfe
  2017-01-30 15:39   ` Johannes Schindelin
  2017-01-30 16:01   ` Johannes Schindelin
@ 2017-04-24 11:29   ` Jeff King
  2017-04-24 11:49     ` Jeff King
  2017-04-28 17:04     ` René Scharfe
  2 siblings, 2 replies; 50+ messages in thread
From: Jeff King @ 2017-04-24 11:29 UTC (permalink / raw)
  To: René Scharfe
  Cc: Duy Nguyen, Git List, Junio C Hamano, Johannes Schindelin

On Sat, Jan 28, 2017 at 10:38:21PM +0100, René Scharfe wrote:

> diff --git a/git-compat-util.h b/git-compat-util.h
> index 87237b092b..66cd466eea 100644
> --- a/git-compat-util.h
> +++ b/git-compat-util.h
> @@ -527,6 +527,16 @@ static inline int ends_with(const char *str, const char *suffix)
>  	return strip_suffix(str, suffix, &len);
>  }
>  
> +#define SWAP(a, b) do {						\
> +	void *_swap_a_ptr = &(a);				\
> +	void *_swap_b_ptr = &(b);				\
> +	unsigned char _swap_buffer[sizeof(a)];			\
> +	memcpy(_swap_buffer, _swap_a_ptr, sizeof(a));		\
> +	memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a) +		\
> +	       BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b)));	\
> +	memcpy(_swap_b_ptr, _swap_buffer, sizeof(a));		\
> +} while (0)

What should:

  SWAP(foo[i], foo[j]);

do when i == j? With this code, it ends up calling

  memcpy(&foo[i], &foo[j], ...);

which can cause valgrind to complain about overlapping memory. I suspect
in practice that noop copies are better off than partial overlaps, but I
think it does still violate the standard.

Is it worth comparing the pointers and bailing early?

A related question is whether the caller should ever be asking to swap
something with itself. This particular case[1] comes from
prio_queue_reverse(). I suspect its "<=" could become a "<", but I
haven't thought it through carefully.

-Peff

[1] http://public-inbox.org/git/CACsJy8AAtV5KJHBqWvnYb3Mw9CVzEdG3M-UJA+jd5MR5e-UMsA@mail.gmail.com/

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-04-24 11:29   ` Jeff King
@ 2017-04-24 11:49     ` Jeff King
  2017-04-24 13:13       ` Duy Nguyen
  2017-04-28 17:04     ` René Scharfe
  1 sibling, 1 reply; 50+ messages in thread
From: Jeff King @ 2017-04-24 11:49 UTC (permalink / raw)
  To: René Scharfe
  Cc: Duy Nguyen, Git List, Junio C Hamano, Johannes Schindelin

On Mon, Apr 24, 2017 at 07:29:28AM -0400, Jeff King wrote:

> A related question is whether the caller should ever be asking to swap
> something with itself. This particular case[1] comes from
> prio_queue_reverse(). I suspect its "<=" could become a "<", but I
> haven't thought it through carefully.

OK, I actually looked closer and we definitely should drop the "<=".  I
was thrown off by the assignment of "j" inside the loop condition. IMHO
it might be more obvious to write as:

  for (i = 0, j = queue->nr - 1; i < j; i++, j--)
	swap(queue, i, j);

but I left it as-is.

So I think this patch is an obvious improvement whatever we do with
SWAP().  There's still an open question of whether SWAP() should be more
careful about its inputs (normally I'd say yes, but I know this is a
potentially performance critical call).

-- >8 --
Subject: [PATCH] prio_queue_reverse: don't swap elements with themselves

Our array-reverse algorithm does the usual "walk from both
ends, swapping elements". We can quit when the two indices
are equal, since:

  1. Swapping an element with itself is a noop.

  2. If i and j are equal, then in the next iteration i is
     guaranteed to be bigge than j, and we will exit the
     loop.

So exiting the loop on equality is slightly more efficient.
And more importantly, the new SWAP() macro does not expect
to handle noop swaps; it will call memcpy() with the same src
and dst pointers in this case. It's unclear whether that
causes a problem on any platforms by violating the
"overlapping memory" constraint of memcpy, but it does cause
valgrind to complain.

Signed-off-by: Jeff King <peff@peff.net>
---
 prio-queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prio-queue.c b/prio-queue.c
index 17252d231..fc3860fdc 100644
--- a/prio-queue.c
+++ b/prio-queue.c
@@ -21,7 +21,7 @@ void prio_queue_reverse(struct prio_queue *queue)
 
 	if (queue->compare != NULL)
 		die("BUG: prio_queue_reverse() on non-LIFO queue");
-	for (i = 0; i <= (j = (queue->nr - 1) - i); i++)
+	for (i = 0; i < (j = (queue->nr - 1) - i); i++)
 		swap(queue, i, j);
 }
 
-- 
2.13.0.rc0.459.g06dc2b676


^ permalink raw reply related	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-04-24 11:49     ` Jeff King
@ 2017-04-24 13:13       ` Duy Nguyen
  0 siblings, 0 replies; 50+ messages in thread
From: Duy Nguyen @ 2017-04-24 13:13 UTC (permalink / raw)
  To: Jeff King
  Cc: René Scharfe, Git List, Junio C Hamano, Johannes Schindelin

On Mon, Apr 24, 2017 at 6:49 PM, Jeff King <peff@peff.net> wrote:
> diff --git a/prio-queue.c b/prio-queue.c
> index 17252d231..fc3860fdc 100644
> --- a/prio-queue.c
> +++ b/prio-queue.c
> @@ -21,7 +21,7 @@ void prio_queue_reverse(struct prio_queue *queue)
>
>         if (queue->compare != NULL)
>                 die("BUG: prio_queue_reverse() on non-LIFO queue");
> -       for (i = 0; i <= (j = (queue->nr - 1) - i); i++)
> +       for (i = 0; i < (j = (queue->nr - 1) - i); i++)
>                 swap(queue, i, j);
>  }

FWIW This shuts valgrind-3.10.1 up.
-- 
Duy

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-04-24 11:29   ` Jeff King
  2017-04-24 11:49     ` Jeff King
@ 2017-04-28 17:04     ` René Scharfe
  2017-04-28 21:49       ` Jeff King
  1 sibling, 1 reply; 50+ messages in thread
From: René Scharfe @ 2017-04-28 17:04 UTC (permalink / raw)
  To: Jeff King; +Cc: Duy Nguyen, Git List, Junio C Hamano, Johannes Schindelin

Am 24.04.2017 um 13:29 schrieb Jeff King:
> On Sat, Jan 28, 2017 at 10:38:21PM +0100, René Scharfe wrote:
> 
>> diff --git a/git-compat-util.h b/git-compat-util.h
>> index 87237b092b..66cd466eea 100644
>> --- a/git-compat-util.h
>> +++ b/git-compat-util.h
>> @@ -527,6 +527,16 @@ static inline int ends_with(const char *str, const char *suffix)
>>   	return strip_suffix(str, suffix, &len);
>>   }
>>   
>> +#define SWAP(a, b) do {						\
>> +	void *_swap_a_ptr = &(a);				\
>> +	void *_swap_b_ptr = &(b);				\
>> +	unsigned char _swap_buffer[sizeof(a)];			\
>> +	memcpy(_swap_buffer, _swap_a_ptr, sizeof(a));		\
>> +	memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a) +		\
>> +	       BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b)));	\
>> +	memcpy(_swap_b_ptr, _swap_buffer, sizeof(a));		\
>> +} while (0)
> 
> What should:
> 
>    SWAP(foo[i], foo[j]);
> 
> do when i == j? With this code, it ends up calling
> 
>    memcpy(&foo[i], &foo[j], ...);
> 
> which can cause valgrind to complain about overlapping memory. I suspect
> in practice that noop copies are better off than partial overlaps, but I
> think it does still violate the standard.
> 
> Is it worth comparing the pointers and bailing early?

Hmm, so swapping a value with itself can be a useful thing to do?
Otherwise an assert would be more appropriate.

Swapping with *partial* overlap sounds tricky, or even evil.  If
we want to support that for some reason we'd have to use memmove
in the middle.  But that would still corrupt at least one of the
objects, wouldn't it?

> A related question is whether the caller should ever be asking to swap
> something with itself. This particular case[1] comes from
> prio_queue_reverse(). I suspect its "<=" could become a "<", but I
> haven't thought it through carefully.

The line in question is this one:

	for (i = 0; i <= (j = (queue->nr - 1) - i); i++)

Assignment in the middle?  Hmm.  Why not do it like this?

	for (i = 0, j = queue->nr - 1; i < j; i++, j--)

Looks less complicated to me.

René

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-04-28 17:04     ` René Scharfe
@ 2017-04-28 21:49       ` Jeff King
  2017-04-29 18:16         ` René Scharfe
  0 siblings, 1 reply; 50+ messages in thread
From: Jeff King @ 2017-04-28 21:49 UTC (permalink / raw)
  To: René Scharfe
  Cc: Duy Nguyen, Git List, Junio C Hamano, Johannes Schindelin

On Fri, Apr 28, 2017 at 07:04:51PM +0200, René Scharfe wrote:

> > What should:
> > 
> >    SWAP(foo[i], foo[j]);
> > 
> > do when i == j? With this code, it ends up calling
> > 
> >    memcpy(&foo[i], &foo[j], ...);
> > 
> > which can cause valgrind to complain about overlapping memory. I suspect
> > in practice that noop copies are better off than partial overlaps, but I
> > think it does still violate the standard.
> > 
> > Is it worth comparing the pointers and bailing early?
> 
> Hmm, so swapping a value with itself can be a useful thing to do?
> Otherwise an assert would be more appropriate.

No, I doubt that it's useful, and it's probably a sign of a bug
elsewhere. But it's likely a _harmless_ bug, so it may be irritating to
die when we hit it rather than continuing.

I dunno. I could go either way. Or we could leave it as-is, and let
valgrind find the problem. That has zero run-time cost, but of course
nobody bothers to run valgrind outside of the test suite, so the inputs
are not usually very exotic.

> Swapping with *partial* overlap sounds tricky, or even evil.  If
> we want to support that for some reason we'd have to use memmove
> in the middle.  But that would still corrupt at least one of the
> objects, wouldn't it?

Yes, the overlap case is probably an actual bug. Detecting it is a bit
harder, but definitely possible. I hate to pay the run-time cost for it,
but I wonder if a compiler could optimize it out.

> The line in question is this one:
> 
> 	for (i = 0; i <= (j = (queue->nr - 1) - i); i++)
> 
> Assignment in the middle?  Hmm.  Why not do it like this?
> 
> 	for (i = 0, j = queue->nr - 1; i < j; i++, j--)
> 
> Looks less complicated to me.

Yes, see my other reply. :)

-Peff

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-04-28 21:49       ` Jeff King
@ 2017-04-29 18:16         ` René Scharfe
  2017-04-30  3:11           ` Jeff King
  0 siblings, 1 reply; 50+ messages in thread
From: René Scharfe @ 2017-04-29 18:16 UTC (permalink / raw)
  To: Jeff King; +Cc: Duy Nguyen, Git List, Junio C Hamano, Johannes Schindelin

Am 28.04.2017 um 23:49 schrieb Jeff King:
> On Fri, Apr 28, 2017 at 07:04:51PM +0200, René Scharfe wrote:
> 
>>> What should:
>>>
>>>     SWAP(foo[i], foo[j]);
>>>
>>> do when i == j? With this code, it ends up calling
>>>
>>>     memcpy(&foo[i], &foo[j], ...);
>>>
>>> which can cause valgrind to complain about overlapping memory. I suspect
>>> in practice that noop copies are better off than partial overlaps, but I
>>> think it does still violate the standard.
>>>
>>> Is it worth comparing the pointers and bailing early?
>>
>> Hmm, so swapping a value with itself can be a useful thing to do?
>> Otherwise an assert would be more appropriate.
> 
> No, I doubt that it's useful, and it's probably a sign of a bug
> elsewhere. But it's likely a _harmless_ bug, so it may be irritating to
> die when we hit it rather than continuing.
> 
> I dunno. I could go either way. Or we could leave it as-is, and let
> valgrind find the problem. That has zero run-time cost, but of course
> nobody bothers to run valgrind outside of the test suite, so the inputs
> are not usually very exotic.

It would be  problematic on platforms where memcpy has to erase the
destination before writing new values (I don't know any example).

We could use two temporary buffers.  The object code is the same with
GCC around 5 and Clang 3.2 or higher -- at least for prio-queue.c.

With GCC 4.9.2 (that's what Debian stable currently has) the result is
actually slightly better with two buffers because a 128-bit move starts
to get used (https://godbolt.org/g/18HQDQ).

>> Swapping with *partial* overlap sounds tricky, or even evil.  If
>> we want to support that for some reason we'd have to use memmove
>> in the middle.  But that would still corrupt at least one of the
>> objects, wouldn't it?
> 
> Yes, the overlap case is probably an actual bug. Detecting it is a bit
> harder, but definitely possible. I hate to pay the run-time cost for it,
> but I wonder if a compiler could optimize it out.

How is it possible to arrive at such a situation?  We'd need two objects
of the same size (we check that in SWAP) and one of them would start
inside of the other one, i.e. the pointer difference between them would
be a fraction of 1.  So the type system would have to be tricked into
it, right?

How *would* we detect overlaps?  The obvious checks (a+len<=b||b+len<=a)
are undefined if applied to objects that don't belong to the same array.
And members of the same array would not overlap to begin with..

It may be my laziness speaking, but do we really need such a check?  If
someone constructs interleaving objects then they'd need to implement
the necessary checks themselves IMHO.

>> The line in question is this one:
>>
>> 	for (i = 0; i <= (j = (queue->nr - 1) - i); i++)
>>
>> Assignment in the middle?  Hmm.  Why not do it like this?
>>
>> 	for (i = 0, j = queue->nr - 1; i < j; i++, j--)
>>
>> Looks less complicated to me.
> 
> Yes, see my other reply. :)

Ah, so that's where I stole it from. ;)  Perhaps my source amnesia was
in part caused by confusion about your reasoning there: The code does A,
B would be better, so let's do C.  Wait, what? :)

René

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-04-29 18:16         ` René Scharfe
@ 2017-04-30  3:11           ` Jeff King
  2017-05-02  5:29             ` René Scharfe
  0 siblings, 1 reply; 50+ messages in thread
From: Jeff King @ 2017-04-30  3:11 UTC (permalink / raw)
  To: René Scharfe
  Cc: Duy Nguyen, Git List, Junio C Hamano, Johannes Schindelin

On Sat, Apr 29, 2017 at 08:16:17PM +0200, René Scharfe wrote:

> > I dunno. I could go either way. Or we could leave it as-is, and let
> > valgrind find the problem. That has zero run-time cost, but of course
> > nobody bothers to run valgrind outside of the test suite, so the inputs
> > are not usually very exotic.
> 
> It would be  problematic on platforms where memcpy has to erase the
> destination before writing new values (I don't know any example).
> 
> We could use two temporary buffers.  The object code is the same with
> GCC around 5 and Clang 3.2 or higher -- at least for prio-queue.c.

Hmm, yeah, that's an easy solution that covers the overlap case, too. If
the generated code is the same, that seems like it might not be bad (I
am a little sad how complex this simple swap operation is getting,
though).

> > Yes, the overlap case is probably an actual bug. Detecting it is a bit
> > harder, but definitely possible. I hate to pay the run-time cost for it,
> > but I wonder if a compiler could optimize it out.
> 
> How is it possible to arrive at such a situation?  We'd need two objects
> of the same size (we check that in SWAP) and one of them would start
> inside of the other one, i.e. the pointer difference between them would
> be a fraction of 1.  So the type system would have to be tricked into
> it, right?

Yeah, I guess it would be pretty odd. I was thinking of swapping a
struct and one of its components, but that fail the size equality check.
And anyway that would be a silly thing to do in the first place, so it's
probably not worth thinking too much about.

> It may be my laziness speaking, but do we really need such a check?  If
> someone constructs interleaving objects then they'd need to implement
> the necessary checks themselves IMHO.

Yeah, I think we can live without it. I was mostly trying to think
through whether there were worse cases than the one we saw. But you've
convinced me that there probably aren't.

> > > The line in question is this one:
> > > 
> > > 	for (i = 0; i <= (j = (queue->nr - 1) - i); i++)
> > > 
> > > Assignment in the middle?  Hmm.  Why not do it like this?
> > > 
> > > 	for (i = 0, j = queue->nr - 1; i < j; i++, j--)
> > > 
> > > Looks less complicated to me.
> > 
> > Yes, see my other reply. :)
> 
> Ah, so that's where I stole it from. ;)  Perhaps my source amnesia was
> in part caused by confusion about your reasoning there: The code does A,
> B would be better, so let's do C.  Wait, what? :)

My reasoning was that there's a bug, and the patch does the minimal fix.
I don't mind making a readability improvement on top, but the two are
orthogonal. I.e., you could still write:

  for (i = 0, j = queue->nr - 1; i <= j; i++, j--)

which is more readable but still buggy. :)

-Peff

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/5] add SWAP macro
  2017-04-30  3:11           ` Jeff King
@ 2017-05-02  5:29             ` René Scharfe
  0 siblings, 0 replies; 50+ messages in thread
From: René Scharfe @ 2017-05-02  5:29 UTC (permalink / raw)
  To: Jeff King; +Cc: Duy Nguyen, Git List, Junio C Hamano, Johannes Schindelin

Am 30.04.2017 um 05:11 schrieb Jeff King:
> On Sat, Apr 29, 2017 at 08:16:17PM +0200, René Scharfe wrote:
> 
>>> I dunno. I could go either way. Or we could leave it as-is, and let
>>> valgrind find the problem. That has zero run-time cost, but of course
>>> nobody bothers to run valgrind outside of the test suite, so the inputs
>>> are not usually very exotic.
>>
>> It would be  problematic on platforms where memcpy has to erase the
>> destination before writing new values (I don't know any example).
>>
>> We could use two temporary buffers.  The object code is the same with
>> GCC around 5 and Clang 3.2 or higher -- at least for prio-queue.c.
> 
> Hmm, yeah, that's an easy solution that covers the overlap case, too. If
> the generated code is the same, that seems like it might not be bad (I
> am a little sad how complex this simple swap operation is getting,
> though).

Patch below.

All is well if the compiler can (and is allowed to) see through the
memcpy calls, otherwise we get a performance hit and this change makes
that slow path slower.  FWIW, I didn't see any slowdown in our perf
tests, though.

It's not too late to switch to a macro that takes a type parameter, as
Dscho suggested earlier:

  #define swap_t(T, a, b) do {T t_ = (a); (a) = (b); (b) = t_;} while (0)

Much simpler and with full type check, but harder to use (needs correct
type parameter and its other parameters must not have side effects).

-- >8 --
Subject: [PATCH] avoid calling memcpy on overlapping objects in SWAP

Copy both objects into their own buffers before swapping to make sure we
don't call memcpy with overlapping memory ranges, as that's undefined.
Compilers that inline the memcpy calls optimize the extra operations
away.  That allows calls like SWAP(x, x) without ill effect and without
extra checks.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 git-compat-util.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/git-compat-util.h b/git-compat-util.h
index bd04564..a843f04 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -528,13 +528,15 @@ static inline int ends_with(const char *str, const char *suffix)
 }
 
 #define SWAP(a, b) do {						\
-	void *_swap_a_ptr = &(a);				\
-	void *_swap_b_ptr = &(b);				\
-	unsigned char _swap_buffer[sizeof(a)];			\
-	memcpy(_swap_buffer, _swap_a_ptr, sizeof(a));		\
-	memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a) +		\
-	       BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b)));	\
-	memcpy(_swap_b_ptr, _swap_buffer, sizeof(a));		\
+	void *swap_a_ptr_ = &(a);				\
+	void *swap_b_ptr_ = &(b);				\
+	unsigned char swap_a_buffer_[sizeof(a)];		\
+	unsigned char swap_b_buffer_[sizeof(a) +		\
+		BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b))];	\
+	memcpy(swap_a_buffer_, swap_a_ptr_, sizeof(a));		\
+	memcpy(swap_b_buffer_, swap_b_ptr_, sizeof(a));		\
+	memcpy(swap_a_ptr_, swap_b_buffer_, sizeof(a));		\
+	memcpy(swap_b_ptr_, swap_a_buffer_, sizeof(a));		\
 } while (0)
 
 #if defined(NO_MMAP) || defined(USE_WIN32_MMAP)
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 50+ messages in thread

end of thread, other threads:[~2017-05-02  5:29 UTC | newest]

Thread overview: 50+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-28 21:13 [PATCH 0/5] introduce SWAP macro René Scharfe
2017-01-28 21:38 ` [PATCH 1/5] add " René Scharfe
2017-01-30 15:39   ` Johannes Schindelin
2017-01-30 16:48     ` René Scharfe
2017-01-30 20:48       ` Johannes Schindelin
2017-01-30 21:46         ` René Scharfe
2017-01-31 12:13           ` Johannes Schindelin
2017-01-31 21:02             ` René Scharfe
2017-02-01  0:44               ` Ramsay Jones
2017-02-01 11:39               ` Johannes Schindelin
2017-01-30 16:01   ` Johannes Schindelin
2017-01-30 16:59     ` René Scharfe
2017-01-30 18:41     ` Johannes Sixt
2017-01-30 21:03       ` Johannes Schindelin
2017-01-30 22:09         ` René Scharfe
2017-01-30 22:21           ` Brandon Williams
2017-01-31 21:03             ` René Scharfe
2017-01-31 21:35               ` Jeff King
2017-01-31 22:29                 ` Junio C Hamano
2017-01-31 22:36                   ` Jeff King
2017-02-01 11:28                 ` Johannes Schindelin
2017-02-01 11:47                   ` Jeff King
2017-02-01 18:06                     ` René Scharfe
2017-02-01 18:33                       ` Junio C Hamano
2017-02-07 22:04                         ` René Scharfe
2017-02-07 22:30                           ` Junio C Hamano
2017-02-08 15:14                             ` Johannes Schindelin
2017-01-31 12:03           ` Johannes Schindelin
2017-04-24 11:29   ` Jeff King
2017-04-24 11:49     ` Jeff King
2017-04-24 13:13       ` Duy Nguyen
2017-04-28 17:04     ` René Scharfe
2017-04-28 21:49       ` Jeff King
2017-04-29 18:16         ` René Scharfe
2017-04-30  3:11           ` Jeff King
2017-05-02  5:29             ` René Scharfe
2017-01-28 21:40 ` [PATCH 2/5] apply: use " René Scharfe
2017-01-28 21:40 ` [PATCH 3/5] " René Scharfe
2017-01-30 16:03   ` Johannes Schindelin
2017-01-30 17:18     ` René Scharfe
2017-01-30 22:22   ` Junio C Hamano
2017-01-31 21:02     ` René Scharfe
2017-01-28 21:41 ` [PATCH 4/5] diff: " René Scharfe
2017-01-30 16:04   ` Johannes Schindelin
2017-01-30 17:26     ` René Scharfe
2017-01-30 22:22   ` Junio C Hamano
2017-01-28 21:42 ` [PATCH 5/5] graph: " René Scharfe
2017-01-30 16:16   ` Johannes Schindelin
2017-01-30 17:41     ` René Scharfe
2017-01-30 23:20 ` [PATCH 0/5] introduce " Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.