All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
@ 2006-09-27  2:40 Junio C Hamano
  2006-09-27  3:11 ` David Rientjes
                   ` (3 more replies)
  0 siblings, 4 replies; 19+ messages in thread
From: Junio C Hamano @ 2006-09-27  2:40 UTC (permalink / raw)
  To: git; +Cc: Jan Engelhardt, Linus Torvalds, Adrian Bunk

When some files have big changes and others are touched only
slightly, diffstat graph did not show differences among smaller
changes that well.  This changes the graph scaling to non-linear
algorithm in such a case.

Without this, "git show --stat fd88d9c" gives:

 .gitignore                       |    1
 Documentation/git-tar-tree.txt   |    3 +
 Documentation/git-upload-tar.txt |   39 -----------
 Documentation/git.txt            |    4 -
 Makefile                         |    1
 builtin-tar-tree.c               |  130 +++++++++++++++-----------------------
 builtin-upload-tar.c             |   74 ----------------------
 git.c                            |    1
 8 files changed, 53 insertions(+), 200 deletions(-)

while with this, it shows:

 .gitignore                       |    1
 Documentation/git-tar-tree.txt   |    3 +++++++++
 Documentation/git-upload-tar.txt |   39 -----------------------------
 Documentation/git.txt            |    4 -----------
 Makefile                         |    1
 builtin-tar-tree.c               |  130 +++++++++++++++-----------------------
 builtin-upload-tar.c             |   74 ----------------------------------
 git.c                            |    1
 8 files changed, 53 insertions(+), 200 deletions(-)

Signed-off-by: Junio C Hamano <junkio@cox.net>
---

 * Jan Engelhardt wondered about doing non-linear scaling on the
   kernel list and this is an experimental patch to do so.  I do
   not seriously consider this for inclusion but it is more of a
   "see if people like it" patch.

 Makefile |    2 +-
 diff.c   |   29 +++++++++++++++++++++++++++--
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 28091d6..0fc59c4 100644
--- a/Makefile
+++ b/Makefile
@@ -304,7 +304,7 @@ BUILTIN_OBJS = \
 	builtin-write-tree.o
 
 GITLIBS = $(LIB_FILE) $(XDIFF_LIB)
-LIBS = $(GITLIBS) -lz
+LIBS = $(GITLIBS) -lz -lm
 
 #
 # Platform specific tweaks
diff --git a/diff.c b/diff.c
index 13aac2d..163ef48 100644
--- a/diff.c
+++ b/diff.c
@@ -4,6 +4,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <signal.h>
+#include <math.h>
 #include "cache.h"
 #include "quote.h"
 #include "diff.h"
@@ -555,6 +556,16 @@ static int scale_linear(int it, int widt
 	return (it * width * 2 + max_change) / (max_change * 2);
 }
 
+static int scale_non_linear(int it, int width, int max_change)
+{
+	/*
+	 * round(width * log(it)/log(max_change))
+	 */
+	if (!it || !max_change)
+		return 0;
+	return (int)(0.5 + width * log(it) / log(max_change));
+}
+
 static void show_name(const char *prefix, const char *name, int len,
 		      const char *reset, const char *set)
 {
@@ -574,10 +585,11 @@ static void show_graph(char ch, int cnt,
 static void show_stats(struct diffstat_t* data, struct diff_options *options)
 {
 	int i, len, add, del, total, adds = 0, dels = 0;
-	int max_change = 0, max_len = 0;
+	int max_change = 0, max_len = 0, min_change = 0;
 	int total_files = data->nr;
 	int width, name_width;
 	const char *reset, *set, *add_c, *del_c;
+	int non_linear_scale = 0;
 
 	if (data->nr == 0)
 		return;
@@ -595,12 +607,12 @@ static void show_stats(struct diffstat_t
 			width = name_width + 15;
 	}
 
-	/* Find the longest filename and max number of changes */
 	reset = diff_get_color(options->color_diff, DIFF_RESET);
 	set = diff_get_color(options->color_diff, DIFF_PLAIN);
 	add_c = diff_get_color(options->color_diff, DIFF_FILE_NEW);
 	del_c = diff_get_color(options->color_diff, DIFF_FILE_OLD);
 
+	/* Find the longest filename and max/min number of changes */
 	for (i = 0; i < data->nr; i++) {
 		struct diffstat_file *file = data->files[i];
 		int change = file->added + file->deleted;
@@ -620,6 +632,8 @@ static void show_stats(struct diffstat_t
 			continue;
 		if (max_change < change)
 			max_change = change;
+		if (0 < change && (!min_change || change < min_change))
+			min_change = change;
 	}
 
 	/* Compute the width of the graph part;
@@ -635,6 +649,12 @@ static void show_stats(struct diffstat_t
 	else
 		width = max_change;
 
+	/* See if the minimum change is shown with the normal scale
+	 * and if not switch to non-linear scale
+	 */
+	if (min_change && !scale_linear(min_change, width, max_change))
+		non_linear_scale = 1;
+
 	for (i = 0; i < data->nr; i++) {
 		const char *prefix = "";
 		char *name = data->files[i]->name;
@@ -684,6 +704,11 @@ static void show_stats(struct diffstat_t
 
 		if (max_change < width)
 			;
+		else if (non_linear_scale) {
+			total = scale_non_linear(total, width, max_change);
+			add = scale_linear(add, total, add + del);
+			del = total - add;
+		}
 		else {
 			total = scale_linear(total, width, max_change);
 			add = scale_linear(add, width, max_change);
-- 
1.4.2.1.gf80a

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
  2006-09-27  2:40 [PATCH 3/3] diff --stat: sometimes use non-linear scaling Junio C Hamano
@ 2006-09-27  3:11 ` David Rientjes
  2006-09-27  5:09   ` Junio C Hamano
  2006-09-27  7:36 ` Johannes Schindelin
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 19+ messages in thread
From: David Rientjes @ 2006-09-27  3:11 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jan Engelhardt, Linus Torvalds, Adrian Bunk

On Tue, 26 Sep 2006, Junio C Hamano wrote:

> @@ -574,10 +585,11 @@ static void show_graph(char ch, int cnt,
>  static void show_stats(struct diffstat_t* data, struct diff_options *options)
>  {
>  	int i, len, add, del, total, adds = 0, dels = 0;
> -	int max_change = 0, max_len = 0;
> +	int max_change = 0, max_len = 0, min_change = 0;
>  	int total_files = data->nr;
>  	int width, name_width;
>  	const char *reset, *set, *add_c, *del_c;
> +	int non_linear_scale = 0;
>  
>  	if (data->nr == 0)
>  		return;
> @@ -620,6 +632,8 @@ static void show_stats(struct diffstat_t
>  			continue;
>  		if (max_change < change)
>  			max_change = change;
> +		if (0 < change && (!min_change || change < min_change))
> +			min_change = change;
>  	}

Again with the constant placement in a comparison expression.

> @@ -684,6 +704,11 @@ static void show_stats(struct diffstat_t
>  
>  		if (max_change < width)
>  			;
> +		else if (non_linear_scale) {
> +			total = scale_non_linear(total, width, max_change);
> +			add = scale_linear(add, total, add + del);
> +			del = total - add;
> +		}
>  		else {
>  			total = scale_linear(total, width, max_change);
>  			add = scale_linear(add, width, max_change);
> 

if (...)
	;
else if {
	...
}

is _never_ necessary.

		David

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
  2006-09-27  3:11 ` David Rientjes
@ 2006-09-27  5:09   ` Junio C Hamano
  2006-09-27  5:32     ` David Rientjes
  0 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2006-09-27  5:09 UTC (permalink / raw)
  To: David Rientjes; +Cc: git

David Rientjes <rientjes@cs.washington.edu> writes:

> Again with the constant placement in a comparison expression.

I won't comment on this one.  See list archives ;-).

>>  		if (max_change < width)
>>  			;
>> +		else if (non_linear_scale) {
>> +			total = scale_non_linear(total, width, max_change);
>> +			add = scale_linear(add, total, add + del);
>> +			del = total - add;
>> +		}
>>  		else {
>>  			total = scale_linear(total, width, max_change);
>>  			add = scale_linear(add, width, max_change);
>> 
>
> if (...)
> 	;
> else if {
> 	...
> }
>
> is _never_ necessary.

What's happening here in this particular case is:

	if the changes fits within the alloted width
		; /* we do not have to do anything */
	else if we are using non-linear scale {
               	scale it like this
	}
	else {
               	scale it like that
	}

so the code actually matches the flow of thought perfectly well.

I first tried to write it without "if () ;/*empty*/ else" chain
like this:

	if given width is narrower than changes we have {
        	if we are doing non-linear scale {
                	scale it like this
                }
                else {
                	scale it like that
		}
	}


It made the indentation unnecessarily deep.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
  2006-09-27  5:09   ` Junio C Hamano
@ 2006-09-27  5:32     ` David Rientjes
  2006-09-27  6:19       ` Junio C Hamano
  0 siblings, 1 reply; 19+ messages in thread
From: David Rientjes @ 2006-09-27  5:32 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Tue, 26 Sep 2006, Junio C Hamano wrote:

> David Rientjes <rientjes@cs.washington.edu> writes:
> 
> > Again with the constant placement in a comparison expression.
> 
> I won't comment on this one.  See list archives ;-).
> 

I'm very familiar with the list archives and your support of writing 
relationals like 0 < x.  It's a matter of taste.  And since the large 
majority of programmers in any language write x > 0 instead, I think it's 
preferrable to write code that is in the style and taste of the majority.

Large software projects require a conformity in the style in which the 
code is written.  Granted the git developer community is small, there is 
still a need for this confomity so that developers don't have to put up 
with the subtleties in the style of which individuals decide to code.

When I read "x > 0", my mind parses that very easily.  When I read "0 < 
x", it takes me a few cycles longer.  I think the goal of any software 
project is to not only emit efficient and quality code, but also code that 
can be read and deciphered with ease unless it's impossible otherwise.

> What's happening here in this particular case is:
> 
> 	if the changes fits within the alloted width
> 		; /* we do not have to do anything */
> 	else if we are using non-linear scale {
>                	scale it like this
> 	}
> 	else {
>                	scale it like that
> 	}
> 
> so the code actually matches the flow of thought perfectly well.
> 
> I first tried to write it without "if () ;/*empty*/ else" chain
> like this:
> 
> 	if given width is narrower than changes we have {
>         	if we are doing non-linear scale {
>                 	scale it like this
>                 }
>                 else {
>                 	scale it like that
> 		}
> 	}
> 
> 
> It made the indentation unnecessarily deep.
> 

To change the code itself because of a hard 80-column limit or because 
you're tired of hitting the tab key is poor style.  The idents are there 
for a purpose: it tells the reader that the code is inside a block.  So 
when this conditional becomes a screen wide, I can understand it on the 
second screen and remember that I'm inside a conditional and not rely on 
the previous 'else' to jog my memory.  C is not a whitespace-dependent 
language like Python, but since when did idents (which are there _solely_ 
for the purpose of helping the reader) become deprecated?

		David

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
  2006-09-27  5:32     ` David Rientjes
@ 2006-09-27  6:19       ` Junio C Hamano
  2006-09-27  6:49         ` David Rientjes
  0 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2006-09-27  6:19 UTC (permalink / raw)
  To: David Rientjes; +Cc: git

David Rientjes <rientjes@cs.washington.edu> writes:

> When I read "x > 0", my mind parses that very easily.  When I read "0 < 
> x", it takes me a few cycles longer.  I think the goal of any software 
> project is to not only emit efficient and quality code, but also code that 
> can be read and deciphered with ease unless it's impossible otherwise.

Well, the thing is, I end up being the guy who needs to stare at
git code longer than you do ;-).

Before --stat-width was introduced there was code like this:

	if (max + len > 70)
		max = 70 - len;

Here "len" is the width of the filename part, and "max" is the
number of changes we need to express.  The code is saying "if we
use one column for each changed line, does graph and name exceed
70 columns -- if so use the remainder of the line after we write
name for the graph".  Your "constant at right" rule makes this
kosher.

If we make that to a variable, say line_width, we can still
write:

	if (max + len > line_width)
        	...

I however tend to think "if line_width cannot fit (max + len)
then we do this", which would be more naturally expressed with:

	if (line_width < max + len)
        	...

Now, at this point, it is really the matter of taste and there
is no real reason to prefer one over the other.  Textual
ordering lets my eyes coast while reading the code without
taxing the brain.  I can see that the expression compares two
quantities, "line_width" and "max + len", and the boolean holds
true if line_width _comes_ _before_ "max + len" on the number
line (having number line in your head helps visualizing what is
compared with what).  If you write the comparison the wrong way,
it forces me to stop and think -- because on my number line
smaller numbers appear left, and cannot help me reading the
comparison written in "a > b" order.

I could try writing constants on the right hand side when
constants are involved, but I do not think it makes much sense.
It means that I would end up doing:

-	if (max + len > 70)
-		max = 70 - len;
+	if (line_width < max + len)
+		max = line_width - len;

Consistency counts not only while reading the finished code, but
also it helps reviewing the diff between the earlier version
that used constant (hence forced to have it on the right hand
side by your rule) and the version that made it into a variable.

> To change the code itself because of a hard 80-column limit or because 
> you're tired of hitting the tab key is poor style.

Well, the program _firstly_ matches the logic flow better, and
_in_ _addition_ if you write it another way it becomes
unnecessarily too deeply indented.  So while I agree with you as a
general principle that indentation depth should not dictate how
we code it does not apply to this particular example.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
  2006-09-27  6:19       ` Junio C Hamano
@ 2006-09-27  6:49         ` David Rientjes
  2006-09-27  7:05           ` Junio C Hamano
  0 siblings, 1 reply; 19+ messages in thread
From: David Rientjes @ 2006-09-27  6:49 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Tue, 26 Sep 2006, Junio C Hamano wrote:

> Well, the thing is, I end up being the guy who needs to stare at
> git code longer than you do ;-).
> 

Really?  This is the only community that hacks git?  There _are_ people 
out there that make their own changes specifically tailored to their 
purposes or that of their organization.

> Before --stat-width was introduced there was code like this:
> 
> 	if (max + len > 70)
> 		max = 70 - len;
> 
> Here "len" is the width of the filename part, and "max" is the
> number of changes we need to express.  The code is saying "if we
> use one column for each changed line, does graph and name exceed
> 70 columns -- if so use the remainder of the line after we write
> name for the graph".  Your "constant at right" rule makes this
> kosher.
> 
> If we make that to a variable, say line_width, we can still
> write:
> 
> 	if (max + len > line_width)
>         	...
> 
> I however tend to think "if line_width cannot fit (max + len)
> then we do this", which would be more naturally expressed with:
> 
> 	if (line_width < max + len)
>         	...
> 

First of all, it's not my "constant at right" rule, it's a preference that 
the _majority_ of computer programmers have used in virtually every 
language that you see source code for.

The grammar for C is

  relational-expression:
	shift-expression
	relational-expression < shift-expression

in this case.  Now while this supports both your variations above, it 
_suggests_ that the higher degree of computation is associated on the left 
side because the less-than operator associates that way.

What happens here:
	a < b < c

it turns out that this is equivalent to:
	(a < b) < c

so if you want your entire code base to conform to a particular style, 
it's _preferable_ to place the constant on the right.  And that's what the 
majority of programmers do.  Your taste is in the minority and out of 
respect to the code base you should make your code conform to what is most 
popular in the surrounding code.

Your argument of saying to yourself "if line_width cannot fit max + len 
then we do this" has no relevance at all.  I can say "if max + len is too 
big for line_width we do this" just the same.

If we're going by what sounds better in your head, then I expect _no_ 
argument when I write a function called conseguir_la_linea_longitud 
instead of get_line_length because Spanish is my first language.

Please respect what the majority of computer programmers write and unify 
the code base so that it's a similar style everywhere.

> > To change the code itself because of a hard 80-column limit or because 
> > you're tired of hitting the tab key is poor style.
> 
> Well, the program _firstly_ matches the logic flow better, and
> _in_ _addition_ if you write it another way it becomes
> unnecessarily too deeply indented.  So while I agree with you as a
> general principle that indentation depth should not dictate how
> we code it does not apply to this particular example.
> 

This is a ridiculous argument.  The C code will emit the exact same 
assembly regardless of how you write it.  You say that you wrote it that 
way to avoid idents which is an absolutely horrible way to dictate the 
code you use.  There are tons of opportunities where you can write cryptic 
source code that functions great with the least number of tokens and least 
number of lines to get the job done in every large project.  But, given 
that there are no assembler or performance tradeoffs, it should be written 
as clearly and nicely as possible for the reader.  I assert again what I 
did previously: if that if clause runs the length of my screen the indents 
will help me later to remember we're still in a conditional.  That's the 
SOLE purpose of indents: to make it easy for the reader to tell you're 
inside a block.  

And in one of your patches you had:
	if (...)
		;
	else {
		...
	}

without any other if statements.  If you're supporting that type of code, 
I'll simply consider this entire thread a lost cause.

		David

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
  2006-09-27  6:49         ` David Rientjes
@ 2006-09-27  7:05           ` Junio C Hamano
  2006-09-27  7:19             ` David Rientjes
  0 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2006-09-27  7:05 UTC (permalink / raw)
  To: David Rientjes; +Cc: git

David Rientjes <rientjes@cs.washington.edu> writes:

> Your argument of saying to yourself "if line_width cannot fit max + len 
> then we do this" has no relevance at all.  I can say "if max + len is too 
> big for line_width we do this" just the same.

Actually that is exactly my point.  "Just the same".  There is
no reason to choose one way or the other from purely logical or
mathematical point of view.

Comparisons written always in textual order, when one gets used
to, takes less thinking to parse and understand, and that is
what I'm used to.  Have number line handy in your head and you
will hopefully like it too ;-).

>> Well, the program _firstly_ matches the logic flow better, and
>> _in_ _addition_ if you write it another way it becomes
>> unnecessarily too deeply indented.  So while I agree with you as a
>> general principle that indentation depth should not dictate how
>> we code it does not apply to this particular example.
>
> This is a ridiculous argument.  The C code will emit the exact same 
> assembly regardless of how you write it.  You say that you wrote it that 
> way to avoid idents which is an absolutely horrible way to dictate the 
> code you use.

I guess probably I was unclear (I did not talk anything about
code generation -- where did it come from?).  I say I wrote it
that way _firstly_ because the flow of the program matches
exactly what I saw the code needed to do -- if A I do not have
to do anything else if B I do this else I do that.  In addition
not having that "do nothing" made the code indent unnecessarily
deep but that is "in addition" and not the primary cause.  It
was an added bonus.

> And in one of your patches you had:
> 	if (...)
> 		;
> 	else {
> 		...
> 	}
>
> without any other if statements.

Yes, indeed that was very funny looking.

It was refactored from the final one that had "else if" in the
middle (else if was to add the non-linear scaling).  I agree
that any sane would not have done that if that was the real
first version.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
  2006-09-27  7:05           ` Junio C Hamano
@ 2006-09-27  7:19             ` David Rientjes
  2006-09-27  7:50               ` Johannes Schindelin
  0 siblings, 1 reply; 19+ messages in thread
From: David Rientjes @ 2006-09-27  7:19 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Wed, 27 Sep 2006, Junio C Hamano wrote:

> David Rientjes <rientjes@cs.washington.edu> writes:
> 
> > Your argument of saying to yourself "if line_width cannot fit max + len 
> > then we do this" has no relevance at all.  I can say "if max + len is too 
> > big for line_width we do this" just the same.
> 
> Actually that is exactly my point.  "Just the same".  There is
> no reason to choose one way or the other from purely logical or
> mathematical point of view.
> 

Nothing about this is "mathematical" at all and I never claimed it was.  
But there _is_ a reason to choose one way over the other and that is 
because the MAJORITY of programmers do it one way and YOU do it another 
way.  Why is it so hard to write all the code in the same style so that 
there is as little variation in the code as possible?

> Comparisons written always in textual order, when one gets used
> to, takes less thinking to parse and understand, and that is
> what I'm used to.  Have number line handy in your head and you
> will hopefully like it too ;-).
> 

Doing what you prefer and not what the majority of your developers do is 
the first step to a stagnant source tree.

> >> Well, the program _firstly_ matches the logic flow better, and
> >> _in_ _addition_ if you write it another way it becomes
> >> unnecessarily too deeply indented.  So while I agree with you as a
> >> general principle that indentation depth should not dictate how
> >> we code it does not apply to this particular example.
> >
> > This is a ridiculous argument.  The C code will emit the exact same 
> > assembly regardless of how you write it.  You say that you wrote it that 
> > way to avoid idents which is an absolutely horrible way to dictate the 
> > code you use.
> 
> I guess probably I was unclear (I did not talk anything about
> code generation -- where did it come from?).  I say I wrote it
> that way _firstly_ because the flow of the program matches
> exactly what I saw the code needed to do -- if A I do not have
> to do anything else if B I do this else I do that.  In addition
> not having that "do nothing" made the code indent unnecessarily
> deep but that is "in addition" and not the primary cause.  It
> was an added bonus.
> 

The concept of code generation is the whole point.  Both of our styles 
emits the same assembly code so by definition there is no difference since 
it achieves the exact same goal.  But there's a reason git is written in C 
and not in assembly and that reason is not just for portability.  A 
.c source file is the bridge between most programmers and assembly and 
since our coding styles differ but emit the same assembly, then it 
inherently comes with a freedom in how it's written.  And on a 
collaborative project such as git, that freedom should be confined to 
resembling the surrounding source code.

		David

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
  2006-09-27  2:40 [PATCH 3/3] diff --stat: sometimes use non-linear scaling Junio C Hamano
  2006-09-27  3:11 ` David Rientjes
@ 2006-09-27  7:36 ` Johannes Schindelin
  2006-09-27  9:16 ` Martin Waitz
  2006-09-27 15:12 ` Linus Torvalds
  3 siblings, 0 replies; 19+ messages in thread
From: Johannes Schindelin @ 2006-09-27  7:36 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jan Engelhardt, Linus Torvalds, Adrian Bunk

Hi,

On Tue, 26 Sep 2006, Junio C Hamano wrote:

> When some files have big changes and others are touched only
> slightly, diffstat graph did not show differences among smaller
> changes that well.  This changes the graph scaling to non-linear
> algorithm in such a case.

I want to say something about the purpose of the patch, not some totally 
unimportant superficialities.

In your example, a three line change has more than three plusses, and I 
find that wrong.

But I would actually find another change very useful: still linear, but 
such that if lines were added, at least one plus should be shown, and 
likewise with minus. (Often I ask myself, was this file removed, or just 
dramatically reduced, when I only see minusses).

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
  2006-09-27  7:19             ` David Rientjes
@ 2006-09-27  7:50               ` Johannes Schindelin
       [not found]                 ` <BAYC1-PASMTP024D1DA4730F9DF93F857FAE1A0@CEZ.ICE>
  2006-10-06 15:53                 ` Petr Baudis
  0 siblings, 2 replies; 19+ messages in thread
From: Johannes Schindelin @ 2006-09-27  7:50 UTC (permalink / raw)
  To: David Rientjes; +Cc: Junio C Hamano, git

Hi,

On Wed, 27 Sep 2006, David Rientjes wrote:

> On Wed, 27 Sep 2006, Junio C Hamano wrote:
> 
> > David Rientjes <rientjes@cs.washington.edu> writes:
> > 
> > > Your argument of saying to yourself "if line_width cannot fit max + len 
> > > then we do this" has no relevance at all.  I can say "if max + len is too 
> > > big for line_width we do this" just the same.
> > 
> > Actually that is exactly my point.  "Just the same".  There is
> > no reason to choose one way or the other from purely logical or
> > mathematical point of view.
> > 
> 
> Nothing about this is "mathematical" at all and I never claimed it was.  
> But there _is_ a reason to choose one way over the other and that is 
> because the MAJORITY of programmers do it one way and YOU do it another 
> way.  Why is it so hard to write all the code in the same style so that 
> there is as little variation in the code as possible?

Could you stop it already?

Git's source code is very clean and readable, even if there are inversions 
you might not be used to.

Besides, always doing it the same way is boring. _Boring_. Or do you make 
love to your girl-friend the same way over and over again?

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
       [not found]                 ` <BAYC1-PASMTP024D1DA4730F9DF93F857FAE1A0@CEZ.ICE>
@ 2006-09-27  8:35                   ` Johannes Schindelin
  2006-09-27  8:41                       ` Sean
  0 siblings, 1 reply; 19+ messages in thread
From: Johannes Schindelin @ 2006-09-27  8:35 UTC (permalink / raw)
  To: Sean; +Cc: David Rientjes, Junio C Hamano, git

Hi,

On Wed, 27 Sep 2006, Sean wrote:

> On Wed, 27 Sep 2006 09:50:11 +0200 (CEST)
> Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
> 
> > Could you stop it already?
> 
> Well i'd like to offer some support for David.
> 
> In English you'd never say "if 10 is less than the number of girls"
> you'd always say "if the number of girls is greater than 10".
> 
> Why on earth would you ever write C code different than the way you'd
> express the same question in natural language?   Maybe this is only common
> in English and other languages are different; that would explain why this
> seems more natural to some.

In this case, though, "English" is utterly, totally irrelevant. The 
question is a mathematical one, and thus, the solution is a mathematical 
one.

So, in essence, if you do not understand a conditional with a constant on 
the left side, just because it happens to honour the mathematical view of 
"left is small, right is large", you do not stand a chance of 
understanding the formula, right?

> > Git's source code is very clean and readable, even if there are inversions 
> > you might not be used to.
> 
> Not to me.  I find it very annoying to have to figure out what
> "if ( 10 < x ) ..." is really trying to do.

Oh, come on! You cannot possibly spend even _seconds_ on this particular 
construct!

'nough said.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
@ 2006-09-27  8:41                       ` Sean
  0 siblings, 0 replies; 19+ messages in thread
From: Sean @ 2006-09-27  8:41 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: David Rientjes, Junio C Hamano, git

On Wed, 27 Sep 2006 10:35:16 +0200 (CEST)
Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:

> In this case, though, "English" is utterly, totally irrelevant. The 
> question is a mathematical one, and thus, the solution is a mathematical 
> one.

Well, no..  At least for me, I "think" in english, not mathematics.  And
thus I have to understand each condition in my native language.  And i'm
being honest with you when I tell you that my parser hiccups every time
I see such a construct.

> So, in essence, if you do not understand a conditional with a constant on 
> the left side, just because it happens to honour the mathematical view of 
> "left is small, right is large", you do not stand a chance of 
> understanding the formula, right?

It's not a matter of being able to understand, it's being able to digest
at a glance, almost without a thought as opposed to consciously having
to rearrange the arguments into something that "feels" right.

> Oh, come on! You cannot possibly spend even _seconds_ on this particular 
> construct!
> 
> 'nough said.

I'm telling you that it is disconcerting and annoying to have to rejig such
a construct.  Whereas when expressed in the opposite format it makes reading
simple and natural.  Making the code easier and more pleasurable to read.

And if you find it so easy to read either way, then why not bend for those
of us who have trouble reading it your way instead of just telling us to get
stuffed?

Sean

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
  2006-09-27  2:40 [PATCH 3/3] diff --stat: sometimes use non-linear scaling Junio C Hamano
  2006-09-27  3:11 ` David Rientjes
  2006-09-27  7:36 ` Johannes Schindelin
@ 2006-09-27  9:16 ` Martin Waitz
  2006-09-27 15:12 ` Linus Torvalds
  3 siblings, 0 replies; 19+ messages in thread
From: Martin Waitz @ 2006-09-27  9:16 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jan Engelhardt, Linus Torvalds, Adrian Bunk

[-- Attachment #1: Type: text/plain, Size: 789 bytes --]

hoi :)

On Tue, Sep 26, 2006 at 07:40:53PM -0700, Junio C Hamano wrote:
>  .gitignore                       |    1
>  Documentation/git-tar-tree.txt   |    3 +++++++++
>  Documentation/git-upload-tar.txt |   39 -----------------------------
>  Documentation/git.txt            |    4 -----------
>  Makefile                         |    1
>  builtin-tar-tree.c               |  130 +++++++++++++++-----------------------
>  builtin-upload-tar.c             |   74 ----------------------------------
>  git.c                            |    1
>  8 files changed, 53 insertions(+), 200 deletions(-)

hmm, the small changes (1 line) are still not shown :-(.
I like the idea of non-linear display, but we have to fine-tune the
algorithm a little bit more.

-- 
Martin Waitz

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
  2006-09-27  2:40 [PATCH 3/3] diff --stat: sometimes use non-linear scaling Junio C Hamano
                   ` (2 preceding siblings ...)
  2006-09-27  9:16 ` Martin Waitz
@ 2006-09-27 15:12 ` Linus Torvalds
  2006-09-28  8:17   ` Martin Waitz
  3 siblings, 1 reply; 19+ messages in thread
From: Linus Torvalds @ 2006-09-27 15:12 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jan Engelhardt, Adrian Bunk



On Tue, 26 Sep 2006, Junio C Hamano wrote:
>
> When some files have big changes and others are touched only
> slightly, diffstat graph did not show differences among smaller
> changes that well.  This changes the graph scaling to non-linear
> algorithm in such a case.

Ok, this is just _strange_.

> while with this, it shows:
> 
>  .gitignore                       |    1
>  Documentation/git-tar-tree.txt   |    3 +++++++++

No _way_ is it correct to show more than three characters if there were 
three lines of changes.

I think "nonlinear" is fine, but this is something that is "superlinear" 
in small changes, and then sublinear in bigger ones (and then apparently 
totally wrong for one-line changes).

It should at least never be superlinear, I believe.

		Linus

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
  2006-09-27 15:12 ` Linus Torvalds
@ 2006-09-28  8:17   ` Martin Waitz
  2006-09-28  9:20     ` Junio C Hamano
  0 siblings, 1 reply; 19+ messages in thread
From: Martin Waitz @ 2006-09-28  8:17 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Junio C Hamano, git, Jan Engelhardt, Adrian Bunk

[-- Attachment #1: Type: text/plain, Size: 890 bytes --]

hoi :)

On Wed, Sep 27, 2006 at 08:12:49AM -0700, Linus Torvalds wrote:
> No _way_ is it correct to show more than three characters if there were 
> three lines of changes.
> 
> I think "nonlinear" is fine, but this is something that is "superlinear" 
> in small changes, and then sublinear in bigger ones (and then apparently 
> totally wrong for one-line changes).
> 
> It should at least never be superlinear, I believe.

So if we want to keep the logarithmic scale we can do some maths:

Assume we use a formula ala

	length = a log(change + b) + c

with three invariants a, b, and c.

We want to scale linearly at first, but want to reach width at
max_change:

	0 = a log(b) + c
	1 = a log(b + 1) + c
	width = a log(max_change + b) + c

But only I have not succeeded in solving these equations, I always stop
at the last invariant :-(

-- 
Martin Waitz

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
  2006-09-28  8:17   ` Martin Waitz
@ 2006-09-28  9:20     ` Junio C Hamano
  2006-09-29 10:56       ` Andreas Ericsson
  0 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2006-09-28  9:20 UTC (permalink / raw)
  To: Martin Waitz; +Cc: git

Martin Waitz <tali@admingilde.org> writes:

>> It should at least never be superlinear, I believe.
>
> So if we want to keep the logarithmic scale we can do some maths:
>...
> But only I have not succeeded in solving these equations, I always stop
> at the last invariant :-(

There is another constraint you did not mention.  Here is the
output from my another failed experiment:

 .gitignore                       |    1 -
 Documentation/git-tar-tree.txt   |    3 +++
 Documentation/git-upload-tar.txt |   39 -----------------------------
 Documentation/git.txt            |    4 ----
 Makefile                         |    1 -
 builtin-tar-tree.c               |  130 +++++++++++++++-----------------------
 builtin-upload-tar.c             |   74 ----------------------------------
 git.c                            |    1 -
 8 files changed, 53 insertions(+), 200 deletions(-)

The deletion from Documentation/git-upload-tar.txt looks much
larger than addition to builtin-tar-tree.c in the above, but
there are 50 lines added to builtin-tar-tree.c (which is why
this experiment is a failure).

Because we are dealing with non-linear scaling, the total of
scaled adds and scaled deletes does not equal to scaled total.
We can deal with this in two ways.  Scale the total and
distribute it, or scale adds and deletes individually and make
sure the sum of scaled adds and deletes never exceed the width.
Obviously the former is easier to implement but it was _wrong_.

The fitting algorithm in the posted patch scales the total to
fit the alloted width and then distributes the result to adds
and deletes.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
  2006-09-28  9:20     ` Junio C Hamano
@ 2006-09-29 10:56       ` Andreas Ericsson
  0 siblings, 0 replies; 19+ messages in thread
From: Andreas Ericsson @ 2006-09-29 10:56 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Martin Waitz, git

Junio C Hamano wrote:
> Martin Waitz <tali@admingilde.org> writes:
> 
>>> It should at least never be superlinear, I believe.
>> So if we want to keep the logarithmic scale we can do some maths:
>> ...
>> But only I have not succeeded in solving these equations, I always stop
>> at the last invariant :-(
> 
> There is another constraint you did not mention.  Here is the
> output from my another failed experiment:
> 
>  .gitignore                       |    1 -
>  Documentation/git-tar-tree.txt   |    3 +++
>  Documentation/git-upload-tar.txt |   39 -----------------------------
>  Documentation/git.txt            |    4 ----
>  Makefile                         |    1 -
>  builtin-tar-tree.c               |  130 +++++++++++++++-----------------------
>  builtin-upload-tar.c             |   74 ----------------------------------
>  git.c                            |    1 -
>  8 files changed, 53 insertions(+), 200 deletions(-)
> 
> The deletion from Documentation/git-upload-tar.txt looks much
> larger than addition to builtin-tar-tree.c in the above, but
> there are 50 lines added to builtin-tar-tree.c (which is why
> this experiment is a failure).
> 
> Because we are dealing with non-linear scaling, the total of
> scaled adds and scaled deletes does not equal to scaled total.
> We can deal with this in two ways.  Scale the total and
> distribute it, or scale adds and deletes individually and make
> sure the sum of scaled adds and deletes never exceed the width.
> Obviously the former is easier to implement but it was _wrong_.
> 
> The fitting algorithm in the posted patch scales the total to
> fit the alloted width and then distributes the result to adds
> and deletes.
> 

Why not just take the stupid and simple solution and make it:

file1   | +31,-19    +++
file2   | +19,-106   ---
file3   | +10,-10    ###

That is, show the number of lines that actually changed, and print a 
fixed number of plusses or minuses after the numbers to make it easy to, 
at a glance, check if more lines were added than deleted or vice versa.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
  2006-09-27  7:50               ` Johannes Schindelin
       [not found]                 ` <BAYC1-PASMTP024D1DA4730F9DF93F857FAE1A0@CEZ.ICE>
@ 2006-10-06 15:53                 ` Petr Baudis
  1 sibling, 0 replies; 19+ messages in thread
From: Petr Baudis @ 2006-10-06 15:53 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: David Rientjes, Junio C Hamano, git

Dear diary, on Wed, Sep 27, 2006 at 09:50:11AM CEST, I got a letter
where Johannes Schindelin <Johannes.Schindelin@gmx.de> said that...
> Git's source code is very clean and readable, even if there are inversions 
> you might not be used to.

I think it's a good sign that _this_ is what we argue about. ;-)

-- 
				Petr "Pasky the 10 > x Loather" Baudis
Stuff: http://pasky.or.cz/
#!/bin/perl -sp0777i<X+d*lMLa^*lN%0]dsXx++lMlN/dsM0<j]dsj
$/=unpack('H*',$_);$_=`echo 16dio\U$k"SK$/SM$n\EsN0p[lN*1
lK[d2%Sa2/d0$^Ixp"|dc`;s/\W//g;$_=pack('H*',/((..)*)$/)

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] diff --stat: sometimes use non-linear scaling.
@ 2006-10-12 15:04 apodtele
  0 siblings, 0 replies; 19+ messages in thread
From: apodtele @ 2006-10-12 15:04 UTC (permalink / raw)
  To: git

Sublinear solution without patch is below.

On Sept 28, 2006 Martin Waitz wrote:
> On Wed, Sep 27, 2006 at 08:12:49AM -0700, Linus Torvalds wrote:
>> No _way_ is it correct to show more than three characters if there were
>> three lines of changes.
>>
>> I think "nonlinear" is fine, but this is something that is "superlinear"
>> in small changes, and then sublinear in bigger ones (and then apparently
>> totally wrong for one-line changes).
>>
>> It should at least never be superlinear, I believe.
>
> So if we want to keep the logarithmic scale we can do some maths:
> Assume we use a formula ala
>         length = a log(change + b) + c

You are probably looking for much simpler, log-less, and pure integer:

        length = width * change / (width + change) + 1

Assuming target witdth of 40, for example, it will produce

Change    Length
1         1
2         2
3         3
4         4
10        9
20        14
30        18
50        23
100       29
1000      39
10000     40
1000000   40

--
Alexei

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2006-10-12 15:04 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-09-27  2:40 [PATCH 3/3] diff --stat: sometimes use non-linear scaling Junio C Hamano
2006-09-27  3:11 ` David Rientjes
2006-09-27  5:09   ` Junio C Hamano
2006-09-27  5:32     ` David Rientjes
2006-09-27  6:19       ` Junio C Hamano
2006-09-27  6:49         ` David Rientjes
2006-09-27  7:05           ` Junio C Hamano
2006-09-27  7:19             ` David Rientjes
2006-09-27  7:50               ` Johannes Schindelin
     [not found]                 ` <BAYC1-PASMTP024D1DA4730F9DF93F857FAE1A0@CEZ.ICE>
2006-09-27  8:35                   ` Johannes Schindelin
2006-09-27  8:41                     ` Sean
2006-09-27  8:41                       ` Sean
2006-10-06 15:53                 ` Petr Baudis
2006-09-27  7:36 ` Johannes Schindelin
2006-09-27  9:16 ` Martin Waitz
2006-09-27 15:12 ` Linus Torvalds
2006-09-28  8:17   ` Martin Waitz
2006-09-28  9:20     ` Junio C Hamano
2006-09-29 10:56       ` Andreas Ericsson
2006-10-12 15:04 apodtele

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.