All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] diff: introduce --stat-lines to limit the stat lines
@ 2011-04-29 14:57 Michael J Gruber
  2011-04-29 14:57 ` [PATCH 2/2] diff-options.txt: describe --stat-{width,name-width,lines} Michael J Gruber
                   ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Michael J Gruber @ 2011-04-29 14:57 UTC (permalink / raw)
  To: git

Often one is interested in the full --stat output only for commits which
change a few files, but not others, because larger restructuring gives a
--stat which fills a few screens.

Introduce a new option --stat-lines=<lines> which limits the --stat output
to the first and last <lines> lines, separated by a "..." line. It can
also be given as the third parameter in
--stat=<width>,<name-width>,<lines>.

Also, the unstuck form is supported analogous to the other two stat
parameters.

Signed-off-by: Michael J Gruber <git@drmicha.warpmail.net>
---
I would even consider a default of 10 (i.e. show a 20 line stat in full,
abbreviate larger ones) to be sensible but have refrained from such a
behaviour change.

We have hardcoded defaults for width (80) and name-width (50), so having
one for lines should be okay also. Can I has tis wiz default? ;)
---
 Documentation/diff-options.txt |    5 ++++-
 diff.c                         |   25 +++++++++++++++++++++++--
 diff.h                         |    1 +
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 34f0145..e0429b3 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -48,11 +48,14 @@ endif::git-format-patch[]
 --patience::
 	Generate a diff using the "patience diff" algorithm.
 
---stat[=<width>[,<name-width>]]::
+--stat[=<width>[,<name-width>[,<lines>]]]::
 	Generate a diffstat.  You can override the default
 	output width for 80-column terminal by `--stat=<width>`.
 	The width of the filename part can be controlled by
 	giving another width to it separated by a comma.
+	By giving a third parameter `<lines>`, you can limit the
+	output to the first and last `<lines>` lines, separated by
+	`...`.
 
 --numstat::
 	Similar to `\--stat`, but shows number of added and
diff --git a/diff.c b/diff.c
index feced34..9ccba1e 100644
--- a/diff.c
+++ b/diff.c
@@ -1244,7 +1244,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 	int i, len, add, del, adds = 0, dels = 0;
 	uintmax_t max_change = 0, max_len = 0;
 	int total_files = data->nr;
-	int width, name_width;
+	int width, name_width, lines;
 	const char *reset, *add_c, *del_c;
 	const char *line_prefix = "";
 	struct strbuf *msg = NULL;
@@ -1259,6 +1259,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 
 	width = options->stat_width ? options->stat_width : 80;
 	name_width = options->stat_name_width ? options->stat_name_width : 50;
+	lines = options->stat_lines;
 
 	/* Sanity: give at least 5 columns to the graph,
 	 * but leave at least 10 columns for the name.
@@ -1303,6 +1304,12 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 		width = max_change;
 
 	for (i = 0; i < data->nr; i++) {
+		if (lines && i >= lines && i < data->nr-lines) {
+			fprintf(options->file, "%s ...\n", line_prefix);
+			i = data->nr-lines-1;
+			lines = 0; /* no need to recheck */
+			continue;
+		}
 		const char *prefix = "";
 		char *name = data->files[i]->print_name;
 		uintmax_t added = data->files[i]->added;
@@ -3105,6 +3112,7 @@ static int stat_opt(struct diff_options *options, const char **av)
 	char *end;
 	int width = options->stat_width;
 	int name_width = options->stat_name_width;
+	int lines = options->stat_lines;
 	int argcount = 1;
 
 	arg += strlen("--stat");
@@ -3132,12 +3140,24 @@ static int stat_opt(struct diff_options *options, const char **av)
 				name_width = strtoul(av[1], &end, 10);
 				argcount = 2;
 			}
+		} else if (!prefixcmp(arg, "-lines")) {
+			arg += strlen("-lines");
+			if (*arg == '=')
+				lines = strtoul(arg + 1, &end, 10);
+			else if (!*arg && !av[1])
+				die("Option '--stat-lines' requires a value");
+			else if (!*arg) {
+				lines = strtoul(av[1], &end, 10);
+				argcount = 2;
+			}
 		}
 		break;
 	case '=':
 		width = strtoul(arg+1, &end, 10);
 		if (*end == ',')
 			name_width = strtoul(end+1, &end, 10);
+		if (*end == ',')
+			lines = strtoul(end+1, &end, 10);
 	}
 
 	/* Important! This checks all the error cases! */
@@ -3146,6 +3166,7 @@ static int stat_opt(struct diff_options *options, const char **av)
 	options->output_format |= DIFF_FORMAT_DIFFSTAT;
 	options->stat_name_width = name_width;
 	options->stat_width = width;
+	options->stat_lines = lines;
 	return argcount;
 }
 
@@ -3191,7 +3212,7 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 	else if (!strcmp(arg, "-s"))
 		options->output_format |= DIFF_FORMAT_NO_OUTPUT;
 	else if (!prefixcmp(arg, "--stat"))
-		/* --stat, --stat-width, or --stat-name-width */
+		/* --stat, --stat-width, --stat-name-width, or --stat-lines */
 		return stat_opt(options, av);
 
 	/* renames options */
diff --git a/diff.h b/diff.h
index d83e53e..98e9aed 100644
--- a/diff.h
+++ b/diff.h
@@ -124,6 +124,7 @@ struct diff_options {
 
 	int stat_width;
 	int stat_name_width;
+	int stat_lines;
 	const char *word_regex;
 	enum diff_words_type word_diff;
 
-- 
1.7.5.250.g4493b

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 2/2] diff-options.txt: describe --stat-{width,name-width,lines}
  2011-04-29 14:57 [PATCH 1/2] diff: introduce --stat-lines to limit the stat lines Michael J Gruber
@ 2011-04-29 14:57 ` Michael J Gruber
  2011-04-29 15:12 ` [PATCH 1/2] diff: introduce --stat-lines to limit the stat lines Michael J Gruber
  2011-04-29 16:15 ` Junio C Hamano
  2 siblings, 0 replies; 21+ messages in thread
From: Michael J Gruber @ 2011-04-29 14:57 UTC (permalink / raw)
  To: git

Signed-off-by: Michael J Gruber <git@drmicha.warpmail.net>
---
 Documentation/diff-options.txt |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index e0429b3..ed8dbef 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -56,6 +56,9 @@ endif::git-format-patch[]
 	By giving a third parameter `<lines>`, you can limit the
 	output to the first and last `<lines>` lines, separated by
 	`...`.
++
+These parameters can also be set individually with `--stat-width=<width>`,
+`--stat-name-width=<name-width>` and `--stat-lines=<lines>`.
 
 --numstat::
 	Similar to `\--stat`, but shows number of added and
-- 
1.7.5.250.g4493b

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/2] diff: introduce --stat-lines to limit the stat lines
  2011-04-29 14:57 [PATCH 1/2] diff: introduce --stat-lines to limit the stat lines Michael J Gruber
  2011-04-29 14:57 ` [PATCH 2/2] diff-options.txt: describe --stat-{width,name-width,lines} Michael J Gruber
@ 2011-04-29 15:12 ` Michael J Gruber
  2011-04-29 16:15 ` Junio C Hamano
  2 siblings, 0 replies; 21+ messages in thread
From: Michael J Gruber @ 2011-04-29 15:12 UTC (permalink / raw)
  Cc: git

Michael J Gruber venit, vidit, dixit 29.04.2011 16:57:
> Often one is interested in the full --stat output only for commits which
> change a few files, but not others, because larger restructuring gives a
> --stat which fills a few screens.
> 
> Introduce a new option --stat-lines=<lines> which limits the --stat output
> to the first and last <lines> lines, separated by a "..." line. It can
> also be given as the third parameter in
> --stat=<width>,<name-width>,<lines>.
> 
> Also, the unstuck form is supported analogous to the other two stat
> parameters.
> 
> Signed-off-by: Michael J Gruber <git@drmicha.warpmail.net>
> ---
> I would even consider a default of 10 (i.e. show a 20 line stat in full,
> abbreviate larger ones) to be sensible but have refrained from such a
> behaviour change.
> 
> We have hardcoded defaults for width (80) and name-width (50), so having
> one for lines should be okay also. Can I has tis wiz default? ;)
> ---
>  Documentation/diff-options.txt |    5 ++++-
>  diff.c                         |   25 +++++++++++++++++++++++--
>  diff.h                         |    1 +
>  3 files changed, 28 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
> index 34f0145..e0429b3 100644
> --- a/Documentation/diff-options.txt
> +++ b/Documentation/diff-options.txt
> @@ -48,11 +48,14 @@ endif::git-format-patch[]
>  --patience::
>  	Generate a diff using the "patience diff" algorithm.
>  
> ---stat[=<width>[,<name-width>]]::
> +--stat[=<width>[,<name-width>[,<lines>]]]::
>  	Generate a diffstat.  You can override the default
>  	output width for 80-column terminal by `--stat=<width>`.
>  	The width of the filename part can be controlled by
>  	giving another width to it separated by a comma.
> +	By giving a third parameter `<lines>`, you can limit the
> +	output to the first and last `<lines>` lines, separated by
> +	`...`.
>  
>  --numstat::
>  	Similar to `\--stat`, but shows number of added and
> diff --git a/diff.c b/diff.c
> index feced34..9ccba1e 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -1244,7 +1244,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>  	int i, len, add, del, adds = 0, dels = 0;
>  	uintmax_t max_change = 0, max_len = 0;
>  	int total_files = data->nr;
> -	int width, name_width;
> +	int width, name_width, lines;
>  	const char *reset, *add_c, *del_c;
>  	const char *line_prefix = "";
>  	struct strbuf *msg = NULL;
> @@ -1259,6 +1259,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>  
>  	width = options->stat_width ? options->stat_width : 80;
>  	name_width = options->stat_name_width ? options->stat_name_width : 50;
> +	lines = options->stat_lines;
>  
>  	/* Sanity: give at least 5 columns to the graph,
>  	 * but leave at least 10 columns for the name.
> @@ -1303,6 +1304,12 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>  		width = max_change;
>  
>  	for (i = 0; i < data->nr; i++) {
> +		if (lines && i >= lines && i < data->nr-lines) {
> +			fprintf(options->file, "%s ...\n", line_prefix);
> +			i = data->nr-lines-1;
> +			lines = 0; /* no need to recheck */
> +			continue;
> +		}

One could/should do the same limitting for the earlier loop which
computes the maximal line width (since the largest file names may be
skipped now). I'll leave this for a v2 which I'm sure will be necessary :)

Michael

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/2] diff: introduce --stat-lines to limit the stat lines
  2011-04-29 14:57 [PATCH 1/2] diff: introduce --stat-lines to limit the stat lines Michael J Gruber
  2011-04-29 14:57 ` [PATCH 2/2] diff-options.txt: describe --stat-{width,name-width,lines} Michael J Gruber
  2011-04-29 15:12 ` [PATCH 1/2] diff: introduce --stat-lines to limit the stat lines Michael J Gruber
@ 2011-04-29 16:15 ` Junio C Hamano
  2011-05-01  8:27   ` Michael J Gruber
  2 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2011-04-29 16:15 UTC (permalink / raw)
  To: Michael J Gruber; +Cc: git

Michael J Gruber <git@drmicha.warpmail.net> writes:

> I would even consider a default of 10 (i.e. show a 20 line stat in full,
> abbreviate larger ones) to be sensible but have refrained from such a
> behaviour change.
>
> We have hardcoded defaults for width (80) and name-width (50), so having
> one for lines should be okay also. Can I has tis wiz default? ;)

The terminal has fixed width to be apportioned but has an infinite height
with scrolling, so that comparison is somewhat bogus.

This should be called "count", not "lines".  Anticipate a future where we
introduce a variant of --stat (in a way similar to how --numstat is a
variant of --stat) that displays its result with two lines per entry.

Also getting (N+1) "lines" output for specifying N feels very unnatural.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/2] diff: introduce --stat-lines to limit the stat lines
  2011-04-29 16:15 ` Junio C Hamano
@ 2011-05-01  8:27   ` Michael J Gruber
  2011-05-01 18:33     ` Junio C Hamano
  0 siblings, 1 reply; 21+ messages in thread
From: Michael J Gruber @ 2011-05-01  8:27 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Fri, 29 Apr 2011 09:15 -0700, "Junio C Hamano" <gitster@pobox.com>
wrote:
> Michael J Gruber <git@drmicha.warpmail.net> writes:
> 
> > I would even consider a default of 10 (i.e. show a 20 line stat in full,
> > abbreviate larger ones) to be sensible but have refrained from such a
> > behaviour change.
> >
> > We have hardcoded defaults for width (80) and name-width (50), so having
> > one for lines should be okay also. Can I has tis wiz default? ;)
> 
> The terminal has fixed width to be apportioned but has an infinite height
> with scrolling, so that comparison is somewhat bogus.
> 
> This should be called "count", not "lines".  Anticipate a future where we
> introduce a variant of --stat (in a way similar to how --numstat is a
> variant of --stat) that displays its result with two lines per entry.
> 
> Also getting (N+1) "lines" output for specifying N feels very unnatural.
> 
It's actually 2*(N+1): The first and last N, plus the "..." line.

With "count" I would expect to see the first N lines/items. That's even
simpler to do. Do you think that makes more sense then head+tail?

Michael

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/2] diff: introduce --stat-lines to limit the stat lines
  2011-05-01  8:27   ` Michael J Gruber
@ 2011-05-01 18:33     ` Junio C Hamano
  2011-05-03 10:46       ` [PATCHv2 " Michael J Gruber
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2011-05-01 18:33 UTC (permalink / raw)
  To: Michael J Gruber; +Cc: git

"Michael J Gruber" <git@drmicha.warpmail.net> writes:

> On Fri, 29 Apr 2011 09:15 -0700, "Junio C Hamano" <gitster@pobox.com>
> wrote:
>> Also getting (N+1) "lines" output for specifying N feels very unnatural.
>> 
> It's actually 2*(N+1): The first and last N, plus the "..." line.

Yeah, that was what I meant, but I think my mistake itself demonstrates
how unintuitive "N lines each from the top and the bottom end" is.

It shouldn't be too difficult to divide the number given by the user by 2
and adjust only one end when the given number is even, but I am not sure
if that is worth doing, or if it is even a good idea to have "..." in the
middle to begin with; it would make it harder to notice that the list is
elided than having the marker at the end.

But what problem are you trying to solve?

If it is just you do not want to see a huge list, shouldn't that option
also be controlling how --summary is produced?

> With "count" I would expect to see the first N lines/items. That's even
> simpler to do. Do you think that makes more sense then head+tail?

"Hide noise and show only a few from the top and from the bottom" makes
sense when the items near the top and near the bottom are somehow more
important than the others.  Otherwise, "keep only a few from the top"
should work just as well.

Usually, no single path is more important than the others, and even if
there were a path that is more important than the others, people tend to
arrange so that these more important paths come very early in the natural
sort order (think "00_README"), so in that sense, items near the top could
be more important than items elsewhere, while items near the bottom are
unlikely to have higher nor lower importance than the remainder.

The bottom of a sorted list could become a bit more important than others
in a narrow corner case that we would likely not care: when your change is
only for paths whose names begin with 'a' through 'm', "show only a few
from the top and from the bottom" would let you see that nothing between
'n' and 'z' was touched.  I do not think it is an important piece of
information to convey, though.

I designed merge.log (back then it was merge.summary) to show commits near
the top because the person reading the log would be better served by
seeing up to what change the merge integrated the topic branch.

A typical clean history of a topic branch tends to have necessary but
uninteresting clean-up commits earier and the commits to complete the
feature near the top.  In the context of reading the merge itself, you
care much more about why the merge was done (what the integration tree
wanted out of the topic) than about how the topic progressed.  Seeing the
bottom (i.e. one commit above the fork point) may only be interesting for
the person who is making a merge of his own topic branch, but it is not
very useful for the integrator (the fork point itself may be a useful
thing to know, though) nor for the reader of the history.

But the same "bottom could be a bit more important than others" argument
can apply here, and I wouldn't oppose to a patch to change "merge.log" to
show N-2 top entries, "...", and then a single bottom entry, when merging
more than N+1 commits to your branch.

That approach would probably apply equally well to your --stat-count and
covers the "the bottom is slightly more important than others" issue.  It
would go like this:

 - Show the first N-2 items unconditionally;

 - If you have shown N-2 items already, keep counting without
   showing, and remember the last two items you saw;

 - At the end of the loop,

   - if you didn't discard any and the last ones you remember comes
     immediately after the first N-2 items, just show them;

   - if you skipped any, show "... (%d items omitted)" and then
     show the last item.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCHv2 1/2] diff: introduce --stat-lines to limit the stat lines
  2011-05-01 18:33     ` Junio C Hamano
@ 2011-05-03 10:46       ` Michael J Gruber
  2011-05-03 10:46         ` [PATCHv2 2/2] diff-options.txt: describe --stat-{width,name-width,count} Michael J Gruber
                           ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Michael J Gruber @ 2011-05-03 10:46 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Often one is interested in the full --stat output only for commits which
change a few files, but not others, because larger restructuring gives a
--stat which fills a few screens.

Introduce a new option --stat-count=<count> which limits the --stat output
to the first <count> lines, followed by a "..." line. It can
also be given as the third parameter in
--stat=<width>,<name-width>,<count>.

Also, the unstuck form is supported analogous to the other two stat
parameters.

Signed-off-by: Michael J Gruber <git@drmicha.warpmail.net>
---
I would even consider a default of 20 to be sensible but have refrained from
such a behaviour change.

We have hardcoded defaults for width (80) and name-width (50), but - as Junio
points out - typical terminals are horizontally limited, not vertically.

v2 uses the name "count" as suggested by Junio and limits to the first <count>
lines/items.

In the case with <count>+1 items one may argue whether it makes more sense to
ignore the user wish and output all <count>+1 lines, or <count> lines (as
requested) plus the "..." line.

(I saw the suggestion about N-2...2 just now. Would work also, but I guess
we would do this in more cases then, as Junio indicated.)
---
 Documentation/diff-options.txt |    5 ++++-
 diff.c                         |   37 +++++++++++++++++++++++++++++++++----
 diff.h                         |    1 +
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 34f0145..000eae0 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -48,11 +48,14 @@ endif::git-format-patch[]
 --patience::
 	Generate a diff using the "patience diff" algorithm.
 
---stat[=<width>[,<name-width>]]::
+--stat[=<width>[,<name-width>[,<count>]]]::
 	Generate a diffstat.  You can override the default
 	output width for 80-column terminal by `--stat=<width>`.
 	The width of the filename part can be controlled by
 	giving another width to it separated by a comma.
+	By giving a third parameter `<count>`, you can limit the
+	output to the first `<count>` lines, followed by
+	`...` if there are more.
 
 --numstat::
 	Similar to `\--stat`, but shows number of added and
diff --git a/diff.c b/diff.c
index feced34..a6b0f0f 100644
--- a/diff.c
+++ b/diff.c
@@ -1244,7 +1244,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 	int i, len, add, del, adds = 0, dels = 0;
 	uintmax_t max_change = 0, max_len = 0;
 	int total_files = data->nr;
-	int width, name_width;
+	int width, name_width, count;
 	const char *reset, *add_c, *del_c;
 	const char *line_prefix = "";
 	struct strbuf *msg = NULL;
@@ -1259,6 +1259,8 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 
 	width = options->stat_width ? options->stat_width : 80;
 	name_width = options->stat_name_width ? options->stat_name_width : 50;
+	count = (options->stat_count && options->stat_count < data->nr)
+					? options->stat_count : data->nr;
 
 	/* Sanity: give at least 5 columns to the graph,
 	 * but leave at least 10 columns for the name.
@@ -1275,7 +1277,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 	add_c = diff_get_color_opt(options, DIFF_FILE_NEW);
 	del_c = diff_get_color_opt(options, DIFF_FILE_OLD);
 
-	for (i = 0; i < data->nr; i++) {
+	for (i = 0; i < count; i++) {
 		struct diffstat_file *file = data->files[i];
 		uintmax_t change = file->added + file->deleted;
 		fill_print_name(file);
@@ -1302,7 +1304,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 	else
 		width = max_change;
 
-	for (i = 0; i < data->nr; i++) {
+	for (i = 0; i < count; i++) {
 		const char *prefix = "";
 		char *name = data->files[i]->print_name;
 		uintmax_t added = data->files[i]->added;
@@ -1369,6 +1371,19 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 		show_graph(options->file, '-', del, del_c, reset);
 		fprintf(options->file, "\n");
 	}
+	if (count < data->nr)
+		fprintf(options->file, "%s ...\n", line_prefix);
+	for (i = count; i < data->nr; i++) {
+		uintmax_t added = data->files[i]->added;
+		uintmax_t deleted = data->files[i]->deleted;
+		if (!data->files[i]->is_renamed &&
+			 (added + deleted == 0)) {
+			total_files--;
+			continue;
+		}
+		adds += added;
+		dels += deleted;
+	}
 	fprintf(options->file, "%s", line_prefix);
 	fprintf(options->file,
 	       " %d files changed, %d insertions(+), %d deletions(-)\n",
@@ -3105,6 +3120,7 @@ static int stat_opt(struct diff_options *options, const char **av)
 	char *end;
 	int width = options->stat_width;
 	int name_width = options->stat_name_width;
+	int count = options->stat_count;
 	int argcount = 1;
 
 	arg += strlen("--stat");
@@ -3132,12 +3148,24 @@ static int stat_opt(struct diff_options *options, const char **av)
 				name_width = strtoul(av[1], &end, 10);
 				argcount = 2;
 			}
+		} else if (!prefixcmp(arg, "-count")) {
+			arg += strlen("-count");
+			if (*arg == '=')
+				count = strtoul(arg + 1, &end, 10);
+			else if (!*arg && !av[1])
+				die("Option '--stat-count' requires a value");
+			else if (!*arg) {
+				count = strtoul(av[1], &end, 10);
+				argcount = 2;
+			}
 		}
 		break;
 	case '=':
 		width = strtoul(arg+1, &end, 10);
 		if (*end == ',')
 			name_width = strtoul(end+1, &end, 10);
+		if (*end == ',')
+			count = strtoul(end+1, &end, 10);
 	}
 
 	/* Important! This checks all the error cases! */
@@ -3146,6 +3174,7 @@ static int stat_opt(struct diff_options *options, const char **av)
 	options->output_format |= DIFF_FORMAT_DIFFSTAT;
 	options->stat_name_width = name_width;
 	options->stat_width = width;
+	options->stat_count = count;
 	return argcount;
 }
 
@@ -3191,7 +3220,7 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 	else if (!strcmp(arg, "-s"))
 		options->output_format |= DIFF_FORMAT_NO_OUTPUT;
 	else if (!prefixcmp(arg, "--stat"))
-		/* --stat, --stat-width, or --stat-name-width */
+		/* --stat, --stat-width, --stat-name-width, or --stat-count */
 		return stat_opt(options, av);
 
 	/* renames options */
diff --git a/diff.h b/diff.h
index d83e53e..30ce9d8 100644
--- a/diff.h
+++ b/diff.h
@@ -124,6 +124,7 @@ struct diff_options {
 
 	int stat_width;
 	int stat_name_width;
+	int stat_count;
 	const char *word_regex;
 	enum diff_words_type word_diff;
 
-- 
1.7.5.250.g4493b

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCHv2 2/2] diff-options.txt: describe --stat-{width,name-width,count}
  2011-05-03 10:46       ` [PATCHv2 " Michael J Gruber
@ 2011-05-03 10:46         ` Michael J Gruber
  2011-05-03 18:47         ` [PATCHv2 1/2] diff: introduce --stat-lines to limit the stat lines Junio C Hamano
  2011-05-04  4:37         ` [PATCHv2 1/2] diff: introduce --stat-lines to limit the stat lines Junio C Hamano
  2 siblings, 0 replies; 21+ messages in thread
From: Michael J Gruber @ 2011-05-03 10:46 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Signed-off-by: Michael J Gruber <git@drmicha.warpmail.net>
---
 Documentation/diff-options.txt |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 000eae0..f6c046a 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -56,6 +56,9 @@ endif::git-format-patch[]
 	By giving a third parameter `<count>`, you can limit the
 	output to the first `<count>` lines, followed by
 	`...` if there are more.
++
+These parameters can also be set individually with `--stat-width=<width>`,
+`--stat-name-width=<name-width>` and `--stat-count=<count>`.
 
 --numstat::
 	Similar to `\--stat`, but shows number of added and
-- 
1.7.5.250.g4493b

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCHv2 1/2] diff: introduce --stat-lines to limit the stat lines
  2011-05-03 10:46       ` [PATCHv2 " Michael J Gruber
  2011-05-03 10:46         ` [PATCHv2 2/2] diff-options.txt: describe --stat-{width,name-width,count} Michael J Gruber
@ 2011-05-03 18:47         ` Junio C Hamano
  2011-05-04  7:16           ` Michael J Gruber
  2011-05-04  4:37         ` [PATCHv2 1/2] diff: introduce --stat-lines to limit the stat lines Junio C Hamano
  2 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2011-05-03 18:47 UTC (permalink / raw)
  To: Michael J Gruber; +Cc: git

Michael J Gruber <git@drmicha.warpmail.net> writes:

> In the case with <count>+1 items one may argue whether it makes more sense to
> ignore the user wish and output all <count>+1 lines, or <count> lines (as
> requested) plus the "..." line.

I think that is a must if we care about consistency. fmt-merge-msg should
already do this (I remember being careful about this particular case when
I wrote its first version, but I do not know it has regressed as I do not
remember writing tests for this boundary case---my bad ;-).

> (I saw the suggestion about N-2...2 just now. Would work also, but I guess
> we would do this in more cases then, as Junio indicated.)

It is not clear what you mean by N-2...2 but if you are referring to my
"first N-1 entries, dots and the last one, to make the total N+1 lines
that show N entries", then yes I think it would make sense to do that also
in fmt-merge-msg.c::shortlog() as well as here.  But that would be a
separate topic.

> diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
> index 34f0145..000eae0 100644
> --- a/Documentation/diff-options.txt
> +++ b/Documentation/diff-options.txt
> @@ -48,11 +48,14 @@ endif::git-format-patch[]
>  --patience::
>  	Generate a diff using the "patience diff" algorithm.
>  
> ---stat[=<width>[,<name-width>]]::
> +--stat[=<width>[,<name-width>[,<count>]]]::
>  	Generate a diffstat.  You can override the default
>  	output width for 80-column terminal by `--stat=<width>`.
>  	The width of the filename part can be controlled by
>  	giving another width to it separated by a comma.
> +	By giving a third parameter `<count>`, you can limit the
> +	output to the first `<count>` lines, followed by
> +	`...` if there are more.

Does an empty-string <count> mean "use default" (currently "no limit")?
This matters when we teach a new parameter to --stat and make the above:

	--stat=[=<width>[,<name-width>[,<count>[,<nitfol>]]]]

> +	for (i = count; i < data->nr; i++) {
> +		uintmax_t added = data->files[i]->added;
> +		uintmax_t deleted = data->files[i]->deleted;
> +		if (!data->files[i]->is_renamed &&
> +			 (added + deleted == 0)) {
> +			total_files--;
> +			continue;
> +		}
> +		adds += added;
> +		dels += deleted;
> +	}
>  	fprintf(options->file, "%s", line_prefix);
>  	fprintf(options->file,
>  	       " %d files changed, %d insertions(+), %d deletions(-)\n",

This is culling the output of what is in struct diffstat that we have
already spent cycles to possibly fill thousands of entries.  I first
thought it may make sense to also tweak the loop in diff_flush() that runs
diff_flush_stat() to all filepairs to run it only on the first <count>
(and later the first <count-1> and the last) filepairs, but we have to
show the short-stat summary at the end, so this cannot be avoided.

What happens when I say "diff --numstat --stat-count=4"?

Should it error out upon seeing a limit that is not infinite, or should it
also elide the lines in its output?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCHv2 1/2] diff: introduce --stat-lines to limit the stat lines
  2011-05-03 10:46       ` [PATCHv2 " Michael J Gruber
  2011-05-03 10:46         ` [PATCHv2 2/2] diff-options.txt: describe --stat-{width,name-width,count} Michael J Gruber
  2011-05-03 18:47         ` [PATCHv2 1/2] diff: introduce --stat-lines to limit the stat lines Junio C Hamano
@ 2011-05-04  4:37         ` Junio C Hamano
  2 siblings, 0 replies; 21+ messages in thread
From: Junio C Hamano @ 2011-05-04  4:37 UTC (permalink / raw)
  To: Michael J Gruber; +Cc: git

Michael J Gruber <git@drmicha.warpmail.net> writes:

> @@ -1302,7 +1304,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>  	else
>  		width = max_change;
>  
> -	for (i = 0; i < data->nr; i++) {
> +	for (i = 0; i < count; i++) {
>  		const char *prefix = "";
>  		char *name = data->files[i]->print_name;
>  		uintmax_t added = data->files[i]->added;

This first loop can omit a "struct diffstat_file" that is not a rename and
does not add nor delete any lines (look for "total_files--"), but you do
not seem to compensate for it. If you have such a record in the earlier
part of the result for whatever reason, you would end up showing fewer
entries than what "count" indicates in this loop.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCHv2 1/2] diff: introduce --stat-lines to limit the stat lines
  2011-05-03 18:47         ` [PATCHv2 1/2] diff: introduce --stat-lines to limit the stat lines Junio C Hamano
@ 2011-05-04  7:16           ` Michael J Gruber
  2011-05-27 12:36             ` [PATCHv3 0/3] diff.c: --stat-count=<n> Michael J Gruber
  0 siblings, 1 reply; 21+ messages in thread
From: Michael J Gruber @ 2011-05-04  7:16 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Junio C Hamano venit, vidit, dixit 03.05.2011 20:47:
> Michael J Gruber <git@drmicha.warpmail.net> writes:
> 
>> In the case with <count>+1 items one may argue whether it makes more sense to
>> ignore the user wish and output all <count>+1 lines, or <count> lines (as
>> requested) plus the "..." line.
> 
> I think that is a must if we care about consistency. fmt-merge-msg should

I assume you mean the part before the "or" by "this".

> already do this (I remember being careful about this particular case when
> I wrote its first version, but I do not know it has regressed as I do not
> remember writing tests for this boundary case---my bad ;-).

I'll recheck with fmt-merge-msg. Seems this series needs more work than
I expected... and may take some time.

>> (I saw the suggestion about N-2...2 just now. Would work also, but I guess
>> we would do this in more cases then, as Junio indicated.)
> 
> It is not clear what you mean by N-2...2 but if you are referring to my
> "first N-1 entries, dots and the last one, to make the total N+1 lines
> that show N entries", then yes I think it would make sense to do that also
> in fmt-merge-msg.c::shortlog() as well as here.  But that would be a
> separate topic.

Yes, N-2, then the remaining 2 or "..." plus the last one. Sorry for the
confusion.

>> diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
>> index 34f0145..000eae0 100644
>> --- a/Documentation/diff-options.txt
>> +++ b/Documentation/diff-options.txt
>> @@ -48,11 +48,14 @@ endif::git-format-patch[]
>>  --patience::
>>  	Generate a diff using the "patience diff" algorithm.
>>  
>> ---stat[=<width>[,<name-width>]]::
>> +--stat[=<width>[,<name-width>[,<count>]]]::
>>  	Generate a diffstat.  You can override the default
>>  	output width for 80-column terminal by `--stat=<width>`.
>>  	The width of the filename part can be controlled by
>>  	giving another width to it separated by a comma.
>> +	By giving a third parameter `<count>`, you can limit the
>> +	output to the first `<count>` lines, followed by
>> +	`...` if there are more.
> 
> Does an empty-string <count> mean "use default" (currently "no limit")?
> This matters when we teach a new parameter to --stat and make the above:
> 
> 	--stat=[=<width>[,<name-width>[,<count>[,<nitfol>]]]]
>

Yes, strtoul("") is 0, and 0 denotes default, which is unlimited. By
design, but maybe I should mention it somewhere, probably as part of
2/2, because that same effect is true and undocumented for the 1st 2
slots, i.e. --stat=",,5" sets the count argument to 5 and the others to
default.

>> +	for (i = count; i < data->nr; i++) {
>> +		uintmax_t added = data->files[i]->added;
>> +		uintmax_t deleted = data->files[i]->deleted;
>> +		if (!data->files[i]->is_renamed &&
>> +			 (added + deleted == 0)) {
>> +			total_files--;
>> +			continue;
>> +		}
>> +		adds += added;
>> +		dels += deleted;
>> +	}
>>  	fprintf(options->file, "%s", line_prefix);
>>  	fprintf(options->file,
>>  	       " %d files changed, %d insertions(+), %d deletions(-)\n",
> 
> This is culling the output of what is in struct diffstat that we have
> already spent cycles to possibly fill thousands of entries.  I first
> thought it may make sense to also tweak the loop in diff_flush() that runs
> diff_flush_stat() to all filepairs to run it only on the first <count>
> (and later the first <count-1> and the last) filepairs, but we have to
> show the short-stat summary at the end, so this cannot be avoided.

Yeah, in my 0th version that stat was for the shown lines only...

> What happens when I say "diff --numstat --stat-count=4"?
> 
> Should it error out upon seeing a limit that is not infinite, or should it
> also elide the lines in its output?

None of the --stat-* options affects --numstat, again by design. All of
width, name-width and count would make sense for --numstat (and
--dirstat) also, but that would require some restructuring.

> Michael J Gruber <git@drmicha.warpmail.net> writes:
> 
>> > @@ -1302,7 +1304,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>> >  	else
>> >  		width = max_change;
>> >  
>> > -	for (i = 0; i < data->nr; i++) {
>> > +	for (i = 0; i < count; i++) {
>> >  		const char *prefix = "";
>> >  		char *name = data->files[i]->print_name;
>> >  		uintmax_t added = data->files[i]->added;
> This first loop can omit a "struct diffstat_file" that is not a rename and
> does not add nor delete any lines (look for "total_files--"), but you do
> not seem to compensate for it. If you have such a record in the earlier
> part of the result for whatever reason, you would end up showing fewer
> entries than what "count" indicates in this loop.

Uh, sorry and thanks for noticing. I wasn't aware that we may skip a
pair completely.

I wanted to make the design optimal in the sense that I introduce as few
additional conditionals within the two loops there. I guess I'll have to
sacrifice that in the first loop.

Michael

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCHv3 0/3] diff.c: --stat-count=<n>
  2011-05-04  7:16           ` Michael J Gruber
@ 2011-05-27 12:36             ` Michael J Gruber
  2011-05-27 12:36               ` [PATCHv3 1/3] diff.c: omit hidden entries from namelen calculation with --stat Michael J Gruber
                                 ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Michael J Gruber @ 2011-05-27 12:36 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

So, this grew into a little series because of the issue in 1/3. The fact
that we looked up the name for a diff item  even when we don't display it
was never great but started being bad when we started not to fill in
unnecessary names... With that segv out of the way, here's v3 of the series.

Compared to v2, I hope I have learned to count.

Michael J Gruber (3):
  diff.c: omit hidden entries from namelen calculation with --stat
  diff: introduce --stat-lines to limit the stat lines
  diff-options.txt: describe --stat-{width,name-width,count}

 Documentation/diff-options.txt |    8 +++++-
 diff.c                         |   52 +++++++++++++++++++++++++++++++++-------
 diff.h                         |    1 +
 3 files changed, 51 insertions(+), 10 deletions(-)

-- 
1.7.5.2.657.g62c2

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCHv3 1/3] diff.c: omit hidden entries from namelen calculation with --stat
  2011-05-27 12:36             ` [PATCHv3 0/3] diff.c: --stat-count=<n> Michael J Gruber
@ 2011-05-27 12:36               ` Michael J Gruber
  2011-05-27 17:43                 ` Junio C Hamano
  2011-05-27 12:36               ` [PATCHv3 2/3] diff: introduce --stat-lines to limit the stat lines Michael J Gruber
  2011-05-27 12:36               ` [PATCHv3 3/3] diff-options.txt: describe --stat-{width,name-width,count} Michael J Gruber
  2 siblings, 1 reply; 21+ messages in thread
From: Michael J Gruber @ 2011-05-27 12:36 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Currently, --stat calculates the longest name from all items but then
drops some (mode changes) from the output later on.

Instead, drop them from the namelen generation and calculation.

Signed-off-by: Michael J Gruber <git@drmicha.warpmail.net>
---
This optimizes (tightens) the display potentially, but we never had tests
which are sensitive to that.
---
 diff.c |   14 +++++++++-----
 1 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/diff.c b/diff.c
index feced34..4541939 100644
--- a/diff.c
+++ b/diff.c
@@ -1278,6 +1278,10 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 	for (i = 0; i < data->nr; i++) {
 		struct diffstat_file *file = data->files[i];
 		uintmax_t change = file->added + file->deleted;
+		if (!data->files[i]->is_renamed &&
+			 (change == 0)) {
+			continue;
+		}
 		fill_print_name(file);
 		len = strlen(file->print_name);
 		if (max_len < len)
@@ -1309,6 +1313,11 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 		uintmax_t deleted = data->files[i]->deleted;
 		int name_len;
 
+		if (!data->files[i]->is_renamed &&
+			 (added + deleted == 0)) {
+			total_files--;
+			continue;
+		}
 		/*
 		 * "scale" the filename
 		 */
@@ -1343,11 +1352,6 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 			fprintf(options->file, "  Unmerged\n");
 			continue;
 		}
-		else if (!data->files[i]->is_renamed &&
-			 (added + deleted == 0)) {
-			total_files--;
-			continue;
-		}
 
 		/*
 		 * scale the add/delete
-- 
1.7.5.2.657.g62c2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCHv3 2/3] diff: introduce --stat-lines to limit the stat lines
  2011-05-27 12:36             ` [PATCHv3 0/3] diff.c: --stat-count=<n> Michael J Gruber
  2011-05-27 12:36               ` [PATCHv3 1/3] diff.c: omit hidden entries from namelen calculation with --stat Michael J Gruber
@ 2011-05-27 12:36               ` Michael J Gruber
  2011-05-28  4:46                 ` Junio C Hamano
  2011-05-27 12:36               ` [PATCHv3 3/3] diff-options.txt: describe --stat-{width,name-width,count} Michael J Gruber
  2 siblings, 1 reply; 21+ messages in thread
From: Michael J Gruber @ 2011-05-27 12:36 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Often one is interested in the full --stat output only for commits which
change a few files, but not others, because larger restructuring gives a
--stat which fills a few screens.

Introduce a new option --stat-count=<count> which limits the --stat output
to the first <count> lines, followed by a "..." line. It can
also be given as the third parameter in
--stat=<width>,<name-width>,<count>.

Also, the unstuck form is supported analogous to the other two stat
parameters.

Signed-off-by: Michael J Gruber <git@drmicha.warpmail.net>
---
 Documentation/diff-options.txt |    5 ++++-
 diff.c                         |   38 ++++++++++++++++++++++++++++++++++----
 diff.h                         |    1 +
 3 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 34f0145..000eae0 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -48,11 +48,14 @@ endif::git-format-patch[]
 --patience::
 	Generate a diff using the "patience diff" algorithm.
 
---stat[=<width>[,<name-width>]]::
+--stat[=<width>[,<name-width>[,<count>]]]::
 	Generate a diffstat.  You can override the default
 	output width for 80-column terminal by `--stat=<width>`.
 	The width of the filename part can be controlled by
 	giving another width to it separated by a comma.
+	By giving a third parameter `<count>`, you can limit the
+	output to the first `<count>` lines, followed by
+	`...` if there are more.
 
 --numstat::
 	Similar to `\--stat`, but shows number of added and
diff --git a/diff.c b/diff.c
index 4541939..6a6cbca 100644
--- a/diff.c
+++ b/diff.c
@@ -1244,7 +1244,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 	int i, len, add, del, adds = 0, dels = 0;
 	uintmax_t max_change = 0, max_len = 0;
 	int total_files = data->nr;
-	int width, name_width;
+	int width, name_width, count;
 	const char *reset, *add_c, *del_c;
 	const char *line_prefix = "";
 	struct strbuf *msg = NULL;
@@ -1259,6 +1259,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 
 	width = options->stat_width ? options->stat_width : 80;
 	name_width = options->stat_name_width ? options->stat_name_width : 50;
+	count = options->stat_count ? options->stat_count : data->nr;
 
 	/* Sanity: give at least 5 columns to the graph,
 	 * but leave at least 10 columns for the name.
@@ -1275,11 +1276,12 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 	add_c = diff_get_color_opt(options, DIFF_FILE_NEW);
 	del_c = diff_get_color_opt(options, DIFF_FILE_OLD);
 
-	for (i = 0; i < data->nr; i++) {
+	for (i = 0; (i < count) && (i < data->nr); i++) {
 		struct diffstat_file *file = data->files[i];
 		uintmax_t change = file->added + file->deleted;
 		if (!data->files[i]->is_renamed &&
 			 (change == 0)) {
+			count++; /* not shown == room for one more */
 			continue;
 		}
 		fill_print_name(file);
@@ -1292,6 +1294,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 		if (max_change < change)
 			max_change = change;
 	}
+	count = i; /* min(count, data->nr) */
 
 	/* Compute the width of the graph part;
 	 * 10 is for one blank at the beginning of the line plus
@@ -1306,7 +1309,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 	else
 		width = max_change;
 
-	for (i = 0; i < data->nr; i++) {
+	for (i = 0; i < count; i++) {
 		const char *prefix = "";
 		char *name = data->files[i]->print_name;
 		uintmax_t added = data->files[i]->added;
@@ -1373,6 +1376,19 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 		show_graph(options->file, '-', del, del_c, reset);
 		fprintf(options->file, "\n");
 	}
+	if (count < data->nr)
+		fprintf(options->file, "%s ...\n", line_prefix);
+	for (i = count; i < data->nr; i++) {
+		uintmax_t added = data->files[i]->added;
+		uintmax_t deleted = data->files[i]->deleted;
+		if (!data->files[i]->is_renamed &&
+			 (added + deleted == 0)) {
+			total_files--;
+			continue;
+		}
+		adds += added;
+		dels += deleted;
+	}
 	fprintf(options->file, "%s", line_prefix);
 	fprintf(options->file,
 	       " %d files changed, %d insertions(+), %d deletions(-)\n",
@@ -3109,6 +3125,7 @@ static int stat_opt(struct diff_options *options, const char **av)
 	char *end;
 	int width = options->stat_width;
 	int name_width = options->stat_name_width;
+	int count = options->stat_count;
 	int argcount = 1;
 
 	arg += strlen("--stat");
@@ -3136,12 +3153,24 @@ static int stat_opt(struct diff_options *options, const char **av)
 				name_width = strtoul(av[1], &end, 10);
 				argcount = 2;
 			}
+		} else if (!prefixcmp(arg, "-count")) {
+			arg += strlen("-count");
+			if (*arg == '=')
+				count = strtoul(arg + 1, &end, 10);
+			else if (!*arg && !av[1])
+				die("Option '--stat-count' requires a value");
+			else if (!*arg) {
+				count = strtoul(av[1], &end, 10);
+				argcount = 2;
+			}
 		}
 		break;
 	case '=':
 		width = strtoul(arg+1, &end, 10);
 		if (*end == ',')
 			name_width = strtoul(end+1, &end, 10);
+		if (*end == ',')
+			count = strtoul(end+1, &end, 10);
 	}
 
 	/* Important! This checks all the error cases! */
@@ -3150,6 +3179,7 @@ static int stat_opt(struct diff_options *options, const char **av)
 	options->output_format |= DIFF_FORMAT_DIFFSTAT;
 	options->stat_name_width = name_width;
 	options->stat_width = width;
+	options->stat_count = count;
 	return argcount;
 }
 
@@ -3195,7 +3225,7 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 	else if (!strcmp(arg, "-s"))
 		options->output_format |= DIFF_FORMAT_NO_OUTPUT;
 	else if (!prefixcmp(arg, "--stat"))
-		/* --stat, --stat-width, or --stat-name-width */
+		/* --stat, --stat-width, --stat-name-width, or --stat-count */
 		return stat_opt(options, av);
 
 	/* renames options */
diff --git a/diff.h b/diff.h
index d83e53e..30ce9d8 100644
--- a/diff.h
+++ b/diff.h
@@ -124,6 +124,7 @@ struct diff_options {
 
 	int stat_width;
 	int stat_name_width;
+	int stat_count;
 	const char *word_regex;
 	enum diff_words_type word_diff;
 
-- 
1.7.5.2.657.g62c2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCHv3 3/3] diff-options.txt: describe --stat-{width,name-width,count}
  2011-05-27 12:36             ` [PATCHv3 0/3] diff.c: --stat-count=<n> Michael J Gruber
  2011-05-27 12:36               ` [PATCHv3 1/3] diff.c: omit hidden entries from namelen calculation with --stat Michael J Gruber
  2011-05-27 12:36               ` [PATCHv3 2/3] diff: introduce --stat-lines to limit the stat lines Michael J Gruber
@ 2011-05-27 12:36               ` Michael J Gruber
  2 siblings, 0 replies; 21+ messages in thread
From: Michael J Gruber @ 2011-05-27 12:36 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Signed-off-by: Michael J Gruber <git@drmicha.warpmail.net>
---
 Documentation/diff-options.txt |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 000eae0..f6c046a 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -56,6 +56,9 @@ endif::git-format-patch[]
 	By giving a third parameter `<count>`, you can limit the
 	output to the first `<count>` lines, followed by
 	`...` if there are more.
++
+These parameters can also be set individually with `--stat-width=<width>`,
+`--stat-name-width=<name-width>` and `--stat-count=<count>`.
 
 --numstat::
 	Similar to `\--stat`, but shows number of added and
-- 
1.7.5.2.657.g62c2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCHv3 1/3] diff.c: omit hidden entries from namelen calculation with --stat
  2011-05-27 12:36               ` [PATCHv3 1/3] diff.c: omit hidden entries from namelen calculation with --stat Michael J Gruber
@ 2011-05-27 17:43                 ` Junio C Hamano
  2011-05-27 20:19                   ` Michael J Gruber
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2011-05-27 17:43 UTC (permalink / raw)
  To: Michael J Gruber; +Cc: git

Michael J Gruber <git@drmicha.warpmail.net> writes:

> Currently, --stat calculates the longest name from all items but then
> drops some (mode changes) from the output later on.
>
> Instead, drop them from the namelen generation and calculation.
>
> Signed-off-by: Michael J Gruber <git@drmicha.warpmail.net>
> ---
> This optimizes (tightens) the display potentially, but we never had tests
> which are sensitive to that.

More importantly I think this is a good first step in the right direction
to reduce the duplicated code that need to implement the same logic.

However, you missed another instance in show_shortstats() no?

I am wondering if it would make it easier to both read and maintain if you
add a boolean field "changed" to "struct diffstat_file", and set it at the
end of builtin_diffstat(), and have these places all check that field.

A possible alternative is not to put such an unchanged entry in the struct
diffstat_t in the first place so that nobody has to worry about it. Right
now, show_numstat() does show 0/0 entries (i.e. only mode change), while
shortstat and normal stat do not, but I somehow have a feeling that this
difference is not by design but by accident.  Besides, --numstat that only
says 0/0 is not useful in practice without --summary.

    $ edit diff.c
    $ chmod +x Makefile
    $ git diff --stat
     diff.c |   26 ++++++++++++++------------
    $ git diff --numstat
    0       0       Makefile
    14      12      diff.c
    $ git diff --numstat --summary
    0       0       Makefile
    14      12      diff.c
     mode change 100644 => 100755 Makefile

Getting rid of an unchanged entry from the diffstat array upfront will
change the behaviour of numstat, but I suspect that it change things in a
better way, making the result consistent with the textual version.

The patch below, diff.c shows that approach, while ndiff.c shows the extra
boolean "changed" approach.

What do you think?

 diff.c            |    7 +++++++
 diff.c => ndiff.c |   26 ++++++++++++++------------
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/diff.c b/diff.c
index 8f4815b..e2ee70a 100644
--- a/diff.c
+++ b/diff.c
@@ -2267,6 +2267,13 @@ static void builtin_diffstat(const char *name_a, const char *name_b,
 			      &xpp, &xecfg);
 	}
 
+	if (!data->is_renamed && !data->added && !data->deleted) {
+		diffstat->nr--;
+		free(data->name);
+		free(data->from_name);
+		free(data);
+	}
+
 	diff_free_filespec_data(one);
 	diff_free_filespec_data(two);
 }
diff --git a/diff.c b/ndiff.c
similarity index 99%
copy from diff.c
copy to ndiff.c
index 8f4815b..1620053 100644
--- a/diff.c
+++ b/ndiff.c
@@ -1222,6 +1222,7 @@ struct diffstat_t {
 		unsigned is_unmerged:1;
 		unsigned is_binary:1;
 		unsigned is_renamed:1;
+		unsigned changed:1;
 		uintmax_t added, deleted;
 	} **files;
 };
@@ -1350,6 +1351,9 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 	for (i = 0; i < data->nr; i++) {
 		struct diffstat_file *file = data->files[i];
 		uintmax_t change = file->added + file->deleted;
+
+		if (!file->changed)
+			continue;
 		fill_print_name(file);
 		len = strlen(file->print_name);
 		if (max_len < len)
@@ -1381,6 +1385,11 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 		uintmax_t deleted = data->files[i]->deleted;
 		int name_len;
 
+		if (!data->files[i]->changed) {
+			total_files--;
+			continue;
+		}
+
 		/*
 		 * "scale" the filename
 		 */
@@ -1415,11 +1424,6 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 			fprintf(options->file, "  Unmerged\n");
 			continue;
 		}
-		else if (!data->files[i]->is_renamed &&
-			 (added + deleted == 0)) {
-			total_files--;
-			continue;
-		}
 
 		/*
 		 * scale the add/delete
@@ -1457,15 +1461,12 @@ static void show_shortstats(struct diffstat_t *data, struct diff_options *option
 	for (i = 0; i < data->nr; i++) {
 		if (!data->files[i]->is_binary &&
 		    !data->files[i]->is_unmerged) {
-			int added = data->files[i]->added;
-			int deleted= data->files[i]->deleted;
-			if (!data->files[i]->is_renamed &&
-			    (added + deleted == 0)) {
+			if (!data->files[i]->changed) {
 				total_files--;
-			} else {
-				adds += added;
-				dels += deleted;
+				continue;
 			}
+			adds += data->files[i]->added;
+			dels += data->files[i]->deleted;
 		}
 	}
 	if (options->output_prefix) {
@@ -2266,6 +2267,7 @@ static void builtin_diffstat(const char *name_a, const char *name_b,
 		xdi_diff_outf(&mf1, &mf2, diffstat_consume, diffstat,
 			      &xpp, &xecfg);
 	}
+	data->changed = data->is_renamed || data->added || data->deleted;
 
 	diff_free_filespec_data(one);
 	diff_free_filespec_data(two);

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCHv3 1/3] diff.c: omit hidden entries from namelen calculation with --stat
  2011-05-27 17:43                 ` Junio C Hamano
@ 2011-05-27 20:19                   ` Michael J Gruber
  2011-05-27 22:45                     ` Junio C Hamano
  0 siblings, 1 reply; 21+ messages in thread
From: Michael J Gruber @ 2011-05-27 20:19 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Junio C Hamano venit, vidit, dixit 27.05.2011 19:43:
> Michael J Gruber <git@drmicha.warpmail.net> writes:
> 
>> Currently, --stat calculates the longest name from all items but then
>> drops some (mode changes) from the output later on.
>>
>> Instead, drop them from the namelen generation and calculation.
>>
>> Signed-off-by: Michael J Gruber <git@drmicha.warpmail.net>
>> ---
>> This optimizes (tightens) the display potentially, but we never had tests
>> which are sensitive to that.
> 
> More importantly I think this is a good first step in the right direction
> to reduce the duplicated code that need to implement the same logic.
> 
> However, you missed another instance in show_shortstats() no?

I'm sorry I don't understand this remark. For all different formats, the
respective functions show_stat(), show_shortstats() etc. do their own
formatting, and I did not change this at all. So, what did I miss?

Also, show_shortstats() does not need the names, so what similar change
should one have made there?

> I am wondering if it would make it easier to both read and maintain if you
> add a boolean field "changed" to "struct diffstat_file", and set it at the
> end of builtin_diffstat(), and have these places all check that field.

Sounds fine.

> A possible alternative is not to put such an unchanged entry in the struct
> diffstat_t in the first place so that nobody has to worry about it. Right
> now, show_numstat() does show 0/0 entries (i.e. only mode change), while
> shortstat and normal stat do not, but I somehow have a feeling that this
> difference is not by design but by accident.  Besides, --numstat that only
> says 0/0 is not useful in practice without --summary.
> 
>     $ edit diff.c
>     $ chmod +x Makefile
>     $ git diff --stat
>      diff.c |   26 ++++++++++++++------------
>     $ git diff --numstat
>     0       0       Makefile
>     14      12      diff.c
>     $ git diff --numstat --summary
>     0       0       Makefile
>     14      12      diff.c
>      mode change 100644 => 100755 Makefile
> 
> Getting rid of an unchanged entry from the diffstat array upfront will
> change the behaviour of numstat, but I suspect that it change things in a
> better way, making the result consistent with the textual version.

That sounds good to me.

> The patch below, diff.c shows that approach, while ndiff.c shows the extra
> boolean "changed" approach.
> 
> What do you think?
> 
>  diff.c            |    7 +++++++
>  diff.c => ndiff.c |   26 ++++++++++++++------------
>  2 files changed, 21 insertions(+), 12 deletions(-)
> 
> diff --git a/diff.c b/diff.c
> index 8f4815b..e2ee70a 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -2267,6 +2267,13 @@ static void builtin_diffstat(const char *name_a, const char *name_b,
>  			      &xpp, &xecfg);
>  	}
>  
> +	if (!data->is_renamed && !data->added && !data->deleted) {
> +		diffstat->nr--;
> +		free(data->name);
> +		free(data->from_name);
> +		free(data);
> +	}
> +

I don't know that codepath very well, but hasn't that diffstat been
added earlier in builtin_diffstat() already? But maybe the
diffstat->nr-- takes care of that.

Also, I have no idea how builtin_diffstat() influences show_stat() which
my patch was about, sorry.

>  	diff_free_filespec_data(one);
>  	diff_free_filespec_data(two);
>  }
> diff --git a/diff.c b/ndiff.c
> similarity index 99%
> copy from diff.c
> copy to ndiff.c
> index 8f4815b..1620053 100644
> --- a/diff.c
> +++ b/ndiff.c
> @@ -1222,6 +1222,7 @@ struct diffstat_t {
>  		unsigned is_unmerged:1;
>  		unsigned is_binary:1;
>  		unsigned is_renamed:1;
> +		unsigned changed:1;
>  		uintmax_t added, deleted;
>  	} **files;
>  };
> @@ -1350,6 +1351,9 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>  	for (i = 0; i < data->nr; i++) {
>  		struct diffstat_file *file = data->files[i];
>  		uintmax_t change = file->added + file->deleted;
> +
> +		if (!file->changed)
> +			continue;
>  		fill_print_name(file);
>  		len = strlen(file->print_name);
>  		if (max_len < len)
> @@ -1381,6 +1385,11 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>  		uintmax_t deleted = data->files[i]->deleted;
>  		int name_len;
>  
> +		if (!data->files[i]->changed) {
> +			total_files--;
> +			continue;
> +		}
> +
>  		/*
>  		 * "scale" the filename
>  		 */
> @@ -1415,11 +1424,6 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>  			fprintf(options->file, "  Unmerged\n");
>  			continue;
>  		}
> -		else if (!data->files[i]->is_renamed &&
> -			 (added + deleted == 0)) {
> -			total_files--;
> -			continue;
> -		}

or do this dance only here (with ...->changed) instead of in the first
loop, because in 2/3 (when limited by stat_count), the second loop would
be done all the way up to nr (as 2 separate loops), but the first one
only up to count.

>  
>  		/*
>  		 * scale the add/delete
> @@ -1457,15 +1461,12 @@ static void show_shortstats(struct diffstat_t *data, struct diff_options *option
>  	for (i = 0; i < data->nr; i++) {
>  		if (!data->files[i]->is_binary &&
>  		    !data->files[i]->is_unmerged) {
> -			int added = data->files[i]->added;
> -			int deleted= data->files[i]->deleted;
> -			if (!data->files[i]->is_renamed &&
> -			    (added + deleted == 0)) {
> +			if (!data->files[i]->changed) {
>  				total_files--;
> -			} else {
> -				adds += added;
> -				dels += deleted;
> +				continue;
>  			}
> +			adds += data->files[i]->added;
> +			dels += data->files[i]->deleted;
>  		}
>  	}
>  	if (options->output_prefix) {
> @@ -2266,6 +2267,7 @@ static void builtin_diffstat(const char *name_a, const char *name_b,
>  		xdi_diff_outf(&mf1, &mf2, diffstat_consume, diffstat,
>  			      &xpp, &xecfg);
>  	}
> +	data->changed = data->is_renamed || data->added || data->deleted;
>  
>  	diff_free_filespec_data(one);
>  	diff_free_filespec_data(two);

Does show_stats() get the data as modified by builtin_diffstat()? Then
this is fine.

Again, I'm sorry I'm not seeing through the call chain here.

Michael

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCHv3 1/3] diff.c: omit hidden entries from namelen calculation with --stat
  2011-05-27 20:19                   ` Michael J Gruber
@ 2011-05-27 22:45                     ` Junio C Hamano
  0 siblings, 0 replies; 21+ messages in thread
From: Junio C Hamano @ 2011-05-27 22:45 UTC (permalink / raw)
  To: Michael J Gruber; +Cc: git

Michael J Gruber <git@drmicha.warpmail.net> writes:

> Again, I'm sorry I'm not seeing through the call chain here.

diff_flush() notices it needs to produce textual patch and count added or
deleted lines here:

	if (output_format & (DIFF_FORMAT_DIFFSTAT|DIFF_FORMAT...
		struct diffstat_t diffstat;

		memset(&diffstat, 0, sizeof(struct diffstat_t));
		for (i = 0; i < q->nr; i++) {
			struct diff_filepair *p = q->queue[i];
			if (check_pair_status(p))
				diff_flush_stat(p, options, &diffstat);
		}
		if (output_format & DIFF_FORMAT_NUMSTAT)
			show_numstat(&diffstat, options);
		if (output_format & DIFF_FORMAT_DIFFSTAT)
			show_stats(&diffstat, options);
		if (output_format & DIFF_FORMAT_SHORTSTAT)
			show_shortstats(&diffstat, options);
		free_diffstat_info(&diffstat);
		separator++;
	}

In the loop, it calls diff_flush_stat() for each and every filepair in the
queue.  The result is collected in &diffstat, and that is what your patch
works on in show_stats() function.  Before you look at it in show_stats(),
the same &diffstat may be examined by show_numstat(), and after you look
at it, the same &diffstat may be examined by show_shortstats().

diff_flush_stat() calls run_diffstat() to get textual patch for one
filepair. It ends up calling builtin_diffstat().

builtin_diffstat() looks at the preimage and postimage, and feeds the low
level xdiff machinery with it, and uses diffstat_consume() callbac to
count the +/- lines in the patch between the two.

Even though three show_*stat* function look at the same &diffstat, two
have duplicated logic to ignore (and uncount) a path whose executable
bit was changed without any other change, with:

	if (!it->is_renamed && (added + deleted) == 0)
        	ignore it!

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCHv3 2/3] diff: introduce --stat-lines to limit the stat lines
  2011-05-27 12:36               ` [PATCHv3 2/3] diff: introduce --stat-lines to limit the stat lines Michael J Gruber
@ 2011-05-28  4:46                 ` Junio C Hamano
  2011-05-30  6:40                   ` Michael J Gruber
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2011-05-28  4:46 UTC (permalink / raw)
  To: Michael J Gruber; +Cc: git

Michael J Gruber <git@drmicha.warpmail.net> writes:

> diff --git a/diff.c b/diff.c
> index 4541939..6a6cbca 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -1259,6 +1259,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>  
>  	width = options->stat_width ? options->stat_width : 80;
>  	name_width = options->stat_name_width ? options->stat_name_width : 50;
> +	count = options->stat_count ? options->stat_count : data->nr;

It is a tad hard to follow the logic of "count" here. It could be the
ceiling the user gave you, or we may not be constrained at all if the full
diff is smaller.  But then

> @@ -1275,11 +1276,12 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>  	add_c = diff_get_color_opt(options, DIFF_FILE_NEW);
>  	del_c = diff_get_color_opt(options, DIFF_FILE_OLD);
>  
> -	for (i = 0; i < data->nr; i++) {
> +	for (i = 0; (i < count) && (i < data->nr); i++) {

Here we compare "i" which does not have much to do with how many we
actually have to show (as we may be skipping in the loop) with "count" and
also with data->nr.  In the loop, you compensate for the skipping
(i.e. increment of "i" that should not affect the outcome) by incrementing
"count", so (i < count) part will correctly count towards the limit the
user gave. It may be a correct math, but it is tricky and hard to read.
Especially if we started with the options->stat_count that is larger than
the data->nr, of course "i < count" will always hold true with or without
skipping, which also is correct but even trickier.

>  		struct diffstat_file *file = data->files[i];
>  		uintmax_t change = file->added + file->deleted;
>  		if (!data->files[i]->is_renamed &&
>  			 (change == 0)) {
> +			count++; /* not shown == room for one more */
>  			continue;
>  		}
>  		fill_print_name(file);
> @@ -1292,6 +1294,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>  		if (max_change < change)
>  			max_change = change;
>  	}
> +	count = i; /* min(count, data->nr) */

This value is mechanically min(count, data->nr) but it is confusing to
explain what it really means.  It is the number of elements you have to
look at in data->files[] array, still skipping irrelevant entries in the
loop, until you show options->stat_count number of entries.

> @@ -1306,7 +1309,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>  	else
>  		width = max_change;
>  
> -	for (i = 0; i < data->nr; i++) {
> +	for (i = 0; i < count; i++) {

Hence this loop is correct. You ignore everything beyond count in the
array, as their lengths are immaterial to the outcome.

> @@ -1373,6 +1376,19 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>  		show_graph(options->file, '-', del, del_c, reset);
>  		fprintf(options->file, "\n");
>  	}
> +	if (count < data->nr)
> +		fprintf(options->file, "%s ...\n", line_prefix);

At this point, count is the sum of the number of items you are going to
show and the number of items you are going to skip. If that is smaller
than the total in the original array, there are items that you are not
showing nor skipping. But you terminated the first loop early before
reaching the end of data->file[] array, so you do not know enough to say
"there _are_ more to show but I am not showing", I think.

If

 - data->nr is 3;
 - the data->file[0] is to be shown but other entries in data->file[] are
   not to be shown; and
 - opt->stat_count is 1

then your first loop fills data->file[0]->name, increments i to 1, the
condition (i < count) no longer holds, and stops.  At this point count is
1, data->nr is 3, but data->file[1] and [2] are not to be shown anyway, so
this "there are more but I am not showing them" is not quite right, is it?

> +	for (i = count; i < data->nr; i++) {
> +		uintmax_t added = data->files[i]->added;
> +		uintmax_t deleted = data->files[i]->deleted;
> +		if (!data->files[i]->is_renamed &&
> +			 (added + deleted == 0)) {
> +			total_files--;
> +			continue;
> +		}
> +		adds += added;
> +		dels += deleted;
> +	}

If this loop runs "adds += added" even once, then you have "there are more
but I am not showing" situation, and if all the entries beyond count are
skipped, then you are not limiting the output.

By the way, in this round, you are trying to show only the first N items,
and if there are more to show, then showing "..." after them. I kind of
liked the idea that came up during the discussion in earlier rounds to
show N-1 then "..."  then the last one item, but I do not feel too
strongly about it.

 diff.c                     |    6 ++++--
 t/t4049-diff-stat-count.sh |   25 +++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index 82789e3..542ff42 100644
--- a/diff.c
+++ b/diff.c
@@ -1247,6 +1247,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 	int width, name_width, count;
 	const char *reset, *add_c, *del_c;
 	const char *line_prefix = "";
+	int extra_shown = 0;
 	struct strbuf *msg = NULL;
 
 	if (data->nr == 0)
@@ -1376,8 +1377,6 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 		show_graph(options->file, '-', del, del_c, reset);
 		fprintf(options->file, "\n");
 	}
-	if (count < data->nr)
-		fprintf(options->file, "%s ...\n", line_prefix);
 	for (i = count; i < data->nr; i++) {
 		uintmax_t added = data->files[i]->added;
 		uintmax_t deleted = data->files[i]->deleted;
@@ -1388,6 +1387,9 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 		}
 		adds += added;
 		dels += deleted;
+		if (!extra_shown)
+			fprintf(options->file, "%s ...\n", line_prefix);
+		extra_shown = 1;
 	}
 	fprintf(options->file, "%s", line_prefix);
 	fprintf(options->file,
diff --git a/t/t4049-diff-stat-count.sh b/t/t4049-diff-stat-count.sh
new file mode 100755
index 0000000..641e70d
--- /dev/null
+++ b/t/t4049-diff-stat-count.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+# Copyright (c) 2011, Google Inc.
+
+test_description='diff --stat-count'
+. ./test-lib.sh
+
+test_expect_success setup '
+	>a &&
+	>b &&
+	>c &&
+	>d &&
+	git add a b c d &&
+	chmod +x c d &&
+	echo a >a &&
+	echo b >b &&
+	cat >expect <<-\EOF
+	 a |    1 +
+	 b |    1 +
+	 2 files changed, 2 insertions(+), 0 deletions(-)
+	EOF
+	git diff --stat --stat-count=2 >actual &&
+	test_cmp expect actual
+'
+
+test_done

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCHv3 2/3] diff: introduce --stat-lines to limit the stat lines
  2011-05-28  4:46                 ` Junio C Hamano
@ 2011-05-30  6:40                   ` Michael J Gruber
  2011-05-30  7:36                     ` Junio C Hamano
  0 siblings, 1 reply; 21+ messages in thread
From: Michael J Gruber @ 2011-05-30  6:40 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Junio C Hamano venit, vidit, dixit 28.05.2011 06:46:
> Michael J Gruber <git@drmicha.warpmail.net> writes:
> 
>> diff --git a/diff.c b/diff.c
>> index 4541939..6a6cbca 100644
>> --- a/diff.c
>> +++ b/diff.c
>> @@ -1259,6 +1259,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>>  
>>  	width = options->stat_width ? options->stat_width : 80;
>>  	name_width = options->stat_name_width ? options->stat_name_width : 50;
>> +	count = options->stat_count ? options->stat_count : data->nr;
> 
> It is a tad hard to follow the logic of "count" here. It could be the
> ceiling the user gave you, or we may not be constrained at all if the full
> diff is smaller.  But then
> 
>> @@ -1275,11 +1276,12 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>>  	add_c = diff_get_color_opt(options, DIFF_FILE_NEW);
>>  	del_c = diff_get_color_opt(options, DIFF_FILE_OLD);
>>  
>> -	for (i = 0; i < data->nr; i++) {
>> +	for (i = 0; (i < count) && (i < data->nr); i++) {
> 
> Here we compare "i" which does not have much to do with how many we
> actually have to show (as we may be skipping in the loop) with "count" and
> also with data->nr.  In the loop, you compensate for the skipping
> (i.e. increment of "i" that should not affect the outcome) by incrementing
> "count", so (i < count) part will correctly count towards the limit the
> user gave. It may be a correct math, but it is tricky and hard to read.
> Especially if we started with the options->stat_count that is larger than
> the data->nr, of course "i < count" will always hold true with or without
> skipping, which also is correct but even trickier.

Maybe I was trying too hard to be efficient. We could have count be
stat_count if non-zero or data->nr and limit all loops by the double
conditional (i < count) && (i < data->nr), resp. use an additional
counter like shown or skipped and compare with that.

Given the short discussion for the initial patch, I really did not
expect how much it would still take... (My bad, of course.)
> 
>>  		struct diffstat_file *file = data->files[i];
>>  		uintmax_t change = file->added + file->deleted;
>>  		if (!data->files[i]->is_renamed &&
>>  			 (change == 0)) {
>> +			count++; /* not shown == room for one more */
>>  			continue;
>>  		}
>>  		fill_print_name(file);
>> @@ -1292,6 +1294,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>>  		if (max_change < change)
>>  			max_change = change;
>>  	}
>> +	count = i; /* min(count, data->nr) */
> 
> This value is mechanically min(count, data->nr) but it is confusing to
> explain what it really means.  It is the number of elements you have to
> look at in data->files[] array, still skipping irrelevant entries in the
> loop, until you show options->stat_count number of entries.

As you explain further down, we may have to look at even more in order
to be able to say "..." or not.

> 
>> @@ -1306,7 +1309,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>>  	else
>>  		width = max_change;
>>  
>> -	for (i = 0; i < data->nr; i++) {
>> +	for (i = 0; i < count; i++) {
> 
> Hence this loop is correct. You ignore everything beyond count in the
> array, as their lengths are immaterial to the outcome.
> 

I think I really should not worry too much about efficiency here and
more about simple code.

>> @@ -1373,6 +1376,19 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>>  		show_graph(options->file, '-', del, del_c, reset);
>>  		fprintf(options->file, "\n");
>>  	}
>> +	if (count < data->nr)
>> +		fprintf(options->file, "%s ...\n", line_prefix);
> 
> At this point, count is the sum of the number of items you are going to
> show and the number of items you are going to skip. If that is smaller
> than the total in the original array, there are items that you are not
> showing nor skipping. But you terminated the first loop early before
> reaching the end of data->file[] array, so you do not know enough to say
> "there _are_ more to show but I am not showing", I think.

Sigh, you're right. We would have to look until we find the first
non-show non-skipped in order to say that.

> 
> If
> 
>  - data->nr is 3;
>  - the data->file[0] is to be shown but other entries in data->file[] are
>    not to be shown; and
>  - opt->stat_count is 1
> 
> then your first loop fills data->file[0]->name, increments i to 1, the
> condition (i < count) no longer holds, and stops.  At this point count is
> 1, data->nr is 3, but data->file[1] and [2] are not to be shown anyway, so
> this "there are more but I am not showing them" is not quite right, is it?
> 
>> +	for (i = count; i < data->nr; i++) {
>> +		uintmax_t added = data->files[i]->added;
>> +		uintmax_t deleted = data->files[i]->deleted;
>> +		if (!data->files[i]->is_renamed &&
>> +			 (added + deleted == 0)) {
>> +			total_files--;
>> +			continue;
>> +		}
>> +		adds += added;
>> +		dels += deleted;
>> +	}
> 
> If this loop runs "adds += added" even once, then you have "there are more
> but I am not showing" situation, and if all the entries beyond count are
> skipped, then you are not limiting the output.
> 
> By the way, in this round, you are trying to show only the first N items,
> and if there are more to show, then showing "..." after them. I kind of
> liked the idea that came up during the discussion in earlier rounds to
> show N-1 then "..."  then the last one item, but I do not feel too
> strongly about it.

I thought fmt-merge-msg was the example to follow. It does exactly what
I'm trying here, i.e. show count items (if there are count or more), and
show "..." if there are more.

>  diff.c                     |    6 ++++--
>  t/t4049-diff-stat-count.sh |   25 +++++++++++++++++++++++++
>  2 files changed, 29 insertions(+), 2 deletions(-)
> 
> diff --git a/diff.c b/diff.c
> index 82789e3..542ff42 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -1247,6 +1247,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>  	int width, name_width, count;
>  	const char *reset, *add_c, *del_c;
>  	const char *line_prefix = "";
> +	int extra_shown = 0;
>  	struct strbuf *msg = NULL;
>  
>  	if (data->nr == 0)
> @@ -1376,8 +1377,6 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>  		show_graph(options->file, '-', del, del_c, reset);
>  		fprintf(options->file, "\n");
>  	}
> -	if (count < data->nr)
> -		fprintf(options->file, "%s ...\n", line_prefix);
>  	for (i = count; i < data->nr; i++) {
>  		uintmax_t added = data->files[i]->added;
>  		uintmax_t deleted = data->files[i]->deleted;
> @@ -1388,6 +1387,9 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
>  		}
>  		adds += added;
>  		dels += deleted;
> +		if (!extra_shown)
> +			fprintf(options->file, "%s ...\n", line_prefix);
> +		extra_shown = 1;
>  	}
>  	fprintf(options->file, "%s", line_prefix);
>  	fprintf(options->file,
> diff --git a/t/t4049-diff-stat-count.sh b/t/t4049-diff-stat-count.sh
> new file mode 100755
> index 0000000..641e70d
> --- /dev/null
> +++ b/t/t4049-diff-stat-count.sh
> @@ -0,0 +1,25 @@
> +#!/bin/sh
> +# Copyright (c) 2011, Google Inc.
> +
> +test_description='diff --stat-count'
> +. ./test-lib.sh
> +
> +test_expect_success setup '
> +	>a &&
> +	>b &&
> +	>c &&
> +	>d &&
> +	git add a b c d &&
> +	chmod +x c d &&
> +	echo a >a &&
> +	echo b >b &&
> +	cat >expect <<-\EOF
> +	 a |    1 +
> +	 b |    1 +
> +	 2 files changed, 2 insertions(+), 0 deletions(-)
> +	EOF
> +	git diff --stat --stat-count=2 >actual &&
> +	test_cmp expect actual
> +'
> +
> +test_done

That works, thanks also for the test.

Should I squash this in or go for a clearer use of "count"? (Also, I may
have to take into account your notes about the workflow.)

Michael

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCHv3 2/3] diff: introduce --stat-lines to limit the stat lines
  2011-05-30  6:40                   ` Michael J Gruber
@ 2011-05-30  7:36                     ` Junio C Hamano
  0 siblings, 0 replies; 21+ messages in thread
From: Junio C Hamano @ 2011-05-30  7:36 UTC (permalink / raw)
  To: Michael J Gruber; +Cc: git

Michael J Gruber <git@drmicha.warpmail.net> writes:

> That works, thanks also for the test.
>
> Should I squash this in or go for a clearer use of "count"? (Also, I may
> have to take into account your notes about the workflow.)

I do not recall any "notes about the workflow" but if you feel it is worth
keeping, please go ahead. I've queued the fix-up as a "finishing touches"
separate commit, but the series is still in 'pu' so it is a fair game for
you to do whatever you think is the most appropriate for it.

Thanks.

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2011-05-30  7:36 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-04-29 14:57 [PATCH 1/2] diff: introduce --stat-lines to limit the stat lines Michael J Gruber
2011-04-29 14:57 ` [PATCH 2/2] diff-options.txt: describe --stat-{width,name-width,lines} Michael J Gruber
2011-04-29 15:12 ` [PATCH 1/2] diff: introduce --stat-lines to limit the stat lines Michael J Gruber
2011-04-29 16:15 ` Junio C Hamano
2011-05-01  8:27   ` Michael J Gruber
2011-05-01 18:33     ` Junio C Hamano
2011-05-03 10:46       ` [PATCHv2 " Michael J Gruber
2011-05-03 10:46         ` [PATCHv2 2/2] diff-options.txt: describe --stat-{width,name-width,count} Michael J Gruber
2011-05-03 18:47         ` [PATCHv2 1/2] diff: introduce --stat-lines to limit the stat lines Junio C Hamano
2011-05-04  7:16           ` Michael J Gruber
2011-05-27 12:36             ` [PATCHv3 0/3] diff.c: --stat-count=<n> Michael J Gruber
2011-05-27 12:36               ` [PATCHv3 1/3] diff.c: omit hidden entries from namelen calculation with --stat Michael J Gruber
2011-05-27 17:43                 ` Junio C Hamano
2011-05-27 20:19                   ` Michael J Gruber
2011-05-27 22:45                     ` Junio C Hamano
2011-05-27 12:36               ` [PATCHv3 2/3] diff: introduce --stat-lines to limit the stat lines Michael J Gruber
2011-05-28  4:46                 ` Junio C Hamano
2011-05-30  6:40                   ` Michael J Gruber
2011-05-30  7:36                     ` Junio C Hamano
2011-05-27 12:36               ` [PATCHv3 3/3] diff-options.txt: describe --stat-{width,name-width,count} Michael J Gruber
2011-05-04  4:37         ` [PATCHv2 1/2] diff: introduce --stat-lines to limit the stat lines Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.