All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH rfc] scripts/get_maintainer.pl: add interactive mode
@ 2010-09-15 12:13 florian
  2010-09-15 13:43 ` [v2 PATCH " florian
  0 siblings, 1 reply; 18+ messages in thread
From: florian @ 2010-09-15 12:13 UTC (permalink / raw)
  To: linux-kernel; +Cc: ebiederm, Florian Mickler, Andrew Morton, Joe Perches

This is a first version of an interactive mode for
scripts/get_maintainer.pl .

It allows the user to interact with the script. Each cc candidate can be
selected and deselected and a shortlog of authored commits can be
displayed for each candidate.
The menu is displayed via STDERR, the end result is outputted to STDOUT. 
This unconventional mechanism allows it to be used via

git send-email --cc-cmd=scripts/get_maintainer.pl 

To use it, add --interactive to the commandline or put it into .get_maintainer.conf.
To have the usual signed-off-by percentage visible in the menu, add --rolestats.

---



TODO:
 - fixup multiple file case
 - fixup chief_penguin handling
 - make output prettier
 - usability?

scripts/get_maintainer.pl |  118 +++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 114 insertions(+), 4 deletions(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index b228198..45eb560 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -32,6 +32,7 @@ my $email_git_max_maintainers = 5;
 my $email_git_min_percent = 5;
 my $email_git_since = "1-year-ago";
 my $email_hg_since = "-365";
+my $interactive = 0;
 my $email_remove_duplicates = 1;
 my $output_multiline = 1;
 my $output_separator = ", ";
@@ -51,6 +52,8 @@ my $help = 0;
 
 my $exit = 0;
 
+my %shortlog_buffer;
+
 my @penguin_chief = ();
 push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
 #Andrew wants in on most everything - 2009/01/14
@@ -91,7 +94,8 @@ my %VCS_cmds_git = (
     "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
     "blame_file_cmd" => "git blame -l \$file",
     "commit_pattern" => "^commit [0-9a-f]{40,40}",
-    "blame_commit_pattern" => "^([0-9a-f]+) "
+    "blame_commit_pattern" => "^([0-9a-f]+) ",
+    "shortlog_cmd" => "git log --no-color --oneline --since=\$email_git_since --author=\"\$email\" -- \$file"
 );
 
 my %VCS_cmds_hg = (
@@ -104,7 +108,8 @@ my %VCS_cmds_hg = (
     "blame_range_cmd" => "",		# not supported
     "blame_file_cmd" => "hg blame -c \$file",
     "commit_pattern" => "^commit [0-9a-f]{40,40}",
-    "blame_commit_pattern" => "^([0-9a-f]+):"
+    "blame_commit_pattern" => "^([0-9a-f]+):",
+    "shortlog_cmd" => "ht log --date=\$email_hg_since"
 );
 
 if (-f "${lk_path}.get_maintainer.conf") {
@@ -142,6 +147,7 @@ if (!GetOptions(
 		'git-min-percent=i' => \$email_git_min_percent,
 		'git-since=s' => \$email_git_since,
 		'hg-since=s' => \$email_hg_since,
+		'i|interactive!' => \$interactive,
 		'remove-duplicates!' => \$email_remove_duplicates,
 		'm!' => \$email_maintainer,
 		'n!' => \$email_usename,
@@ -219,6 +225,8 @@ if ($email_git_all_signature_types) {
     $signaturePattern = "(.+?)[Bb][Yy]:";
 }
 
+
+
 ## Read MAINTAINERS for type/value pairs
 
 my @typevalue = ();
@@ -440,10 +448,14 @@ foreach my $file (@files) {
     if ($email && $email_git) {
 	vcs_file_signoffs($file);
     }
-
     if ($email && $email_git_blame) {
 	vcs_file_blame($file);
     }
+    if ($email && $interactive){
+	#FIXME: multiple file case is broken at the moment
+	#FIXME: penguin_chief handlich is broken at the moment
+	vcs_interactive_menu($file);
+    }
 }
 
 if ($keywords) {
@@ -476,6 +488,7 @@ if ($email) {
     }
 }
 
+
 if ($email || $email_list) {
     my @to = ();
     if ($email) {
@@ -545,6 +558,7 @@ MAINTAINER field selection options:
     --git-blame => use git blame to find modified commits for patch or file
     --git-since => git history to use (default: $email_git_since)
     --hg-since => hg history to use (default: $email_hg_since)
+    --interactive => display a menu (mostly useful if used with the --git option)
     --m => include maintainer(s) if any
     --n => include name 'Full Name <addr\@domain.tld>'
     --l => include list(s) if any
@@ -1104,6 +1118,102 @@ sub vcs_exists {
     return 0;
 }
 
+sub vcs_interactive_menu {
+    my ($file) = @_;
+
+    return if (!vcs_exists());
+
+    my %selected;
+    my %shortlog;
+    my $input;
+    my $count = 0;
+
+    #select maintainers by default
+    foreach my $entry (@email_to){
+	    my $role = $entry->[1];
+	    $selected{$count} = ($role =~ /maintainer:/);
+	    $count++;
+    }
+
+    #menu loop
+    do {
+	my $count = 0;
+	foreach my $entry (@email_to){
+	    my $email = $entry->[0];
+	    my $role = $entry->[1];
+	    if ($selected{$count}){
+		print STDERR "* ";
+	    } else {
+		print STDERR "  ";
+	    }
+	    print STDERR "$count: $email,\t\t $role";
+	    print STDERR "\n";
+	    if ($shortlog{$count}){
+		my @lines = @{vcs_get_shortlog($email, $file)};
+		print STDERR "\tauthored commits:" . @lines . ".\n";
+		foreach my $commit (@lines){
+		    print STDERR "\t$commit\n";
+		}
+		print STDERR "\n";
+	    }
+	    $count++;
+	}
+	print STDERR "\n";
+	print STDERR "Choose whom to cc by entering a commaseperated list of numbers and hitting enter.\n";
+	print STDERR "To show a short list of commits, precede the number by a '?',\n";
+	print STDERR "A blank line indicates that you are satisfied with your choice.\n";
+	$input = <STDIN>;
+	chomp($input);
+
+	my @wish = split(/[, ]+/,$input);
+	foreach my $nr (@wish){
+		my $logtoggle = 0;
+		if ($nr =~ /\?/){
+			$nr =~ s/\?//;
+			$logtoggle = 1;
+		}
+
+		#skip out of bounds numbers
+		next unless ($nr <= $count && $nr >= 0);
+
+		if ($logtoggle){
+			$shortlog{$nr} = !$shortlog{$nr};
+		} else {
+			$selected{$nr} = !$selected{$nr};
+		}
+	};
+    } while(length($input) > 0);
+
+    #drop not selected entries
+    $count = 0;
+    my @new_emailto;
+    foreach my $entry (@email_to){
+	next unless $selected{$count};
+	push(@new_emailto,$email_to[$count]);
+	print STDERR "$count: ";
+	print STDERR $email_to[$count]->[0];
+	print STDERR ",\t\t ";
+	print STDERR $email_to[$count]->[1];
+	print STDERR "\n";
+	$count++;
+    }
+    @email_to = @new_emailto;
+}
+
+sub vcs_get_shortlog {
+	my $email = shift;
+	my ($file) = @_;
+
+	if (!$shortlog_buffer{$email}){
+		my $cmd = $VCS_cmds{"shortlog_cmd"};
+		$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
+
+		my @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
+		$shortlog_buffer{$email}=\@lines;
+	}
+	return $shortlog_buffer{$email};
+}
+
 sub vcs_assign {
     my ($role, $divisor, @lines) = @_;
 
@@ -1179,7 +1289,7 @@ sub vcs_file_blame {
 	my @commit_signers = ();
 
 	my $cmd = $VCS_cmds{"find_commit_signers_cmd"};
-	$cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
+	$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
 
 	($commit_count, @commit_signers) = vcs_find_signers($cmd);
 	push(@signers, @commit_signers);
-- 
1.7.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [v2 PATCH rfc] scripts/get_maintainer.pl: add interactive mode
  2010-09-15 12:13 [PATCH rfc] scripts/get_maintainer.pl: add interactive mode florian
@ 2010-09-15 13:43 ` florian
  2010-09-15 15:16   ` [PATCH RFC v3] " florian
  2010-09-15 15:27   ` [v2 PATCH rfc] scripts/get_maintainer.pl: add interactive mode Joe Perches
  0 siblings, 2 replies; 18+ messages in thread
From: florian @ 2010-09-15 13:43 UTC (permalink / raw)
  To: linux-kernel; +Cc: ebiederm, Florian Mickler, Andrew Morton, Joe Perches

This is a first version of an interactive mode for
scripts/get_maintainer.pl .

It allows the user to interact with the script. Each cc candidate can be
selected and deselected and a shortlog of authored commits can be
displayed for each candidate.

The menu is displayed via STDERR, the end result is outputted to STDOUT.

This allows using get_maintainer.pl in interactive mode via
git send-email --cc-cmd.

---
 scripts/get_maintainer.pl |  119 +++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 115 insertions(+), 4 deletions(-)

changes from v1: I forgot to git add a small bugfix, that made it actually work 

TODO:
 - fixup multiple file case
 - fixup chief_penguin handling
 - make output prettier
 - usability?

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index b228198..2e15c22 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -32,6 +32,7 @@ my $email_git_max_maintainers = 5;
 my $email_git_min_percent = 5;
 my $email_git_since = "1-year-ago";
 my $email_hg_since = "-365";
+my $interactive = 0;
 my $email_remove_duplicates = 1;
 my $output_multiline = 1;
 my $output_separator = ", ";
@@ -51,6 +52,8 @@ my $help = 0;
 
 my $exit = 0;
 
+my %shortlog_buffer;
+
 my @penguin_chief = ();
 push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
 #Andrew wants in on most everything - 2009/01/14
@@ -91,7 +94,8 @@ my %VCS_cmds_git = (
     "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
     "blame_file_cmd" => "git blame -l \$file",
     "commit_pattern" => "^commit [0-9a-f]{40,40}",
-    "blame_commit_pattern" => "^([0-9a-f]+) "
+    "blame_commit_pattern" => "^([0-9a-f]+) ",
+    "shortlog_cmd" => "git log --no-color --oneline --since=\$email_git_since --author=\"\$email\" -- \$file"
 );
 
 my %VCS_cmds_hg = (
@@ -104,7 +108,8 @@ my %VCS_cmds_hg = (
     "blame_range_cmd" => "",		# not supported
     "blame_file_cmd" => "hg blame -c \$file",
     "commit_pattern" => "^commit [0-9a-f]{40,40}",
-    "blame_commit_pattern" => "^([0-9a-f]+):"
+    "blame_commit_pattern" => "^([0-9a-f]+):",
+    "shortlog_cmd" => "ht log --date=\$email_hg_since"
 );
 
 if (-f "${lk_path}.get_maintainer.conf") {
@@ -142,6 +147,7 @@ if (!GetOptions(
 		'git-min-percent=i' => \$email_git_min_percent,
 		'git-since=s' => \$email_git_since,
 		'hg-since=s' => \$email_hg_since,
+		'i|interactive!' => \$interactive,
 		'remove-duplicates!' => \$email_remove_duplicates,
 		'm!' => \$email_maintainer,
 		'n!' => \$email_usename,
@@ -219,6 +225,8 @@ if ($email_git_all_signature_types) {
     $signaturePattern = "(.+?)[Bb][Yy]:";
 }
 
+
+
 ## Read MAINTAINERS for type/value pairs
 
 my @typevalue = ();
@@ -440,10 +448,14 @@ foreach my $file (@files) {
     if ($email && $email_git) {
 	vcs_file_signoffs($file);
     }
-
     if ($email && $email_git_blame) {
 	vcs_file_blame($file);
     }
+    if ($email && $interactive){
+	#FIXME: multiple file case is broken at the moment
+	#FIXME: penguin_chief handlich is broken at the moment
+	vcs_interactive_menu($file);
+    }
 }
 
 if ($keywords) {
@@ -476,6 +488,7 @@ if ($email) {
     }
 }
 
+
 if ($email || $email_list) {
     my @to = ();
     if ($email) {
@@ -545,6 +558,7 @@ MAINTAINER field selection options:
     --git-blame => use git blame to find modified commits for patch or file
     --git-since => git history to use (default: $email_git_since)
     --hg-since => hg history to use (default: $email_hg_since)
+    --interactive => display a menu (mostly useful if used with the --git option)
     --m => include maintainer(s) if any
     --n => include name 'Full Name <addr\@domain.tld>'
     --l => include list(s) if any
@@ -1104,6 +1118,103 @@ sub vcs_exists {
     return 0;
 }
 
+sub vcs_interactive_menu {
+    my ($file) = @_;
+
+    return if (!vcs_exists());
+
+    my %selected;
+    my %shortlog;
+    my $input;
+    my $count = 0;
+
+    #select maintainers by default
+    foreach my $entry (@email_to){
+	    my $role = $entry->[1];
+	    $selected{$count} = ($role =~ /maintainer:/);
+	    $count++;
+    }
+
+    #menu loop
+    do {
+	my $count = 0;
+	foreach my $entry (@email_to){
+	    my $email = $entry->[0];
+	    my $role = $entry->[1];
+	    if ($selected{$count}){
+		print STDERR "* ";
+	    } else {
+		print STDERR "  ";
+	    }
+	    print STDERR "$count: $email,\t\t $role";
+	    print STDERR "\n";
+	    if ($shortlog{$count}){
+		my @lines = @{vcs_get_shortlog($email, $file)};
+		print STDERR "\tauthored commits:" . @lines . ".\n";
+		foreach my $commit (@lines){
+		    print STDERR "\t$commit\n";
+		}
+		print STDERR "\n";
+	    }
+	    $count++;
+	}
+	print STDERR "\n";
+	print STDERR "Choose whom to cc by entering a commaseperated list of numbers and hitting enter.\n";
+	print STDERR "To show a short list of commits, precede the number by a '?',\n";
+	print STDERR "A blank line indicates that you are satisfied with your choice.\n";
+	$input = <STDIN>;
+	chomp($input);
+
+	my @wish = split(/[, ]+/,$input);
+	foreach my $nr (@wish){
+		my $logtoggle = 0;
+		if ($nr =~ /\?/){
+			$nr =~ s/\?//;
+			$logtoggle = 1;
+		}
+
+		#skip out of bounds numbers
+		next unless ($nr <= $count && $nr >= 0);
+
+		if ($logtoggle){
+			$shortlog{$nr} = !$shortlog{$nr};
+		} else {
+			$selected{$nr} = !$selected{$nr};
+		}
+	};
+    } while(length($input) > 0);
+
+    #drop not selected entries
+    $count = 0;
+    my @new_emailto;
+    foreach my $entry (@email_to){
+	if ($selected{$count}){
+		push(@new_emailto,$email_to[$count]);
+		print STDERR "$count: ";
+		print STDERR $email_to[$count]->[0];
+		print STDERR ",\t\t ";
+		print STDERR $email_to[$count]->[1];
+		print STDERR "\n";
+	}
+	$count++;
+    }
+    @email_to = @new_emailto;
+}
+
+sub vcs_get_shortlog {
+	my $email = shift;
+	my ($file) = @_;
+
+	if (!$shortlog_buffer{$email}){
+		my $cmd = $VCS_cmds{"shortlog_cmd"};
+		$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
+
+		my @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
+		$shortlog_buffer{$email}=\@lines;
+	}
+	return $shortlog_buffer{$email};
+}
+
 sub vcs_assign {
     my ($role, $divisor, @lines) = @_;
 
@@ -1179,7 +1290,7 @@ sub vcs_file_blame {
 	my @commit_signers = ();
 
 	my $cmd = $VCS_cmds{"find_commit_signers_cmd"};
-	$cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
+	$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
 
 	($commit_count, @commit_signers) = vcs_find_signers($cmd);
 	push(@signers, @commit_signers);
-- 
1.7.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH RFC v3] scripts/get_maintainer.pl: add interactive mode
  2010-09-15 13:43 ` [v2 PATCH " florian
@ 2010-09-15 15:16   ` florian
  2010-09-15 15:35     ` Joe Perches
  2010-09-20 19:43     ` [RFC PATCH] " Joe Perches
  2010-09-15 15:27   ` [v2 PATCH rfc] scripts/get_maintainer.pl: add interactive mode Joe Perches
  1 sibling, 2 replies; 18+ messages in thread
From: florian @ 2010-09-15 15:16 UTC (permalink / raw)
  To: linux-kernel; +Cc: ebiederm, Florian Mickler, Andrew Morton, Joe Perches

This is a first version of an interactive mode for
scripts/get_maintainer.pl .

It allows the user to interact with the script. Each cc candidate can be
selected and deselected and a shortlog of authored commits can be
displayed for each candidate.

The menu is displayed via STDERR, the end result is outputted to STDOUT.
This unusual mechanism allows using get_maintainer.pl in interactive mode via
git send-email --cc-cmd.

---

v2: small bugfix
v3: 
   o added multifile support
   o fixed chief_penguin_handling for interactive mode
   o added supporter: roles to be selected by default


TODO:
 Btw, --rolestats is somewhat broken in the current script, it assembles the statistics 
 sequentially file by file, meaning that candidates that have only commits on the first 
 file get a higher percentage as candidates also active on a later file...


 scripts/get_maintainer.pl |  146 +++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 141 insertions(+), 5 deletions(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index b228198..6deabc6 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -32,6 +32,7 @@ my $email_git_max_maintainers = 5;
 my $email_git_min_percent = 5;
 my $email_git_since = "1-year-ago";
 my $email_hg_since = "-365";
+my $interactive = 0;
 my $email_remove_duplicates = 1;
 my $output_multiline = 1;
 my $output_separator = ", ";
@@ -51,6 +52,8 @@ my $help = 0;
 
 my $exit = 0;
 
+my %shortlog_buffer;
+
 my @penguin_chief = ();
 push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
 #Andrew wants in on most everything - 2009/01/14
@@ -91,7 +94,8 @@ my %VCS_cmds_git = (
     "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
     "blame_file_cmd" => "git blame -l \$file",
     "commit_pattern" => "^commit [0-9a-f]{40,40}",
-    "blame_commit_pattern" => "^([0-9a-f]+) "
+    "blame_commit_pattern" => "^([0-9a-f]+) ",
+    "shortlog_cmd" => "git log --no-color --oneline --since=\$email_git_since --author=\"\$email\" -- \$file"
 );
 
 my %VCS_cmds_hg = (
@@ -104,7 +108,8 @@ my %VCS_cmds_hg = (
     "blame_range_cmd" => "",		# not supported
     "blame_file_cmd" => "hg blame -c \$file",
     "commit_pattern" => "^commit [0-9a-f]{40,40}",
-    "blame_commit_pattern" => "^([0-9a-f]+):"
+    "blame_commit_pattern" => "^([0-9a-f]+):",
+    "shortlog_cmd" => "ht log --date=\$email_hg_since"
 );
 
 if (-f "${lk_path}.get_maintainer.conf") {
@@ -142,6 +147,7 @@ if (!GetOptions(
 		'git-min-percent=i' => \$email_git_min_percent,
 		'git-since=s' => \$email_git_since,
 		'hg-since=s' => \$email_hg_since,
+		'i|interactive!' => \$interactive,
 		'remove-duplicates!' => \$email_remove_duplicates,
 		'm!' => \$email_maintainer,
 		'n!' => \$email_usename,
@@ -219,6 +225,8 @@ if ($email_git_all_signature_types) {
     $signaturePattern = "(.+?)[Bb][Yy]:";
 }
 
+
+
 ## Read MAINTAINERS for type/value pairs
 
 my @typevalue = ();
@@ -440,10 +448,13 @@ foreach my $file (@files) {
     if ($email && $email_git) {
 	vcs_file_signoffs($file);
     }
-
     if ($email && $email_git_blame) {
 	vcs_file_blame($file);
     }
+    if ($email && $interactive){
+	vcs_file_shortlogs($file);
+
+    }
 }
 
 if ($keywords) {
@@ -476,9 +487,13 @@ if ($email) {
     }
 }
 
+
 if ($email || $email_list) {
     my @to = ();
     if ($email) {
+	if ($interactive) {
+	    @email_to = @{vcs_interactive_menu(\@email_to)};
+	}
 	@to = (@to, @email_to);
     }
     if ($email_list) {
@@ -491,7 +506,6 @@ if ($scm) {
     @scm = uniq(@scm);
     output(@scm);
 }
-
 if ($status) {
     @status = uniq(@status);
     output(@status);
@@ -545,6 +559,7 @@ MAINTAINER field selection options:
     --git-blame => use git blame to find modified commits for patch or file
     --git-since => git history to use (default: $email_git_since)
     --hg-since => hg history to use (default: $email_hg_since)
+    --interactive => display a menu (mostly useful if used with the --git option)
     --m => include maintainer(s) if any
     --n => include name 'Full Name <addr\@domain.tld>'
     --l => include list(s) if any
@@ -1104,6 +1119,127 @@ sub vcs_exists {
     return 0;
 }
 
+sub vcs_interactive_menu {
+    my $list_ref = shift;
+    my @list = @$list_ref;
+
+    return if (!vcs_exists());
+
+    my %selected;
+    my %shortlog;
+    my $input;
+    my $count = 0;
+
+    #select maintainers by default
+    foreach my $entry (@list){
+	    my $role = $entry->[1];
+	    $selected{$count} = ($role =~ /maintainer:|supporter:/);
+	    $count++;
+    }
+
+    #menu loop
+    do {
+	my $count = 0;
+	foreach my $entry (@list){
+	    my $email = $entry->[0];
+	    my $role = $entry->[1];
+	    if ($selected{$count}){
+		print STDERR "* ";
+	    } else {
+		print STDERR "  ";
+	    }
+	    print STDERR "$count: $email,\t\t $role";
+	    print STDERR "\n";
+	    if ($shortlog{$count}){
+		my $entries_ref = vcs_get_shortlog($email);
+		foreach my $entry_ref (@{$entries_ref}){
+		    my $filename = @{$entry_ref}[0];
+		    my @shortlog = @{@{$entry_ref}[1]};
+		    print STDERR "\tshortlog for $filename (authored commits: " . @shortlog . ").\n";
+		    foreach my $commit (@shortlog){
+			print STDERR "\t  $commit\n";
+		    }
+		    print STDERR "\n";
+		}
+	    }
+	    $count++;
+	}
+	print STDERR "\n";
+	print STDERR "Choose whom to cc by entering a commaseperated list of numbers and hitting enter.\n";
+	print STDERR "To show a short list of commits, precede the number by a '?',\n";
+	print STDERR "A blank line indicates that you are satisfied with your choice.\n";
+	$input = <STDIN>;
+	chomp($input);
+
+	my @wish = split(/[, ]+/,$input);
+	foreach my $nr (@wish){
+		my $logtoggle = 0;
+		if ($nr =~ /\?/){
+			$nr =~ s/\?//;
+			$logtoggle = 1;
+		}
+
+		#skip out of bounds numbers
+		next unless ($nr <= $count && $nr >= 0);
+
+		if ($logtoggle){
+			$shortlog{$nr} = !$shortlog{$nr};
+		} else {
+			$selected{$nr} = !$selected{$nr};
+
+			#switch shortlog on if an entry get's selected
+			if ($selected{$nr}){
+				$shortlog{$nr}=1;
+			}
+		}
+	};
+    } while(length($input) > 0);
+
+    #drop not selected entries
+    $count = 0;
+    my @new_emailto;
+    foreach my $entry (@list){
+	if ($selected{$count}){
+		push(@new_emailto,$list[$count]);
+		print STDERR "$count: ";
+		print STDERR $email_to[$count]->[0];
+		print STDERR ",\t\t ";
+		print STDERR $email_to[$count]->[1];
+		print STDERR "\n";
+	}
+	$count++;
+    }
+    return \@new_emailto;
+}
+
+sub vcs_get_shortlog {
+    my $arg = shift;
+    my ($name, $address) = parse_email($arg);
+    return $shortlog_buffer{$address};
+}
+
+sub vcs_file_shortlogs {
+    my ($file) = @_;
+    print STDERR "shortlog processing $file:";
+    foreach my $entry (@email_to){
+	my ($name, $address) = parse_email($entry->[0]);
+	print STDERR ".";
+	my $commits_ref = vcs_email_shortlog($address, $file);
+	push(@{$shortlog_buffer{$address}}, [ $file, $commits_ref ]);
+    }
+    print STDERR "\n";
+}
+
+sub vcs_email_shortlog {
+    my $email = shift;
+    my ($file) = @_;
+
+    my $cmd = $VCS_cmds{"shortlog_cmd"};
+    $cmd =~ s/(\$\w+)/$1/eeg;		#substitute variables
+    my @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
+    return \@lines;
+}
+
 sub vcs_assign {
     my ($role, $divisor, @lines) = @_;
 
@@ -1179,7 +1315,7 @@ sub vcs_file_blame {
 	my @commit_signers = ();
 
 	my $cmd = $VCS_cmds{"find_commit_signers_cmd"};
-	$cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
+	$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
 
 	($commit_count, @commit_signers) = vcs_find_signers($cmd);
 	push(@signers, @commit_signers);
-- 
1.7.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [v2 PATCH rfc] scripts/get_maintainer.pl: add interactive mode
  2010-09-15 13:43 ` [v2 PATCH " florian
  2010-09-15 15:16   ` [PATCH RFC v3] " florian
@ 2010-09-15 15:27   ` Joe Perches
  2010-09-16  8:30     ` Florian Mickler
  1 sibling, 1 reply; 18+ messages in thread
From: Joe Perches @ 2010-09-15 15:27 UTC (permalink / raw)
  To: florian; +Cc: linux-kernel, ebiederm, Andrew Morton

On Wed, 2010-09-15 at 15:43 +0200, florian@mickler.org wrote:
> This is a first version of an interactive mode for
> scripts/get_maintainer.pl .
> 
> It allows the user to interact with the script. Each cc candidate can be
> selected and deselected and a shortlog of authored commits can be
> displayed for each candidate.
> 
> The menu is displayed via STDERR, the end result is outputted to STDOUT.
> 
> This allows using get_maintainer.pl in interactive mode via
> git send-email --cc-cmd.
> 
> ---
>  scripts/get_maintainer.pl |  119 +++++++++++++++++++++++++++++++++++++++++++--
>  1 files changed, 115 insertions(+), 4 deletions(-)
> 
> changes from v1: I forgot to git add a small bugfix, that made it actually work 
> 
> TODO:
>  - fixup multiple file case
>  - fixup chief_penguin handling
>  - make output prettier
>  - usability?
> 
> diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
> index b228198..2e15c22 100755
> --- a/scripts/get_maintainer.pl
> +++ b/scripts/get_maintainer.pl
> @@ -32,6 +32,7 @@ my $email_git_max_maintainers = 5;
>  my $email_git_min_percent = 5;
>  my $email_git_since = "1-year-ago";
>  my $email_hg_since = "-365";
> +my $interactive = 0;
>  my $email_remove_duplicates = 1;
>  my $output_multiline = 1;
>  my $output_separator = ", ";
> @@ -51,6 +52,8 @@ my $help = 0;
>  
>  my $exit = 0;
>  
> +my %shortlog_buffer;
> +
>  my @penguin_chief = ();
>  push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
>  #Andrew wants in on most everything - 2009/01/14
> @@ -91,7 +94,8 @@ my %VCS_cmds_git = (
>      "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
>      "blame_file_cmd" => "git blame -l \$file",
>      "commit_pattern" => "^commit [0-9a-f]{40,40}",
> -    "blame_commit_pattern" => "^([0-9a-f]+) "
> +    "blame_commit_pattern" => "^([0-9a-f]+) ",
> +    "shortlog_cmd" => "git log --no-color --oneline --since=\$email_git_since --author=\"\$email\" -- \$file"

Perhaps git shortlog might be better.

This will also miss commits where $email, which is
"name <address>", is slightly different on multiple
commits or when --remove-duplicates is set and the
"name" part of $email matches.

>  );
>  
>  my %VCS_cmds_hg = (
> @@ -104,7 +108,8 @@ my %VCS_cmds_hg = (
>      "blame_range_cmd" => "",		# not supported
>      "blame_file_cmd" => "hg blame -c \$file",
>      "commit_pattern" => "^commit [0-9a-f]{40,40}",
> -    "blame_commit_pattern" => "^([0-9a-f]+):"
> +    "blame_commit_pattern" => "^([0-9a-f]+):",
> +    "shortlog_cmd" => "ht log --date=\$email_hg_since"

hg and don't you want some additional filtering?

>  );
>  
>  if (-f "${lk_path}.get_maintainer.conf") {
> @@ -142,6 +147,7 @@ if (!GetOptions(
>  		'git-min-percent=i' => \$email_git_min_percent,
>  		'git-since=s' => \$email_git_since,
>  		'hg-since=s' => \$email_hg_since,
> +		'i|interactive!' => \$interactive,
>  		'remove-duplicates!' => \$email_remove_duplicates,
>  		'm!' => \$email_maintainer,
>  		'n!' => \$email_usename,
> @@ -219,6 +225,8 @@ if ($email_git_all_signature_types) {
>      $signaturePattern = "(.+?)[Bb][Yy]:";
>  }
>  
> +
> +

Please don't add blank lines.

>  ## Read MAINTAINERS for type/value pairs
>  
>  my @typevalue = ();
> @@ -440,10 +448,14 @@ foreach my $file (@files) {
>      if ($email && $email_git) {
>  	vcs_file_signoffs($file);
>      }
> -

Or remove them.

>      if ($email && $email_git_blame) {
>  	vcs_file_blame($file);
>      }
> +    if ($email && $interactive){
> +	#FIXME: multiple file case is broken at the moment
> +	#FIXME: penguin_chief handlich is broken at the moment
> +	vcs_interactive_menu($file);
> +    }
>  }
>  
>  if ($keywords) {
> @@ -476,6 +488,7 @@ if ($email) {
>      }
>  }
>  
> +

Please don't add blank lines

>  if ($email || $email_list) {
>      my @to = ();
>      if ($email) {
> @@ -545,6 +558,7 @@ MAINTAINER field selection options:
>      --git-blame => use git blame to find modified commits for patch or file
>      --git-since => git history to use (default: $email_git_since)
>      --hg-since => hg history to use (default: $email_hg_since)
> +    --interactive => display a menu (mostly useful if used with the --git option)
>      --m => include maintainer(s) if any
>      --n => include name 'Full Name <addr\@domain.tld>'
>      --l => include list(s) if any
> @@ -1104,6 +1118,103 @@ sub vcs_exists {
>      return 0;
>  }
>  
> +sub vcs_interactive_menu {
> +    my ($file) = @_;
> +
> +    return if (!vcs_exists());
> +
> +    my %selected;
> +    my %shortlog;
> +    my $input;
> +    my $count = 0;
> +
> +    #select maintainers by default
> +    foreach my $entry (@email_to){
> +	    my $role = $entry->[1];
> +	    $selected{$count} = ($role =~ /maintainer:/);

This should probably be:
	$selected{$count} = ($role =~ /(\(supporter/maintainer/open list)/i);

> +	    $count++;
> +    }
> +
> +    #menu loop
> +    do {
> +	my $count = 0;
> +	foreach my $entry (@email_to){
> +	    my $email = $entry->[0];
> +	    my $role = $entry->[1];
> +	    if ($selected{$count}){
> +		print STDERR "* ";
> +	    } else {
> +		print STDERR "  ";
> +	    }
> +	    print STDERR "$count: $email,\t\t $role";

This won't align well, maybe something like
	my $sel;
	$sel = "*" if $selected{$count};
	printf STDERR "%1s %2d %-40s %s\n", $sel, $count, $email, $role;

or just don't try to align it at all.

> +	    print STDERR "\n";
> +	    if ($shortlog{$count}){
> +		my @lines = @{vcs_get_shortlog($email, $file)};

You should do this once before the loop for each non-list candidate,
save the result and display it only if the user asks for details.

It could be a very long list and will scroll off some display.

> +		print STDERR "\tauthored commits:" . @lines . ".\n";
> +		foreach my $commit (@lines){
> +		    print STDERR "\t$commit\n";
> +		}
> +		print STDERR "\n";
> +	    }
> +	    $count++;
> +	}

If you're going to show commits, then perhaps any commit type
(signed-off-by, tested-by, etc) should be shown.

Perhaps the vcs_<foo> commands like vcs_file_signoffs should
be modified to return a list of commit IDs and you could
parse and use that.  That way you wouldn't need to make
multiple vcs/git passes over the same content.

> +	print STDERR "\n";
> +	print STDERR "Choose whom to cc by entering a commaseperated list of numbers and hitting enter.\n";
> +	print STDERR "To show a short list of commits, precede the number by a '?',\n";

Won't necessarily be short.

> +	print STDERR "A blank line indicates that you are satisfied with your choice.\n";
> +	$input = <STDIN>;
> +	chomp($input);
> +
> +	my @wish = split(/[, ]+/,$input);
> +	foreach my $nr (@wish){
> +		my $logtoggle = 0;
> +		if ($nr =~ /\?/){
> +			$nr =~ s/\?//;
> +			$logtoggle = 1;
> +		}
> +
> +		#skip out of bounds numbers
> +		next unless ($nr <= $count && $nr >= 0);
> +
> +		if ($logtoggle){
> +			$shortlog{$nr} = !$shortlog{$nr};
> +		} else {
> +			$selected{$nr} = !$selected{$nr};
> +		}
> +	};
> +    } while(length($input) > 0);
> +
> +    #drop not selected entries
> +    $count = 0;
> +    my @new_emailto;
> +    foreach my $entry (@email_to){
> +	if ($selected{$count}){
> +		push(@new_emailto,$email_to[$count]);
> +		print STDERR "$count: ";
> +		print STDERR $email_to[$count]->[0];
> +		print STDERR ",\t\t ";
> +		print STDERR $email_to[$count]->[1];

printf should be used here too.

> +		print STDERR "\n";
> +	}
> +	$count++;
> +    }
> +    @email_to = @new_emailto;
> +}
> +
> +sub vcs_get_shortlog {
> +	my $email = shift;
> +	my ($file) = @_;
> +
> +	if (!$shortlog_buffer{$email}){
> +		my $cmd = $VCS_cmds{"shortlog_cmd"};
> +		$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
> +
> +		my @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
> +		$shortlog_buffer{$email}=\@lines;
> +	}
> +	return $shortlog_buffer{$email};
> +}
> +
>  sub vcs_assign {
>      my ($role, $divisor, @lines) = @_;
>  
> @@ -1179,7 +1290,7 @@ sub vcs_file_blame {
>  	my @commit_signers = ();
>  
>  	my $cmd = $VCS_cmds{"find_commit_signers_cmd"};
> -	$cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
> +	$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
>  
>  	($commit_count, @commit_signers) = vcs_find_signers($cmd);
>  	push(@signers, @commit_signers);




^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC v3] scripts/get_maintainer.pl: add interactive mode
  2010-09-15 15:16   ` [PATCH RFC v3] " florian
@ 2010-09-15 15:35     ` Joe Perches
  2010-09-20 19:43     ` [RFC PATCH] " Joe Perches
  1 sibling, 0 replies; 18+ messages in thread
From: Joe Perches @ 2010-09-15 15:35 UTC (permalink / raw)
  To: florian; +Cc: linux-kernel, ebiederm, Andrew Morton

On Wed, 2010-09-15 at 17:16 +0200, florian@mickler.org wrote:
> TODO:
>  Btw, --rolestats is somewhat broken in the current script, it assembles the statistics 
>  sequentially file by file, meaning that candidates that have only commits on the first 
>  file get a higher percentage as candidates also active on a later file...

Don't think so, statistics are assembled file by file and
the percentages are calculated the same way.

The output list is ordered by first file percentages though.



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [v2 PATCH rfc] scripts/get_maintainer.pl: add interactive mode
  2010-09-15 15:27   ` [v2 PATCH rfc] scripts/get_maintainer.pl: add interactive mode Joe Perches
@ 2010-09-16  8:30     ` Florian Mickler
  0 siblings, 0 replies; 18+ messages in thread
From: Florian Mickler @ 2010-09-16  8:30 UTC (permalink / raw)
  To: Joe Perches; +Cc: linux-kernel, ebiederm, Andrew Morton

On Wed, 15 Sep 2010 08:27:49 -0700
Joe Perches <joe@perches.com> wrote:

> On Wed, 2010-09-15 at 15:43 +0200, florian@mickler.org wrote:
> > This is a first version of an interactive mode for
> > scripts/get_maintainer.pl .
> > 
> > It allows the user to interact with the script. Each cc candidate can be
> > selected and deselected and a shortlog of authored commits can be
> > displayed for each candidate.
> > 
> > The menu is displayed via STDERR, the end result is outputted to STDOUT.
> > 
> > This allows using get_maintainer.pl in interactive mode via
> > git send-email --cc-cmd.
> > 
> > ---
> >  scripts/get_maintainer.pl |  119 +++++++++++++++++++++++++++++++++++++++++++--
> >  1 files changed, 115 insertions(+), 4 deletions(-)
> > 
> > changes from v1: I forgot to git add a small bugfix, that made it actually work 
> > 
> > TODO:
> >  - fixup multiple file case
> >  - fixup chief_penguin handling
> >  - make output prettier
> >  - usability?
> > 
> > diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
> > index b228198..2e15c22 100755
> > --- a/scripts/get_maintainer.pl
> > +++ b/scripts/get_maintainer.pl
> > @@ -32,6 +32,7 @@ my $email_git_max_maintainers = 5;
> >  my $email_git_min_percent = 5;
> >  my $email_git_since = "1-year-ago";
> >  my $email_hg_since = "-365";
> > +my $interactive = 0;
> >  my $email_remove_duplicates = 1;
> >  my $output_multiline = 1;
> >  my $output_separator = ", ";
> > @@ -51,6 +52,8 @@ my $help = 0;
> >  
> >  my $exit = 0;
> >  
> > +my %shortlog_buffer;
> > +
> >  my @penguin_chief = ();
> >  push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
> >  #Andrew wants in on most everything - 2009/01/14
> > @@ -91,7 +94,8 @@ my %VCS_cmds_git = (
> >      "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
> >      "blame_file_cmd" => "git blame -l \$file",
> >      "commit_pattern" => "^commit [0-9a-f]{40,40}",
> > -    "blame_commit_pattern" => "^([0-9a-f]+) "
> > +    "blame_commit_pattern" => "^([0-9a-f]+) ",
> > +    "shortlog_cmd" => "git log --no-color --oneline --since=\$email_git_since --author=\"\$email\" -- \$file"
> 
> Perhaps git shortlog might be better.

I want the abbreviated sha1, so that the user can investigate more
easily if he wants to.


> >  );
> >  
> >  my %VCS_cmds_hg = (
> > @@ -104,7 +108,8 @@ my %VCS_cmds_hg = (
> >      "blame_range_cmd" => "",		# not supported
> >      "blame_file_cmd" => "hg blame -c \$file",
> >      "commit_pattern" => "^commit [0-9a-f]{40,40}",
> > -    "blame_commit_pattern" => "^([0-9a-f]+):"
> > +    "blame_commit_pattern" => "^([0-9a-f]+):",
> > +    "shortlog_cmd" => "ht log --date=\$email_hg_since"
> 

> hg and don't you want some additional filtering?

Yes. But I don't have any hg installed over here atm....

> >  
> > +sub vcs_interactive_menu {
> > +    my ($file) = @_;
> > +
> > +    return if (!vcs_exists());
> > +
> > +    my %selected;
> > +    my %shortlog;
> > +    my $input;
> > +    my $count = 0;
> > +
> > +    #select maintainers by default
> > +    foreach my $entry (@email_to){
> > +	    my $role = $entry->[1];
> > +	    $selected{$count} = ($role =~ /maintainer:/);
> 
> This should probably be:
> 	$selected{$count} = ($role =~ /(\(supporter/maintainer/open list)/i);

our mails crossed. see v3.  

> 
> > +	    $count++;
> > +    }
> > +
> > +    #menu loop
> > +    do {
> > +	my $count = 0;
> > +	foreach my $entry (@email_to){
> > +	    my $email = $entry->[0];
> > +	    my $role = $entry->[1];
> > +	    if ($selected{$count}){
> > +		print STDERR "* ";
> > +	    } else {
> > +		print STDERR "  ";
> > +	    }
> > +	    print STDERR "$count: $email,\t\t $role";
> 
> This won't align well, maybe something like
> 	my $sel;
> 	$sel = "*" if $selected{$count};
> 	printf STDERR "%1s %2d %-40s %s\n", $sel, $count, $email, $role;

Yup, something like that is better.


> 
> or just don't try to align it at all.

I think the alignment is beneficial. 

> 
> > +	    print STDERR "\n";
> > +	    if ($shortlog{$count}){
> > +		my @lines = @{vcs_get_shortlog($email, $file)};
> 
> You should do this once before the loop for each non-list candidate,
> save the result and display it only if the user asks for details.
> 
> It could be a very long list and will scroll off some display.

Let's see how it plays out. Maybe wrapping the print STDERR in a
function that keeps count of how many lines it put out and waits after
N lines for user input ("press enter to see the next 80 lines"). 

But perhaps it's better to trim the output to not be too long. If
there are many authored commits, the individual commit is not 
that much new information for this usecase.

> 
> > +		print STDERR "\tauthored commits:" . @lines . ".\n";
> > +		foreach my $commit (@lines){
> > +		    print STDERR "\t$commit\n";
> > +		}
> > +		print STDERR "\n";
> > +	    }
> > +	    $count++;
> > +	}
> 
> If you're going to show commits, then perhaps any commit type
> (signed-off-by, tested-by, etc) should be shown.

The individual commits of Tested-By: and non-author Signed-Off-By: are
probably not that interesting. But the individual counts should be
displayed. 
Individual commits for Reviewed-By: could be interesting, though. 

I think the statistics gathering in get_maintainer.pl needs a little
tweaking. It's the next thing on my ToDo. 

> 
> Perhaps the vcs_<foo> commands like vcs_file_signoffs should
> be modified to return a list of commit IDs and you could
> parse and use that.  That way you wouldn't need to make
> multiple vcs/git passes over the same content.

Yes, it should be a single pass that yields all the information we
need. 


> 
> > +	print STDERR "\n";
> > +	print STDERR "Choose whom to cc by entering a commaseperated list of numbers and hitting enter.\n";
> > +	print STDERR "To show a short list of commits, precede the number by a '?',\n";
>
> Won't necessarily be short.

Indeed. 

> 
> > +	if ($selected{$count}){
> > +		push(@new_emailto,$email_to[$count]);
> > +		print STDERR "$count: ";
> > +		print STDERR $email_to[$count]->[0];
> > +		print STDERR ",\t\t ";
> > +		print STDERR $email_to[$count]->[1];
> 
> printf should be used here too.

Ack.

Thx for your feedback, suggestions and help here and per private mail!


Cheers,
Flo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [RFC PATCH] scripts/get_maintainer.pl: add interactive mode
  2010-09-15 15:16   ` [PATCH RFC v3] " florian
  2010-09-15 15:35     ` Joe Perches
@ 2010-09-20 19:43     ` Joe Perches
  2010-09-20 21:53       ` Florian Mickler
  1 sibling, 1 reply; 18+ messages in thread
From: Joe Perches @ 2010-09-20 19:43 UTC (permalink / raw)
  To: Florian Mickler
  Cc: linux-kernel, Eric W. Biederman, Andrew Morton, Wolfram Sang,
	Mark Brown, Stephen Hemminger, Stefan Richter, Greg KH

On Wed, 2010-09-15 at 17:16 +0200, florian@mickler.org wrote:
> This is a first version of an interactive mode for
> scripts/get_maintainer.pl .

Thank you Flo.

I've used Flo's initial patch and created a rather more
complete version now built on top of mm with the 5
get_maintainer patches posted earlier.

http://lkml.org/lkml/2010/9/16/150

Flo and I have worked together on this and he has some
upcoming enhancements that could improve the rerun
speed quite a bit.

For now, this uses whatever command line options are
passed as well as allows the user to change these
settings from a prompt during use.

There are still defects in this version.

You can see below that Stephen Hemminger's sign-offs
aren't correctly attributed because his MAINTAINER
entry doesn't match how he normally signs-off
(ie: @vyatta.com vs @linux-foundation.org)

For example, the --interactive (or -i) menu looks like:

--------------------------------------------------------------------------------
$ ./scripts/get_maintainer.pl -i -f drivers/net/sky2.c

*  # email/list and role:stats                                        auth sign
*  1 Stephen Hemminger <shemminger@linux-foundation.org>              
     maintainer:SKGE, SKY2 10/100...
*  2 netdev@vger.kernel.org                                           
     open list:SKGE, SKY2 10/100...
*  3 linux-kernel@vger.kernel.org                                     
     open list

#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): 
--------------------------------------------------------------------------------

with options:

Version Control options:
g  use git history      [0]
gf use git-fallback     [1]
b  use git blame        [0]
bs use blame signatures [1]
c# minimum commits      [1]
%# min percent          [5]
d# history to use       [1-year-ago]
x# max maintainers      [5]
t  all signature types  [0]

Additional options:
0  toggle all
f  emails in file       [0]
k  keywords in file     [1]
r  remove duplicates    [1]
p# pattern match depth  [0]

Because git history is now not searched by default
when there is a named maintainer, there are no
commit signers.

If the "G" option is entered, you get:

#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): g
--------------------------------------------------------------------------------
*  # email/list and role:stats                                        auth sign
*  1 Stephen Hemminger <shemminger@linux-foundation.org>                 1    0
     maintainer:SKGE, SKY2 10/100...,commit_signer:38/69=55%
   2 "David S. Miller" <davem@davemloft.net>                             7   57
     commit_signer:57/69=83%
   3 Mike McCormack <mikem@ring3k.org>                                  16   16
     commit_signer:16/69=23%
   4 Joe Perches <joe@perches.com>                                       4    4
     commit_signer:4/69=6%
*  5 netdev@vger.kernel.org                                           
     open list:SKGE, SKY2 10/100...
*  6 linux-kernel@vger.kernel.org                                     
     open list

#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): 
--------------------------------------------------------------------------------

git signers are not selected by default, these can be selected either
individually by #, or all by *, or multiple can be selected/deselected
from a single command like: "2,3"

To see what commits any individual contributor has authored or
signed, enter "A" or "S" with the #(s)

--------------------------------------------------------------------------------
#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): a4

*  # email/list and role:stats                                        auth sign
*  1 Stephen Hemminger <shemminger@linux-foundation.org>                 1    0
     maintainer:SKGE, SKY2 10/100...,commit_signer:38/69=55%
   2 "David S. Miller" <davem@davemloft.net>                             7   57
     commit_signer:57/69=83%
   3 Mike McCormack <mikem@ring3k.org>                                  16   16
     commit_signer:16/69=23%
   4 Joe Perches <joe@perches.com>                                       4    4
     commit_signer:4/69=6%
     Author: drivers/net/sky2.c: Use (pr|netdev)_<level> macro helpers
     Author: drivers/net/sky2: Convert to use netif_printk macros
     Author: drivers/net: Move && and || to end of previous line
     Author: trivial: remove unnecessary semicolons
*  5 netdev@vger.kernel.org                                           
     open list:SKGE, SKY2 10/100...
*  6 linux-kernel@vger.kernel.org                                     
     open list

#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): 
--------------------------------------------------------------------------------

There are also significant improvements in HG runtime.
HG is now faster than git for blame and commit signer
lookups.

Here's the rather large patch:

---
 scripts/get_maintainer.pl |  819 +++++++++++++++++++++++++++++++++++----------
 1 files changed, 638 insertions(+), 181 deletions(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index e5a400c..f511760 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -13,7 +13,7 @@
 use strict;
 
 my $P = $0;
-my $V = '0.25';
+my $V = '0.26-beta3';
 
 use Getopt::Long qw(:config no_auto_abbrev);
 
@@ -27,12 +27,14 @@ my $email_git_penguin_chiefs = 0;
 my $email_git = 0;
 my $email_git_all_signature_types = 0;
 my $email_git_blame = 0;
+my $email_git_blame_signatures = 1;
 my $email_git_fallback = 1;
 my $email_git_min_signatures = 1;
 my $email_git_max_maintainers = 5;
 my $email_git_min_percent = 5;
 my $email_git_since = "1-year-ago";
 my $email_hg_since = "-365";
+my $interactive = 0;
 my $email_remove_duplicates = 1;
 my $output_multiline = 1;
 my $output_separator = ", ";
@@ -50,8 +52,13 @@ my $pattern_depth = 0;
 my $version = 0;
 my $help = 0;
 
+my $vcs_used = 0;
+
 my $exit = 0;
 
+my %commit_author_hash;
+my %commit_signer_hash;
+
 my @penguin_chief = ();
 push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
 #Andrew wants in on most everything - 2009/01/14
@@ -74,7 +81,6 @@ my @signature_tags = ();
 push(@signature_tags, "Signed-off-by:");
 push(@signature_tags, "Reviewed-by:");
 push(@signature_tags, "Acked-by:");
-my $signaturePattern = "\(" . join("|", @signature_tags) . "\)";
 
 # rfc822 email address - preloaded methods go here.
 my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
@@ -87,27 +93,62 @@ my %VCS_cmds;
 my %VCS_cmds_git = (
     "execute_cmd" => \&git_execute_cmd,
     "available" => '(which("git") ne "") && (-d ".git")',
-    "find_signers_cmd" => "git log --no-color --since=\$email_git_since -- \$file",
-    "find_commit_signers_cmd" => "git log --no-color -1 \$commit",
-    "find_commit_author_cmd" => "git log -1 --format=\"%an <%ae>\" \$commit",
+    "find_signers_cmd" =>
+	"git log --no-color --since=\$email_git_since " .
+	    '--format="GitCommit: %H%n' .
+		      'GitAuthor: %an <%ae>%n' .
+		      'GitDate: %aD%n' .
+		      'GitSubject: %s%n' .
+		      '%b%n"' .
+	    " -- \$file",
+    "find_commit_signers_cmd" =>
+	"git log --no-color " .
+	    '--format="GitCommit: %H%n' .
+		      'GitAuthor: %an <%ae>%n' .
+		      'GitDate: %aD%n' .
+		      'GitSubject: %s%n' .
+		      '%b%n"' .
+	    " -1 \$commit",
+    "find_commit_author_cmd" =>
+	"git log --no-color " .
+	    '--format="GitCommit: %H%n' .
+		      'GitAuthor: %an <%ae>%n' .
+		      'GitDate: %aD%n' .
+		      'GitSubject: %s%n"' .
+	    " -1 \$commit",
     "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
     "blame_file_cmd" => "git blame -l \$file",
-    "commit_pattern" => "^commit [0-9a-f]{40,40}",
-    "blame_commit_pattern" => "^([0-9a-f]+) "
+    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
+    "blame_commit_pattern" => "^([0-9a-f]+) ",
+    "author_pattern" => "^GitAuthor: (.*)",
+    "subject_pattern" => "^GitSubject: (.*)",
 );
 
 my %VCS_cmds_hg = (
     "execute_cmd" => \&hg_execute_cmd,
     "available" => '(which("hg") ne "") && (-d ".hg")',
     "find_signers_cmd" =>
-	"hg log --date=\$email_hg_since" .
-		" --template='commit {node}\\n{desc}\\n' -- \$file",
-    "find_commit_signers_cmd" => "hg log --template='{desc}\\n' -r \$commit",
-    "find_commit_author_cmd" => "hg log -l 1 --template='{author}\\n' -r \$commit",
+	"hg log --date=\$email_hg_since " .
+	    "--template='HgCommit: {node}\\n" .
+	                "HgAuthor: {author}\\n" .
+			"HgSubject: {desc}\\n'" .
+	    " -- \$file",
+    "find_commit_signers_cmd" =>
+	"hg log " .
+	    "--template='HgSubject: {desc}\\n'" .
+	    " -r \$commit",
+    "find_commit_author_cmd" =>
+	"hg log " .
+	    "--template='HgCommit: {node}\\n" .
+		        "HgAuthor: {author}\\n" .
+			"HgSubject: {desc|firstline}\\n'" .
+	    " -r \$commit",
     "blame_range_cmd" => "",		# not supported
-    "blame_file_cmd" => "hg blame -c \$file",
-    "commit_pattern" => "^commit [0-9a-f]{40,40}",
-    "blame_commit_pattern" => "^([0-9a-f]+):"
+    "blame_file_cmd" => "hg blame -n \$file",
+    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
+    "blame_commit_pattern" => "^([ 0-9a-f]+):",
+    "author_pattern" => "^HgAuthor: (.*)",
+    "subject_pattern" => "^HgSubject: (.*)",
 );
 
 my $conf = which_conf(".get_maintainer.conf");
@@ -141,6 +182,7 @@ if (!GetOptions(
 		'git!' => \$email_git,
 		'git-all-signature-types!' => \$email_git_all_signature_types,
 		'git-blame!' => \$email_git_blame,
+		'git-blame-signatures!' => \$email_git_blame_signatures,
 		'git-fallback!' => \$email_git_fallback,
 		'git-chief-penguins!' => \$email_git_penguin_chiefs,
 		'git-min-signatures=i' => \$email_git_min_signatures,
@@ -148,6 +190,7 @@ if (!GetOptions(
 		'git-min-percent=i' => \$email_git_min_percent,
 		'git-since=s' => \$email_git_since,
 		'hg-since=s' => \$email_hg_since,
+		'i|interactive!' => \$interactive,
 		'remove-duplicates!' => \$email_remove_duplicates,
 		'm!' => \$email_maintainer,
 		'n!' => \$email_usename,
@@ -187,13 +230,9 @@ if (-t STDIN && !@ARGV) {
     die "$P: missing patchfile or -f file - use --help if necessary\n";
 }
 
-if ($output_separator ne ", ") {
-    $output_multiline = 0;
-}
-
-if ($output_rolestats) {
-    $output_roles = 1;
-}
+$output_multiline = 0 if ($output_separator ne ", ");
+$output_rolestats = 1 if ($interactive);
+$output_roles = 1 if ($output_rolestats);
 
 if ($sections) {
     $email = 0;
@@ -221,10 +260,6 @@ if (!top_of_kernel_tree($lk_path)) {
 	. "a linux kernel source tree.\n";
 }
 
-if ($email_git_all_signature_types) {
-    $signaturePattern = "(.+?)[Bb][Yy]:";
-}
-
 ## Read MAINTAINERS for type/value pairs
 
 my @typevalue = ();
@@ -363,162 +398,193 @@ foreach my $file (@ARGV) {
 
 @file_emails = uniq(@file_emails);
 
+my %email_hash_name;
+my %email_hash_address;
 my @email_to = ();
+my %hash_list_to;
 my @list_to = ();
 my @scm = ();
 my @web = ();
 my @subsystem = ();
 my @status = ();
+my $signature_pattern;
 
-# Find responsible parties
+my @to = get_maintainer();
 
-foreach my $file (@files) {
+@to = merge_email(@to);
 
-    my %hash;
-    my $exact_pattern_match = 0;
-    my $tvi = find_first_section();
-    while ($tvi < @typevalue) {
-	my $start = find_starting_index($tvi);
-	my $end = find_ending_index($tvi);
-	my $exclude = 0;
-	my $i;
-
-	#Do not match excluded file patterns
-
-	for ($i = $start; $i < $end; $i++) {
-	    my $line = $typevalue[$i];
-	    if ($line =~ m/^(\C):\s*(.*)/) {
-		my $type = $1;
-		my $value = $2;
-		if ($type eq 'X') {
-		    if (file_match_pattern($file, $value)) {
-			$exclude = 1;
-			last;
-		    }
-		}
-	    }
-	}
+output(@to) if (@to);
+
+if ($scm) {
+    @scm = uniq(@scm);
+    output(@scm);
+}
+
+if ($status) {
+    @status = uniq(@status);
+    output(@status);
+}
+
+if ($subsystem) {
+    @subsystem = uniq(@subsystem);
+    output(@subsystem);
+}
+
+if ($web) {
+    @web = uniq(@web);
+    output(@web);
+}
+
+exit($exit);
+
+sub get_maintainer {
+    %email_hash_name = ();
+    %email_hash_address = ();
+    %commit_author_hash = ();
+    %commit_signer_hash = ();
+    @email_to = ();
+    %hash_list_to = ();
+    @list_to = ();
+    @scm = ();
+    @web = ();
+    @subsystem = ();
+    @status = ();
+
+    if ($email_git_all_signature_types) {
+	$signature_pattern = "(.+?)[Bb][Yy]:";
+    } else {
+	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
+    }
+
+    # Find responsible parties
+
+    foreach my $file (@files) {
+
+	my %hash;
+	my $exact_pattern_match = 0;
+	my $tvi = find_first_section();
+	while ($tvi < @typevalue) {
+	    my $start = find_starting_index($tvi);
+	    my $end = find_ending_index($tvi);
+	    my $exclude = 0;
+	    my $i;
+
+	    #Do not match excluded file patterns
 
-	if (!$exclude) {
 	    for ($i = $start; $i < $end; $i++) {
 		my $line = $typevalue[$i];
 		if ($line =~ m/^(\C):\s*(.*)/) {
 		    my $type = $1;
 		    my $value = $2;
-		    if ($type eq 'F') {
+		    if ($type eq 'X') {
 			if (file_match_pattern($file, $value)) {
-			    my $value_pd = ($value =~ tr@/@@);
-			    my $file_pd = ($file  =~ tr@/@@);
-			    $value_pd++ if (substr($value,-1,1) ne "/");
-			    $value_pd = -1 if ($value =~ /^\.\*/);
-			    $exact_pattern_match = 1 if ($value_pd >= $file_pd);
-			    if ($pattern_depth == 0 ||
-				(($file_pd - $value_pd) < $pattern_depth)) {
-				$hash{$tvi} = $value_pd;
+			    $exclude = 1;
+			    last;
+			}
+		    }
+		}
+	    }
+
+	    if (!$exclude) {
+		for ($i = $start; $i < $end; $i++) {
+		    my $line = $typevalue[$i];
+		    if ($line =~ m/^(\C):\s*(.*)/) {
+			my $type = $1;
+			my $value = $2;
+			if ($type eq 'F') {
+			    if (file_match_pattern($file, $value)) {
+				my $value_pd = ($value =~ tr@/@@);
+				my $file_pd = ($file  =~ tr@/@@);
+				$value_pd++ if (substr($value,-1,1) ne "/");
+				$value_pd = -1 if ($value =~ /^\.\*/);
+				$exact_pattern_match = 1 if ($value_pd >= $file_pd);
+				if ($pattern_depth == 0 ||
+				    (($file_pd - $value_pd) < $pattern_depth)) {
+				    $hash{$tvi} = $value_pd;
+				}
 			    }
 			}
 		    }
 		}
 	    }
+	    $tvi = $end + 1;
 	}
 
-	$tvi = $end + 1;
-    }
-
-    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
-	add_categories($line);
-	if ($sections) {
-	    my $i;
-	    my $start = find_starting_index($line);
-	    my $end = find_ending_index($line);
-	    for ($i = $start; $i < $end; $i++) {
-		my $line = $typevalue[$i];
-		if ($line =~ /^[FX]:/) {		##Restore file patterns
-		    $line =~ s/([^\\])\.([^\*])/$1\?$2/g;
-		    $line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
-		    $line =~ s/\\\./\./g;       	##Convert \. to .
-		    $line =~ s/\.\*/\*/g;       	##Convert .* to *
+	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
+	    add_categories($line);
+	    if ($sections) {
+		my $i;
+		my $start = find_starting_index($line);
+		my $end = find_ending_index($line);
+		for ($i = $start; $i < $end; $i++) {
+		    my $line = $typevalue[$i];
+		    if ($line =~ /^[FX]:/) {		##Restore file patterns
+			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
+			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
+			$line =~ s/\\\./\./g;       	##Convert \. to .
+			$line =~ s/\.\*/\*/g;       	##Convert .* to *
+		    }
+		    $line =~ s/^([A-Z]):/$1:\t/g;
+		    print("$line\n");
 		}
-		$line =~ s/^([A-Z]):/$1:\t/g;
-		print("$line\n");
+		print("\n");
 	    }
-	    print("\n");
 	}
-    }
-
-    if ($email &&
-	($email_git || ($email_git_fallback && !$exact_pattern_match))) {
-	vcs_file_signoffs($file);
-    }
 
-    if ($email && $email_git_blame) {
-	vcs_file_blame($file);
+	if ($email && ($email_git ||
+		       ($email_git_fallback && !$exact_pattern_match))) {
+	    vcs_file_signoffs($file);
+	}
+	if ($email && $email_git_blame) {
+	    vcs_file_blame($file);
+	}
     }
-}
 
-if ($keywords) {
-    @keyword_tvi = sort_and_uniq(@keyword_tvi);
-    foreach my $line (@keyword_tvi) {
-	add_categories($line);
+    if ($keywords) {
+	@keyword_tvi = sort_and_uniq(@keyword_tvi);
+	foreach my $line (@keyword_tvi) {
+	    add_categories($line);
+	}
     }
-}
 
-if ($email) {
-    foreach my $chief (@penguin_chief) {
-	if ($chief =~ m/^(.*):(.*)/) {
-	    my $email_address;
+    if ($email) {
+	foreach my $chief (@penguin_chief) {
+	    if ($chief =~ m/^(.*):(.*)/) {
+		my $email_address;
 
-	    $email_address = format_email($1, $2, $email_usename);
-	    if ($email_git_penguin_chiefs) {
-		push(@email_to, [$email_address, 'chief penguin']);
-	    } else {
-		@email_to = grep($_->[0] !~ /${email_address}/, @email_to);
+		$email_address = format_email($1, $2, $email_usename);
+		if ($email_git_penguin_chiefs) {
+		    push(@email_to, [$email_address, 'chief penguin']);
+		} else {
+		    @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
+		}
 	    }
 	}
-    }
 
-    foreach my $email (@file_emails) {
-	my ($name, $address) = parse_email($email);
+	foreach my $email (@file_emails) {
+	    my ($name, $address) = parse_email($email);
 
-	my $tmp_email = format_email($name, $address, $email_usename);
-	push_email_address($tmp_email, '');
-	add_role($tmp_email, 'in file');
+	    my $tmp_email = format_email($name, $address, $email_usename);
+	    push_email_address($tmp_email, '');
+	    add_role($tmp_email, 'in file');
+	}
     }
-}
 
-if ($email || $email_list) {
     my @to = ();
-    if ($email) {
-	@to = (@to, @email_to);
-    }
-    if ($email_list) {
-	@to = (@to, @list_to);
+    if ($email || $email_list) {
+	if ($email) {
+	    @to = (@to, @email_to);
+	}
+	if ($email_list) {
+	    @to = (@to, @list_to);
+	}
     }
-    output(merge_email(@to));
-}
-
-if ($scm) {
-    @scm = uniq(@scm);
-    output(@scm);
-}
-
-if ($status) {
-    @status = uniq(@status);
-    output(@status);
-}
 
-if ($subsystem) {
-    @subsystem = uniq(@subsystem);
-    output(@subsystem);
-}
+    @to = interactive_get_maintainer(\@to) if ($interactive);
 
-if ($web) {
-    @web = uniq(@web);
-    output(@web);
+    return @to;
 }
 
-exit($exit);
-
 sub file_match_pattern {
     my ($file, $pattern) = @_;
     if (substr($pattern, -1) eq "/") {
@@ -547,7 +613,7 @@ MAINTAINER field selection options:
   --email => print email address(es) if any
     --git => include recent git \*-by: signers
     --git-all-signature-types => include signers regardless of signature type
-        or use only ${signaturePattern} signers (default: $email_git_all_signature_types)
+        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
     --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
     --git-chief-penguins => include ${penguin_chiefs}
     --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
@@ -556,6 +622,7 @@ MAINTAINER field selection options:
     --git-blame => use git blame to find modified commits for patch or file
     --git-since => git history to use (default: $email_git_since)
     --hg-since => hg history to use (default: $email_hg_since)
+    --interactive => display a menu (mostly useful if used with the --git option)
     --m => include maintainer(s) if any
     --n => include name 'Full Name <addr\@domain.tld>'
     --l => include list(s) if any
@@ -832,11 +899,19 @@ sub add_categories {
 		}
 		if ($list_additional =~ m/subscribers-only/) {
 		    if ($email_subscriber_list) {
-			push(@list_to, [$list_address, "subscriber list${list_role}"]);
+			if (!$hash_list_to{$list_address}) {
+			    $hash_list_to{$list_address} = 1;
+			    push(@list_to, [$list_address,
+					    "subscriber list${list_role}"]);
+			}
 		    }
 		} else {
 		    if ($email_list) {
-			push(@list_to, [$list_address, "open list${list_role}"]);
+			if (!$hash_list_to{$list_address}) {
+			    $hash_list_to{$list_address} = 1;
+			    push(@list_to, [$list_address,
+					    "open list${list_role}"]);
+			}
 		    }
 		}
 	    } elsif ($ptype eq "M") {
@@ -867,9 +942,6 @@ sub add_categories {
     }
 }
 
-my %email_hash_name;
-my %email_hash_address;
-
 sub email_inuse {
     my ($name, $address) = @_;
 
@@ -1022,10 +1094,31 @@ sub hg_execute_cmd {
     return @lines;
 }
 
+sub extract_formatted_signatures {
+    my (@signature_lines) = @_;
+
+    my @type = @signature_lines;
+
+    s/\s*(.*):.*/$1/ for (@type);
+
+    # cut -f2- -d":"
+    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
+
+## Reformat email addresses (with names) to avoid badly written signatures
+
+    foreach my $signer (@signature_lines) {
+	my ($name, $address) = parse_email($signer);
+	$signer = format_email($name, $address, 1);
+    }
+
+    return (\@type, \@signature_lines);
+}
+
 sub vcs_find_signers {
     my ($cmd) = @_;
-    my @lines = ();
     my $commits;
+    my @lines = ();
+    my @signatures = ();
 
     @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
 
@@ -1033,24 +1126,20 @@ sub vcs_find_signers {
 
     $commits = grep(/$pattern/, @lines);	# of commits
 
-    @lines = grep(/^[ \t]*${signaturePattern}.*\@.*$/, @lines);
-    if (!$email_git_penguin_chiefs) {
-	@lines = grep(!/${penguin_chiefs}/i, @lines);
-    }
-
-    return (0, @lines) if !@lines;
+    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
 
-    # cut -f2- -d":"
-    s/.*:\s*(.+)\s*/$1/ for (@lines);
+    return (0, @signatures) if !@signatures;
 
-## Reformat email addresses (with names) to avoid badly written signatures
+    save_commits_by_author(@lines) if ($interactive);
+    save_commits_by_signer(@lines) if ($interactive);
 
-    foreach my $line (@lines) {
-	my ($name, $address) = parse_email($line);
-	$line = format_email($name, $address, 1);
+    if (!$email_git_penguin_chiefs) {
+	@signatures = grep(!/${penguin_chiefs}/i, @signatures);
     }
 
-    return ($commits, @lines);
+    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
+
+    return ($commits, @$signers_ref);
 }
 
 sub vcs_find_author {
@@ -1065,14 +1154,20 @@ sub vcs_find_author {
 
     return @lines if !@lines;
 
-## Reformat email addresses (with names) to avoid badly written signatures
-
+    my @authors = ();
     foreach my $line (@lines) {
-	my ($name, $address) = parse_email($line);
-	$line = format_email($name, $address, 1);
+	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
+	    my $author = $1;
+	    my ($name, $address) = parse_email($author);
+	    $author = format_email($name, $address, 1);
+	    push(@authors, $author);
+	}
     }
 
-    return @lines;
+    save_commits_by_author(@lines) if ($interactive);
+    save_commits_by_signer(@lines) if ($interactive);
+
+    return @authors;
 }
 
 sub vcs_save_commits {
@@ -1144,7 +1239,7 @@ sub vcs_exists {
     %VCS_cmds = %VCS_cmds_git;
     return 1 if eval $VCS_cmds{"available"};
     %VCS_cmds = %VCS_cmds_hg;
-    return 1 if eval $VCS_cmds{"available"};
+    return 2 if eval $VCS_cmds{"available"};
     %VCS_cmds = ();
     if (!$printed_novcs) {
 	warn("$P: No supported VCS found.  Add --nogit to options?\n");
@@ -1156,6 +1251,311 @@ sub vcs_exists {
     return 0;
 }
 
+sub vcs_is_git {
+    return $vcs_used == 1;
+}
+
+sub vcs_is_hg {
+    return $vcs_used == 2;
+}
+
+sub interactive_get_maintainer {
+    my ($list_ref) = @_;
+    my @list = @$list_ref;
+
+    vcs_exists();
+
+    my %selected;
+    my %authored;
+    my %signed;
+    my $count = 0;
+
+    #select maintainers by default
+    foreach my $entry (@list){
+	my $role = $entry->[1];
+	$selected{$count} = ($role =~ /^(maintainer|supporter|open list)/);
+	$authored{$count} = 0;
+	$signed{$count} = 0;
+	$count++;
+    }
+
+    #menu loop
+    my $done = 0;
+    my $print_options = 0;
+    my $redraw = 1;
+    while (!$done) {
+	$count = 0;
+	if ($redraw) {
+	    printf STDERR "\n%1s %2s %-65sauth sign\n",
+		"*", "#", "email/list and role:stats";
+	    foreach my $entry (@list) {
+		my $email = $entry->[0];
+		my $role = $entry->[1];
+		my $sel = "";
+		$sel = "*" if ($selected{$count});
+		my $commit_author = $commit_author_hash{$email};
+		my $commit_signer = $commit_signer_hash{$email};
+		my $authored = 0;
+		my $signed = 0;
+		$authored++ for (@{$commit_author});
+		$signed++ for (@{$commit_signer});
+		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
+		printf STDERR "%4d %4d", $authored, $signed
+		    if ($authored > 0 || $signed > 0);
+		printf STDERR "\n     %s\n", $role;
+		if ($authored{$count}) {
+		    my $commit_author = $commit_author_hash{$email};
+		    foreach my $ref (@{$commit_author}) {
+			print STDERR "     Author: @{$ref}[1]\n";
+		    }
+		}
+		if ($signed{$count}) {
+		    my $commit_signer = $commit_signer_hash{$email};
+		    foreach my $ref (@{$commit_signer}) {
+			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
+		    }
+		}
+
+		$count++;
+	    }
+	}
+	my $date_ref = \$email_git_since;
+	$date_ref = \$email_hg_since if (vcs_is_hg());
+	if ($print_options) {
+	    $print_options = 0;
+	    if (vcs_exists()) {
+		print STDERR
+"\nVersion Control options:\n" .
+"g  use git history      [$email_git]\n" .
+"gf use git-fallback     [$email_git_fallback]\n" .
+"b  use git blame        [$email_git_blame]\n" .
+"bs use blame signatures [$email_git_blame_signatures]\n" .
+"c# minimum commits      [$email_git_min_signatures]\n" .
+"%# min percent          [$email_git_min_percent]\n" .
+"d# history to use       [$$date_ref]\n" .
+"x# max maintainers      [$email_git_max_maintainers]\n" .
+"t  all signature types  [$email_git_all_signature_types]\n";
+	    }
+	    print STDERR "\nAdditional options:\n" .
+"0  toggle all\n" .
+"f  emails in file       [$file_emails]\n" .
+"k  keywords in file     [$keywords]\n" .
+"r  remove duplicates    [$email_remove_duplicates]\n" .
+"p# pattern match depth  [$pattern_depth]\n";
+	}
+	print STDERR
+"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
+
+	my $input = <STDIN>;
+	chomp($input);
+
+	$redraw = 1;
+	my $rerun = 0;
+	my @wish = split(/[, ]+/, $input);
+	foreach my $nr (@wish) {
+	    $nr = lc($nr);
+	    my $sel = substr($nr, 0, 1);
+	    my $str = substr($nr, 1);
+	    my $val = 0;
+	    $val = $1 if $str =~ /^(\d+)$/;
+
+	    if ($sel eq "y") {
+		$interactive = 0;
+		$done = 1;
+		$output_rolestats = 0;
+		$output_roles = 0;
+		last;
+	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
+		$selected{$nr - 1} = !$selected{$nr - 1};
+	    } elsif ($sel eq "*" || $sel eq '^') {
+		my $toggle = 0;
+		$toggle = 1 if ($sel eq '*');
+		for (my $i = 0; $i < $count; $i++) {
+		    $selected{$i} = $toggle;
+		}
+	    } elsif ($sel eq "0") {
+		for (my $i = 0; $i < $count; $i++) {
+		    $selected{$i} = !$selected{$i};
+		}
+	    } elsif ($sel eq "a") {
+		if ($val > 0 && $val <= $count) {
+		    $authored{$val - 1} = !$authored{$val - 1};
+		} elsif ($str eq '*' || $str eq '^') {
+		    my $toggle = 0;
+		    $toggle = 1 if ($str eq '*');
+		    for (my $i = 0; $i < $count; $i++) {
+			$authored{$i} = $toggle;
+		    }
+		}
+	    } elsif ($sel eq "s") {
+		if ($val > 0 && $val <= $count) {
+		    $signed{$val - 1} = !$signed{$val - 1};
+		} elsif ($str eq '*' || $str eq '^') {
+		    my $toggle = 0;
+		    $toggle = 1 if ($str eq '*');
+		    for (my $i = 0; $i < $count; $i++) {
+			$signed{$i} = $toggle;
+		    }
+		}
+	    } elsif ($sel eq "o") {
+		$print_options = 1;
+		$redraw = 1;
+	    } elsif ($sel eq "g") {
+		if ($str eq "f") {
+		    bool_invert(\$email_git_fallback);
+		} else {
+		    bool_invert(\$email_git);
+		}
+		$rerun = 1;
+	    } elsif ($sel eq "b") {
+		if ($str eq "s") {
+		    bool_invert(\$email_git_blame_signatures);
+		} else {
+		    bool_invert(\$email_git_blame);
+		}
+		$rerun = 1;
+	    } elsif ($sel eq "c") {
+		if ($val > 0) {
+		    $email_git_min_signatures = $val;
+		    $rerun = 1;
+		}
+	    } elsif ($sel eq "x") {
+		if ($val > 0) {
+		    $email_git_max_maintainers = $val;
+		    $rerun = 1;
+		}
+	    } elsif ($sel eq "%") {
+		if ($str ne "" && $val >= 0) {
+		    $email_git_min_percent = $val;
+		    $rerun = 1;
+		}
+	    } elsif ($sel eq "d") {
+		if (vcs_is_git()) {
+		    $email_git_since = $str;
+		} elsif (vcs_is_hg()) {
+		    $email_hg_since = $str;
+		}
+		$rerun = 1;
+	    } elsif ($sel eq "t") {
+		bool_invert(\$email_git_all_signature_types);
+		$rerun = 1;
+	    } elsif ($sel eq "f") {
+		bool_invert(\$file_emails);
+		$rerun = 1;
+	    } elsif ($sel eq "r") {
+		bool_invert(\$email_remove_duplicates);
+		$rerun = 1;
+	    } elsif ($sel eq "k") {
+		bool_invert(\$keywords);
+		$rerun = 1;
+	    } elsif ($sel eq "p") {
+		if ($str ne "" && $val >= 0) {
+		    $pattern_depth = $val;
+		    $rerun = 1;
+		}
+	    } else {
+		print STDERR "invalid option: '$nr'\n";
+		$redraw = 0;
+	    }
+	}
+	if ($rerun) {
+	    print STDERR "git-blame can be very slow, please have patience..."
+		if ($email_git_blame);
+	    goto &get_maintainer;
+	}
+    }
+
+    #drop not selected entries
+    $count = 0;
+    my @new_emailto = ();
+    foreach my $entry (@list) {
+	if ($selected{$count}) {
+	    push(@new_emailto, $list[$count]);
+	}
+	$count++;
+    }
+    return @new_emailto;
+}
+
+sub bool_invert {
+    my ($bool_ref) = @_;
+
+    if ($$bool_ref) {
+	$$bool_ref = 0;
+    } else {
+	$$bool_ref = 1;
+    }
+}
+
+sub save_commits_by_author {
+    my (@lines) = @_;
+
+    my @authors = ();
+    my @commits = ();
+    my @subjects = ();
+
+    foreach my $line (@lines) {
+	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
+	    my $author = $1;
+	    my ($name, $address) = parse_email($author);
+	    $author = format_email($name, $address, 1);
+	    push(@authors, $author);
+	}
+	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
+	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
+    }
+
+    for (my $i = 0; $i < @authors; $i++) {
+	my $exists = 0;
+	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
+	    if (@{$ref}[0] eq $commits[$i] &&
+		@{$ref}[1] eq $subjects[$i]) {
+		$exists = 1;
+		last;
+	    }
+	}
+	if (!$exists) {
+	    push(@{$commit_author_hash{$authors[$i]}},
+		 [ ($commits[$i], $subjects[$i]) ]);
+	}
+    }
+}
+
+sub save_commits_by_signer {
+    my (@lines) = @_;
+
+    my $commit = "";
+    my $subject = "";
+
+    foreach my $line (@lines) {
+	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
+	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
+	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
+	    my @signatures = ($line);
+	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
+	    my @types = @$types_ref;
+	    my @signers = @$signers_ref;
+
+	    my $type = $types[0];
+	    my $signer = $signers[0];
+
+	    my $exists = 0;
+	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
+		if (@{$ref}[0] eq $commit &&
+		    @{$ref}[1] eq $subject &&
+		    @{$ref}[2] eq $type) {
+		    $exists = 1;
+		    last;
+		}
+	    }
+	    if (!$exists) {
+		push(@{$commit_signer_hash{$signer}},
+		     [ ($commit, $subject, $type) ]);
+	    }
+	}
+    }
+}
+
 sub vcs_assign {
     my ($role, $divisor, @lines) = @_;
 
@@ -1206,7 +1606,8 @@ sub vcs_file_signoffs {
     my @signers = ();
     my $commits;
 
-    return if (!vcs_exists());
+    $vcs_used = vcs_exists();
+    return if (!$vcs_used);
 
     my $cmd = $VCS_cmds{"find_signers_cmd"};
     $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
@@ -1224,37 +1625,93 @@ sub vcs_file_blame {
     my $total_commits;
     my $total_lines;
 
-    return if (!vcs_exists());
+    $vcs_used = vcs_exists();
+    return if (!$vcs_used);
 
     @all_commits = vcs_blame($file);
     @commits = uniq(@all_commits);
     $total_commits = @commits;
     $total_lines = @all_commits;
 
-    foreach my $commit (@commits) {
-	my $commit_count;
-	my @commit_signers = ();
+    if ($email_git_blame_signatures) {
+	if (vcs_is_hg()) {
+	    my $commit_count;
+	    my @commit_signers = ();
+	    my $commit = join(" -r ", @commits);
+	    my $cmd;
+
+	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
+	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
 
-	my $cmd = $VCS_cmds{"find_commit_signers_cmd"};
-	$cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
+	    ($commit_count, @commit_signers) = vcs_find_signers($cmd);
 
-	($commit_count, @commit_signers) = vcs_find_signers($cmd);
+	    push(@signers, @commit_signers);
+	} else {
+	    foreach my $commit (@commits) {
+		my $commit_count;
+		my @commit_signers = ();
+		my $cmd;
 
-	push(@signers, @commit_signers);
+		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
+		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
+
+		($commit_count, @commit_signers) = vcs_find_signers($cmd);
+
+		push(@signers, @commit_signers);
+	    }
+	}
     }
 
     if ($from_filename) {
 	if ($output_rolestats) {
 	    my @blame_signers;
-	    foreach my $commit (@commits) {
-		my $i;
-		my $cmd = $VCS_cmds{"find_commit_author_cmd"};
-		$cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
-		my @author = vcs_find_author($cmd);
-		next if !@author;
-		my $count = grep(/$commit/, @all_commits);
-		for ($i = 0; $i < $count ; $i++) {
-		    push(@blame_signers, $author[0]);
+	    if (vcs_is_hg()) {{		# Double brace for last exit
+		my $commit_count;
+		my @commit_signers = ();
+		@commits = uniq(@commits);
+		@commits = sort(@commits);
+		my $commit = join(" -r ", @commits);
+		my $cmd;
+
+		$cmd = $VCS_cmds{"find_commit_author_cmd"};
+		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
+
+		my @lines = ();
+
+		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
+
+		if (!$email_git_penguin_chiefs) {
+		    @lines = grep(!/${penguin_chiefs}/i, @lines);
+		}
+
+		last if !@lines;
+
+		my @authors = ();
+		foreach my $line (@lines) {
+		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
+			my $author = $1;
+			my ($name, $address) = parse_email($author);
+			$author = format_email($name, $address, 1);
+			push(@authors, $1);
+		    }
+		}
+
+		save_commits_by_author(@lines) if ($interactive);
+		save_commits_by_signer(@lines) if ($interactive);
+
+		push(@signers, @authors);
+	    }}
+	    else {
+		foreach my $commit (@commits) {
+		    my $i;
+		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
+		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
+		    my @author = vcs_find_author($cmd);
+		    next if !@author;
+		    my $count = grep(/$commit/, @all_commits);
+		    for ($i = 0; $i < $count ; $i++) {
+			push(@blame_signers, $author[0]);
+		    }
 		}
 	    }
 	    if (@blame_signers) {



^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] scripts/get_maintainer.pl: add interactive mode
  2010-09-20 19:43     ` [RFC PATCH] " Joe Perches
@ 2010-09-20 21:53       ` Florian Mickler
  2010-09-20 22:27         ` [PATCH] scripts/get_maintainer.pl: fix .mailmap handling florian
  2010-09-21  0:38         ` [RFC PATCH] scripts/get_maintainer.pl: add interactive mode Joe Perches
  0 siblings, 2 replies; 18+ messages in thread
From: Florian Mickler @ 2010-09-20 21:53 UTC (permalink / raw)
  To: Joe Perches
  Cc: linux-kernel, Eric W. Biederman, Andrew Morton, Wolfram Sang,
	Mark Brown, Stephen Hemminger, Stefan Richter, Greg KH

On Mon, 20 Sep 2010 12:43:08 -0700
Joe Perches <joe@perches.com> wrote:

> On Wed, 2010-09-15 at 17:16 +0200, florian@mickler.org wrote:
> > This is a first version of an interactive mode for
> > scripts/get_maintainer.pl .
> 
> Thank you Flo.
> 
> I've used Flo's initial patch and created a rather more
> complete version now built on top of mm with the 5
> get_maintainer patches posted earlier.
> 
> http://lkml.org/lkml/2010/9/16/150

Hi Joe!

Thx for wrapping this up. 

> 
> Flo and I have worked together on this and he has some
> upcoming enhancements that could improve the rerun
> speed quite a bit.
> 
> For now, this uses whatever command line options are
> passed as well as allows the user to change these
> settings from a prompt during use.
> 
> There are still defects in this version.
> 
> You can see below that Stephen Hemminger's sign-offs
> aren't correctly attributed because his MAINTAINER
> entry doesn't match how he normally signs-off
> (ie: @vyatta.com vs @linux-foundation.org)

I pondered quite a bit about how to tell developers apart. Here is how
it should work:

It is common for developers to commit via different mailaddresses,
while it is uncommon for two distinct developers to have the same name. 

So the only workable solution is to tell developers apart by their real
name (the name part of an Emailaddress of the form Jane Doe
<j.d@example.com> ).

Then the .mailmap also makes sense. :) 

If a new developer commits and his name clashes with some other
developer, the .mailmap has to be set up to attach a somewhat distinct
name to his commit-mailaddresses.  (I.e. another "Jane Doe" has to
have a .mailmap entry that map's her email-address to "Jane F. Doe" or
"Jane Doe 2" or whatever she prefers.)


 
> For example, the --interactive (or -i) menu looks like:
> 
> --------------------------------------------------------------------------------
> $ ./scripts/get_maintainer.pl -i -f drivers/net/sky2.c
> 
> *  # email/list and role:stats                                        auth sign
> *  1 Stephen Hemminger <shemminger@linux-foundation.org>              
>      maintainer:SKGE, SKY2 10/100...
> *  2 netdev@vger.kernel.org                                           
>      open list:SKGE, SKY2 10/100...
> *  3 linux-kernel@vger.kernel.org                                     
>      open list
> 
> #(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): 
> --------------------------------------------------------------------------------


I don't think the descriptions on the bottom are that descriptive. Nor
can they be. There should probably be just a quick primer like:

"([N]+:select) ([N]-:deselect) ([N]?:toggle infos) (Y:approve) (h:help)"

and on "h" a long description gets displayed.

> 
> with options:
> 
> Version Control options:
> g  use git history      [0]
> gf use git-fallback     [1]
> b  use git blame        [0]
> bs use blame signatures [1]
> c# minimum commits      [1]
> %# min percent          [5]
> d# history to use       [1-year-ago]
> x# max maintainers      [5]
> t  all signature types  [0]

good idea to let the user fumble with the options

> 
> Additional options:
> 0  toggle all
> f  emails in file       [0]
> k  keywords in file     [1]
> r  remove duplicates    [1]
> p# pattern match depth  [0]
> 
> Because git history is now not searched by default
> when there is a named maintainer, there are no
> commit signers.

Don't know if this is intuitive. If there is the possibility to have
them shown but not selected, that would be ideal as it relieves the
user from pressing extra keys while still having a sane behaviour.

> 
> If the "G" option is entered, you get:
> 
> #(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): g
> --------------------------------------------------------------------------------
> *  # email/list and role:stats                                        auth sign
> *  1 Stephen Hemminger <shemminger@linux-foundation.org>                 1    0
>      maintainer:SKGE, SKY2 10/100...,commit_signer:38/69=55%
>    2 "David S. Miller" <davem@davemloft.net>                             7   57
>      commit_signer:57/69=83%
>    3 Mike McCormack <mikem@ring3k.org>                                  16   16
>      commit_signer:16/69=23%
>    4 Joe Perches <joe@perches.com>                                       4    4
>      commit_signer:4/69=6%
> *  5 netdev@vger.kernel.org                                           
>      open list:SKGE, SKY2 10/100...
> *  6 linux-kernel@vger.kernel.org                                     
>      open list

I would try to fit reviewed-by in the statistics on the right, because
that actually says a bit about the ability and willingness to review
code... 

 
> #(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): 
> --------------------------------------------------------------------------------
> 
> git signers are not selected by default, these can be selected either
> individually by #, or all by *, or multiple can be selected/deselected
> from a single command like: "2,3"

I would just use a simple menu command-format (like I did in the patch I
sent you) with the following parts:
 
	o the number  (optionally, when ommitted takes effect for all
		menu items)
	o a '+'(select) or '-' (deselect) sign (optionally, when
		omitted, defaults to toggle action)
	o one of A(show authored commits) or S(show sob-commits)
	(optionally, when omitted, selects/deselects the menu item for
	stdout)

 
> To see what commits any individual contributor has authored or
> signed, enter "A" or "S" with the #(s)
> 
> --------------------------------------------------------------------------------
> #(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): a4
> 
> *  # email/list and role:stats                                        auth sign
> *  1 Stephen Hemminger <shemminger@linux-foundation.org>                 1    0
>      maintainer:SKGE, SKY2 10/100...,commit_signer:38/69=55%
>    2 "David S. Miller" <davem@davemloft.net>                             7   57
>      commit_signer:57/69=83%
>    3 Mike McCormack <mikem@ring3k.org>                                  16   16
>      commit_signer:16/69=23%
>    4 Joe Perches <joe@perches.com>                                       4    4
>      commit_signer:4/69=6%
>      Author: drivers/net/sky2.c: Use (pr|netdev)_<level> macro helpers
>      Author: drivers/net/sky2: Convert to use netif_printk macros
>      Author: drivers/net: Move && and || to end of previous line
>      Author: trivial: remove unnecessary semicolons
> *  5 netdev@vger.kernel.org                                           
>      open list:SKGE, SKY2 10/100...
> *  6 linux-kernel@vger.kernel.org                                     
>      open list
> 
> #(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): 
> --------------------------------------------------------------------------------
> 
> There are also significant improvements in HG runtime.
> HG is now faster than git for blame and commit signer
> lookups.

nice. :) 
 
I  
> -    my %hash;
> -    my $exact_pattern_match = 0;
> -    my $tvi = find_first_section();
> -    while ($tvi < @typevalue) {
> -	my $start = find_starting_index($tvi);
> -	my $end = find_ending_index($tvi);
> -	my $exclude = 0;
> -	my $i;
> -
> -	#Do not match excluded file patterns
> -
> -	for ($i = $start; $i < $end; $i++) {
> -	    my $line = $typevalue[$i];
> -	    if ($line =~ m/^(\C):\s*(.*)/) {
> -		my $type = $1;
> -		my $value = $2;
> -		if ($type eq 'X') {
> -		    if (file_match_pattern($file, $value)) {
> -			$exclude = 1;
> -			last;
> -		    }
> -		}
> -	    }
> -	}

Yes, most of the global code should just land in nicely named
functions. [ Else nobody knows what it does :-) ]

> +sub interactive_get_maintainer {
> +    my ($list_ref) = @_;
> +    my @list = @$list_ref;
> +
> +    vcs_exists();
> +
> +    my %selected;
> +    my %authored;
> +    my %signed;
> +    my $count = 0;
> +
> +    #select maintainers by default
> +    foreach my $entry (@list){
> +	my $role = $entry->[1];
> +	$selected{$count} = ($role =~ /^(maintainer|supporter|open list)/);
> +	$authored{$count} = 0;
> +	$signed{$count} = 0;
> +	$count++;
> +    }
> +
> +    #menu loop
> +    my $done = 0;
> +    my $print_options = 0;
> +    my $redraw = 1;
> +    while (!$done) {
> +	$count = 0;
> +	if ($redraw) {
> +	    printf STDERR "\n%1s %2s %-65sauth sign\n",
> +		"*", "#", "email/list and role:stats";
> +	    foreach my $entry (@list) {
> +		my $email = $entry->[0];
> +		my $role = $entry->[1];
> +		my $sel = "";
> +		$sel = "*" if ($selected{$count});
> +		my $commit_author = $commit_author_hash{$email};
> +		my $commit_signer = $commit_signer_hash{$email};
> +		my $authored = 0;
> +		my $signed = 0;
> +		$authored++ for (@{$commit_author});
> +		$signed++ for (@{$commit_signer});
> +		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
> +		printf STDERR "%4d %4d", $authored, $signed
> +		    if ($authored > 0 || $signed > 0);
> +		printf STDERR "\n     %s\n", $role;
> +		if ($authored{$count}) {
> +		    my $commit_author = $commit_author_hash{$email};
> +		    foreach my $ref (@{$commit_author}) {
> +			print STDERR "     Author: @{$ref}[1]\n";
> +		    }
> +		}
> +		if ($signed{$count}) {
> +		    my $commit_signer = $commit_signer_hash{$email};
> +		    foreach my $ref (@{$commit_signer}) {
> +			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
> +		    }
> +		}
> +
> +		$count++;
> +	    }
> +	}
> +	my $date_ref = \$email_git_since;
> +	$date_ref = \$email_hg_since if (vcs_is_hg());
> +	if ($print_options) {
> +	    $print_options = 0;
> +	    if (vcs_exists()) {
> +		print STDERR
> +"\nVersion Control options:\n" .
> +"g  use git history      [$email_git]\n" .
> +"gf use git-fallback     [$email_git_fallback]\n" .
> +"b  use git blame        [$email_git_blame]\n" .
> +"bs use blame signatures [$email_git_blame_signatures]\n" .
> +"c# minimum commits      [$email_git_min_signatures]\n" .
> +"%# min percent          [$email_git_min_percent]\n" .
> +"d# history to use       [$$date_ref]\n" .
> +"x# max maintainers      [$email_git_max_maintainers]\n" .
> +"t  all signature types  [$email_git_all_signature_types]\n";
> +	    }
> +	    print STDERR "\nAdditional options:\n" .
> +"0  toggle all\n" .
> +"f  emails in file       [$file_emails]\n" .
> +"k  keywords in file     [$keywords]\n" .
> +"r  remove duplicates    [$email_remove_duplicates]\n" .
> +"p# pattern match depth  [$pattern_depth]\n";
> +	}
> +	print STDERR
> +"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
> +
> +	my $input = <STDIN>;
> +	chomp($input);

[snip menu parsing code]

That should probably go in an extra function and be slimmed down, like
I did in a later version I sent you.



> +sub bool_invert {
> +    my ($bool_ref) = @_;
> +
> +    if ($$bool_ref) {
> +	$$bool_ref = 0;
> +    } else {
> +	$$bool_ref = 1;
> +    }
 +}

That should just be $$bool_ref = !$$bool_ref (and probably not a
function)


Regards,
Flo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] scripts/get_maintainer.pl: fix .mailmap handling
  2010-09-20 21:53       ` Florian Mickler
@ 2010-09-20 22:27         ` florian
  2010-09-20 22:35           ` florian
  2010-09-21  0:38         ` [RFC PATCH] scripts/get_maintainer.pl: add interactive mode Joe Perches
  1 sibling, 1 reply; 18+ messages in thread
From: florian @ 2010-09-20 22:27 UTC (permalink / raw)
  To: linux-kernel
  Cc: ebiederm, greg, stefanr, broonie, Florian Mickler, Andrew Morton,
	Joe Perches, Stephen Hemminger

The .mailmap handling was broken.

Implement it like the git-shortlog man page  explains it.

---

With this patch, I think telling developers apart by their Realname should be
the right thing to do.


 scripts/get_maintainer.pl |  148 +++++++++++++++++++++++++++++++++------------
 1 files changed, 110 insertions(+), 38 deletions(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index a91ae63..ad92897 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -255,31 +255,77 @@ while (<$maint>) {
 }
 close($maint);
 
-my %mailmap;
 
-if ($email_remove_duplicates) {
-    open(my $mailmap, '<', "${lk_path}.mailmap")
+#
+# Read mail address map
+#
+
+my $mailmap = read_mailmap();
+
+sub read_mailmap {
+    my $mnailmap = {
+	names => {},
+	addresses => {}
+   };
+
+    if (!$email_remove_duplicates) {
+	return $mailmap;
+    }
+
+    open(my $mailmap_file, '<', "${lk_path}.mailmap")
 	or warn "$P: Can't open .mailmap: $!\n";
-    while (<$mailmap>) {
-	my $line = $_;
 
-	next if ($line =~ m/^\s*#/);
-	next if ($line =~ m/^\s*$/);
+    while (<$mailmap_file>) {
+	s/#.*$//; #strip comments
+	s/^\s+|\s+$//g; #trim
 
-	my ($name, $address) = parse_email($line);
-	$line = format_email($name, $address, $email_usename);
+	next if (/^\s*$/); #skip empty lines
+	#print "entry: \"$_\"\n";
+	#entries have one of the following formats:
+	# name1 <mail1>
+	# <mail1> <mail2>
+	# name1 <mail1> <mail2>
+	# name1 <mail1> name2 <mail2>
+	# (see man git-shortlog)
+	if (/^(.+)<(.+)>$/) {
+		my $real_name = $1;
+		my $address = $2;
 
-	next if ($line =~ m/^\s*$/);
+		$real_name =~ s/\s+$//;
+		$mailmap->{names}->{$address} = $real_name;
 
-	if (exists($mailmap{$name})) {
-	    my $obj = $mailmap{$name};
-	    push(@$obj, $address);
-	} else {
-	    my @arr = ($address);
-	    $mailmap{$name} = \@arr;
+	} elsif (/^<([^\s]+)>\s*<([^\s]+)>$/) {
+		my $real_address = $1;
+		my $wrong_address = $2;
+
+		$mailmap->{addresses}->{$wrong_address} = $real_address;
+
+	} elsif (/^(.+)<([^\s]+)>\s*<([^\s]+)>$/) {
+		my $real_name= $1;
+		my $real_address = $2;
+		my $wrong_address = $3;
+
+		$real_name =~ s/\s+$//;
+
+		$mailmap->{names}->{$wrong_address} = $real_name;
+		$mailmap->{addresses}->{$wrong_address} = $real_address;
+
+	} elsif (/^(.+)<([^\s]+)>\s*([^\s].*)<([^\s]+)>$/) {
+		my $real_name = $1;
+		my $real_address = $2;
+		my $wrong_name = $3;
+		my $wrong_address = $4;
+
+		$real_name =~ s/\s+$//;
+		$wrong_name =~ s/\s+$//;
+
+		$mailmap->{names}->{format_email($wrong_name,$wrong_address,1)} = $real_name;
+		$mailmap->{addresses}->{format_email($wrong_name,$wrong_address,1)} = $real_address;
 	}
     }
-    close($mailmap);
+    close($mailmap_file);
+
+    return $mailmap;
 }
 
 ## use the filenames on the command line or find the filenames in the patchfiles
@@ -954,30 +1000,58 @@ sub which {
     return "";
 }
 
-sub mailmap {
-    my (@lines) = @_;
-    my %hash;
+sub mailmap_email {
+	my $line = shift;
 
-    foreach my $line (@lines) {
 	my ($name, $address) = parse_email($line);
-	if (!exists($hash{$name})) {
-	    $hash{$name} = $address;
-	} elsif ($address ne $hash{$name}) {
-	    $address = $hash{$name};
-	    $line = format_email($name, $address, $email_usename);
-	}
-	if (exists($mailmap{$name})) {
-	    my $obj = $mailmap{$name};
-	    foreach my $map_address (@$obj) {
-		if (($map_address eq $address) &&
-		    ($map_address ne $hash{$name})) {
-		    $line = format_email($name, $hash{$name}, $email_usename);
+	my $email = format_email($name, $address, 1);
+	my $real_name = $name;
+	my $real_address = $address;
+
+	if (exists $mailmap->{names}->{$email} || exists $mailmap->{addresses}->{$email}) {
+		if (exists $mailmap->{names}->{$email}) {
+			$real_name = $mailmap->{names}->{$email};
+		}
+		if (exists $mailmap->{addresses}->{$email}) {
+			$real_address = $mailmap->{addresses}->{$email};
+		}
+	} else {
+		if (exists $mailmap->{names}->{$address}) {
+			$real_name = $mailmap->{names}->{$address};
+		}
+		if (exists $mailmap->{addresses}->{$address}) {
+			$real_address = $mailmap->{addresses}->{$address};
 		}
-	    }
 	}
+	return format_email($real_name, $real_address, 1);
+}
+
+sub mailmap {
+    my (@addresses) = @_;
+
+    my @ret = ();
+    foreach my $line (@addresses) {
+	push(@ret, mailmap_email($line), 1);
     }
 
-    return @lines;
+    merge_by_realname(@ret) if $email_remove_duplicates;
+
+    return @ret;
+}
+
+sub merge_by_realname {
+	my %address_map;
+	my (@emails) = @_;
+	foreach my $email (@emails) {
+		my ($name, $address) = parse_email($email);
+		if (!exists $address_map{$name}) {
+			$address_map{$name} = $address;
+		} else {
+			$address = $address_map{$name};
+			$email = format_email($name,$address,1);
+		}
+	}
+
 }
 
 sub git_execute_cmd {
@@ -1148,9 +1222,7 @@ sub vcs_assign {
 	$divisor = 1;
     }
 
-    if ($email_remove_duplicates) {
-	@lines = mailmap(@lines);
-    }
+    @lines = mailmap(@lines);
 
     return if (@lines <= 0);
 
-- 
1.7.3


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH] scripts/get_maintainer.pl: fix .mailmap handling
  2010-09-20 22:27         ` [PATCH] scripts/get_maintainer.pl: fix .mailmap handling florian
@ 2010-09-20 22:35           ` florian
  2010-09-21  0:14             ` Joe Perches
  0 siblings, 1 reply; 18+ messages in thread
From: florian @ 2010-09-20 22:35 UTC (permalink / raw)
  To: linux-kernel
  Cc: ebiederm, greg, stefanr, broonie, florian, Andrew Morton,
	Joe Perches, Stephen Hemminger

From: florian@mickler.org <florian@mickler.org>

The .mailmap handling was broken.

Implement it like the git-shortlog man page  explains it.

Signed-off-by: Florian Mickler <florian@mickler.org>
---
changes: added sign off

    scripts/get_maintainer.pl |  148 +++++++++++++++++++++++++++++++++------------
 1 files changed, 110 insertions(+), 38 deletions(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index a91ae63..d6fdd1a 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -255,31 +255,77 @@ while (<$maint>) {
 }
 close($maint);
 
-my %mailmap;
 
-if ($email_remove_duplicates) {
-    open(my $mailmap, '<', "${lk_path}.mailmap")
+#
+# Read mail address map
+#
+
+my $mailmap = read_mailmap();
+
+sub read_mailmap {
+    my $mailmap = {
+	names => {},
+	addresses => {}
+   };
+
+    if (!$email_remove_duplicates) {
+	return $mailmap;
+    }
+
+    open(my $mailmap_file, '<', "${lk_path}.mailmap")
 	or warn "$P: Can't open .mailmap: $!\n";
-    while (<$mailmap>) {
-	my $line = $_;
 
-	next if ($line =~ m/^\s*#/);
-	next if ($line =~ m/^\s*$/);
+    while (<$mailmap_file>) {
+	s/#.*$//; #strip comments
+	s/^\s+|\s+$//g; #trim
 
-	my ($name, $address) = parse_email($line);
-	$line = format_email($name, $address, $email_usename);
+	next if (/^\s*$/); #skip empty lines
+	#print "entry: \"$_\"\n";
+	#entries have one of the following formats:
+	# name1 <mail1>
+	# <mail1> <mail2>
+	# name1 <mail1> <mail2>
+	# name1 <mail1> name2 <mail2>
+	# (see man git-shortlog)
+	if (/^(.+)<(.+)>$/) {
+		my $real_name = $1;
+		my $address = $2;
 
-	next if ($line =~ m/^\s*$/);
+		$real_name =~ s/\s+$//;
+		$mailmap->{names}->{$address} = $real_name;
 
-	if (exists($mailmap{$name})) {
-	    my $obj = $mailmap{$name};
-	    push(@$obj, $address);
-	} else {
-	    my @arr = ($address);
-	    $mailmap{$name} = \@arr;
+	} elsif (/^<([^\s]+)>\s*<([^\s]+)>$/) {
+		my $real_address = $1;
+		my $wrong_address = $2;
+
+		$mailmap->{addresses}->{$wrong_address} = $real_address;
+
+	} elsif (/^(.+)<([^\s]+)>\s*<([^\s]+)>$/) {
+		my $real_name= $1;
+		my $real_address = $2;
+		my $wrong_address = $3;
+
+		$real_name =~ s/\s+$//;
+
+		$mailmap->{names}->{$wrong_address} = $real_name;
+		$mailmap->{addresses}->{$wrong_address} = $real_address;
+
+	} elsif (/^(.+)<([^\s]+)>\s*([^\s].*)<([^\s]+)>$/) {
+		my $real_name = $1;
+		my $real_address = $2;
+		my $wrong_name = $3;
+		my $wrong_address = $4;
+
+		$real_name =~ s/\s+$//;
+		$wrong_name =~ s/\s+$//;
+
+		$mailmap->{names}->{format_email($wrong_name,$wrong_address,1)} = $real_name;
+		$mailmap->{addresses}->{format_email($wrong_name,$wrong_address,1)} = $real_address;
 	}
     }
-    close($mailmap);
+    close($mailmap_file);
+
+    return $mailmap;
 }
 
 ## use the filenames on the command line or find the filenames in the patchfiles
@@ -954,30 +1000,58 @@ sub which {
     return "";
 }
 
-sub mailmap {
-    my (@lines) = @_;
-    my %hash;
+sub mailmap_email {
+	my $line = shift;
 
-    foreach my $line (@lines) {
 	my ($name, $address) = parse_email($line);
-	if (!exists($hash{$name})) {
-	    $hash{$name} = $address;
-	} elsif ($address ne $hash{$name}) {
-	    $address = $hash{$name};
-	    $line = format_email($name, $address, $email_usename);
-	}
-	if (exists($mailmap{$name})) {
-	    my $obj = $mailmap{$name};
-	    foreach my $map_address (@$obj) {
-		if (($map_address eq $address) &&
-		    ($map_address ne $hash{$name})) {
-		    $line = format_email($name, $hash{$name}, $email_usename);
+	my $email = format_email($name, $address, 1);
+	my $real_name = $name;
+	my $real_address = $address;
+
+	if (exists $mailmap->{names}->{$email} || exists $mailmap->{addresses}->{$email}) {
+		if (exists $mailmap->{names}->{$email}) {
+			$real_name = $mailmap->{names}->{$email};
+		}
+		if (exists $mailmap->{addresses}->{$email}) {
+			$real_address = $mailmap->{addresses}->{$email};
+		}
+	} else {
+		if (exists $mailmap->{names}->{$address}) {
+			$real_name = $mailmap->{names}->{$address};
+		}
+		if (exists $mailmap->{addresses}->{$address}) {
+			$real_address = $mailmap->{addresses}->{$address};
 		}
-	    }
 	}
+	return format_email($real_name, $real_address, 1);
+}
+
+sub mailmap {
+    my (@addresses) = @_;
+
+    my @ret = ();
+    foreach my $line (@addresses) {
+	push(@ret, mailmap_email($line), 1);
     }
 
-    return @lines;
+    merge_by_realname(@ret) if $email_remove_duplicates;
+
+    return @ret;
+}
+
+sub merge_by_realname {
+	my %address_map;
+	my (@emails) = @_;
+	foreach my $email (@emails) {
+		my ($name, $address) = parse_email($email);
+		if (!exists $address_map{$name}) {
+			$address_map{$name} = $address;
+		} else {
+			$address = $address_map{$name};
+			$email = format_email($name,$address,1);
+		}
+	}
+
 }
 
 sub git_execute_cmd {
@@ -1148,9 +1222,7 @@ sub vcs_assign {
 	$divisor = 1;
     }
 
-    if ($email_remove_duplicates) {
-	@lines = mailmap(@lines);
-    }
+    @lines = mailmap(@lines);
 
     return if (@lines <= 0);
 
-- 
1.7.3


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] scripts/get_maintainer.pl: fix .mailmap handling
  2010-09-20 22:35           ` florian
@ 2010-09-21  0:14             ` Joe Perches
  2010-09-21  4:59               ` Florian Mickler
  0 siblings, 1 reply; 18+ messages in thread
From: Joe Perches @ 2010-09-21  0:14 UTC (permalink / raw)
  To: florian
  Cc: linux-kernel, ebiederm, greg, stefanr, broonie, Andrew Morton,
	Stephen Hemminger

On Tue, 2010-09-21 at 00:35 +0200, florian@mickler.org wrote:
> From: florian@mickler.org <florian@mickler.org>
> diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl

[]

> -	my ($name, $address) = parse_email($line);
> -	$line = format_email($name, $address, $email_usename);
> +	next if (/^\s*$/); #skip empty lines
> +	#print "entry: \"$_\"\n";

Please do not add commented out debugging code.
Add and use use some centralized debug(foo) call.

[]

> +sub mailmap {
> +    my (@addresses) = @_;
> +
> +    my @ret = ();

Suboptimal naming.  I try to use descriptive names.

cheers, Joe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] scripts/get_maintainer.pl: add interactive mode
  2010-09-20 21:53       ` Florian Mickler
  2010-09-20 22:27         ` [PATCH] scripts/get_maintainer.pl: fix .mailmap handling florian
@ 2010-09-21  0:38         ` Joe Perches
  2010-09-21  5:31           ` Florian Mickler
  1 sibling, 1 reply; 18+ messages in thread
From: Joe Perches @ 2010-09-21  0:38 UTC (permalink / raw)
  To: Florian Mickler
  Cc: linux-kernel, Eric W. Biederman, Andrew Morton, Wolfram Sang,
	Mark Brown, Stephen Hemminger, Stefan Richter, Greg KH

On Mon, 2010-09-20 at 23:53 +0200, Florian Mickler wrote:
> On Mon, 20 Sep 2010 12:43:08 -0700
> Joe Perches <joe@perches.com> wrote:
> > #(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): 
> I don't think the descriptions on the bottom are that descriptive. Nor
> can they be. There should probably be just a quick primer like:
> "([N]+:select) ([N]-:deselect) ([N]?:toggle infos) (Y:approve) (h:help)"
> and on "h" a long description gets displayed.

We disagree.  There could be a useful "H" help option.

> > Because git history is now not searched by default
> > when there is a named maintainer, there are no
> > commit signers.
> 
> Don't know if this is intuitive. If there is the possibility to have
> them shown but not selected, that would be ideal as it relieves the
> user from pressing extra keys while still having a sane behaviour.

Use a command line option: --git.

All command line options apply to create the initial list of
displayed names.

Or add code to set

	$git-fallback = 1 if $interactive;

> > 
> > If the "G" option is entered, you get:
> > 
> > #(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): g
> > --------------------------------------------------------------------------------
> > *  # email/list and role:stats                                        auth sign
> > *  1 Stephen Hemminger <shemminger@linux-foundation.org>                 1    0
> >      maintainer:SKGE, SKY2 10/100...,commit_signer:38/69=55%
> >    2 "David S. Miller" <davem@davemloft.net>                             7   57
> >      commit_signer:57/69=83%
> >    3 Mike McCormack <mikem@ring3k.org>                                  16   16
> >      commit_signer:16/69=23%
> >    4 Joe Perches <joe@perches.com>                                       4    4
> >      commit_signer:4/69=6%
> > *  5 netdev@vger.kernel.org                                           
> >      open list:SKGE, SKY2 10/100...
> > *  6 linux-kernel@vger.kernel.org                                     
> >      open list
> 
> I would try to fit reviewed-by in the statistics on the right, because
> that actually says a bit about the ability and willingness to review
> code... 

I think it's not worth it.

Only about 2% of signatures in git history are reviewed-by:

Using S shows the signature type.

Over the last year:
$ git log --since=1-year-ago | grep -i "by:.*@" | \
  cut -f1 -d":" | sort -i | uniq -ci | sort -rn | head -10
  83413     Signed-off-by
   6544     Acked-by
   2022     Reviewed-by
   1691     Reported-by
   1065     Tested-by
    111     Reported-and-tested-by
     83     Suggested-by
     31     Requested-by
     28         Signed-off-by
     26     Fixed-by

> [snip menu parsing code]
> 
> That should probably go in an extra function and be slimmed down, like
> I did in a later version I sent you.

Maybe.  I think it doesn't matter much though.
Menu handling code tends to get long.

> > +sub bool_invert {
> > +    my ($bool_ref) = @_;
> > +
> > +    if ($$bool_ref) {
> > +	$$bool_ref = 0;
> > +    } else {
> > +	$$bool_ref = 1;
> > +    }
>  +}
> 
> That should just be $$bool_ref = !$$bool_ref (and probably not a
> function)

I think it needs to be a function.
I want a 0 or 1, not "" or 1.



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] scripts/get_maintainer.pl: fix .mailmap handling
  2010-09-21  0:14             ` Joe Perches
@ 2010-09-21  4:59               ` Florian Mickler
  2010-09-21  5:20                 ` Joe Perches
  0 siblings, 1 reply; 18+ messages in thread
From: Florian Mickler @ 2010-09-21  4:59 UTC (permalink / raw)
  To: Joe Perches
  Cc: linux-kernel, ebiederm, greg, stefanr, broonie, Andrew Morton,
	Stephen Hemminger

On Mon, 20 Sep 2010 17:14:45 -0700
Joe Perches <joe@perches.com> wrote:

> On Tue, 2010-09-21 at 00:35 +0200, florian@mickler.org wrote:
> > From: florian@mickler.org <florian@mickler.org>
> > diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
> 
> []
> 
> > -	my ($name, $address) = parse_email($line);
> > -	$line = format_email($name, $address, $email_usename);
> > +	next if (/^\s*$/); #skip empty lines
> > +	#print "entry: \"$_\"\n";
> 
> Please do not add commented out debugging code.
> Add and use use some centralized debug(foo) call.

Hm. Yes that might be better in the long run. 

> 
> []
> 
> > +sub mailmap {
> > +    my (@addresses) = @_;
> > +
> > +    my @ret = ();
> 
> Suboptimal naming.  I try to use descriptive names.
> 
> cheers, Joe
> 

:-) 

Come on, Joe! It was "sub mailmap" before. @addresses was @list before
and @ret is the list that get's returned by the function. 

If you think a patch is ok, you actually may say so. 

Regards,
Flo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] scripts/get_maintainer.pl: fix .mailmap handling
  2010-09-21  4:59               ` Florian Mickler
@ 2010-09-21  5:20                 ` Joe Perches
  2010-09-21  6:23                   ` Florian Mickler
  0 siblings, 1 reply; 18+ messages in thread
From: Joe Perches @ 2010-09-21  5:20 UTC (permalink / raw)
  To: Florian Mickler
  Cc: linux-kernel, ebiederm, greg, stefanr, broonie, Andrew Morton,
	Stephen Hemminger

On Tue, 2010-09-21 at 06:59 +0200, Florian Mickler wrote:
> Come on, Joe! It was "sub mailmap" before. @addresses was @list before
> and @ret is the list that get's returned by the function. 

Tell me, what's better as an individual line in perl
with typeless returns
	return @ret;
or
	return @list;

@ret is not descriptive.
Yeah, it's an array.
An array of what?
btw: @list was sub-optimal too.

> If you think a patch is ok, you actually may say so. 

As we discussed privately, yes, .mailmap handling was broken
but both of us seem to think that it's not even necessary as
most .mailmap entries are created manually and it's not
up-to-date.  Name de-duplication is more effective.

.mailmap could be used effectively I suppose, but I asked you
to show a use case where it performs differently than the
existing name de-duplication.

And, I do ack a patch after I think it is ready to be applied.
Until then, I note what I see as shortcomings and defects.

cheers, Joe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] scripts/get_maintainer.pl: add interactive mode
  2010-09-21  0:38         ` [RFC PATCH] scripts/get_maintainer.pl: add interactive mode Joe Perches
@ 2010-09-21  5:31           ` Florian Mickler
  2010-09-21  6:12             ` Joe Perches
  0 siblings, 1 reply; 18+ messages in thread
From: Florian Mickler @ 2010-09-21  5:31 UTC (permalink / raw)
  To: Joe Perches
  Cc: linux-kernel, Eric W. Biederman, Andrew Morton, Wolfram Sang,
	Mark Brown, Stephen Hemminger, Stefan Richter, Greg KH

On Mon, 20 Sep 2010 17:38:52 -0700
Joe Perches <joe@perches.com> wrote:

> On Mon, 2010-09-20 at 23:53 +0200, Florian Mickler wrote:
> > On Mon, 20 Sep 2010 12:43:08 -0700
> > Joe Perches <joe@perches.com> wrote:
> > > #(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): 
> > I don't think the descriptions on the bottom are that descriptive. Nor
> > can they be. There should probably be just a quick primer like:
> > "([N]+:select) ([N]-:deselect) ([N]?:toggle infos) (Y:approve) (h:help)"
> > and on "h" a long description gets displayed.
> 
> We disagree.  There could be a useful "H" help option.

Oh, we don't disagree that much then. After all, the help option is the
meat of the comment above. Bringing up the alternative syntax for the
prompt was just a suggestion.


> > > Because git history is now not searched by default
> > > when there is a named maintainer, there are no
> > > commit signers.
> > 
> > Don't know if this is intuitive. If there is the possibility to have
> > them shown but not selected, that would be ideal as it relieves the
> > user from pressing extra keys while still having a sane behaviour.
> 
> Use a command line option: --git.
> 
> All command line options apply to create the initial list of
> displayed names.
>
> Or add code to set
> 
> 	$git-fallback = 1 if $interactive;

Are you opposed to that second solution?



> 
> > > 
> > > If the "G" option is entered, you get:
> > > 
> > > #(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): g
> > > --------------------------------------------------------------------------------
> > > *  # email/list and role:stats                                        auth sign
> > > *  1 Stephen Hemminger <shemminger@linux-foundation.org>                 1    0
> > >      maintainer:SKGE, SKY2 10/100...,commit_signer:38/69=55%
> > >    2 "David S. Miller" <davem@davemloft.net>                             7   57
> > >      commit_signer:57/69=83%
> > >    3 Mike McCormack <mikem@ring3k.org>                                  16   16
> > >      commit_signer:16/69=23%
> > >    4 Joe Perches <joe@perches.com>                                       4    4
> > >      commit_signer:4/69=6%
> > > *  5 netdev@vger.kernel.org                                           
> > >      open list:SKGE, SKY2 10/100...
> > > *  6 linux-kernel@vger.kernel.org                                     
> > >      open list
> > 
> > I would try to fit reviewed-by in the statistics on the right, because
> > that actually says a bit about the ability and willingness to review
> > code... 
> 
> I think it's not worth it.
> 
> Only about 2% of signatures in git history are reviewed-by:
> 
> Using S shows the signature type.

Yes, that is nice. Did I say, I like the UI over all?

I think showing reviewed-by seperately is good nonetheless, as it might
give an increased awareness to the existence of that line.

In the xserver, no patch is comitted, that doesn't has an explicit
reviewed-by line. That is actually a good thing in my opinion.

> 
> Over the last year:
> $ git log --since=1-year-ago | grep -i "by:.*@" | \
>   cut -f1 -d":" | sort -i | uniq -ci | sort -rn | head -10
>   83413     Signed-off-by
>    6544     Acked-by
>    2022     Reviewed-by
>    1691     Reported-by
>    1065     Tested-by
>     111     Reported-and-tested-by
>      83     Suggested-by
>      31     Requested-by
>      28         Signed-off-by
>      26     Fixed-by


That's just sad. At least there are more Reviewed-By then Reported-By. 
I don't think this actually represents reality of the
development process. 


> > [snip menu parsing code]
> > 
> > That should probably go in an extra function and be slimmed down, like
> > I did in a later version I sent you.
> 
> Maybe.  I think it doesn't matter much though.
> Menu handling code tends to get long.

> > > +sub bool_invert {
> > > +    my ($bool_ref) = @_;
> > > +
> > > +    if ($$bool_ref) {
> > > +	$$bool_ref = 0;
> > > +    } else {
> > > +	$$bool_ref = 1;
> > > +    }
> >  +}
> > 
> > That should just be $$bool_ref = !$$bool_ref (and probably not a
> > function)
> 
> I think it needs to be a function.
> I want a 0 or 1, not "" or 1.

Sorry, I didn't look at the use of that. What about  

$bool = (1 - !!$bool) 

then? (if $bool is always 1 or 0, you can drop the double negation in
front of it)

Cheers,
Flo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] scripts/get_maintainer.pl: add interactive mode
  2010-09-21  5:31           ` Florian Mickler
@ 2010-09-21  6:12             ` Joe Perches
  2010-09-21  6:30               ` [PATCH v2] scripts/get_maintainer.pl: fix mailmap handling florian
  0 siblings, 1 reply; 18+ messages in thread
From: Joe Perches @ 2010-09-21  6:12 UTC (permalink / raw)
  To: Florian Mickler
  Cc: linux-kernel, Eric W. Biederman, Andrew Morton, Wolfram Sang,
	Mark Brown, Stephen Hemminger, Stefan Richter, Greg KH

On Tue, 2010-09-21 at 07:31 +0200, Florian Mickler wrote:
> On Mon, 20 Sep 2010 17:38:52 -0700
> Joe Perches <joe@perches.com> wrote:
> > > > Because git history is now not searched by default
> > > > when there is a named maintainer, there are no
> > > > commit signers.
> > > Don't know if this is intuitive. If there is the possibility to have
> > > them shown but not selected, that would be ideal as it relieves the
> > > user from pressing extra keys while still having a sane behaviour.
> > Use a command line option: --git.
> > All command line options apply to create the initial list of
> > displayed names.
> > Or add code to set
> > 	$git-fallback = 1 if $interactive;
> Are you opposed to that second solution?

Not at all.

I'd prefer to default select all returned entries though.
Maybe add some key to deselect the non-maintainer
git added entries.

> > $ git log --since=1-year-ago | grep -i "by:.*@" | \
> >   cut -f1 -d":" | sort -i | uniq -ci | sort -rn | head -10
> >   83413     Signed-off-by
> >    6544     Acked-by
> >    2022     Reviewed-by
> >    1691     Reported-by
> >    1065     Tested-by
> >     111     Reported-and-tested-by
> >      83     Suggested-by
> >      31     Requested-by
> >      28         Signed-off-by
> >      26     Fixed-by
> That's just sad. At least there are more Reviewed-By then Reported-By. 
> I don't think this actually represents reality of the
> development process. 

I think most all reviewers don't add reviewed-by acks
and most submitters don't collect them.

> > > That should just be $$bool_ref = !$$bool_ref (and probably not a
> > > function)
> > I think it needs to be a function.
> > I want a 0 or 1, not "" or 1.
> Sorry, I didn't look at the use of that. What about  
> $bool = (1 - !!$bool) 
> then? (if $bool is always 1 or 0, you can drop the double negation in
> front of it)

<shrug> Sure.  I don't care which is used.



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] scripts/get_maintainer.pl: fix .mailmap handling
  2010-09-21  5:20                 ` Joe Perches
@ 2010-09-21  6:23                   ` Florian Mickler
  0 siblings, 0 replies; 18+ messages in thread
From: Florian Mickler @ 2010-09-21  6:23 UTC (permalink / raw)
  To: Joe Perches
  Cc: linux-kernel, ebiederm, greg, stefanr, broonie, Andrew Morton,
	Stephen Hemminger

On Mon, 20 Sep 2010 22:20:35 -0700
Joe Perches <joe@perches.com> wrote:

> On Tue, 2010-09-21 at 06:59 +0200, Florian Mickler wrote:

> @ret is not descriptive.
> Yeah, it's an array.
> An array of what?
> btw: @list was sub-optimal too.

:) ok. 

> As we discussed privately, yes, .mailmap handling was broken
> but both of us seem to think that it's not even necessary as
> most .mailmap entries are created manually and it's not
> up-to-date.  Name de-duplication is more effective.

No, no. I think .mailmap mangling should be applied before grouping 

name <address>  

together by name.


> And, I do ack a patch after I think it is ready to be applied.
> Until then, I note what I see as shortcomings and defects.

I send the patch with the removed #print as a reply. 

> 
> cheers, Joe
> 

Regards,
Flo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2] scripts/get_maintainer.pl: fix mailmap handling
  2010-09-21  6:12             ` Joe Perches
@ 2010-09-21  6:30               ` florian
  0 siblings, 0 replies; 18+ messages in thread
From: florian @ 2010-09-21  6:30 UTC (permalink / raw)
  To: linux-kernel
  Cc: ebiederm, greg, stefanr, broonie, Florian Mickler, Andrew Morton,
	Joe Perches, Stephen Hemminger

Implement it, like it is described in git-shortlog.

Signed-off-by: Florian Mickler <florian@mickler.org>
---

change: Removed superfluous comment.

 scripts/get_maintainer.pl |  147 +++++++++++++++++++++++++++++++++------------
 1 files changed, 109 insertions(+), 38 deletions(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index a91ae63..4219078 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -255,31 +255,76 @@ while (<$maint>) {
 }
 close($maint);
 
-my %mailmap;
 
-if ($email_remove_duplicates) {
-    open(my $mailmap, '<', "${lk_path}.mailmap")
+#
+# Read mail address map
+#
+
+my $mailmap = read_mailmap();
+
+sub read_mailmap {
+    my $mailmap = {
+	names => {},
+	addresses => {}
+   };
+
+    if (!$email_remove_duplicates) {
+	return $mailmap;
+    }
+
+    open(my $mailmap_file, '<', "${lk_path}.mailmap")
 	or warn "$P: Can't open .mailmap: $!\n";
-    while (<$mailmap>) {
-	my $line = $_;
 
-	next if ($line =~ m/^\s*#/);
-	next if ($line =~ m/^\s*$/);
+    while (<$mailmap_file>) {
+	s/#.*$//; #strip comments
+	s/^\s+|\s+$//g; #trim
 
-	my ($name, $address) = parse_email($line);
-	$line = format_email($name, $address, $email_usename);
+	next if (/^\s*$/); #skip empty lines
+	#entries have one of the following formats:
+	# name1 <mail1>
+	# <mail1> <mail2>
+	# name1 <mail1> <mail2>
+	# name1 <mail1> name2 <mail2>
+	# (see man git-shortlog)
+	if (/^(.+)<(.+)>$/) {
+		my $real_name = $1;
+		my $address = $2;
 
-	next if ($line =~ m/^\s*$/);
+		$real_name =~ s/\s+$//;
+		$mailmap->{names}->{$address} = $real_name;
 
-	if (exists($mailmap{$name})) {
-	    my $obj = $mailmap{$name};
-	    push(@$obj, $address);
-	} else {
-	    my @arr = ($address);
-	    $mailmap{$name} = \@arr;
+	} elsif (/^<([^\s]+)>\s*<([^\s]+)>$/) {
+		my $real_address = $1;
+		my $wrong_address = $2;
+
+		$mailmap->{addresses}->{$wrong_address} = $real_address;
+
+	} elsif (/^(.+)<([^\s]+)>\s*<([^\s]+)>$/) {
+		my $real_name= $1;
+		my $real_address = $2;
+		my $wrong_address = $3;
+
+		$real_name =~ s/\s+$//;
+
+		$mailmap->{names}->{$wrong_address} = $real_name;
+		$mailmap->{addresses}->{$wrong_address} = $real_address;
+
+	} elsif (/^(.+)<([^\s]+)>\s*([^\s].*)<([^\s]+)>$/) {
+		my $real_name = $1;
+		my $real_address = $2;
+		my $wrong_name = $3;
+		my $wrong_address = $4;
+
+		$real_name =~ s/\s+$//;
+		$wrong_name =~ s/\s+$//;
+
+		$mailmap->{names}->{format_email($wrong_name,$wrong_address,1)} = $real_name;
+		$mailmap->{addresses}->{format_email($wrong_name,$wrong_address,1)} = $real_address;
 	}
     }
-    close($mailmap);
+    close($mailmap_file);
+
+    return $mailmap;
 }
 
 ## use the filenames on the command line or find the filenames in the patchfiles
@@ -954,30 +999,58 @@ sub which {
     return "";
 }
 
-sub mailmap {
-    my (@lines) = @_;
-    my %hash;
+sub mailmap_email {
+	my $line = shift;
 
-    foreach my $line (@lines) {
 	my ($name, $address) = parse_email($line);
-	if (!exists($hash{$name})) {
-	    $hash{$name} = $address;
-	} elsif ($address ne $hash{$name}) {
-	    $address = $hash{$name};
-	    $line = format_email($name, $address, $email_usename);
-	}
-	if (exists($mailmap{$name})) {
-	    my $obj = $mailmap{$name};
-	    foreach my $map_address (@$obj) {
-		if (($map_address eq $address) &&
-		    ($map_address ne $hash{$name})) {
-		    $line = format_email($name, $hash{$name}, $email_usename);
+	my $email = format_email($name, $address, 1);
+	my $real_name = $name;
+	my $real_address = $address;
+
+	if (exists $mailmap->{names}->{$email} || exists $mailmap->{addresses}->{$email}) {
+		if (exists $mailmap->{names}->{$email}) {
+			$real_name = $mailmap->{names}->{$email};
+		}
+		if (exists $mailmap->{addresses}->{$email}) {
+			$real_address = $mailmap->{addresses}->{$email};
+		}
+	} else {
+		if (exists $mailmap->{names}->{$address}) {
+			$real_name = $mailmap->{names}->{$address};
+		}
+		if (exists $mailmap->{addresses}->{$address}) {
+			$real_address = $mailmap->{addresses}->{$address};
 		}
-	    }
 	}
+	return format_email($real_name, $real_address, 1);
+}
+
+sub mailmap {
+    my (@addresses) = @_;
+
+    my @ret = ();
+    foreach my $line (@addresses) {
+	push(@ret, mailmap_email($line), 1);
     }
 
-    return @lines;
+    merge_by_realname(@ret) if $email_remove_duplicates;
+
+    return @ret;
+}
+
+sub merge_by_realname {
+	my %address_map;
+	my (@emails) = @_;
+	foreach my $email (@emails) {
+		my ($name, $address) = parse_email($email);
+		if (!exists $address_map{$name}) {
+			$address_map{$name} = $address;
+		} else {
+			$address = $address_map{$name};
+			$email = format_email($name,$address,1);
+		}
+	}
+
 }
 
 sub git_execute_cmd {
@@ -1148,9 +1221,7 @@ sub vcs_assign {
 	$divisor = 1;
     }
 
-    if ($email_remove_duplicates) {
-	@lines = mailmap(@lines);
-    }
+    @lines = mailmap(@lines);
 
     return if (@lines <= 0);
 
-- 
1.7.3


^ permalink raw reply related	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2010-09-21  6:30 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-09-15 12:13 [PATCH rfc] scripts/get_maintainer.pl: add interactive mode florian
2010-09-15 13:43 ` [v2 PATCH " florian
2010-09-15 15:16   ` [PATCH RFC v3] " florian
2010-09-15 15:35     ` Joe Perches
2010-09-20 19:43     ` [RFC PATCH] " Joe Perches
2010-09-20 21:53       ` Florian Mickler
2010-09-20 22:27         ` [PATCH] scripts/get_maintainer.pl: fix .mailmap handling florian
2010-09-20 22:35           ` florian
2010-09-21  0:14             ` Joe Perches
2010-09-21  4:59               ` Florian Mickler
2010-09-21  5:20                 ` Joe Perches
2010-09-21  6:23                   ` Florian Mickler
2010-09-21  0:38         ` [RFC PATCH] scripts/get_maintainer.pl: add interactive mode Joe Perches
2010-09-21  5:31           ` Florian Mickler
2010-09-21  6:12             ` Joe Perches
2010-09-21  6:30               ` [PATCH v2] scripts/get_maintainer.pl: fix mailmap handling florian
2010-09-15 15:27   ` [v2 PATCH rfc] scripts/get_maintainer.pl: add interactive mode Joe Perches
2010-09-16  8:30     ` Florian Mickler

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.