All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] get_maintainer: Add a couple more --self-test options
@ 2017-11-06 17:27 Joe Perches
  2017-11-06 19:12 ` Tom Saeger
  0 siblings, 1 reply; 3+ messages in thread
From: Joe Perches @ 2017-11-06 17:27 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Tom Saeger, linux-kernel

Check for duplicate section headers and link reachability.

Miscellanea:

o Add --self-test=<foo> options (sections, patterns and scm for now)
  where the default without options is all tests
o Rename check_maintainers_patterns to self_test
o Rename self_test_pattern_info to self_test_info

Signed-off-by: Joe Perches <joe@perches.com>
cc: Tom Saeger <tom.saeger@oracle.com>
---
 scripts/get_maintainer.pl | 114 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 97 insertions(+), 17 deletions(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index c68a5d1ba709..748bff0790a8 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -57,7 +57,7 @@ my $sections = 0;
 my $file_emails = 0;
 my $from_filename = 0;
 my $pattern_depth = 0;
-my $self_test = 0;
+my $self_test = undef;
 my $version = 0;
 my $help = 0;
 my $find_maintainer_files = 0;
@@ -221,7 +221,7 @@ if (-f $ignore_file) {
 
 if ($#ARGV > 0) {
     foreach (@ARGV) {
-        if ($_ eq "-self-test" || $_ eq "--self-test") {
+        if ($_ =~ /^-{1,2}self-test(?:=|$)/) {
             die "$P: using --self-test does not allow any other option or argument\n";
         }
     }
@@ -263,7 +263,7 @@ if (!GetOptions(
 		'fe|file-emails!' => \$file_emails,
 		'f|file' => \$from_filename,
 		'find-maintainer-files' => \$find_maintainer_files,
-		'self-test' => \$self_test,
+		'self-test:s' => \$self_test,
 		'v|version' => \$version,
 		'h|help|usage' => \$help,
 		)) {
@@ -280,9 +280,9 @@ if ($version != 0) {
     exit 0;
 }
 
-if ($self_test) {
+if (defined $self_test) {
     read_all_maintainer_files();
-    check_maintainers_patterns();
+    self_test();
     exit 0;
 }
 
@@ -329,7 +329,7 @@ if (!top_of_kernel_tree($lk_path)) {
 my @typevalue = ();
 my %keyword_hash;
 my @mfiles = ();
-my @self_test_pattern_info = ();
+my @self_test_info = ();
 
 sub read_maintainer_file {
     my ($file) = @_;
@@ -339,6 +339,7 @@ sub read_maintainer_file {
     my $i = 1;
     while (<$maint>) {
 	my $line = $_;
+	chomp $line;
 
 	if ($line =~ m/^([A-Z]):\s*(.*)/) {
 	    my $type = $1;
@@ -353,17 +354,16 @@ sub read_maintainer_file {
 		if ((-d $value)) {
 		    $value =~ s@([^/])$@$1/@;
 		}
-		if ($self_test) {
-			push(@self_test_pattern_info, {file=>$file, line=>$line, linenr=>$i, pat=>$value});
-		}
 	    } elsif ($type eq "K") {
 		$keyword_hash{@typevalue} = $value;
 	    }
 	    push(@typevalue, "$type:$value");
 	} elsif (!(/^\s*$/ || /^\s*\#/)) {
-	    $line =~ s/\n$//g;
 	    push(@typevalue, $line);
 	}
+	if (defined $self_test) {
+	    push(@self_test_info, {file=>$file, linenr=>$i, line=>$line});
+	}
 	$i++;
     }
     close($maint);
@@ -614,17 +614,97 @@ if ($web) {
 
 exit($exit);
 
-sub check_maintainers_patterns {
+sub self_test {
     my @lsfiles = ();
+    my @good_links = ();
+    my @bad_links = ();
+    my @section_headers = ();
 
     @lsfiles = vcs_list_files($lk_path);
 
-    for my $x (@self_test_pattern_info) {
-        if (!grep(m@^$x->{pat}@, @lsfiles)) {
-            my $line = $x->{line};
-            chomp($line);
-            print("$x->{file}:$x->{linenr}: warning: no matches $line\n");
-        }
+    for my $x (@self_test_info) {
+
+	## Section header duplication
+	if (($self_test eq "" || $self_test =~ /\bsections\b/) &&
+	    $x->{line} =~ /^\S[^:]/) {
+	    if (grep(m@^\Q$x->{line}\E@, @section_headers)) {
+		print("$x->{file}:$x->{linenr}: warning: duplicate section header\t$x->{line}\n");
+	    } else {
+		push(@section_headers, $x->{line});
+	    }
+	}
+	next if ($x->{line} !~ /^([A-Z]):\s*(.*)/);
+
+	my $type = $1;
+	my $value = $2;
+
+	## Filename pattern matching
+	if (($type eq "F" || $type eq "X") &&
+	    ($self_test eq "" || $self_test =~ /\bpatterns\b/)) {
+	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
+	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
+	    $value =~ s/\?/\./g;         ##Convert ? to .
+	    ##if pattern is a directory and it lacks a trailing slash, add one
+	    if ((-d $value)) {
+		$value =~ s@([^/])$@$1/@;
+	    }
+	    if (!grep(m@^$value@, @lsfiles)) {
+		print("$x->{file}:$x->{linenr}: warning: no file matches\t$x->{line}\n");
+	    }
+
+	## Link reachability
+	} elsif (($type eq "W" ||
+		  $type eq "B" && $value =~ /^https?:/) &&
+		 ($self_test eq "" || $self_test =~ /\blinks\b/)) {
+	    next if (grep(m@^\Q$value\E$@, @good_links));
+	    my $isbad = 0;
+	    if (grep(m@^\Q$value\E$@, @bad_links)) {
+	        $isbad = 1;
+	    } else {
+		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $value`;
+		if ($? == 0) {
+		    push(@good_links, $value);
+		} else {
+		    push(@bad_links, $value);
+		    $isbad = 1;
+		}
+	    }
+	    if ($isbad) {
+	        print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
+	    }
+
+	## SCM reachability
+	} elsif (($type eq "T" && $value =~ /^(?:git|quilt|hg)\s+\S/) &&
+		 ($self_test eq "" || $self_test =~ /\bscm\b/)) {
+	    next if (grep(m@^\Q$value\E$@, @good_links));
+	    my $isbad = 0;
+	    if (grep(m@^\Q$value\E$@, @bad_links)) {
+	        $isbad = 1;
+	    } else {
+		if ($value =~ /^git\s+(\S+)/) {
+		    my $url = $1;
+		    my $output = `git ls-remote --exit-code -h "$url" > /dev/null 2>&1`;
+		    if ($? == 0) {
+			push(@good_links, $value);
+		    } else {
+			push(@bad_links, $value);
+			$isbad = 1;
+		    }
+		} elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
+		    my $url = $1;
+		    my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`;
+		    if ($? == 0) {
+			push(@good_links, $value);
+		    } else {
+			push(@bad_links, $value);
+			$isbad = 1;
+		    }
+		}
+		if ($isbad) {
+		    print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
+		}
+	    }
+	}
     }
 }
 
-- 
2.15.0

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] get_maintainer: Add a couple more --self-test options
  2017-11-06 17:27 [PATCH] get_maintainer: Add a couple more --self-test options Joe Perches
@ 2017-11-06 19:12 ` Tom Saeger
  2017-11-06 19:24   ` Joe Perches
  0 siblings, 1 reply; 3+ messages in thread
From: Tom Saeger @ 2017-11-06 19:12 UTC (permalink / raw)
  To: Joe Perches; +Cc: Andrew Morton, Tom Saeger, linux-kernel

Hi Joe,
    This is good!  I had something similar cooking - specifically for SCM validation.

My SCM attempt caught a few more issues:
    - check git branch if specified
    - check validitiy of "T:" entry, otherwise warn of malformed entry.

Example malformed (current next has two instances):

9740 T:      git://git.infradead.org/nvme.git

Should be:
9740 T:      git git://git.infradead.org/nvme.git


Also - I believe you intended on warning on all bad SCM entries, not just newly discovered ones?
Your change correctly finds a previously $isbad, however the print is enclosed in an else preventing output.

I was going to inline these, but in my haste to understand in incorporate changes I sanitized whitespace
(BTW - I see both tabs and spaces, which is preferred in this file?)

The below git branch special-casing is for these:
567:T:  git git://people.freedesktop.org/~airlied/linux (part of drm maint)
3671:T: git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates)

See bottom for my suggestions.

--Tom



On Mon, Nov 06, 2017 at 09:27:25AM -0800, Joe Perches wrote:
> Check for duplicate section headers and link reachability.
> 
> Miscellanea:
> 
> o Add --self-test=<foo> options (sections, patterns and scm for now)
>   where the default without options is all tests
> o Rename check_maintainers_patterns to self_test
> o Rename self_test_pattern_info to self_test_info
> 
> Signed-off-by: Joe Perches <joe@perches.com>
> cc: Tom Saeger <tom.saeger@oracle.com>
> ---
>  scripts/get_maintainer.pl | 114 +++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 97 insertions(+), 17 deletions(-)
> 
> diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
> index c68a5d1ba709..748bff0790a8 100755
> --- a/scripts/get_maintainer.pl
> +++ b/scripts/get_maintainer.pl
> @@ -57,7 +57,7 @@ my $sections = 0;
>  my $file_emails = 0;
>  my $from_filename = 0;
>  my $pattern_depth = 0;
> -my $self_test = 0;
> +my $self_test = undef;
>  my $version = 0;
>  my $help = 0;
>  my $find_maintainer_files = 0;
> @@ -221,7 +221,7 @@ if (-f $ignore_file) {
>  
>  if ($#ARGV > 0) {
>      foreach (@ARGV) {
> -        if ($_ eq "-self-test" || $_ eq "--self-test") {
> +        if ($_ =~ /^-{1,2}self-test(?:=|$)/) {
>              die "$P: using --self-test does not allow any other option or argument\n";
>          }
>      }
> @@ -263,7 +263,7 @@ if (!GetOptions(
>  		'fe|file-emails!' => \$file_emails,
>  		'f|file' => \$from_filename,
>  		'find-maintainer-files' => \$find_maintainer_files,
> -		'self-test' => \$self_test,
> +		'self-test:s' => \$self_test,
>  		'v|version' => \$version,
>  		'h|help|usage' => \$help,
>  		)) {
> @@ -280,9 +280,9 @@ if ($version != 0) {
>      exit 0;
>  }
>  
> -if ($self_test) {
> +if (defined $self_test) {
>      read_all_maintainer_files();
> -    check_maintainers_patterns();
> +    self_test();
>      exit 0;
>  }
>  
> @@ -329,7 +329,7 @@ if (!top_of_kernel_tree($lk_path)) {
>  my @typevalue = ();
>  my %keyword_hash;
>  my @mfiles = ();
> -my @self_test_pattern_info = ();
> +my @self_test_info = ();
>  
>  sub read_maintainer_file {
>      my ($file) = @_;
> @@ -339,6 +339,7 @@ sub read_maintainer_file {
>      my $i = 1;
>      while (<$maint>) {
>  	my $line = $_;
> +	chomp $line;
>  
>  	if ($line =~ m/^([A-Z]):\s*(.*)/) {
>  	    my $type = $1;
> @@ -353,17 +354,16 @@ sub read_maintainer_file {
>  		if ((-d $value)) {
>  		    $value =~ s@([^/])$@$1/@;
>  		}
> -		if ($self_test) {
> -			push(@self_test_pattern_info, {file=>$file, line=>$line, linenr=>$i, pat=>$value});
> -		}
>  	    } elsif ($type eq "K") {
>  		$keyword_hash{@typevalue} = $value;
>  	    }
>  	    push(@typevalue, "$type:$value");
>  	} elsif (!(/^\s*$/ || /^\s*\#/)) {
> -	    $line =~ s/\n$//g;
>  	    push(@typevalue, $line);
>  	}
> +	if (defined $self_test) {
> +	    push(@self_test_info, {file=>$file, linenr=>$i, line=>$line});
> +	}
>  	$i++;
>      }
>      close($maint);
> @@ -614,17 +614,97 @@ if ($web) {
>  
>  exit($exit);
>  
> -sub check_maintainers_patterns {
> +sub self_test {
>      my @lsfiles = ();
> +    my @good_links = ();
> +    my @bad_links = ();
> +    my @section_headers = ();
>  
>      @lsfiles = vcs_list_files($lk_path);
>  
> -    for my $x (@self_test_pattern_info) {
> -        if (!grep(m@^$x->{pat}@, @lsfiles)) {
> -            my $line = $x->{line};
> -            chomp($line);
> -            print("$x->{file}:$x->{linenr}: warning: no matches $line\n");
> -        }
> +    for my $x (@self_test_info) {
> +
> +	## Section header duplication
> +	if (($self_test eq "" || $self_test =~ /\bsections\b/) &&
> +	    $x->{line} =~ /^\S[^:]/) {
> +	    if (grep(m@^\Q$x->{line}\E@, @section_headers)) {
> +		print("$x->{file}:$x->{linenr}: warning: duplicate section header\t$x->{line}\n");
> +	    } else {
> +		push(@section_headers, $x->{line});
> +	    }
> +	}
> +	next if ($x->{line} !~ /^([A-Z]):\s*(.*)/);
> +
> +	my $type = $1;
> +	my $value = $2;
> +
> +	## Filename pattern matching
> +	if (($type eq "F" || $type eq "X") &&
> +	    ($self_test eq "" || $self_test =~ /\bpatterns\b/)) {
> +	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
> +	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
> +	    $value =~ s/\?/\./g;         ##Convert ? to .
> +	    ##if pattern is a directory and it lacks a trailing slash, add one
> +	    if ((-d $value)) {
> +		$value =~ s@([^/])$@$1/@;
> +	    }
> +	    if (!grep(m@^$value@, @lsfiles)) {
> +		print("$x->{file}:$x->{linenr}: warning: no file matches\t$x->{line}\n");
> +	    }
> +
> +	## Link reachability
> +	} elsif (($type eq "W" ||
> +		  $type eq "B" && $value =~ /^https?:/) &&
> +		 ($self_test eq "" || $self_test =~ /\blinks\b/)) {
> +	    next if (grep(m@^\Q$value\E$@, @good_links));
> +	    my $isbad = 0;
> +	    if (grep(m@^\Q$value\E$@, @bad_links)) {
> +	        $isbad = 1;
> +	    } else {
> +		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $value`;
> +		if ($? == 0) {
> +		    push(@good_links, $value);
> +		} else {
> +		    push(@bad_links, $value);
> +		    $isbad = 1;
> +		}
> +	    }
> +	    if ($isbad) {
> +	        print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
> +	    }
> +
> +	## SCM reachability
> +	} elsif (($type eq "T" && $value =~ /^(?:git|quilt|hg)\s+\S/) &&
> +		 ($self_test eq "" || $self_test =~ /\bscm\b/)) {
> +	    next if (grep(m@^\Q$value\E$@, @good_links));
> +	    my $isbad = 0;
> +	    if (grep(m@^\Q$value\E$@, @bad_links)) {
> +	        $isbad = 1;
> +	    } else {
> +		if ($value =~ /^git\s+(\S+)/) {
> +		    my $url = $1;
> +		    my $output = `git ls-remote --exit-code -h "$url" > /dev/null 2>&1`;
> +		    if ($? == 0) {
> +			push(@good_links, $value);
> +		    } else {
> +			push(@bad_links, $value);
> +			$isbad = 1;
> +		    }
> +		} elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
> +		    my $url = $1;
> +		    my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`;
> +		    if ($? == 0) {
> +			push(@good_links, $value);
> +		    } else {
> +			push(@bad_links, $value);
> +			$isbad = 1;
> +		    }
> +		}
> +		if ($isbad) {
> +		    print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
> +		}
> +	    }
>      }
>  }
>  
> -- 
> 2.15.0
> 

Changed SCM portion to this, which picks up a few more warnings...
Checks git branch on remote if specified.
Perhaps a $ismalformed category or some other way to deal with malformed entries?  Or just
move up to first check of SCM?

         ## SCM reachability
         } elsif (($type eq "T") && ($self_test eq "" || $self_test =~ /\bscm\b/)) {
             next if (grep(m@^\Q$value\E$@, @good_links));
             my $isbad = 0;
             if (grep(m@^\Q$value\E$@, @bad_links)) {
                 $isbad = 1;
             } else {
                 if ($value !~ /^(?:git|quilt|hg)\s+\S/) {
                     print("$x->{file}:$x->{linenr}: warning: malformed entry\t$x->{line}\n");
                 } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) {
                     my $url = $1;
                     my $branch = "";
                     $branch = $3 if $3;
                     my $output = `git ls-remote --exit-code -h "$url" $branch> /dev/null 2>&1`;
                     if ($? == 0) {
                         push(@good_links, $value);
                     } else {
                         push(@bad_links, $value);
                         $isbad = 1;
                     }
                 } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
                     my $url = $1;
                     my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`;
                     if ($? == 0) {
                         push(@good_links, $value);
                     } else {
                         push(@bad_links, $value);
                         $isbad = 1;
                     }
                 }
             }
             if ($isbad) {
                 print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
             }
         }
     }
  }

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] get_maintainer: Add a couple more --self-test options
  2017-11-06 19:12 ` Tom Saeger
@ 2017-11-06 19:24   ` Joe Perches
  0 siblings, 0 replies; 3+ messages in thread
From: Joe Perches @ 2017-11-06 19:24 UTC (permalink / raw)
  To: Tom Saeger; +Cc: Andrew Morton, linux-kernel

On Mon, 2017-11-06 at 13:12 -0600, Tom Saeger wrote:
> Hi Joe,
>     This is good!  I had something similar cooking - specifically for SCM validation.
> 
> My SCM attempt caught a few more issues:
>     - check git branch if specified
>     - check validitiy of "T:" entry, otherwise warn of malformed entry.
> 
> Example malformed (current next has two instances):
> 
> 9740 T:      git://git.infradead.org/nvme.git
> 
> Should be:
> 9740 T:      git git://git.infradead.org/nvme.git
> 
> 
> Also - I believe you intended on warning on all bad SCM entries, not just newly discovered ones?
> Your change correctly finds a previously $isbad, however the print is enclosed in an else preventing output.
> 
> I was going to inline these, but in my haste to understand in incorporate changes I sanitized whitespace
> (BTW - I see both tabs and spaces, which is preferred in this file?)

The indent in get_maintainers is supposed to be 4.

There is a mix of 8 char tabs and spaces, but there
shouldn't be any spaces followed by tabs.

> The below git branch special-casing is for these:
> 567:T:  git git://people.freedesktop.org/~airlied/linux (part of drm maint)
> 3671:T: git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates)
> 
> See bottom for my suggestions.
[]
> Changed SCM portion to this, which picks up a few more warnings...
> Checks git branch on remote if specified.
> Perhaps a $ismalformed category or some other way to deal with malformed entries?  Or just
> move up to first check of SCM?
> 
>          ## SCM reachability
>          } elsif (($type eq "T") && ($self_test eq "" || $self_test =~ /\bscm\b/)) {
>              next if (grep(m@^\Q$value\E$@, @good_links));
>              my $isbad = 0;
>              if (grep(m@^\Q$value\E$@, @bad_links)) {
>                  $isbad = 1;
>              } else {
>                  if ($value !~ /^(?:git|quilt|hg)\s+\S/) {
>                      print("$x->{file}:$x->{linenr}: warning: malformed entry\t$x->{line}\n");
>                  } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) {

This seems OK.

>                      my $url = $1;
>                      my $branch = "";
>                      $branch = $3 if $3;
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-11-06 19:24 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-06 17:27 [PATCH] get_maintainer: Add a couple more --self-test options Joe Perches
2017-11-06 19:12 ` Tom Saeger
2017-11-06 19:24   ` Joe Perches

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.