linux-kernel-mentees.lists.linuxfoundation.org archive mirror
 help / color / mirror / Atom feed
* [Linux-kernel-mentees] [PATCH v4] checkpatch: add fix and improve warning msg for non-standard signature
@ 2020-11-28 13:05 Aditya Srivastava
  2020-11-28 15:40 ` Joe Perches
  0 siblings, 1 reply; 6+ messages in thread
From: Aditya Srivastava @ 2020-11-28 13:05 UTC (permalink / raw)
  To: joe; +Cc: linux-kernel-mentees, linux-kernel, yashsri421

Currently checkpatch warns for BAD_SIGN_OFF on non-standard signature
styles.

A large number of these warnings occur because of typo mistakes in
signature tags. An evaluation over v4.13..v5.8 showed that out of 539
warnings due to non-standard signatures, 87 are due to typo mistakes.

Following are the standard signature tags which are often incorrectly
used, along with their individual counts of incorrect use (over
v4.13..v5.8):

 Reviewed-by: 42
 Signed-off-by: 25
 Reported-by: 6
 Acked-by: 4
 Tested-by: 4
 Suggested-by: 4

Provide a fix by calculating levenshtein distance for the signature tag
with all the standard signatures and suggest a fix with a signature, whose
edit distance is less than or equal to 2 with the misspelled signature.

Out of the 86 mispelled signatures fixed with this approach, 85 were
found to be good corrections and 1 was bad correction.

Following was found to be a bad correction:
 Tweeted-by (count: 1) => Tested-by

Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
---
changes in v2: modify commit message: replace specific example with overall evaluation, minor changes

changes in v3: summarize commit message

changes in v4: improve commit message; remove signature suggestions of small length (ie 'cc' and 'to')

 scripts/checkpatch.pl | 85 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 83 insertions(+), 2 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index fdfd5ec09be6..2b1afd763d8d 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -506,6 +506,77 @@ our $signature_tags = qr{(?xi:
 	Cc:
 )};
 
+sub get_min {
+	my (@arr) = @_;
+	my $len = scalar @arr;
+	if((scalar @arr) < 1) {
+		# if underflow, return
+		return;
+	}
+	my $min = $arr[0];
+	for my $i (0 .. ($len-1)) {
+		if ($arr[$i] < $min) {
+			$min = $arr[$i];
+		}
+	}
+	return $min;
+}
+
+sub get_edit_distance {
+	my ($str1, $str2) = @_;
+	my $len1 = length($str1);
+	my $len2 = length($str2);
+	# two dimensional array storing minimum edit distance
+	my @distance;
+	for my $i (0 .. $len1) {
+		for my $j (0 .. $len2) {
+			if ($i == 0) {
+				$distance[$i][$j] = $j;
+			}
+			elsif ($j == 0) {
+				$distance[$i][$j] = $i;
+			}
+			elsif (substr($str1, $i-1, 1) eq substr($str2, $j-1, 1)) {
+				$distance[$i][$j] = $distance[$i - 1][$j - 1];
+			}
+			else {
+				my $dist1 = $distance[$i][$j - 1]; #insert distance
+				my $dist2 = $distance[$i - 1][$j]; # remove
+				my $dist3 = $distance[$i - 1][$j - 1]; #replace
+				$distance[$i][$j] = 1 + get_min($dist1, $dist2, $dist3);
+			}
+		}
+	}
+	return $distance[$len1][$len2];
+}
+
+sub get_standard_signature {
+	my ($sign_off) = @_;
+	$sign_off = lc($sign_off);
+	$sign_off =~ s/\-//g; # to match with formed hash
+	my @standard_signature_tags = (
+		'signed-off-by:', 'co-developed-by:', 'acked-by:', 'tested-by:',
+		'reviewed-by:', 'reported-by:', 'suggested-by:'
+	);
+	# setting default values
+	my $standard_signature = 'signed-off-by';
+	my $min_edit_distance = 20;
+	my $edit_distance;
+	foreach (@standard_signature_tags) {
+		my $signature = $_;
+		$_ =~ s/\-//g;
+		$edit_distance = get_edit_distance($sign_off, $_);
+		if ($edit_distance < $min_edit_distance) {
+			$min_edit_distance = $edit_distance;
+			$standard_signature = $signature;
+		}
+	}
+        if($min_edit_distance<=2) {
+		return ucfirst($standard_signature);
+        }
+	return "";
+}
+
 our @typeListMisordered = (
 	qr{char\s+(?:un)?signed},
 	qr{int\s+(?:(?:un)?signed\s+)?short\s},
@@ -2773,8 +2844,18 @@ sub process {
 			my $ucfirst_sign_off = ucfirst(lc($sign_off));
 
 			if ($sign_off !~ /$signature_tags/) {
-				WARN("BAD_SIGN_OFF",
-				     "Non-standard signature: $sign_off\n" . $herecurr);
+				my $suggested_signature = get_standard_signature($sign_off);
+				if ($suggested_signature eq "") {
+					WARN("BAD_SIGN_OFF",
+					"Non-standard signature: $sign_off\n" . $herecurr);
+				}
+				else {
+					if (WARN("BAD_SIGN_OFF",
+						 "Non-standard signature: $sign_off. Please use '$suggested_signature' instead\n" . $herecurr) &&
+					    $fix) {
+						$fixed[$fixlinenr] =~ s/$sign_off/$suggested_signature/;
+					}
+				}
 			}
 			if (defined $space_before && $space_before ne "") {
 				if (WARN("BAD_SIGN_OFF",
-- 
2.17.1

_______________________________________________
Linux-kernel-mentees mailing list
Linux-kernel-mentees@lists.linuxfoundation.org
https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Linux-kernel-mentees] [PATCH v4] checkpatch: add fix and improve warning msg for non-standard signature
  2020-11-28 13:05 [Linux-kernel-mentees] [PATCH v4] checkpatch: add fix and improve warning msg for non-standard signature Aditya Srivastava
@ 2020-11-28 15:40 ` Joe Perches
  2020-11-28 18:35   ` [Linux-kernel-mentees] [PATCH v5] " Aditya Srivastava
  0 siblings, 1 reply; 6+ messages in thread
From: Joe Perches @ 2020-11-28 15:40 UTC (permalink / raw)
  To: Aditya Srivastava; +Cc: linux-kernel-mentees, linux-kernel

On Sat, 2020-11-28 at 18:35 +0530, Aditya Srivastava wrote:
> Currently checkpatch warns for BAD_SIGN_OFF on non-standard signature
> styles.
> 
> A large number of these warnings occur because of typo mistakes in
> signature tags. An evaluation over v4.13..v5.8 showed that out of 539
> warnings due to non-standard signatures, 87 are due to typo mistakes.
> 
> Following are the standard signature tags which are often incorrectly
> used, along with their individual counts of incorrect use (over
> v4.13..v5.8):
> 
>  Reviewed-by: 42
>  Signed-off-by: 25
>  Reported-by: 6
>  Acked-by: 4
>  Tested-by: 4
>  Suggested-by: 4
> 
> Provide a fix by calculating levenshtein distance for the signature tag
> with all the standard signatures and suggest a fix with a signature, whose
> edit distance is less than or equal to 2 with the misspelled signature.
> 
> Out of the 86 mispelled signatures fixed with this approach, 85 were
> found to be good corrections and 1 was bad correction.
> 
> Following was found to be a bad correction:
>  Tweeted-by (count: 1) => Tested-by
> 
> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
> ---
> changes in v2: modify commit message: replace specific example with overall evaluation, minor changes
> 
> changes in v3: summarize commit message
> 
> changes in v4: improve commit message; remove signature suggestions of small length (ie 'cc' and 'to')

Seems OKish but this needs style modifications as there are
several whitespace uses that don't match the typical forms
and perhaps some new function naming could be improved.

> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
[]
> @@ -506,6 +506,77 @@ our $signature_tags = qr{(?xi:
>  	Cc:
>  )};
>  
> 
> +sub get_min {

probably a poor name choice.  Maybe edit_distance_min

> +	my (@arr) = @_;
> +	my $len = scalar @arr;
> +	if((scalar @arr) < 1) {

space after if

> +		# if underflow, return
> +		return;
> +	}
> +	my $min = $arr[0];
> +	for my $i (0 .. ($len-1)) {
> +		if ($arr[$i] < $min) {
> +			$min = $arr[$i];
> +		}
> +	}
> +	return $min;
> +}
> +
> +sub get_edit_distance {
> +	my ($str1, $str2) = @_;

maybe lc($str) =~ s/-//g; here instead of the code in the caller

> +	my $len1 = length($str1);
> +	my $len2 = length($str2);
> +	# two dimensional array storing minimum edit distance
> +	my @distance;
> +	for my $i (0 .. $len1) {
> +		for my $j (0 .. $len2) {
> +			if ($i == 0) {
> +				$distance[$i][$j] = $j;
> +			}
> +			elsif ($j == 0) {

} elsif {

> +				$distance[$i][$j] = $i;
> +			}
> +			elsif (substr($str1, $i-1, 1) eq substr($str2, $j-1, 1)) {
> +				$distance[$i][$j] = $distance[$i - 1][$j - 1];
> +			}
> +			else {

} else {

> +				my $dist1 = $distance[$i][$j - 1]; #insert distance
> +				my $dist2 = $distance[$i - 1][$j]; # remove
> +				my $dist3 = $distance[$i - 1][$j - 1]; #replace
> +				$distance[$i][$j] = 1 + get_min($dist1, $dist2, $dist3);
> +			}
> +		}
> +	}
> +	return $distance[$len1][$len2];
> +}
> +
> +sub get_standard_signature {

find_standard_signature ?

> +	my ($sign_off) = @_;
> +	$sign_off = lc($sign_off);
> +	$sign_off =~ s/\-//g; # to match with formed hash

why not strip the dashes in get_edit_distance instead
of using this weird dance with dashes here?

> +	my @standard_signature_tags = (
> +		'signed-off-by:', 'co-developed-by:', 'acked-by:', 'tested-by:',
> +		'reviewed-by:', 'reported-by:', 'suggested-by:'
> +	);
> +	# setting default values
> +	my $standard_signature = 'signed-off-by';

why is does this need to be given a value?

> +	my $min_edit_distance = 20;
> +	my $edit_distance;
> +	foreach (@standard_signature_tags) {
> +		my $signature = $_;
> +		$_ =~ s/\-//g;

and this dancing here

> +		$edit_distance = get_edit_distance($sign_off, $_);
> +		if ($edit_distance < $min_edit_distance) {
> +			$min_edit_distance = $edit_distance;
> +			$standard_signature = $signature;
> +		}
> +	}
> +        if($min_edit_distance<=2) {

bad indentation, if (, spaces around test <=

> +		return ucfirst($standard_signature);
> +        }

bad indentation

> +	return "";
> +}
> +
>  our @typeListMisordered = (
>  	qr{char\s+(?:un)?signed},
>  	qr{int\s+(?:(?:un)?signed\s+)?short\s},
> @@ -2773,8 +2844,18 @@ sub process {
>  			my $ucfirst_sign_off = ucfirst(lc($sign_off));
>  
> 
>  			if ($sign_off !~ /$signature_tags/) {
> -				WARN("BAD_SIGN_OFF",
> -				     "Non-standard signature: $sign_off\n" . $herecurr);
> +				my $suggested_signature = get_standard_signature($sign_off);
> +				if ($suggested_signature eq "") {
> +					WARN("BAD_SIGN_OFF",
> +					"Non-standard signature: $sign_off\n" . $herecurr);

bad alignment

> +				}
> +				else {

} else {

> +					if (WARN("BAD_SIGN_OFF",
> +						 "Non-standard signature: $sign_off. Please use '$suggested_signature' instead\n" . $herecurr) &&

"perhaps" rather than "please use" or "likely typo of"

> +					    $fix) {
> +						$fixed[$fixlinenr] =~ s/$sign_off/$suggested_signature/;
> +					}
> +				}
>  			}
>  			if (defined $space_before && $space_before ne "") {
>  				if (WARN("BAD_SIGN_OFF",


_______________________________________________
Linux-kernel-mentees mailing list
Linux-kernel-mentees@lists.linuxfoundation.org
https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Linux-kernel-mentees] [PATCH v5] checkpatch: add fix and improve warning msg for non-standard signature
  2020-11-28 15:40 ` Joe Perches
@ 2020-11-28 18:35   ` Aditya Srivastava
  2020-11-28 19:12     ` Joe Perches
  0 siblings, 1 reply; 6+ messages in thread
From: Aditya Srivastava @ 2020-11-28 18:35 UTC (permalink / raw)
  To: joe; +Cc: linux-kernel-mentees, linux-kernel, yashsri421

Currently checkpatch warns for BAD_SIGN_OFF on non-standard signature
styles.

A large number of these warnings occur because of typo mistakes in
signature tags. An evaluation over v4.13..v5.8 showed that out of 539
warnings due to non-standard signatures, 87 are due to typo mistakes.

Following are the standard signature tags which are often incorrectly
used, along with their individual counts of incorrect use (over
v4.13..v5.8):

 Reviewed-by: 42
 Signed-off-by: 25
 Reported-by: 6
 Acked-by: 4
 Tested-by: 4
 Suggested-by: 4

Provide a fix by calculating levenshtein distance for the signature tag
with all the standard signatures and suggest a fix with a signature, whose
edit distance is less than or equal to 2 with the misspelled signature.

Out of the 86 mispelled signatures fixed with this approach, 85 were
found to be good corrections and 1 was bad correction.

Following was found to be a bad correction:
 Tweeted-by (count: 1) => Tested-by

Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
---
applies on next-20201120

changes in v2: modify commit message: replace specific example with overall evaluation, minor changes

changes in v3: summarize commit message

changes in v4: improve commit message; remove signature suggestions of small length (ie 'cc' and 'to')

changes in v5: modify coding styles: improve function names, whitespaces

 scripts/checkpatch.pl | 79 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 77 insertions(+), 2 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index fdfd5ec09be6..e372d26d03dc 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -506,6 +506,72 @@ our $signature_tags = qr{(?xi:
 	Cc:
 )};
 
+sub edit_distance_min {
+	my (@arr) = @_;
+	my $len = scalar @arr;
+	if ((scalar @arr) < 1) {
+		# if underflow, return
+		return;
+	}
+	my $min = $arr[0];
+	for my $i (0 .. ($len-1)) {
+		if ($arr[$i] < $min) {
+			$min = $arr[$i];
+		}
+	}
+	return $min;
+}
+
+sub get_edit_distance {
+	my ($str1, $str2) = @_;
+	$str1 = lc($str1);
+	$str1 =~ s/-//g;
+	$str2 =~ s/-//g;
+	my $len1 = length($str1);
+	my $len2 = length($str2);
+	# two dimensional array storing minimum edit distance
+	my @distance;
+	for my $i (0 .. $len1) {
+		for my $j (0 .. $len2) {
+			if ($i == 0) {
+				$distance[$i][$j] = $j;
+			} elsif ($j == 0) {
+				$distance[$i][$j] = $i;
+			} elsif (substr($str1, $i-1, 1) eq substr($str2, $j-1, 1)) {
+				$distance[$i][$j] = $distance[$i - 1][$j - 1];
+			} else {
+				my $dist1 = $distance[$i][$j - 1]; #insert distance
+				my $dist2 = $distance[$i - 1][$j]; # remove
+				my $dist3 = $distance[$i - 1][$j - 1]; #replace
+				$distance[$i][$j] = 1 + edit_distance_min($dist1, $dist2, $dist3);
+			}
+		}
+	}
+	return $distance[$len1][$len2];
+}
+
+sub find_standard_signature {
+	my ($sign_off) = @_;
+	my @standard_signature_tags = (
+		'signed-off-by:', 'co-developed-by:', 'acked-by:', 'tested-by:',
+		'reviewed-by:', 'reported-by:', 'suggested-by:'
+	);
+	my $standard_signature;
+	my $min_edit_distance = 20; # setting default value
+	my $edit_distance;
+	foreach (@standard_signature_tags) {
+		$edit_distance = get_edit_distance($sign_off, $_);
+		if ($edit_distance < $min_edit_distance) {
+			$min_edit_distance = $edit_distance;
+			$standard_signature = $_;
+		}
+	}
+        if ($min_edit_distance <= 2) {
+		return ucfirst($standard_signature);
+	}
+	return "";
+}
+
 our @typeListMisordered = (
 	qr{char\s+(?:un)?signed},
 	qr{int\s+(?:(?:un)?signed\s+)?short\s},
@@ -2773,8 +2839,17 @@ sub process {
 			my $ucfirst_sign_off = ucfirst(lc($sign_off));
 
 			if ($sign_off !~ /$signature_tags/) {
-				WARN("BAD_SIGN_OFF",
-				     "Non-standard signature: $sign_off\n" . $herecurr);
+				my $suggested_signature = find_standard_signature($sign_off);
+				if ($suggested_signature eq "") {
+					WARN("BAD_SIGN_OFF",
+					     "Non-standard signature: $sign_off\n" . $herecurr);
+				} else {
+					if (WARN("BAD_SIGN_OFF",
+						 "Non-standard signature: $sign_off. Perhaps '$suggested_signature'\n" . $herecurr) &&
+					    $fix) {
+						$fixed[$fixlinenr] =~ s/$sign_off/$suggested_signature/;
+					}
+				}
 			}
 			if (defined $space_before && $space_before ne "") {
 				if (WARN("BAD_SIGN_OFF",
-- 
2.17.1

_______________________________________________
Linux-kernel-mentees mailing list
Linux-kernel-mentees@lists.linuxfoundation.org
https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Linux-kernel-mentees] [PATCH v5] checkpatch: add fix and improve warning msg for non-standard signature
  2020-11-28 18:35   ` [Linux-kernel-mentees] [PATCH v5] " Aditya Srivastava
@ 2020-11-28 19:12     ` Joe Perches
  2020-11-28 20:43       ` [Linux-kernel-mentees] [PATCH v6] " Aditya Srivastava
  0 siblings, 1 reply; 6+ messages in thread
From: Joe Perches @ 2020-11-28 19:12 UTC (permalink / raw)
  To: Aditya Srivastava; +Cc: linux-kernel-mentees, linux-kernel

On Sun, 2020-11-29 at 00:05 +0530, Aditya Srivastava wrote:
> Currently checkpatch warns for BAD_SIGN_OFF on non-standard signature
> styles.

Seems OK, but here are some last trivial notes:

> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
[]
> +sub find_standard_signature {
> +	my ($sign_off) = @_;
> +	my @standard_signature_tags = (
> +		'signed-off-by:', 'co-developed-by:', 'acked-by:', 'tested-by:',
> +		'reviewed-by:', 'reported-by:', 'suggested-by:'

I would change this to the normal signatures:

	my @standard_signature_tags = (
		'Signed-off-by:', 'Co-developed-by:', 'Acked-by:', 'Tested-by:',
		'Reviewed-by:', 'Reported-by:', 'Suggested-by:'

> +	);
> +	my $standard_signature;
> +	my $min_edit_distance = 20; # setting default value

20 seems arbitrary, maybe (~0 << 1) ?

> +	my $edit_distance;

move this into the foreach (or maybe not use this at all)

> +	foreach (@standard_signature_tags) {

foreach style in this code uses foreach my $<something> and not $_

	foreach my $standard (@standard_signature_tags) {

> +		$edit_distance = get_edit_distance($sign_off, $_);

So:

		my $edit_distance = get_edit_distance($sign_off, $standard);

> +		if ($edit_distance < $min_edit_distance) {
> +			$min_edit_distance = $edit_distance;
> +			$standard_signature = $_;
> +		}
> +	}
> +        if ($min_edit_distance <= 2) {
> +		return ucfirst($standard_signature);

	return $standard;

Though maybe it's simpler to test in the loop if it's <= 2 as
the lowercase and dash strip is done inside get_edit_distance
so this seems rather simpler:

	foreach my $standard (@standard_signature_tags) {
		return $standard if (get_edit_distance($sign_off, $standard) <= 2);
	}

	return "";

> @@ -2773,8 +2839,17 @@ sub process {
>  			my $ucfirst_sign_off = ucfirst(lc($sign_off));
>  
> 
>  			if ($sign_off !~ /$signature_tags/) {
> -				WARN("BAD_SIGN_OFF",
> -				     "Non-standard signature: $sign_off\n" . $herecurr);
> +				my $suggested_signature = find_standard_signature($sign_off);
> +				if ($suggested_signature eq "") {
> +					WARN("BAD_SIGN_OFF",
> +					     "Non-standard signature: $sign_off\n" . $herecurr);
> +				} else {
> +					if (WARN("BAD_SIGN_OFF",
> +						 "Non-standard signature: $sign_off. Perhaps '$suggested_signature'\n" . $herecurr) &&

Please use consistent '' or nothing around signatures:

						"Non-standard signature: '$sign_off' - likely typo of '$suggested_signature'\n" . $herecurr) &&

> +					    $fix) {
> +						$fixed[$fixlinenr] =~ s/$sign_off/$suggested_signature/;
> +					}
> +				}
>  			}
>  			if (defined $space_before && $space_before ne "") {
>  				if (WARN("BAD_SIGN_OFF",


_______________________________________________
Linux-kernel-mentees mailing list
Linux-kernel-mentees@lists.linuxfoundation.org
https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Linux-kernel-mentees] [PATCH v6] checkpatch: add fix and improve warning msg for non-standard signature
  2020-11-28 19:12     ` Joe Perches
@ 2020-11-28 20:43       ` Aditya Srivastava
  2020-11-28 20:57         ` Joe Perches
  0 siblings, 1 reply; 6+ messages in thread
From: Aditya Srivastava @ 2020-11-28 20:43 UTC (permalink / raw)
  To: joe; +Cc: linux-kernel-mentees, linux-kernel, yashsri421

Currently checkpatch warns for BAD_SIGN_OFF on non-standard signature
styles.

A large number of these warnings occur because of typo mistakes in
signature tags. An evaluation over v4.13..v5.8 showed that out of 539
warnings due to non-standard signatures, 87 are due to typo mistakes.

Following are the standard signature tags which are often incorrectly
used, along with their individual counts of incorrect use (over
v4.13..v5.8):

 Reviewed-by: 42
 Signed-off-by: 25
 Reported-by: 6
 Acked-by: 4
 Tested-by: 4
 Suggested-by: 4

Provide a fix by calculating levenshtein distance for the signature tag
with all the standard signatures and suggest a fix with a signature, whose
edit distance is less than or equal to 2 with the misspelled signature.

Out of the 86 mispelled signatures fixed with this approach, 85 were
found to be good corrections and 1 was bad correction.

Following was found to be a bad correction:
 Tweeted-by (count: 1) => Tested-by

Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
---
applies on next-20201120

changes in v2: modify commit message: replace specific example with overall evaluation, minor changes

changes in v3: summarize commit message

changes in v4: improve commit message; remove signature suggestions of small length (ie 'cc' and 'to')

changes in v5: modify coding styles: improve function names, whitespaces

changes in v6: Simplify foreach loop; change standard signature tag values to normal ucfirst; modify warning message

 scripts/checkpatch.pl | 71 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 2 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index fdfd5ec09be6..4a026926139f 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -506,6 +506,64 @@ our $signature_tags = qr{(?xi:
 	Cc:
 )};
 
+sub edit_distance_min {
+	my (@arr) = @_;
+	my $len = scalar @arr;
+	if ((scalar @arr) < 1) {
+		# if underflow, return
+		return;
+	}
+	my $min = $arr[0];
+	for my $i (0 .. ($len-1)) {
+		if ($arr[$i] < $min) {
+			$min = $arr[$i];
+		}
+	}
+	return $min;
+}
+
+sub get_edit_distance {
+	my ($str1, $str2) = @_;
+	$str1 = lc($str1);
+	$str2 = lc($str2);
+	$str1 =~ s/-//g;
+	$str2 =~ s/-//g;
+	my $len1 = length($str1);
+	my $len2 = length($str2);
+	# two dimensional array storing minimum edit distance
+	my @distance;
+	for my $i (0 .. $len1) {
+		for my $j (0 .. $len2) {
+			if ($i == 0) {
+				$distance[$i][$j] = $j;
+			} elsif ($j == 0) {
+				$distance[$i][$j] = $i;
+			} elsif (substr($str1, $i-1, 1) eq substr($str2, $j-1, 1)) {
+				$distance[$i][$j] = $distance[$i - 1][$j - 1];
+			} else {
+				my $dist1 = $distance[$i][$j - 1]; #insert distance
+				my $dist2 = $distance[$i - 1][$j]; # remove
+				my $dist3 = $distance[$i - 1][$j - 1]; #replace
+				$distance[$i][$j] = 1 + edit_distance_min($dist1, $dist2, $dist3);
+			}
+		}
+	}
+	return $distance[$len1][$len2];
+}
+
+sub find_standard_signature {
+	my ($sign_off) = @_;
+	my @standard_signature_tags = (
+		'Signed-off-by:', 'Co-developed-by:', 'Acked-by:', 'Tested-by:',
+		'Reviewed-by:', 'Reported-by:', 'Suggested-by:'
+	);
+	foreach my $signature (@standard_signature_tags) {
+		return $signature if (get_edit_distance($sign_off, $signature) <= 2);
+	}
+
+	return "";
+}
+
 our @typeListMisordered = (
 	qr{char\s+(?:un)?signed},
 	qr{int\s+(?:(?:un)?signed\s+)?short\s},
@@ -2773,8 +2831,17 @@ sub process {
 			my $ucfirst_sign_off = ucfirst(lc($sign_off));
 
 			if ($sign_off !~ /$signature_tags/) {
-				WARN("BAD_SIGN_OFF",
-				     "Non-standard signature: $sign_off\n" . $herecurr);
+				my $suggested_signature = find_standard_signature($sign_off);
+				if ($suggested_signature eq "") {
+					WARN("BAD_SIGN_OFF",
+					     "Non-standard signature: $sign_off\n" . $herecurr);
+				} else {
+					if (WARN("BAD_SIGN_OFF",
+						 "Non-standard signature: '$sign_off' - perhaps '$suggested_signature'?\n" . $herecurr) &&
+					    $fix) {
+						$fixed[$fixlinenr] =~ s/$sign_off/$suggested_signature/;
+					}
+				}
 			}
 			if (defined $space_before && $space_before ne "") {
 				if (WARN("BAD_SIGN_OFF",
-- 
2.17.1

_______________________________________________
Linux-kernel-mentees mailing list
Linux-kernel-mentees@lists.linuxfoundation.org
https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Linux-kernel-mentees] [PATCH v6] checkpatch: add fix and improve warning msg for non-standard signature
  2020-11-28 20:43       ` [Linux-kernel-mentees] [PATCH v6] " Aditya Srivastava
@ 2020-11-28 20:57         ` Joe Perches
  0 siblings, 0 replies; 6+ messages in thread
From: Joe Perches @ 2020-11-28 20:57 UTC (permalink / raw)
  To: Aditya Srivastava, Andrew Morton; +Cc: linux-kernel-mentees, linux-kernel

On Sun, 2020-11-29 at 02:13 +0530, Aditya Srivastava wrote:
> Currently checkpatch warns for BAD_SIGN_OFF on non-standard signature
> styles.

Thanks.

Acked-by: Joe Perches <joe@perches.com>

> A large number of these warnings occur because of typo mistakes in
> signature tags. An evaluation over v4.13..v5.8 showed that out of 539
> warnings due to non-standard signatures, 87 are due to typo mistakes.
> 
> Following are the standard signature tags which are often incorrectly
> used, along with their individual counts of incorrect use (over
> v4.13..v5.8):
> 
>  Reviewed-by: 42
>  Signed-off-by: 25
>  Reported-by: 6
>  Acked-by: 4
>  Tested-by: 4
>  Suggested-by: 4
> 
> Provide a fix by calculating levenshtein distance for the signature tag
> with all the standard signatures and suggest a fix with a signature, whose
> edit distance is less than or equal to 2 with the misspelled signature.
> 
> Out of the 86 mispelled signatures fixed with this approach, 85 were
> found to be good corrections and 1 was bad correction.
> 
> Following was found to be a bad correction:
>  Tweeted-by (count: 1) => Tested-by
> 
> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
> ---
> applies on next-20201120
> 
> changes in v2: modify commit message: replace specific example with overall evaluation, minor changes
> 
> changes in v3: summarize commit message
> 
> changes in v4: improve commit message; remove signature suggestions of small length (ie 'cc' and 'to')
> 
> changes in v5: modify coding styles: improve function names, whitespaces
> 
> changes in v6: Simplify foreach loop; change standard signature tag values to normal ucfirst; modify warning message
> 
>  scripts/checkpatch.pl | 71 +++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 69 insertions(+), 2 deletions(-)
> 
> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> index fdfd5ec09be6..4a026926139f 100755
> --- a/scripts/checkpatch.pl
> +++ b/scripts/checkpatch.pl
> @@ -506,6 +506,64 @@ our $signature_tags = qr{(?xi:
>  	Cc:
>  )};
>  
> 
> +sub edit_distance_min {
> +	my (@arr) = @_;
> +	my $len = scalar @arr;
> +	if ((scalar @arr) < 1) {
> +		# if underflow, return
> +		return;
> +	}
> +	my $min = $arr[0];
> +	for my $i (0 .. ($len-1)) {
> +		if ($arr[$i] < $min) {
> +			$min = $arr[$i];
> +		}
> +	}
> +	return $min;
> +}
> +
> +sub get_edit_distance {
> +	my ($str1, $str2) = @_;
> +	$str1 = lc($str1);
> +	$str2 = lc($str2);
> +	$str1 =~ s/-//g;
> +	$str2 =~ s/-//g;
> +	my $len1 = length($str1);
> +	my $len2 = length($str2);
> +	# two dimensional array storing minimum edit distance
> +	my @distance;
> +	for my $i (0 .. $len1) {
> +		for my $j (0 .. $len2) {
> +			if ($i == 0) {
> +				$distance[$i][$j] = $j;
> +			} elsif ($j == 0) {
> +				$distance[$i][$j] = $i;
> +			} elsif (substr($str1, $i-1, 1) eq substr($str2, $j-1, 1)) {
> +				$distance[$i][$j] = $distance[$i - 1][$j - 1];
> +			} else {
> +				my $dist1 = $distance[$i][$j - 1]; #insert distance
> +				my $dist2 = $distance[$i - 1][$j]; # remove
> +				my $dist3 = $distance[$i - 1][$j - 1]; #replace
> +				$distance[$i][$j] = 1 + edit_distance_min($dist1, $dist2, $dist3);
> +			}
> +		}
> +	}
> +	return $distance[$len1][$len2];
> +}
> +
> +sub find_standard_signature {
> +	my ($sign_off) = @_;
> +	my @standard_signature_tags = (
> +		'Signed-off-by:', 'Co-developed-by:', 'Acked-by:', 'Tested-by:',
> +		'Reviewed-by:', 'Reported-by:', 'Suggested-by:'
> +	);
> +	foreach my $signature (@standard_signature_tags) {
> +		return $signature if (get_edit_distance($sign_off, $signature) <= 2);
> +	}
> +
> +	return "";
> +}
> +
>  our @typeListMisordered = (
>  	qr{char\s+(?:un)?signed},
>  	qr{int\s+(?:(?:un)?signed\s+)?short\s},
> @@ -2773,8 +2831,17 @@ sub process {
>  			my $ucfirst_sign_off = ucfirst(lc($sign_off));
>  
> 
>  			if ($sign_off !~ /$signature_tags/) {
> -				WARN("BAD_SIGN_OFF",
> -				     "Non-standard signature: $sign_off\n" . $herecurr);
> +				my $suggested_signature = find_standard_signature($sign_off);
> +				if ($suggested_signature eq "") {
> +					WARN("BAD_SIGN_OFF",
> +					     "Non-standard signature: $sign_off\n" . $herecurr);
> +				} else {
> +					if (WARN("BAD_SIGN_OFF",
> +						 "Non-standard signature: '$sign_off' - perhaps '$suggested_signature'?\n" . $herecurr) &&
> +					    $fix) {
> +						$fixed[$fixlinenr] =~ s/$sign_off/$suggested_signature/;
> +					}
> +				}
>  			}
>  			if (defined $space_before && $space_before ne "") {
>  				if (WARN("BAD_SIGN_OFF",


_______________________________________________
Linux-kernel-mentees mailing list
Linux-kernel-mentees@lists.linuxfoundation.org
https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-11-28 20:58 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-28 13:05 [Linux-kernel-mentees] [PATCH v4] checkpatch: add fix and improve warning msg for non-standard signature Aditya Srivastava
2020-11-28 15:40 ` Joe Perches
2020-11-28 18:35   ` [Linux-kernel-mentees] [PATCH v5] " Aditya Srivastava
2020-11-28 19:12     ` Joe Perches
2020-11-28 20:43       ` [Linux-kernel-mentees] [PATCH v6] " Aditya Srivastava
2020-11-28 20:57         ` Joe Perches

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).