linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables
@ 2021-04-22 19:18 Aditya Srivastava
       [not found] ` <CAKXUXMx9q57cWXkcezKKo-uuh21Sd-Si9M9KydzFEMQ0ELYEng@mail.gmail.com>
  2021-04-23 13:21 ` Matthew Wilcox
  0 siblings, 2 replies; 18+ messages in thread
From: Aditya Srivastava @ 2021-04-22 19:18 UTC (permalink / raw)
  To: corbet
  Cc: yashsri421, lukas.bulwahn, linux-kernel-mentees, linux-doc, linux-kernel

There are some regex expressions in the kernel-doc script, which are used
repeatedly in the script.

Reduce such expressions into variables, which can be used everywhere.

A quick manual check found that no errors and warnings were added/removed
in this process.

Suggested-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
---
 scripts/kernel-doc | 89 ++++++++++++++++++++++++++--------------------
 1 file changed, 50 insertions(+), 39 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 2a85d34fdcd0..579c9fdd275f 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -406,6 +406,7 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)';
 my $doc_inline_end = '^\s*\*/\s*$';
 my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$';
 my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;';
+my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};
 
 my %parameterdescs;
 my %parameterdesc_start_lines;
@@ -694,7 +695,7 @@ sub output_function_man(%) {
 	    $post = ");";
 	}
 	$type = $args{'parametertypes'}{$parameter};
-	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
+	if ($type =~ m/$pointer_function/) {
 	    # pointer-to-function
 	    print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n";
 	} else {
@@ -974,7 +975,7 @@ sub output_function_rst(%) {
 	$count++;
 	$type = $args{'parametertypes'}{$parameter};
 
-	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
+	if ($type =~ m/$pointer_function/) {
 	    # pointer-to-function
 	    print $1 . $parameter . ") (" . $2 . ")";
 	} else {
@@ -1210,8 +1211,14 @@ sub dump_struct($$) {
     my $decl_type;
     my $members;
     my $type = qr{struct|union};
+    my $packed = qr{__packed};
+    my $aligned = qr{__aligned};
+    my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp};
+    my $cacheline_aligned = qr{____cacheline_aligned};
+    my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;
     # For capturing struct/union definition body, i.e. "{members*}qualifiers*"
-    my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*};
+    my $definition_body = qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*};
+    my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;};
 
     if ($x =~ /($type)\s+(\w+)\s*$definition_body/) {
 	$decl_type = $1;
@@ -1235,27 +1242,27 @@ sub dump_struct($$) {
 	# strip comments:
 	$members =~ s/\/\*.*?\*\///gos;
 	# strip attributes
-	$members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi;
-	$members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
-	$members =~ s/\s*__packed\s*/ /gos;
+	$members =~ s/\s*$attribute/ /gi;
+	$members =~ s/\s*$aligned\s*\([^;]*\)/ /gos;
+	$members =~ s/\s*$packed\s*/ /gos;
 	$members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
-	$members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
-	$members =~ s/\s*____cacheline_aligned/ /gos;
+	$members =~ s/\s*$cacheline_aligned_in_smp/ /gos;
+	$members =~ s/\s*$cacheline_aligned/ /gos;
 
+	my $args = qr{([^,)]+)};
 	# replace DECLARE_BITMAP
 	$members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos;
-	$members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
+	$members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
 	# replace DECLARE_HASHTABLE
-	$members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
+	$members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
 	# replace DECLARE_KFIFO
-	$members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
+	$members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos;
 	# replace DECLARE_KFIFO_PTR
-	$members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
-
+	$members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos;
 	my $declaration = $members;
 
 	# Split nested struct/union elements as newer ones
-	while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) {
+	while ($members =~ m/$struct_members/) {
 		my $newmember;
 		my $maintype = $1;
 		my $ids = $4;
@@ -1315,7 +1322,7 @@ sub dump_struct($$) {
 				}
 			}
 		}
-		$members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/;
+		$members =~ s/$struct_members/$newmember/;
 	}
 
 	# Ignore other nested elements, like enums
@@ -1555,8 +1562,9 @@ sub create_parameterlist($$$$) {
     my $param;
 
     # temporarily replace commas inside function pointer definition
-    while ($args =~ /(\([^\),]+),/) {
-	$args =~ s/(\([^\),]+),/$1#/g;
+    my $arg_expr = qr{\([^\),]+};
+    while ($args =~ /$arg_expr,/) {
+	$args =~ s/($arg_expr),/$1#/g;
     }
 
     foreach my $arg (split($splitter, $args)) {
@@ -1808,8 +1816,11 @@ sub dump_function($$) {
     # - parport_register_device (function pointer parameters)
     # - atomic_set (macro)
     # - pci_match_device, __copy_to_user (long return type)
+    my $name = qr{[a-zA-Z0-9_~:]+};
+    my $prototype_end1 = qr{\(([^\(]*)\)};
+    my $prototype_end2 = qr{\(([^\{]*)\)};
 
-    if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) {
+    if ($define && $prototype =~ m/^()($name)\s+/) {
         # This is an object-like macro, it has no return type and no parameter
         # list.
         # Function-like macros are not allowed to have spaces between
@@ -1817,23 +1828,23 @@ sub dump_function($$) {
         $return_type = $1;
         $declaration_name = $2;
         $noret = 1;
-    } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/)  {
+    } elsif ($prototype =~ m/^()($name)\s*$prototype_end1/ ||
+	$prototype =~ m/^(\w+)\s+($name)\s*$prototype_end1/ ||
+	$prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end1/ ||
+	$prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end1/ ||
+	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ ||
+	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end1/ ||
+	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ ||
+	$prototype =~ m/^()($name)\s*$prototype_end2/ ||
+	$prototype =~ m/^(\w+)\s+($name)\s*$prototype_end2/ ||
+	$prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
+	$prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end2/ ||
+	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
+	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ ||
+	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
+	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ ||
+	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
+	$prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*($name)\s*$prototype_end2/)  {
 	$return_type = $1;
 	$declaration_name = $2;
 	my $args = $3;
@@ -2110,12 +2121,12 @@ sub process_name($$) {
     } elsif (/$doc_decl/o) {
 	$identifier = $1;
 	my $is_kernel_comment = 0;
-	my $decl_start = qr{\s*\*};
+	my $decl_start = qr{$doc_com};
 	# test for pointer declaration type, foo * bar() - desc
 	my $fn_type = qr{\w+\s*\*\s*}; 
 	my $parenthesis = qr{\(\w*\)};
 	my $decl_end = qr{[-:].*};
-	if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
+	if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
 	    $identifier = $1;
 	}
 	if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) {
@@ -2125,8 +2136,8 @@ sub process_name($$) {
 	}
 	# Look for foo() or static void foo() - description; or misspelt
 	# identifier
-	elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
-	    /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
+	elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
+	    /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
 	    $identifier = $1;
 	    $decl_type = 'function';
 	    $identifier =~ s/^define\s+//;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables
       [not found] ` <CAKXUXMx9q57cWXkcezKKo-uuh21Sd-Si9M9KydzFEMQ0ELYEng@mail.gmail.com>
@ 2021-04-23 12:20   ` Aditya Srivastava
  0 siblings, 0 replies; 18+ messages in thread
From: Aditya Srivastava @ 2021-04-23 12:20 UTC (permalink / raw)
  To: Lukas Bulwahn
  Cc: Jonathan Corbet, linux-kernel-mentees, open list:DOCUMENTATION,
	Linux Kernel Mailing List

On 23/4/21 1:03 am, Lukas Bulwahn wrote:
> Aditya Srivastava <yashsri421@gmail.com> schrieb am Do., 22. Apr. 2021,
> 21:18:
> 
>> There are some regex expressions in the kernel-doc script, which are used
>> repeatedly in the script.
>>
>> Reduce such expressions into variables, which can be used everywhere.
>>
>> A quick manual check found that no errors and warnings were added/removed
>> in this process.
>>
>> Suggested-by: Jonathan Corbet <corbet@lwn.net>
>> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
>> ---
>>  scripts/kernel-doc | 89 ++++++++++++++++++++++++++--------------------
>>  1 file changed, 50 insertions(+), 39 deletions(-)
>>
>> diff --git a/scripts/kernel-doc b/scripts/kernel-doc
>> index 2a85d34fdcd0..579c9fdd275f 100755
>> --- a/scripts/kernel-doc
>> +++ b/scripts/kernel-doc
>> @@ -406,6 +406,7 @@ my $doc_inline_sect =
>> '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)';
>>  my $doc_inline_end = '^\s*\*/\s*$';
>>  my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$';
>>  my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;';
>> +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};
>>
>>  my %parameterdescs;
>>  my %parameterdesc_start_lines;
>> @@ -694,7 +695,7 @@ sub output_function_man(%) {
>>             $post = ");";
>>         }
>>         $type = $args{'parametertypes'}{$parameter};
>> -       if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
>> +       if ($type =~ m/$pointer_function/) {
>>             # pointer-to-function
>>             print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" .
>> $post . "\"\n";
>>         } else {
>> @@ -974,7 +975,7 @@ sub output_function_rst(%) {
>>         $count++;
>>         $type = $args{'parametertypes'}{$parameter};
>>
>> -       if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
>> +       if ($type =~ m/$pointer_function/) {
>>             # pointer-to-function
>>             print $1 . $parameter . ") (" . $2 . ")";
>>         } else {
>> @@ -1210,8 +1211,14 @@ sub dump_struct($$) {
>>      my $decl_type;
>>      my $members;
>>      my $type = qr{struct|union};
>> +    my $packed = qr{__packed};
>> +    my $aligned = qr{__aligned};
>> +    my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp};
>> +    my $cacheline_aligned = qr{____cacheline_aligned};
>> +    my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;
>>      # For capturing struct/union definition body, i.e.
>> "{members*}qualifiers*"
>> -    my $definition_body =
>> qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*};
>> +    my $definition_body =
>> qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*};
>> +    my $struct_members =
>> qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;};
>>
>>      if ($x =~ /($type)\s+(\w+)\s*$definition_body/) {
>>         $decl_type = $1;
>> @@ -1235,27 +1242,27 @@ sub dump_struct($$) {
>>         # strip comments:
>>         $members =~ s/\/\*.*?\*\///gos;
>>         # strip attributes
>> -       $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi;
>> -       $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
>> -       $members =~ s/\s*__packed\s*/ /gos;
>> +       $members =~ s/\s*$attribute/ /gi;
>> +       $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos;
>> +       $members =~ s/\s*$packed\s*/ /gos;
>>         $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
>> -       $members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
>> -       $members =~ s/\s*____cacheline_aligned/ /gos;
>> +       $members =~ s/\s*$cacheline_aligned_in_smp/ /gos;
>> +       $members =~ s/\s*$cacheline_aligned/ /gos;
>>
>> +       my $args = qr{([^,)]+)};
>>         # replace DECLARE_BITMAP
>>         $members =~
>> s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1,
>> __ETHTOOL_LINK_MODE_MASK_NBITS)/gos;
>> -       $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned
>> long $1\[BITS_TO_LONGS($2)\]/gos;
>> +       $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long
>> $1\[BITS_TO_LONGS($2)\]/gos;
>>         # replace DECLARE_HASHTABLE
>> -       $members =~
>> s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2)
>> - 1)\]/gos;
>> +       $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long
>> $1\[1 << (($2) - 1)\]/gos;
>>         # replace DECLARE_KFIFO
>> -       $members =~
>> s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
>> +       $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2
>> \*$1/gos;
>>         # replace DECLARE_KFIFO_PTR
>> -       $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2
>> \*$1/gos;
>> -
>> +       $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos;
>>         my $declaration = $members;
>>
>>         # Split nested struct/union elements as newer ones
>> -       while ($members =~
>> m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) {
>> +       while ($members =~ m/$struct_members/) {
>>                 my $newmember;
>>                 my $maintype = $1;
>>                 my $ids = $4;
>> @@ -1315,7 +1322,7 @@ sub dump_struct($$) {
>>                                 }
>>                         }
>>                 }
>> -               $members =~
>> s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/;
>> +               $members =~ s/$struct_members/$newmember/;
>>         }
>>
>>         # Ignore other nested elements, like enums
>> @@ -1555,8 +1562,9 @@ sub create_parameterlist($$$$) {
>>      my $param;
>>
>>      # temporarily replace commas inside function pointer definition
>> -    while ($args =~ /(\([^\),]+),/) {
>> -       $args =~ s/(\([^\),]+),/$1#/g;
>> +    my $arg_expr = qr{\([^\),]+};
>> +    while ($args =~ /$arg_expr,/) {
>> +       $args =~ s/($arg_expr),/$1#/g;
>>      }
>>
>>      foreach my $arg (split($splitter, $args)) {
>> @@ -1808,8 +1816,11 @@ sub dump_function($$) {
>>      # - parport_register_device (function pointer parameters)
>>      # - atomic_set (macro)
>>      # - pci_match_device, __copy_to_user (long return type)
>> +    my $name = qr{[a-zA-Z0-9_~:]+};
>> +    my $prototype_end1 = qr{\(([^\(]*)\)};
>> +    my $prototype_end2 = qr{\(([^\{]*)\)};
>>
> 
> Why do you need end1 and end2 here?
> 

Thanks for pointing out, Lukas. I am looking into the possibility of
combining these expressions, and testing against the files.
Please let me know if there are any more improvements possible :)

Thanks
Aditya

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-04-22 19:18 [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables Aditya Srivastava
       [not found] ` <CAKXUXMx9q57cWXkcezKKo-uuh21Sd-Si9M9KydzFEMQ0ELYEng@mail.gmail.com>
@ 2021-04-23 13:21 ` Matthew Wilcox
  2021-04-24 11:57   ` Aditya Srivastava
  1 sibling, 1 reply; 18+ messages in thread
From: Matthew Wilcox @ 2021-04-23 13:21 UTC (permalink / raw)
  To: Aditya Srivastava
  Cc: corbet, lukas.bulwahn, linux-kernel-mentees, linux-doc, linux-kernel

On Fri, Apr 23, 2021 at 12:48:39AM +0530, Aditya Srivastava wrote:
> +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};

Is that a pointer-to-function?  Or as people who write C usually call it,
a function pointer?  Wouldn't it be better to call it $function_pointer?

> @@ -1210,8 +1211,14 @@ sub dump_struct($$) {
>      my $decl_type;
>      my $members;
>      my $type = qr{struct|union};
> +    my $packed = qr{__packed};
> +    my $aligned = qr{__aligned};
> +    my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp};
> +    my $cacheline_aligned = qr{____cacheline_aligned};

I don't think those four definitions actually simplify anything.

> +    my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;

... whereas this one definitely does.

> -	$members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi;
> -	$members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
> -	$members =~ s/\s*__packed\s*/ /gos;
> +	$members =~ s/\s*$attribute/ /gi;
> +	$members =~ s/\s*$aligned\s*\([^;]*\)/ /gos;

Maybe put the \s*\([^;]*\) into $aligned?  Then it becomes a useful
abstraction.

> -    } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/)  {
> +    } elsif ($prototype =~ m/^()($name)\s*$prototype_end1/ ||
> +	$prototype =~ m/^(\w+)\s+($name)\s*$prototype_end1/ ||
> +	$prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end1/ ||
> +	$prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end1/ ||
> +	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ ||
> +	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end1/ ||
> +	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ ||
> +	$prototype =~ m/^()($name)\s*$prototype_end2/ ||
> +	$prototype =~ m/^(\w+)\s+($name)\s*$prototype_end2/ ||
> +	$prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
> +	$prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end2/ ||
> +	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
> +	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ ||
> +	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
> +	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ ||
> +	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
> +	$prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*($name)\s*$prototype_end2/)  {

This is probably the best patch I've seen so far this year.

Now, can we go further?  For example:
	$prototype_end = $prototype_end1|$prototype_end2
That would let us cut the number of lines here in half.

Can we create a definition for a variable number of \w and \s and '*'
in the return type?  In fact, can we define a regex that matches a type?
So this would become:

> +    } elsif ($prototype =~ m/^($type)\s*($name)\s*$prototype_end/) {


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-04-23 13:21 ` Matthew Wilcox
@ 2021-04-24 11:57   ` Aditya Srivastava
  2021-04-24 12:47     ` [RFC v2] " Aditya Srivastava
  2021-04-26 17:31     ` [RFC] " Matthew Wilcox
  0 siblings, 2 replies; 18+ messages in thread
From: Aditya Srivastava @ 2021-04-24 11:57 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: corbet, lukas.bulwahn, linux-kernel-mentees, linux-doc, linux-kernel

On 23/4/21 6:51 pm, Matthew Wilcox wrote:
> On Fri, Apr 23, 2021 at 12:48:39AM +0530, Aditya Srivastava wrote:
>> +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};
> 
> Is that a pointer-to-function?  Or as people who write C usually call it,
> a function pointer?  Wouldn't it be better to call it $function_pointer?
> 
Will do it.

>> @@ -1210,8 +1211,14 @@ sub dump_struct($$) {
>>      my $decl_type;
>>      my $members;
>>      my $type = qr{struct|union};
>> +    my $packed = qr{__packed};
>> +    my $aligned = qr{__aligned};
>> +    my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp};
>> +    my $cacheline_aligned = qr{____cacheline_aligned};
> 
> I don't think those four definitions actually simplify anything.
> 
>> +    my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;
> 
> ... whereas this one definitely does.
> 
>> -	$members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi;
>> -	$members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
>> -	$members =~ s/\s*__packed\s*/ /gos;
>> +	$members =~ s/\s*$attribute/ /gi;
>> +	$members =~ s/\s*$aligned\s*\([^;]*\)/ /gos;
> 
> Maybe put the \s*\([^;]*\) into $aligned?  Then it becomes a useful
> abstraction.

Actually, I had made these variables as they were repeated here and at
-    my $definition_body =
qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*};
+    my $definition_body =
qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*};

So, defining them at a place might help.

What do you think?

> 
>> -    } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
>> -	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
>> -	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
>> -	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
>> -	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
>> -	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
>> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
>> -	$prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
>> -	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
>> -	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
>> -	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
>> -	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
>> -	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
>> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
>> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
>> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
>> -	$prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/)  {
>> +    } elsif ($prototype =~ m/^()($name)\s*$prototype_end1/ ||
>> +	$prototype =~ m/^(\w+)\s+($name)\s*$prototype_end1/ ||
>> +	$prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end1/ ||
>> +	$prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end1/ ||
>> +	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ ||
>> +	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end1/ ||
>> +	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ ||
>> +	$prototype =~ m/^()($name)\s*$prototype_end2/ ||
>> +	$prototype =~ m/^(\w+)\s+($name)\s*$prototype_end2/ ||
>> +	$prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
>> +	$prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end2/ ||
>> +	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
>> +	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ ||
>> +	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
>> +	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ ||
>> +	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ ||
>> +	$prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*($name)\s*$prototype_end2/)  {
> 
> This is probably the best patch I've seen so far this year.
> 
> Now, can we go further?  For example:
> 	$prototype_end = $prototype_end1|$prototype_end2
> That would let us cut the number of lines here in half.
> > Can we create a definition for a variable number of \w and \s and '*'
> in the return type?  In fact, can we define a regex that matches a type?
> So this would become:
> 
>> +    } elsif ($prototype =~ m/^($type)\s*($name)\s*$prototype_end/) {
> 

I have been able to reduce these expressions furthermore. Will send a
v2 in few..

Thanks
Aditya

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [RFC v2] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-04-24 11:57   ` Aditya Srivastava
@ 2021-04-24 12:47     ` Aditya Srivastava
  2021-04-27 15:55       ` Jonathan Corbet
  2021-04-26 17:31     ` [RFC] " Matthew Wilcox
  1 sibling, 1 reply; 18+ messages in thread
From: Aditya Srivastava @ 2021-04-24 12:47 UTC (permalink / raw)
  To: corbet
  Cc: yashsri421, lukas.bulwahn, willy, linux-kernel-mentees,
	linux-doc, linux-kernel

There are some regex expressions in the kernel-doc script, which are used
repeatedly in the script.

Reduce such expressions into variables, which can be used everywhere.

A quick manual check found that no errors and warnings were added/removed
in this process.

Suggested-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
---
Changes in v2:
- Rename $pointer_function to $function_pointer
- Combine elsif-block expressions at "sub dump_function" into lesser regex expressions
- Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end

 scripts/kernel-doc | 80 +++++++++++++++++++++++-----------------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 2a85d34fdcd0..fe7f51be44e0 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -406,6 +406,7 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)';
 my $doc_inline_end = '^\s*\*/\s*$';
 my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$';
 my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;';
+my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};
 
 my %parameterdescs;
 my %parameterdesc_start_lines;
@@ -694,7 +695,7 @@ sub output_function_man(%) {
 	    $post = ");";
 	}
 	$type = $args{'parametertypes'}{$parameter};
-	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
+	if ($type =~ m/$function_pointer/) {
 	    # pointer-to-function
 	    print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n";
 	} else {
@@ -974,7 +975,7 @@ sub output_function_rst(%) {
 	$count++;
 	$type = $args{'parametertypes'}{$parameter};
 
-	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
+	if ($type =~ m/$function_pointer/) {
 	    # pointer-to-function
 	    print $1 . $parameter . ") (" . $2 . ")";
 	} else {
@@ -1210,8 +1211,14 @@ sub dump_struct($$) {
     my $decl_type;
     my $members;
     my $type = qr{struct|union};
+    my $packed = qr{__packed};
+    my $aligned = qr{__aligned};
+    my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp};
+    my $cacheline_aligned = qr{____cacheline_aligned};
+    my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;
     # For capturing struct/union definition body, i.e. "{members*}qualifiers*"
-    my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*};
+    my $definition_body = qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*};
+    my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;};
 
     if ($x =~ /($type)\s+(\w+)\s*$definition_body/) {
 	$decl_type = $1;
@@ -1235,27 +1242,27 @@ sub dump_struct($$) {
 	# strip comments:
 	$members =~ s/\/\*.*?\*\///gos;
 	# strip attributes
-	$members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi;
-	$members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
-	$members =~ s/\s*__packed\s*/ /gos;
+	$members =~ s/\s*$attribute/ /gi;
+	$members =~ s/\s*$aligned\s*\([^;]*\)/ /gos;
+	$members =~ s/\s*$packed\s*/ /gos;
 	$members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
-	$members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
-	$members =~ s/\s*____cacheline_aligned/ /gos;
+	$members =~ s/\s*$cacheline_aligned_in_smp/ /gos;
+	$members =~ s/\s*$cacheline_aligned/ /gos;
 
+	my $args = qr{([^,)]+)};
 	# replace DECLARE_BITMAP
 	$members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos;
-	$members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
+	$members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
 	# replace DECLARE_HASHTABLE
-	$members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
+	$members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
 	# replace DECLARE_KFIFO
-	$members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
+	$members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos;
 	# replace DECLARE_KFIFO_PTR
-	$members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
-
+	$members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos;
 	my $declaration = $members;
 
 	# Split nested struct/union elements as newer ones
-	while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) {
+	while ($members =~ m/$struct_members/) {
 		my $newmember;
 		my $maintype = $1;
 		my $ids = $4;
@@ -1315,7 +1322,7 @@ sub dump_struct($$) {
 				}
 			}
 		}
-		$members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/;
+		$members =~ s/$struct_members/$newmember/;
 	}
 
 	# Ignore other nested elements, like enums
@@ -1555,8 +1562,9 @@ sub create_parameterlist($$$$) {
     my $param;
 
     # temporarily replace commas inside function pointer definition
-    while ($args =~ /(\([^\),]+),/) {
-	$args =~ s/(\([^\),]+),/$1#/g;
+    my $arg_expr = qr{\([^\),]+};
+    while ($args =~ /$arg_expr,/) {
+	$args =~ s/($arg_expr),/$1#/g;
     }
 
     foreach my $arg (split($splitter, $args)) {
@@ -1808,8 +1816,14 @@ sub dump_function($$) {
     # - parport_register_device (function pointer parameters)
     # - atomic_set (macro)
     # - pci_match_device, __copy_to_user (long return type)
-
-    if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) {
+    my $name = qr{[a-zA-Z0-9_~:]+};
+    my $prototype_end1 = qr{[^\(]*};
+    my $prototype_end2 = qr{[^\{]*};
+    my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)};
+    my $type1 = qr{[\w\s]+};
+    my $type2 = qr{$type1\*+};
+
+    if ($define && $prototype =~ m/^()($name)\s+/) {
         # This is an object-like macro, it has no return type and no parameter
         # list.
         # Function-like macros are not allowed to have spaces between
@@ -1817,23 +1831,9 @@ sub dump_function($$) {
         $return_type = $1;
         $declaration_name = $2;
         $noret = 1;
-    } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/)  {
+    } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ ||
+	$prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ ||
+	$prototype =~ m/^($type2)+\s*($name)\s*$prototype_end/)  {
 	$return_type = $1;
 	$declaration_name = $2;
 	my $args = $3;
@@ -2110,12 +2110,12 @@ sub process_name($$) {
     } elsif (/$doc_decl/o) {
 	$identifier = $1;
 	my $is_kernel_comment = 0;
-	my $decl_start = qr{\s*\*};
+	my $decl_start = qr{$doc_com};
 	# test for pointer declaration type, foo * bar() - desc
 	my $fn_type = qr{\w+\s*\*\s*}; 
 	my $parenthesis = qr{\(\w*\)};
 	my $decl_end = qr{[-:].*};
-	if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
+	if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
 	    $identifier = $1;
 	}
 	if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) {
@@ -2125,8 +2125,8 @@ sub process_name($$) {
 	}
 	# Look for foo() or static void foo() - description; or misspelt
 	# identifier
-	elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
-	    /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
+	elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
+	    /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
 	    $identifier = $1;
 	    $decl_type = 'function';
 	    $identifier =~ s/^define\s+//;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-04-24 11:57   ` Aditya Srivastava
  2021-04-24 12:47     ` [RFC v2] " Aditya Srivastava
@ 2021-04-26 17:31     ` Matthew Wilcox
  1 sibling, 0 replies; 18+ messages in thread
From: Matthew Wilcox @ 2021-04-26 17:31 UTC (permalink / raw)
  To: Aditya Srivastava
  Cc: corbet, lukas.bulwahn, linux-kernel-mentees, linux-doc, linux-kernel

On Sat, Apr 24, 2021 at 05:27:34PM +0530, Aditya Srivastava wrote:
> On 23/4/21 6:51 pm, Matthew Wilcox wrote:
> > On Fri, Apr 23, 2021 at 12:48:39AM +0530, Aditya Srivastava wrote:
> >> +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};
> > 
> > Is that a pointer-to-function?  Or as people who write C usually call it,
> > a function pointer?  Wouldn't it be better to call it $function_pointer?
> > 
> Will do it.
> 
> >> @@ -1210,8 +1211,14 @@ sub dump_struct($$) {
> >>      my $decl_type;
> >>      my $members;
> >>      my $type = qr{struct|union};
> >> +    my $packed = qr{__packed};
> >> +    my $aligned = qr{__aligned};
> >> +    my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp};
> >> +    my $cacheline_aligned = qr{____cacheline_aligned};
> > 
> > I don't think those four definitions actually simplify anything.
> > 
> >> +    my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;
> > 
> > ... whereas this one definitely does.
> > 
> >> -	$members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi;
> >> -	$members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
> >> -	$members =~ s/\s*__packed\s*/ /gos;
> >> +	$members =~ s/\s*$attribute/ /gi;
> >> +	$members =~ s/\s*$aligned\s*\([^;]*\)/ /gos;
> > 
> > Maybe put the \s*\([^;]*\) into $aligned?  Then it becomes a useful
> > abstraction.
> 
> Actually, I had made these variables as they were repeated here and at
> -    my $definition_body =
> qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*};
> +    my $definition_body =
> qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*};
> 
> So, defining them at a place might help.
> 
> What do you think?

I don't think that seeing $packed is any easier to read than __packed.
Indeed, I think it's harder, because now I have to look up what $packed
is defined as.

Defining a variable, say

	$decorations = qr{__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\))}
	(i didn't count brackets to be sure i got that right)

would be helpful because then we could say:

	my $definition_body = qr{\{(.*)\}...$decorations...

and have a fighting chance of understanding what it means.

Now, this other place we use it, we do the =~ operation a number of times.
Is there a way to use the $decorations variable to do the same thing
with a single operation?


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC v2] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-04-24 12:47     ` [RFC v2] " Aditya Srivastava
@ 2021-04-27 15:55       ` Jonathan Corbet
  2021-04-27 16:56         ` Matthew Wilcox
  0 siblings, 1 reply; 18+ messages in thread
From: Jonathan Corbet @ 2021-04-27 15:55 UTC (permalink / raw)
  To: Aditya Srivastava
  Cc: yashsri421, lukas.bulwahn, willy, linux-kernel-mentees,
	linux-doc, linux-kernel

Aditya Srivastava <yashsri421@gmail.com> writes:

> There are some regex expressions in the kernel-doc script, which are used
> repeatedly in the script.
>
> Reduce such expressions into variables, which can be used everywhere.
>
> A quick manual check found that no errors and warnings were added/removed
> in this process.
>
> Suggested-by: Jonathan Corbet <corbet@lwn.net>
> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>

So my comments pretty much mirror Willy's ...  there is a lot to really
like here mixed with some stuff that's not as helpful.

> Changes in v2:
> - Rename $pointer_function to $function_pointer
> - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions
> - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end
>
>  scripts/kernel-doc | 80 +++++++++++++++++++++++-----------------------
>  1 file changed, 40 insertions(+), 40 deletions(-)
>
> diff --git a/scripts/kernel-doc b/scripts/kernel-doc
> index 2a85d34fdcd0..fe7f51be44e0 100755
> --- a/scripts/kernel-doc
> +++ b/scripts/kernel-doc
> @@ -406,6 +406,7 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)';
>  my $doc_inline_end = '^\s*\*/\s*$';
>  my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$';
>  my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;';
> +my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};
>  
>  my %parameterdescs;
>  my %parameterdesc_start_lines;
> @@ -694,7 +695,7 @@ sub output_function_man(%) {
>  	    $post = ");";
>  	}
>  	$type = $args{'parametertypes'}{$parameter};
> -	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
> +	if ($type =~ m/$function_pointer/) {

This kind of change is wonderful.  The more that we can coalesce this
regex mess and reuse it throughout the script, the more maintainable it
will be.

>  	    # pointer-to-function
>  	    print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n";
>  	} else {
> @@ -974,7 +975,7 @@ sub output_function_rst(%) {
>  	$count++;
>  	$type = $args{'parametertypes'}{$parameter};
>  
> -	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
> +	if ($type =~ m/$function_pointer/) {
>  	    # pointer-to-function
>  	    print $1 . $parameter . ") (" . $2 . ")";
>  	} else {
> @@ -1210,8 +1211,14 @@ sub dump_struct($$) {
>      my $decl_type;
>      my $members;
>      my $type = qr{struct|union};
> +    my $packed = qr{__packed};
> +    my $aligned = qr{__aligned};
> +    my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp};
> +    my $cacheline_aligned = qr{____cacheline_aligned};
> +    my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;
>      # For capturing struct/union definition body, i.e. "{members*}qualifiers*"
> -    my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*};
> +    my $definition_body = qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*};
> +    my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;};

This is a bit less helpful - variables like $aligned don't do much for
us.  That can be seen just below:

>  
>      if ($x =~ /($type)\s+(\w+)\s*$definition_body/) {
>  	$decl_type = $1;
> @@ -1235,27 +1242,27 @@ sub dump_struct($$) {
>  	# strip comments:
>  	$members =~ s/\/\*.*?\*\///gos;
>  	# strip attributes
> -	$members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi;
> -	$members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
> -	$members =~ s/\s*__packed\s*/ /gos;
> +	$members =~ s/\s*$attribute/ /gi;
> +	$members =~ s/\s*$aligned\s*\([^;]*\)/ /gos;
> +	$members =~ s/\s*$packed\s*/ /gos;

The use of the variables here doesn't really make those expressions more
readable.  

>  	$members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
> -	$members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
> -	$members =~ s/\s*____cacheline_aligned/ /gos;
> +	$members =~ s/\s*$cacheline_aligned_in_smp/ /gos;
> +	$members =~ s/\s*$cacheline_aligned/ /gos;
>  
> +	my $args = qr{([^,)]+)};
>  	# replace DECLARE_BITMAP
>  	$members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos;
> -	$members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
> +	$members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;

Here too ... this is the kind of stuff that makes me glad that Colorado
is a legal-weed state, and the new version, while better, doesn't change
that basic fact.

>  	# replace DECLARE_HASHTABLE
> -	$members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
> +	$members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
>  	# replace DECLARE_KFIFO
> -	$members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
> +	$members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos;
>  	# replace DECLARE_KFIFO_PTR
> -	$members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
> -
> +	$members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos;
>  	my $declaration = $members;
>  
>  	# Split nested struct/union elements as newer ones
> -	while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) {
> +	while ($members =~ m/$struct_members/) {

...but this is great.

>  		my $newmember;
>  		my $maintype = $1;
>  		my $ids = $4;
> @@ -1315,7 +1322,7 @@ sub dump_struct($$) {
>  				}
>  			}
>  		}
> -		$members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/;
> +		$members =~ s/$struct_members/$newmember/;
>  	}
>  
>  	# Ignore other nested elements, like enums
> @@ -1555,8 +1562,9 @@ sub create_parameterlist($$$$) {
>      my $param;
>  
>      # temporarily replace commas inside function pointer definition
> -    while ($args =~ /(\([^\),]+),/) {
> -	$args =~ s/(\([^\),]+),/$1#/g;
> +    my $arg_expr = qr{\([^\),]+};
> +    while ($args =~ /$arg_expr,/) {
> +	$args =~ s/($arg_expr),/$1#/g;
>      }
>  
>      foreach my $arg (split($splitter, $args)) {
> @@ -1808,8 +1816,14 @@ sub dump_function($$) {
>      # - parport_register_device (function pointer parameters)
>      # - atomic_set (macro)
>      # - pci_match_device, __copy_to_user (long return type)
> -
> -    if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) {
> +    my $name = qr{[a-zA-Z0-9_~:]+};
> +    my $prototype_end1 = qr{[^\(]*};
> +    my $prototype_end2 = qr{[^\{]*};
> +    my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)};
> +    my $type1 = qr{[\w\s]+};
> +    my $type2 = qr{$type1\*+};
> +
> +    if ($define && $prototype =~ m/^()($name)\s+/) {
>          # This is an object-like macro, it has no return type and no parameter
>          # list.
>          # Function-like macros are not allowed to have spaces between
> @@ -1817,23 +1831,9 @@ sub dump_function($$) {
>          $return_type = $1;
>          $declaration_name = $2;
>          $noret = 1;
> -    } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/)  {
> +    } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ ||
> +	$prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ ||
> +	$prototype =~ m/^($type2)+\s*($name)\s*$prototype_end/)  {

It's hard not to be happy about something like that - this is a definite
step toward clarifying this code.

I think I'll stop here; hopefully I've gotten my point across.  I really
like where this work is heading; focusing just a bit more on pulling the
regexes together and making the whole thing more readable would be
wonderful.

Thanks,

jon

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC v2] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-04-27 15:55       ` Jonathan Corbet
@ 2021-04-27 16:56         ` Matthew Wilcox
  2021-04-29  6:37           ` [RFC v3] " Aditya Srivastava
  0 siblings, 1 reply; 18+ messages in thread
From: Matthew Wilcox @ 2021-04-27 16:56 UTC (permalink / raw)
  To: Jonathan Corbet
  Cc: Aditya Srivastava, lukas.bulwahn, linux-kernel-mentees,
	linux-doc, linux-kernel

On Tue, Apr 27, 2021 at 09:55:35AM -0600, Jonathan Corbet wrote:
> The use of the variables here doesn't really make those expressions more
> readable.  
> 
> >  	$members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
> > -	$members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
> > -	$members =~ s/\s*____cacheline_aligned/ /gos;
> > +	$members =~ s/\s*$cacheline_aligned_in_smp/ /gos;
> > +	$members =~ s/\s*$cacheline_aligned/ /gos;
> >  
> > +	my $args = qr{([^,)]+)};
> >  	# replace DECLARE_BITMAP
> >  	$members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos;
> > -	$members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
> > +	$members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
> 
> Here too ... this is the kind of stuff that makes me glad that Colorado
> is a legal-weed state, and the new version, while better, doesn't change
> that basic fact.

I'm going to have to disagree with you on this one (I agree with you on all
the others).  I find this much easier to read ...

"DECLARE_BITMAP followed by any amount of whitespace, literal open bracket,
an argument, literal comma, whitespace, another argument, literal close bracket"

Before, I get to "DECLARE_BITMAP followed by any amount of whitespace,
then some line noise".

Obviously I'm less experienced at reading regexes than you are, but this
simplification really does help me.

> I think I'll stop here; hopefully I've gotten my point across.  I really
> like where this work is heading; focusing just a bit more on pulling the
> regexes together and making the whole thing more readable would be
> wonderful.

Amen.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-04-27 16:56         ` Matthew Wilcox
@ 2021-04-29  6:37           ` Aditya Srivastava
  2021-04-29 23:39             ` Jonathan Corbet
  2021-05-01 15:43             ` [RFC v3] " Matthew Wilcox
  0 siblings, 2 replies; 18+ messages in thread
From: Aditya Srivastava @ 2021-04-29  6:37 UTC (permalink / raw)
  To: corbet
  Cc: yashsri421, lukas.bulwahn, willy, linux-kernel-mentees,
	linux-doc, linux-kernel

There are some regex expressions in the kernel-doc script, which are used
repeatedly in the script.

Reduce such expressions into variables, which can be used everywhere.

A quick manual check found that no errors and warnings were added/removed
in this process.

Suggested-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
---
Changes in v3:
- Remove variables for separate qualifiers in "sub dump_struct"
- Make a common variable for all the qualifiers
- Make $attribute global variable to use it at "sub check_sections" as well

Changes in v2:
- Rename $pointer_function to $function_pointer
- Combine elsif-block expressions at "sub dump_function" into lesser regex expressions
- Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end

 scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------
 1 file changed, 34 insertions(+), 37 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 2a85d34fdcd0..721005a02e64 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -406,6 +406,8 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)';
 my $doc_inline_end = '^\s*\*/\s*$';
 my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$';
 my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;';
+my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};
+my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;
 
 my %parameterdescs;
 my %parameterdesc_start_lines;
@@ -694,7 +696,7 @@ sub output_function_man(%) {
 	    $post = ");";
 	}
 	$type = $args{'parametertypes'}{$parameter};
-	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
+	if ($type =~ m/$function_pointer/) {
 	    # pointer-to-function
 	    print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n";
 	} else {
@@ -974,7 +976,7 @@ sub output_function_rst(%) {
 	$count++;
 	$type = $args{'parametertypes'}{$parameter};
 
-	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
+	if ($type =~ m/$function_pointer/) {
 	    # pointer-to-function
 	    print $1 . $parameter . ") (" . $2 . ")";
 	} else {
@@ -1211,7 +1213,9 @@ sub dump_struct($$) {
     my $members;
     my $type = qr{struct|union};
     # For capturing struct/union definition body, i.e. "{members*}qualifiers*"
-    my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*};
+    my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned};
+    my $definition_body = qr{\{(.*)\}\s*$qualifiers*};
+    my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;};
 
     if ($x =~ /($type)\s+(\w+)\s*$definition_body/) {
 	$decl_type = $1;
@@ -1235,27 +1239,27 @@ sub dump_struct($$) {
 	# strip comments:
 	$members =~ s/\/\*.*?\*\///gos;
 	# strip attributes
-	$members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi;
+	$members =~ s/\s*$attribute/ /gi;
 	$members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
 	$members =~ s/\s*__packed\s*/ /gos;
 	$members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
 	$members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
 	$members =~ s/\s*____cacheline_aligned/ /gos;
 
+	my $args = qr{([^,)]+)};
 	# replace DECLARE_BITMAP
 	$members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos;
-	$members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
+	$members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
 	# replace DECLARE_HASHTABLE
-	$members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
+	$members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
 	# replace DECLARE_KFIFO
-	$members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
+	$members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos;
 	# replace DECLARE_KFIFO_PTR
-	$members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
-
+	$members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos;
 	my $declaration = $members;
 
 	# Split nested struct/union elements as newer ones
-	while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) {
+	while ($members =~ m/$struct_members/) {
 		my $newmember;
 		my $maintype = $1;
 		my $ids = $4;
@@ -1315,7 +1319,7 @@ sub dump_struct($$) {
 				}
 			}
 		}
-		$members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/;
+		$members =~ s/$struct_members/$newmember/;
 	}
 
 	# Ignore other nested elements, like enums
@@ -1555,8 +1559,9 @@ sub create_parameterlist($$$$) {
     my $param;
 
     # temporarily replace commas inside function pointer definition
-    while ($args =~ /(\([^\),]+),/) {
-	$args =~ s/(\([^\),]+),/$1#/g;
+    my $arg_expr = qr{\([^\),]+};
+    while ($args =~ /$arg_expr,/) {
+	$args =~ s/($arg_expr),/$1#/g;
     }
 
     foreach my $arg (split($splitter, $args)) {
@@ -1707,7 +1712,7 @@ sub check_sections($$$$$) {
 		foreach $px (0 .. $#prms) {
 			$prm_clean = $prms[$px];
 			$prm_clean =~ s/\[.*\]//;
-			$prm_clean =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i;
+			$prm_clean =~ s/$attribute//i;
 			# ignore array size in a parameter string;
 			# however, the original param string may contain
 			# spaces, e.g.:  addr[6 + 2]
@@ -1808,8 +1813,14 @@ sub dump_function($$) {
     # - parport_register_device (function pointer parameters)
     # - atomic_set (macro)
     # - pci_match_device, __copy_to_user (long return type)
-
-    if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) {
+    my $name = qr{[a-zA-Z0-9_~:]+};
+    my $prototype_end1 = qr{[^\(]*};
+    my $prototype_end2 = qr{[^\{]*};
+    my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)};
+    my $type1 = qr{[\w\s]+};
+    my $type2 = qr{$type1\*+};
+
+    if ($define && $prototype =~ m/^()($name)\s+/) {
         # This is an object-like macro, it has no return type and no parameter
         # list.
         # Function-like macros are not allowed to have spaces between
@@ -1817,23 +1828,9 @@ sub dump_function($$) {
         $return_type = $1;
         $declaration_name = $2;
         $noret = 1;
-    } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/)  {
+    } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ ||
+	$prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ ||
+	$prototype =~ m/^($type2)+\s*($name)\s*$prototype_end/)  {
 	$return_type = $1;
 	$declaration_name = $2;
 	my $args = $3;
@@ -2110,12 +2107,12 @@ sub process_name($$) {
     } elsif (/$doc_decl/o) {
 	$identifier = $1;
 	my $is_kernel_comment = 0;
-	my $decl_start = qr{\s*\*};
+	my $decl_start = qr{$doc_com};
 	# test for pointer declaration type, foo * bar() - desc
 	my $fn_type = qr{\w+\s*\*\s*}; 
 	my $parenthesis = qr{\(\w*\)};
 	my $decl_end = qr{[-:].*};
-	if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
+	if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
 	    $identifier = $1;
 	}
 	if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) {
@@ -2125,8 +2122,8 @@ sub process_name($$) {
 	}
 	# Look for foo() or static void foo() - description; or misspelt
 	# identifier
-	elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
-	    /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
+	elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
+	    /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
 	    $identifier = $1;
 	    $decl_type = 'function';
 	    $identifier =~ s/^define\s+//;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-04-29  6:37           ` [RFC v3] " Aditya Srivastava
@ 2021-04-29 23:39             ` Jonathan Corbet
  2021-04-30  2:03               ` Joe Perches
  2021-05-01  9:30               ` Aditya Srivastava
  2021-05-01 15:43             ` [RFC v3] " Matthew Wilcox
  1 sibling, 2 replies; 18+ messages in thread
From: Jonathan Corbet @ 2021-04-29 23:39 UTC (permalink / raw)
  To: Aditya Srivastava
  Cc: yashsri421, lukas.bulwahn, willy, linux-kernel-mentees,
	linux-doc, linux-kernel

Aditya Srivastava <yashsri421@gmail.com> writes:

> There are some regex expressions in the kernel-doc script, which are used
> repeatedly in the script.
>
> Reduce such expressions into variables, which can be used everywhere.
>
> A quick manual check found that no errors and warnings were added/removed
> in this process.
>
> Suggested-by: Jonathan Corbet <corbet@lwn.net>
> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
> ---
> Changes in v3:
> - Remove variables for separate qualifiers in "sub dump_struct"
> - Make a common variable for all the qualifiers
> - Make $attribute global variable to use it at "sub check_sections" as well
>
> Changes in v2:
> - Rename $pointer_function to $function_pointer
> - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions
> - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end
>
>  scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------
>  1 file changed, 34 insertions(+), 37 deletions(-)

So this looks good but ... it adds a warning to the build:

/stuff/k/git/kernel/Documentation/driver-api/media/v4l2-controls:823: ./include/media/v4l2-ctrls.h:964: WARNING: Invalid C declaration: Expected identifier in nested name. [error at 6]
  const * v4l2_ctrl_get_menu (u32 id)
  ------^

So it looks like something isn't being parsed quite identically?

Thanks,

jon

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-04-29 23:39             ` Jonathan Corbet
@ 2021-04-30  2:03               ` Joe Perches
  2021-05-01  9:30               ` Aditya Srivastava
  1 sibling, 0 replies; 18+ messages in thread
From: Joe Perches @ 2021-04-30  2:03 UTC (permalink / raw)
  To: Jonathan Corbet, Aditya Srivastava
  Cc: lukas.bulwahn, willy, linux-kernel-mentees, linux-doc, linux-kernel

On Thu, 2021-04-29 at 17:39 -0600, Jonathan Corbet wrote:
> Aditya Srivastava <yashsri421@gmail.com> writes:
> 
> > There are some regex expressions in the kernel-doc script, which are used
> > repeatedly in the script.
> > 
> > Reduce such expressions into variables, which can be used everywhere.
> > 
> > A quick manual check found that no errors and warnings were added/removed
> > in this process.
> > 
> > Suggested-by: Jonathan Corbet <corbet@lwn.net>
> > Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
> > ---
> > Changes in v3:
> > - Remove variables for separate qualifiers in "sub dump_struct"
> > - Make a common variable for all the qualifiers
> > - Make $attribute global variable to use it at "sub check_sections" as well
> > 
> > Changes in v2:
> > - Rename $pointer_function to $function_pointer
> > - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions
> > - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end
> > 
> >  scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------
> >  1 file changed, 34 insertions(+), 37 deletions(-)
> 
> So this looks good but ... it adds a warning to the build:
> 
> /stuff/k/git/kernel/Documentation/driver-api/media/v4l2-controls:823: ./include/media/v4l2-ctrls.h:964: WARNING: Invalid C declaration: Expected identifier in nested name. [error at 6]
>   const * v4l2_ctrl_get_menu (u32 id)
>   ------^
> 
> So it looks like something isn't being parsed quite identically?

Perhaps a few of the regexes from checkpatch could be used or
maybe a linux specific perl module produced.



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-04-29 23:39             ` Jonathan Corbet
  2021-04-30  2:03               ` Joe Perches
@ 2021-05-01  9:30               ` Aditya Srivastava
  2021-05-01 15:03                 ` Jonathan Corbet
  1 sibling, 1 reply; 18+ messages in thread
From: Aditya Srivastava @ 2021-05-01  9:30 UTC (permalink / raw)
  To: Jonathan Corbet
  Cc: lukas.bulwahn, willy, linux-kernel-mentees, linux-doc, linux-kernel

On 30/4/21 5:09 am, Jonathan Corbet wrote:
> Aditya Srivastava <yashsri421@gmail.com> writes:
> 
>> There are some regex expressions in the kernel-doc script, which are used
>> repeatedly in the script.
>>
>> Reduce such expressions into variables, which can be used everywhere.
>>
>> A quick manual check found that no errors and warnings were added/removed
>> in this process.
>>
>> Suggested-by: Jonathan Corbet <corbet@lwn.net>
>> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
>> ---
>> Changes in v3:
>> - Remove variables for separate qualifiers in "sub dump_struct"
>> - Make a common variable for all the qualifiers
>> - Make $attribute global variable to use it at "sub check_sections" as well
>>
>> Changes in v2:
>> - Rename $pointer_function to $function_pointer
>> - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions
>> - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end
>>
>>  scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------
>>  1 file changed, 34 insertions(+), 37 deletions(-)
> 
> So this looks good but ... it adds a warning to the build:
> 
> /stuff/k/git/kernel/Documentation/driver-api/media/v4l2-controls:823: ./include/media/v4l2-ctrls.h:964: WARNING: Invalid C declaration: Expected identifier in nested name. [error at 6]
>   const * v4l2_ctrl_get_menu (u32 id)
>   ------^
> 
> So it looks like something isn't being parsed quite identically?
>

Hi Jonathan!
I could not reproduce this error..
Can you suggest me how can I reproduce this error?
I ran kernel-doc -none {$file} over the tree.

Probably, this is not a kernel-doc error

Thanks
Aditya

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-05-01  9:30               ` Aditya Srivastava
@ 2021-05-01 15:03                 ` Jonathan Corbet
  2021-05-14 14:42                   ` [RFC v4] " Aditya Srivastava
  0 siblings, 1 reply; 18+ messages in thread
From: Jonathan Corbet @ 2021-05-01 15:03 UTC (permalink / raw)
  To: Aditya Srivastava
  Cc: lukas.bulwahn, willy, linux-kernel-mentees, linux-doc, linux-kernel

Aditya Srivastava <yashsri421@gmail.com> writes:

> On 30/4/21 5:09 am, Jonathan Corbet wrote:
>> Aditya Srivastava <yashsri421@gmail.com> writes:
>> 
>>> There are some regex expressions in the kernel-doc script, which are used
>>> repeatedly in the script.
>>>
>>> Reduce such expressions into variables, which can be used everywhere.
>>>
>>> A quick manual check found that no errors and warnings were added/removed
>>> in this process.
>>>
>>> Suggested-by: Jonathan Corbet <corbet@lwn.net>
>>> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
>>> ---
>>> Changes in v3:
>>> - Remove variables for separate qualifiers in "sub dump_struct"
>>> - Make a common variable for all the qualifiers
>>> - Make $attribute global variable to use it at "sub check_sections" as well
>>>
>>> Changes in v2:
>>> - Rename $pointer_function to $function_pointer
>>> - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions
>>> - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end
>>>
>>>  scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------
>>>  1 file changed, 34 insertions(+), 37 deletions(-)
>> 
>> So this looks good but ... it adds a warning to the build:
>> 
>> /stuff/k/git/kernel/Documentation/driver-api/media/v4l2-controls:823: ./include/media/v4l2-ctrls.h:964: WARNING: Invalid C declaration: Expected identifier in nested name. [error at 6]
>>   const * v4l2_ctrl_get_menu (u32 id)
>>   ------^
>> 
>> So it looks like something isn't being parsed quite identically?
>>
>
> Hi Jonathan!
> I could not reproduce this error..
> Can you suggest me how can I reproduce this error?
> I ran kernel-doc -none {$file} over the tree.
>
> Probably, this is not a kernel-doc error

It's a Sphinx error; run "make htmldocs" to see it.  That said, the
error itself should be enough to point at where the problem is.

Thanks,

jon

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-04-29  6:37           ` [RFC v3] " Aditya Srivastava
  2021-04-29 23:39             ` Jonathan Corbet
@ 2021-05-01 15:43             ` Matthew Wilcox
  2021-05-14 16:17               ` Aditya Srivastava
  1 sibling, 1 reply; 18+ messages in thread
From: Matthew Wilcox @ 2021-05-01 15:43 UTC (permalink / raw)
  To: Aditya Srivastava
  Cc: corbet, lukas.bulwahn, linux-kernel-mentees, linux-doc, linux-kernel

On Thu, Apr 29, 2021 at 12:07:29PM +0530, Aditya Srivastava wrote:
> +    my $name = qr{[a-zA-Z0-9_~:]+};
> +    my $prototype_end1 = qr{[^\(]*};
> +    my $prototype_end2 = qr{[^\{]*};
> +    my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)};

Would this be better written as:

	my $prototype_end = qr{\([^\(\{]*\)}

And now that I look at the whole thing, doesn't this fail to parse
a function declared as:

int f(void (*g)(long));

(that is, f takes a single argument, which is a pointer to a function
which takes a long argument and returns void)

Still, I don't think this was parsed correctly before, so it's not an
argument against this patch, just something to take care of later.

> +    my $type1 = qr{[\w\s]+};
> +    my $type2 = qr{$type1\*+};
> +
> +    if ($define && $prototype =~ m/^()($name)\s+/) {
>          # This is an object-like macro, it has no return type and no parameter
>          # list.
>          # Function-like macros are not allowed to have spaces between
> @@ -1817,23 +1828,9 @@ sub dump_function($$) {
>          $return_type = $1;
>          $declaration_name = $2;
>          $noret = 1;
> -    } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/)  {
> +    } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ ||
> +	$prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ ||
> +	$prototype =~ m/^($type2)+\s*($name)\s*$prototype_end/)  {
>  	$return_type = $1;
>  	$declaration_name = $2;
>  	my $args = $3;

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [RFC v4] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-05-01 15:03                 ` Jonathan Corbet
@ 2021-05-14 14:42                   ` Aditya Srivastava
  2021-05-14 15:10                     ` Aditya Srivastava
  2021-05-17 17:49                     ` Jonathan Corbet
  0 siblings, 2 replies; 18+ messages in thread
From: Aditya Srivastava @ 2021-05-14 14:42 UTC (permalink / raw)
  To: corbet
  Cc: yashsri421, lukas.bulwahn, willy, linux-kernel-mentees,
	linux-doc, linux-kernel

There are some regex expressions in the kernel-doc script, which are used
repeatedly in the script.

Reduce such expressions into variables, which can be used everywhere.

A quick manual check found that no errors and warnings were added/removed
in this process.

Suggested-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
---
Changes in v4:
- Fix htmldocs warning at function parsing, involving repeated $type2 identifiers capture
- Re-tested against all files in kernel tree

Changes in v3:
- Remove variables for separate qualifiers in "sub dump_struct"
- Make a common variable for all the qualifiers
- Make $attribute global variable to use it at "sub check_sections" as well

Changes in v2:
- Rename $pointer_function to $function_pointer
- Combine elsif-block expressions at "sub dump_function" into lesser regex expressions
- Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end

 scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------
 1 file changed, 34 insertions(+), 37 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 4840e748fca8..7c4a6a507ac4 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -406,6 +406,8 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)';
 my $doc_inline_end = '^\s*\*/\s*$';
 my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$';
 my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;';
+my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};
+my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;
 
 my %parameterdescs;
 my %parameterdesc_start_lines;
@@ -694,7 +696,7 @@ sub output_function_man(%) {
 	    $post = ");";
 	}
 	$type = $args{'parametertypes'}{$parameter};
-	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
+	if ($type =~ m/$function_pointer/) {
 	    # pointer-to-function
 	    print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n";
 	} else {
@@ -974,7 +976,7 @@ sub output_function_rst(%) {
 	$count++;
 	$type = $args{'parametertypes'}{$parameter};
 
-	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
+	if ($type =~ m/$function_pointer/) {
 	    # pointer-to-function
 	    print $1 . $parameter . ") (" . $2 . ")";
 	} else {
@@ -1211,7 +1213,9 @@ sub dump_struct($$) {
     my $members;
     my $type = qr{struct|union};
     # For capturing struct/union definition body, i.e. "{members*}qualifiers*"
-    my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*};
+    my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned};
+    my $definition_body = qr{\{(.*)\}\s*$qualifiers*};
+    my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;};
 
     if ($x =~ /($type)\s+(\w+)\s*$definition_body/) {
 	$decl_type = $1;
@@ -1235,27 +1239,27 @@ sub dump_struct($$) {
 	# strip comments:
 	$members =~ s/\/\*.*?\*\///gos;
 	# strip attributes
-	$members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi;
+	$members =~ s/\s*$attribute/ /gi;
 	$members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
 	$members =~ s/\s*__packed\s*/ /gos;
 	$members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
 	$members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
 	$members =~ s/\s*____cacheline_aligned/ /gos;
 
+	my $args = qr{([^,)]+)};
 	# replace DECLARE_BITMAP
 	$members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos;
-	$members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
+	$members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
 	# replace DECLARE_HASHTABLE
-	$members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
+	$members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
 	# replace DECLARE_KFIFO
-	$members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
+	$members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos;
 	# replace DECLARE_KFIFO_PTR
-	$members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
-
+	$members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos;
 	my $declaration = $members;
 
 	# Split nested struct/union elements as newer ones
-	while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) {
+	while ($members =~ m/$struct_members/) {
 		my $newmember;
 		my $maintype = $1;
 		my $ids = $4;
@@ -1315,7 +1319,7 @@ sub dump_struct($$) {
 				}
 			}
 		}
-		$members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/;
+		$members =~ s/$struct_members/$newmember/;
 	}
 
 	# Ignore other nested elements, like enums
@@ -1555,8 +1559,9 @@ sub create_parameterlist($$$$) {
     my $param;
 
     # temporarily replace commas inside function pointer definition
-    while ($args =~ /(\([^\),]+),/) {
-	$args =~ s/(\([^\),]+),/$1#/g;
+    my $arg_expr = qr{\([^\),]+};
+    while ($args =~ /$arg_expr,/) {
+	$args =~ s/($arg_expr),/$1#/g;
     }
 
     foreach my $arg (split($splitter, $args)) {
@@ -1707,7 +1712,7 @@ sub check_sections($$$$$) {
 		foreach $px (0 .. $#prms) {
 			$prm_clean = $prms[$px];
 			$prm_clean =~ s/\[.*\]//;
-			$prm_clean =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i;
+			$prm_clean =~ s/$attribute//i;
 			# ignore array size in a parameter string;
 			# however, the original param string may contain
 			# spaces, e.g.:  addr[6 + 2]
@@ -1809,8 +1814,14 @@ sub dump_function($$) {
     # - parport_register_device (function pointer parameters)
     # - atomic_set (macro)
     # - pci_match_device, __copy_to_user (long return type)
-
-    if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) {
+    my $name = qr{[a-zA-Z0-9_~:]+};
+    my $prototype_end1 = qr{[^\(]*};
+    my $prototype_end2 = qr{[^\{]*};
+    my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)};
+    my $type1 = qr{[\w\s]+};
+    my $type2 = qr{$type1\*+};
+
+    if ($define && $prototype =~ m/^()($name)\s+/) {
         # This is an object-like macro, it has no return type and no parameter
         # list.
         # Function-like macros are not allowed to have spaces between
@@ -1818,23 +1829,9 @@ sub dump_function($$) {
         $return_type = $1;
         $declaration_name = $2;
         $noret = 1;
-    } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/)  {
+    } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ ||
+	$prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ ||
+	$prototype =~ m/^($type2+)\s*($name)\s*$prototype_end/)  {
 	$return_type = $1;
 	$declaration_name = $2;
 	my $args = $3;
@@ -2111,12 +2108,12 @@ sub process_name($$) {
     } elsif (/$doc_decl/o) {
 	$identifier = $1;
 	my $is_kernel_comment = 0;
-	my $decl_start = qr{\s*\*};
+	my $decl_start = qr{$doc_com};
 	# test for pointer declaration type, foo * bar() - desc
 	my $fn_type = qr{\w+\s*\*\s*}; 
 	my $parenthesis = qr{\(\w*\)};
 	my $decl_end = qr{[-:].*};
-	if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
+	if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
 	    $identifier = $1;
 	}
 	if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) {
@@ -2126,8 +2123,8 @@ sub process_name($$) {
 	}
 	# Look for foo() or static void foo() - description; or misspelt
 	# identifier
-	elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
-	    /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
+	elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
+	    /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
 	    $identifier = $1;
 	    $decl_type = 'function';
 	    $identifier =~ s/^define\s+//;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [RFC v4] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-05-14 14:42                   ` [RFC v4] " Aditya Srivastava
@ 2021-05-14 15:10                     ` Aditya Srivastava
  2021-05-17 17:49                     ` Jonathan Corbet
  1 sibling, 0 replies; 18+ messages in thread
From: Aditya Srivastava @ 2021-05-14 15:10 UTC (permalink / raw)
  To: corbet
  Cc: lukas.bulwahn, willy, linux-kernel-mentees, linux-doc, linux-kernel

On 14/5/21 8:12 pm, Aditya Srivastava wrote:
> There are some regex expressions in the kernel-doc script, which are used
> repeatedly in the script.
> 
> Reduce such expressions into variables, which can be used everywhere.
> 
> A quick manual check found that no errors and warnings were added/removed
> in this process.
> 
> Suggested-by: Jonathan Corbet <corbet@lwn.net>
> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
> ---
> Changes in v4:
> - Fix htmldocs warning at function parsing, involving repeated $type2 identifiers capture
> - Re-tested against all files in kernel tree
> 
> Changes in v3:
> - Remove variables for separate qualifiers in "sub dump_struct"
> - Make a common variable for all the qualifiers
> - Make $attribute global variable to use it at "sub check_sections" as well
> 
> Changes in v2:
> - Rename $pointer_function to $function_pointer
> - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions
> - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end
> 
>  scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------
>  1 file changed, 34 insertions(+), 37 deletions(-)
> 
> diff --git a/scripts/kernel-doc b/scripts/kernel-doc
> index 4840e748fca8..7c4a6a507ac4 100755
> --- a/scripts/kernel-doc
> +++ b/scripts/kernel-doc
> @@ -406,6 +406,8 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)';
>  my $doc_inline_end = '^\s*\*/\s*$';
>  my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$';
>  my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;';
> +my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};
> +my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;
>  
>  my %parameterdescs;
>  my %parameterdesc_start_lines;
> @@ -694,7 +696,7 @@ sub output_function_man(%) {
>  	    $post = ");";
>  	}
>  	$type = $args{'parametertypes'}{$parameter};
> -	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
> +	if ($type =~ m/$function_pointer/) {
>  	    # pointer-to-function
>  	    print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n";
>  	} else {
> @@ -974,7 +976,7 @@ sub output_function_rst(%) {
>  	$count++;
>  	$type = $args{'parametertypes'}{$parameter};
>  
> -	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
> +	if ($type =~ m/$function_pointer/) {
>  	    # pointer-to-function
>  	    print $1 . $parameter . ") (" . $2 . ")";
>  	} else {
> @@ -1211,7 +1213,9 @@ sub dump_struct($$) {
>      my $members;
>      my $type = qr{struct|union};
>      # For capturing struct/union definition body, i.e. "{members*}qualifiers*"
> -    my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*};
> +    my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned};
> +    my $definition_body = qr{\{(.*)\}\s*$qualifiers*};
> +    my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;};
>  
>      if ($x =~ /($type)\s+(\w+)\s*$definition_body/) {
>  	$decl_type = $1;
> @@ -1235,27 +1239,27 @@ sub dump_struct($$) {
>  	# strip comments:
>  	$members =~ s/\/\*.*?\*\///gos;
>  	# strip attributes
> -	$members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi;
> +	$members =~ s/\s*$attribute/ /gi;
>  	$members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
>  	$members =~ s/\s*__packed\s*/ /gos;
>  	$members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
>  	$members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
>  	$members =~ s/\s*____cacheline_aligned/ /gos;
>  
> +	my $args = qr{([^,)]+)};
>  	# replace DECLARE_BITMAP
>  	$members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos;
> -	$members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
> +	$members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
>  	# replace DECLARE_HASHTABLE
> -	$members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
> +	$members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
>  	# replace DECLARE_KFIFO
> -	$members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
> +	$members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos;
>  	# replace DECLARE_KFIFO_PTR
> -	$members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
> -
> +	$members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos;
>  	my $declaration = $members;
>  
>  	# Split nested struct/union elements as newer ones
> -	while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) {
> +	while ($members =~ m/$struct_members/) {
>  		my $newmember;
>  		my $maintype = $1;
>  		my $ids = $4;
> @@ -1315,7 +1319,7 @@ sub dump_struct($$) {
>  				}
>  			}
>  		}
> -		$members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/;
> +		$members =~ s/$struct_members/$newmember/;
>  	}
>  
>  	# Ignore other nested elements, like enums
> @@ -1555,8 +1559,9 @@ sub create_parameterlist($$$$) {
>      my $param;
>  
>      # temporarily replace commas inside function pointer definition
> -    while ($args =~ /(\([^\),]+),/) {
> -	$args =~ s/(\([^\),]+),/$1#/g;
> +    my $arg_expr = qr{\([^\),]+};
> +    while ($args =~ /$arg_expr,/) {
> +	$args =~ s/($arg_expr),/$1#/g;
>      }
>  
>      foreach my $arg (split($splitter, $args)) {
> @@ -1707,7 +1712,7 @@ sub check_sections($$$$$) {
>  		foreach $px (0 .. $#prms) {
>  			$prm_clean = $prms[$px];
>  			$prm_clean =~ s/\[.*\]//;
> -			$prm_clean =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i;
> +			$prm_clean =~ s/$attribute//i;
>  			# ignore array size in a parameter string;
>  			# however, the original param string may contain
>  			# spaces, e.g.:  addr[6 + 2]
> @@ -1809,8 +1814,14 @@ sub dump_function($$) {
>      # - parport_register_device (function pointer parameters)
>      # - atomic_set (macro)
>      # - pci_match_device, __copy_to_user (long return type)
> -
> -    if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) {
> +    my $name = qr{[a-zA-Z0-9_~:]+};
> +    my $prototype_end1 = qr{[^\(]*};
> +    my $prototype_end2 = qr{[^\{]*};
> +    my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)};
> +    my $type1 = qr{[\w\s]+};
> +    my $type2 = qr{$type1\*+};
> +
> +    if ($define && $prototype =~ m/^()($name)\s+/) {
>          # This is an object-like macro, it has no return type and no parameter
>          # list.
>          # Function-like macros are not allowed to have spaces between
> @@ -1818,23 +1829,9 @@ sub dump_function($$) {
>          $return_type = $1;
>          $declaration_name = $2;
>          $noret = 1;
> -    } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
> -	$prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
> -	$prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/)  {
> +    } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ ||
> +	$prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ ||
> +	$prototype =~ m/^($type2+)\s*($name)\s*$prototype_end/)  {
>  	$return_type = $1;
>  	$declaration_name = $2;
>  	my $args = $3;
> @@ -2111,12 +2108,12 @@ sub process_name($$) {
>      } elsif (/$doc_decl/o) {
>  	$identifier = $1;
>  	my $is_kernel_comment = 0;
> -	my $decl_start = qr{\s*\*};
> +	my $decl_start = qr{$doc_com};
>  	# test for pointer declaration type, foo * bar() - desc
>  	my $fn_type = qr{\w+\s*\*\s*}; 
>  	my $parenthesis = qr{\(\w*\)};
>  	my $decl_end = qr{[-:].*};
> -	if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
> +	if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
>  	    $identifier = $1;
>  	}
>  	if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) {
> @@ -2126,8 +2123,8 @@ sub process_name($$) {
>  	}
>  	# Look for foo() or static void foo() - description; or misspelt
>  	# identifier
> -	elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
> -	    /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
> +	elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
> +	    /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
>  	    $identifier = $1;
>  	    $decl_type = 'function';
>  	    $identifier =~ s/^define\s+//;
> 

Hi Jonathan!
The warning you mentioned was not showing to me on running "make
htmldocs", for some reason.. As a result, I haven't been able to test
the patch for this warning.. However, I understood the reason for the
error.
It was in this line:
> +	$prototype =~ m/^($type2)+\s*($name)\s*$prototype_end/)  {

Here, $1 was taking only the last captured value, instead of all the
occurrences, as was desired by me.

Just for reference, these were the warnings which I was getting:
https://github.com/AdityaSrivast/kernel-tasks/blob/master/random/kernel-doc/htmldocs_msgs

Thanks
Aditya

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-05-01 15:43             ` [RFC v3] " Matthew Wilcox
@ 2021-05-14 16:17               ` Aditya Srivastava
  0 siblings, 0 replies; 18+ messages in thread
From: Aditya Srivastava @ 2021-05-14 16:17 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: corbet, lukas.bulwahn, linux-kernel-mentees, linux-doc, linux-kernel

On 1/5/21 9:13 pm, Matthew Wilcox wrote:
> On Thu, Apr 29, 2021 at 12:07:29PM +0530, Aditya Srivastava wrote:
>> +    my $name = qr{[a-zA-Z0-9_~:]+};
>> +    my $prototype_end1 = qr{[^\(]*};
>> +    my $prototype_end2 = qr{[^\{]*};
>> +    my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)};
> 
> Would this be better written as:
> 
> 	my $prototype_end = qr{\([^\(\{]*\)}
> 

Hi Matthew
I have actually tried this earlier, but it does not work as expected,
probably because of greedy matching. I have produced the list of
warning differences before and after over the files, when using this
regex:
https://github.com/AdityaSrivast/kernel-tasks/blob/master/random/kernel-doc/diff_on_alt_protend


> And now that I look at the whole thing, doesn't this fail to parse
> a function declared as:
> 
> int f(void (*g)(long));
> 
> (that is, f takes a single argument, which is a pointer to a function
> which takes a long argument and returns void)
> 

I think this will match against:
$prototype =~ m/^($type1)\s+($name)\s*$prototype_end/

Thanks
Aditya



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC v4] scripts: kernel-doc: reduce repeated regex expressions into variables
  2021-05-14 14:42                   ` [RFC v4] " Aditya Srivastava
  2021-05-14 15:10                     ` Aditya Srivastava
@ 2021-05-17 17:49                     ` Jonathan Corbet
  1 sibling, 0 replies; 18+ messages in thread
From: Jonathan Corbet @ 2021-05-17 17:49 UTC (permalink / raw)
  To: Aditya Srivastava
  Cc: yashsri421, lukas.bulwahn, willy, linux-kernel-mentees,
	linux-doc, linux-kernel

Aditya Srivastava <yashsri421@gmail.com> writes:

> There are some regex expressions in the kernel-doc script, which are used
> repeatedly in the script.
>
> Reduce such expressions into variables, which can be used everywhere.
>
> A quick manual check found that no errors and warnings were added/removed
> in this process.
>
> Suggested-by: Jonathan Corbet <corbet@lwn.net>
> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
> ---
> Changes in v4:
> - Fix htmldocs warning at function parsing, involving repeated $type2 identifiers capture
> - Re-tested against all files in kernel tree

Applied, thanks for stickint with this.

jon

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2021-05-17 17:49 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-22 19:18 [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables Aditya Srivastava
     [not found] ` <CAKXUXMx9q57cWXkcezKKo-uuh21Sd-Si9M9KydzFEMQ0ELYEng@mail.gmail.com>
2021-04-23 12:20   ` Aditya Srivastava
2021-04-23 13:21 ` Matthew Wilcox
2021-04-24 11:57   ` Aditya Srivastava
2021-04-24 12:47     ` [RFC v2] " Aditya Srivastava
2021-04-27 15:55       ` Jonathan Corbet
2021-04-27 16:56         ` Matthew Wilcox
2021-04-29  6:37           ` [RFC v3] " Aditya Srivastava
2021-04-29 23:39             ` Jonathan Corbet
2021-04-30  2:03               ` Joe Perches
2021-05-01  9:30               ` Aditya Srivastava
2021-05-01 15:03                 ` Jonathan Corbet
2021-05-14 14:42                   ` [RFC v4] " Aditya Srivastava
2021-05-14 15:10                     ` Aditya Srivastava
2021-05-17 17:49                     ` Jonathan Corbet
2021-05-01 15:43             ` [RFC v3] " Matthew Wilcox
2021-05-14 16:17               ` Aditya Srivastava
2021-04-26 17:31     ` [RFC] " Matthew Wilcox

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).