* [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-04-22 19:18 ` Aditya Srivastava 0 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-04-22 19:18 UTC (permalink / raw) To: corbet Cc: yashsri421, lukas.bulwahn, linux-kernel-mentees, linux-doc, linux-kernel There are some regex expressions in the kernel-doc script, which are used repeatedly in the script. Reduce such expressions into variables, which can be used everywhere. A quick manual check found that no errors and warnings were added/removed in this process. Suggested-by: Jonathan Corbet <corbet@lwn.net> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> --- scripts/kernel-doc | 89 ++++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 39 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 2a85d34fdcd0..579c9fdd275f 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -406,6 +406,7 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; my $doc_inline_end = '^\s*\*/\s*$'; my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; my %parameterdescs; my %parameterdesc_start_lines; @@ -694,7 +695,7 @@ sub output_function_man(%) { $post = ");"; } $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$pointer_function/) { # pointer-to-function print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; } else { @@ -974,7 +975,7 @@ sub output_function_rst(%) { $count++; $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$pointer_function/) { # pointer-to-function print $1 . $parameter . ") (" . $2 . ")"; } else { @@ -1210,8 +1211,14 @@ sub dump_struct($$) { my $decl_type; my $members; my $type = qr{struct|union}; + my $packed = qr{__packed}; + my $aligned = qr{__aligned}; + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; + my $cacheline_aligned = qr{____cacheline_aligned}; + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; # For capturing struct/union definition body, i.e. "{members*}qualifiers*" - my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; + my $definition_body = qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*}; + my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { $decl_type = $1; @@ -1235,27 +1242,27 @@ sub dump_struct($$) { # strip comments: $members =~ s/\/\*.*?\*\///gos; # strip attributes - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; - $members =~ s/\s*__packed\s*/ /gos; + $members =~ s/\s*$attribute/ /gi; + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; + $members =~ s/\s*$packed\s*/ /gos; $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; - $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; - $members =~ s/\s*____cacheline_aligned/ /gos; + $members =~ s/\s*$cacheline_aligned_in_smp/ /gos; + $members =~ s/\s*$cacheline_aligned/ /gos; + my $args = qr{([^,)]+)}; # replace DECLARE_BITMAP $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; # replace DECLARE_HASHTABLE - $members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos; + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; # replace DECLARE_KFIFO - $members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; # replace DECLARE_KFIFO_PTR - $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; - + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; my $declaration = $members; # Split nested struct/union elements as newer ones - while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { + while ($members =~ m/$struct_members/) { my $newmember; my $maintype = $1; my $ids = $4; @@ -1315,7 +1322,7 @@ sub dump_struct($$) { } } } - $members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; + $members =~ s/$struct_members/$newmember/; } # Ignore other nested elements, like enums @@ -1555,8 +1562,9 @@ sub create_parameterlist($$$$) { my $param; # temporarily replace commas inside function pointer definition - while ($args =~ /(\([^\),]+),/) { - $args =~ s/(\([^\),]+),/$1#/g; + my $arg_expr = qr{\([^\),]+}; + while ($args =~ /$arg_expr,/) { + $args =~ s/($arg_expr),/$1#/g; } foreach my $arg (split($splitter, $args)) { @@ -1808,8 +1816,11 @@ sub dump_function($$) { # - parport_register_device (function pointer parameters) # - atomic_set (macro) # - pci_match_device, __copy_to_user (long return type) + my $name = qr{[a-zA-Z0-9_~:]+}; + my $prototype_end1 = qr{\(([^\(]*)\)}; + my $prototype_end2 = qr{\(([^\{]*)\)}; - if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) { + if ($define && $prototype =~ m/^()($name)\s+/) { # This is an object-like macro, it has no return type and no parameter # list. # Function-like macros are not allowed to have spaces between @@ -1817,23 +1828,23 @@ sub dump_function($$) { $return_type = $1; $declaration_name = $2; $noret = 1; - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { + } elsif ($prototype =~ m/^()($name)\s*$prototype_end1/ || + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end1/ || + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end1/ || + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end1/ || + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end1/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || + $prototype =~ m/^()($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*($name)\s*$prototype_end2/) { $return_type = $1; $declaration_name = $2; my $args = $3; @@ -2110,12 +2121,12 @@ sub process_name($$) { } elsif (/$doc_decl/o) { $identifier = $1; my $is_kernel_comment = 0; - my $decl_start = qr{\s*\*}; + my $decl_start = qr{$doc_com}; # test for pointer declaration type, foo * bar() - desc my $fn_type = qr{\w+\s*\*\s*}; my $parenthesis = qr{\(\w*\)}; my $decl_end = qr{[-:].*}; - if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) { + if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) { $identifier = $1; } if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { @@ -2125,8 +2136,8 @@ sub process_name($$) { } # Look for foo() or static void foo() - description; or misspelt # identifier - elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || - /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { + elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || + /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { $identifier = $1; $decl_type = 'function'; $identifier =~ s/^define\s+//; -- 2.17.1 ^ permalink raw reply related [flat|nested] 37+ messages in thread
* [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-04-22 19:18 ` Aditya Srivastava 0 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-04-22 19:18 UTC (permalink / raw) To: corbet; +Cc: linux-doc, linux-kernel-mentees, linux-kernel, yashsri421 There are some regex expressions in the kernel-doc script, which are used repeatedly in the script. Reduce such expressions into variables, which can be used everywhere. A quick manual check found that no errors and warnings were added/removed in this process. Suggested-by: Jonathan Corbet <corbet@lwn.net> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> --- scripts/kernel-doc | 89 ++++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 39 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 2a85d34fdcd0..579c9fdd275f 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -406,6 +406,7 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; my $doc_inline_end = '^\s*\*/\s*$'; my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; my %parameterdescs; my %parameterdesc_start_lines; @@ -694,7 +695,7 @@ sub output_function_man(%) { $post = ");"; } $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$pointer_function/) { # pointer-to-function print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; } else { @@ -974,7 +975,7 @@ sub output_function_rst(%) { $count++; $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$pointer_function/) { # pointer-to-function print $1 . $parameter . ") (" . $2 . ")"; } else { @@ -1210,8 +1211,14 @@ sub dump_struct($$) { my $decl_type; my $members; my $type = qr{struct|union}; + my $packed = qr{__packed}; + my $aligned = qr{__aligned}; + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; + my $cacheline_aligned = qr{____cacheline_aligned}; + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; # For capturing struct/union definition body, i.e. "{members*}qualifiers*" - my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; + my $definition_body = qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*}; + my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { $decl_type = $1; @@ -1235,27 +1242,27 @@ sub dump_struct($$) { # strip comments: $members =~ s/\/\*.*?\*\///gos; # strip attributes - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; - $members =~ s/\s*__packed\s*/ /gos; + $members =~ s/\s*$attribute/ /gi; + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; + $members =~ s/\s*$packed\s*/ /gos; $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; - $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; - $members =~ s/\s*____cacheline_aligned/ /gos; + $members =~ s/\s*$cacheline_aligned_in_smp/ /gos; + $members =~ s/\s*$cacheline_aligned/ /gos; + my $args = qr{([^,)]+)}; # replace DECLARE_BITMAP $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; # replace DECLARE_HASHTABLE - $members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos; + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; # replace DECLARE_KFIFO - $members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; # replace DECLARE_KFIFO_PTR - $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; - + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; my $declaration = $members; # Split nested struct/union elements as newer ones - while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { + while ($members =~ m/$struct_members/) { my $newmember; my $maintype = $1; my $ids = $4; @@ -1315,7 +1322,7 @@ sub dump_struct($$) { } } } - $members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; + $members =~ s/$struct_members/$newmember/; } # Ignore other nested elements, like enums @@ -1555,8 +1562,9 @@ sub create_parameterlist($$$$) { my $param; # temporarily replace commas inside function pointer definition - while ($args =~ /(\([^\),]+),/) { - $args =~ s/(\([^\),]+),/$1#/g; + my $arg_expr = qr{\([^\),]+}; + while ($args =~ /$arg_expr,/) { + $args =~ s/($arg_expr),/$1#/g; } foreach my $arg (split($splitter, $args)) { @@ -1808,8 +1816,11 @@ sub dump_function($$) { # - parport_register_device (function pointer parameters) # - atomic_set (macro) # - pci_match_device, __copy_to_user (long return type) + my $name = qr{[a-zA-Z0-9_~:]+}; + my $prototype_end1 = qr{\(([^\(]*)\)}; + my $prototype_end2 = qr{\(([^\{]*)\)}; - if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) { + if ($define && $prototype =~ m/^()($name)\s+/) { # This is an object-like macro, it has no return type and no parameter # list. # Function-like macros are not allowed to have spaces between @@ -1817,23 +1828,23 @@ sub dump_function($$) { $return_type = $1; $declaration_name = $2; $noret = 1; - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { + } elsif ($prototype =~ m/^()($name)\s*$prototype_end1/ || + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end1/ || + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end1/ || + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end1/ || + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end1/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || + $prototype =~ m/^()($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || + $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*($name)\s*$prototype_end2/) { $return_type = $1; $declaration_name = $2; my $args = $3; @@ -2110,12 +2121,12 @@ sub process_name($$) { } elsif (/$doc_decl/o) { $identifier = $1; my $is_kernel_comment = 0; - my $decl_start = qr{\s*\*}; + my $decl_start = qr{$doc_com}; # test for pointer declaration type, foo * bar() - desc my $fn_type = qr{\w+\s*\*\s*}; my $parenthesis = qr{\(\w*\)}; my $decl_end = qr{[-:].*}; - if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) { + if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) { $identifier = $1; } if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { @@ -2125,8 +2136,8 @@ sub process_name($$) { } # Look for foo() or static void foo() - description; or misspelt # identifier - elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || - /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { + elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || + /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { $identifier = $1; $decl_type = 'function'; $identifier =~ s/^define\s+//; -- 2.17.1 _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply related [flat|nested] 37+ messages in thread
* Re: [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-04-22 19:18 ` Aditya Srivastava (?) @ 2021-04-22 19:33 ` Lukas Bulwahn 2021-04-23 12:20 ` Aditya Srivastava -1 siblings, 1 reply; 37+ messages in thread From: Lukas Bulwahn @ 2021-04-22 19:33 UTC (permalink / raw) To: Aditya Srivastava Cc: open list:DOCUMENTATION, linux-kernel-mentees, Linux Kernel Mailing List, Jonathan Corbet [-- Attachment #1.1: Type: text/plain, Size: 10676 bytes --] Aditya Srivastava <yashsri421@gmail.com> schrieb am Do., 22. Apr. 2021, 21:18: > There are some regex expressions in the kernel-doc script, which are used > repeatedly in the script. > > Reduce such expressions into variables, which can be used everywhere. > > A quick manual check found that no errors and warnings were added/removed > in this process. > > Suggested-by: Jonathan Corbet <corbet@lwn.net> > Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> > --- > scripts/kernel-doc | 89 ++++++++++++++++++++++++++-------------------- > 1 file changed, 50 insertions(+), 39 deletions(-) > > diff --git a/scripts/kernel-doc b/scripts/kernel-doc > index 2a85d34fdcd0..579c9fdd275f 100755 > --- a/scripts/kernel-doc > +++ b/scripts/kernel-doc > @@ -406,6 +406,7 @@ my $doc_inline_sect = > '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; > my $doc_inline_end = '^\s*\*/\s*$'; > my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; > my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; > +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; > > my %parameterdescs; > my %parameterdesc_start_lines; > @@ -694,7 +695,7 @@ sub output_function_man(%) { > $post = ");"; > } > $type = $args{'parametertypes'}{$parameter}; > - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { > + if ($type =~ m/$pointer_function/) { > # pointer-to-function > print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . > $post . "\"\n"; > } else { > @@ -974,7 +975,7 @@ sub output_function_rst(%) { > $count++; > $type = $args{'parametertypes'}{$parameter}; > > - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { > + if ($type =~ m/$pointer_function/) { > # pointer-to-function > print $1 . $parameter . ") (" . $2 . ")"; > } else { > @@ -1210,8 +1211,14 @@ sub dump_struct($$) { > my $decl_type; > my $members; > my $type = qr{struct|union}; > + my $packed = qr{__packed}; > + my $aligned = qr{__aligned}; > + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; > + my $cacheline_aligned = qr{____cacheline_aligned}; > + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; > # For capturing struct/union definition body, i.e. > "{members*}qualifiers*" > - my $definition_body = > qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; > + my $definition_body = > qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*}; > + my $struct_members = > qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; > > if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { > $decl_type = $1; > @@ -1235,27 +1242,27 @@ sub dump_struct($$) { > # strip comments: > $members =~ s/\/\*.*?\*\///gos; > # strip attributes > - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; > - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; > - $members =~ s/\s*__packed\s*/ /gos; > + $members =~ s/\s*$attribute/ /gi; > + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; > + $members =~ s/\s*$packed\s*/ /gos; > $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; > - $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; > - $members =~ s/\s*____cacheline_aligned/ /gos; > + $members =~ s/\s*$cacheline_aligned_in_smp/ /gos; > + $members =~ s/\s*$cacheline_aligned/ /gos; > > + my $args = qr{([^,)]+)}; > # replace DECLARE_BITMAP > $members =~ > s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, > __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; > - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned > long $1\[BITS_TO_LONGS($2)\]/gos; > + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long > $1\[BITS_TO_LONGS($2)\]/gos; > # replace DECLARE_HASHTABLE > - $members =~ > s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) > - 1)\]/gos; > + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long > $1\[1 << (($2) - 1)\]/gos; > # replace DECLARE_KFIFO > - $members =~ > s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; > + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 > \*$1/gos; > # replace DECLARE_KFIFO_PTR > - $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 > \*$1/gos; > - > + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; > my $declaration = $members; > > # Split nested struct/union elements as newer ones > - while ($members =~ > m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { > + while ($members =~ m/$struct_members/) { > my $newmember; > my $maintype = $1; > my $ids = $4; > @@ -1315,7 +1322,7 @@ sub dump_struct($$) { > } > } > } > - $members =~ > s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; > + $members =~ s/$struct_members/$newmember/; > } > > # Ignore other nested elements, like enums > @@ -1555,8 +1562,9 @@ sub create_parameterlist($$$$) { > my $param; > > # temporarily replace commas inside function pointer definition > - while ($args =~ /(\([^\),]+),/) { > - $args =~ s/(\([^\),]+),/$1#/g; > + my $arg_expr = qr{\([^\),]+}; > + while ($args =~ /$arg_expr,/) { > + $args =~ s/($arg_expr),/$1#/g; > } > > foreach my $arg (split($splitter, $args)) { > @@ -1808,8 +1816,11 @@ sub dump_function($$) { > # - parport_register_device (function pointer parameters) > # - atomic_set (macro) > # - pci_match_device, __copy_to_user (long return type) > + my $name = qr{[a-zA-Z0-9_~:]+}; > + my $prototype_end1 = qr{\(([^\(]*)\)}; > + my $prototype_end2 = qr{\(([^\{]*)\)}; > Why do you need end1 and end2 here? - if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) { > + if ($define && $prototype =~ m/^()($name)\s+/) { > # This is an object-like macro, it has no return type and no > parameter > # list. > # Function-like macros are not allowed to have spaces between > @@ -1817,23 +1828,23 @@ sub dump_function($$) { > $return_type = $1; > $declaration_name = $2; > $noret = 1; > - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ > m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ > m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ > m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ > m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ > m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ > m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ > m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ > m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ > m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { > + } elsif ($prototype =~ m/^()($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end1/ || > + $prototype =~ > m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || > + $prototype =~ m/^()($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ > m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ > m/^(\w+\s+\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ > m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ > m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*($name)\s*$prototype_end2/) { > $return_type = $1; > $declaration_name = $2; > my $args = $3; > @@ -2110,12 +2121,12 @@ sub process_name($$) { > } elsif (/$doc_decl/o) { > $identifier = $1; > my $is_kernel_comment = 0; > - my $decl_start = qr{\s*\*}; > + my $decl_start = qr{$doc_com}; > # test for pointer declaration type, foo * bar() - desc > my $fn_type = qr{\w+\s*\*\s*}; > my $parenthesis = qr{\(\w*\)}; > my $decl_end = qr{[-:].*}; > - if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) { > + if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) { > $identifier = $1; > } > if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { > @@ -2125,8 +2136,8 @@ sub process_name($$) { > } > # Look for foo() or static void foo() - description; or misspelt > # identifier > - elsif > (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || > - /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { > + elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ > || > + /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { > $identifier = $1; > $decl_type = 'function'; > $identifier =~ s/^define\s+//; > -- > 2.17.1 > > [-- Attachment #1.2: Type: text/html, Size: 12745 bytes --] [-- Attachment #2: Type: text/plain, Size: 201 bytes --] _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-04-22 19:33 ` Lukas Bulwahn @ 2021-04-23 12:20 ` Aditya Srivastava 0 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-04-23 12:20 UTC (permalink / raw) To: Lukas Bulwahn Cc: Jonathan Corbet, linux-kernel-mentees, open list:DOCUMENTATION, Linux Kernel Mailing List On 23/4/21 1:03 am, Lukas Bulwahn wrote: > Aditya Srivastava <yashsri421@gmail.com> schrieb am Do., 22. Apr. 2021, > 21:18: > >> There are some regex expressions in the kernel-doc script, which are used >> repeatedly in the script. >> >> Reduce such expressions into variables, which can be used everywhere. >> >> A quick manual check found that no errors and warnings were added/removed >> in this process. >> >> Suggested-by: Jonathan Corbet <corbet@lwn.net> >> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> >> --- >> scripts/kernel-doc | 89 ++++++++++++++++++++++++++-------------------- >> 1 file changed, 50 insertions(+), 39 deletions(-) >> >> diff --git a/scripts/kernel-doc b/scripts/kernel-doc >> index 2a85d34fdcd0..579c9fdd275f 100755 >> --- a/scripts/kernel-doc >> +++ b/scripts/kernel-doc >> @@ -406,6 +406,7 @@ my $doc_inline_sect = >> '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; >> my $doc_inline_end = '^\s*\*/\s*$'; >> my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; >> my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; >> +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; >> >> my %parameterdescs; >> my %parameterdesc_start_lines; >> @@ -694,7 +695,7 @@ sub output_function_man(%) { >> $post = ");"; >> } >> $type = $args{'parametertypes'}{$parameter}; >> - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { >> + if ($type =~ m/$pointer_function/) { >> # pointer-to-function >> print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . >> $post . "\"\n"; >> } else { >> @@ -974,7 +975,7 @@ sub output_function_rst(%) { >> $count++; >> $type = $args{'parametertypes'}{$parameter}; >> >> - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { >> + if ($type =~ m/$pointer_function/) { >> # pointer-to-function >> print $1 . $parameter . ") (" . $2 . ")"; >> } else { >> @@ -1210,8 +1211,14 @@ sub dump_struct($$) { >> my $decl_type; >> my $members; >> my $type = qr{struct|union}; >> + my $packed = qr{__packed}; >> + my $aligned = qr{__aligned}; >> + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; >> + my $cacheline_aligned = qr{____cacheline_aligned}; >> + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; >> # For capturing struct/union definition body, i.e. >> "{members*}qualifiers*" >> - my $definition_body = >> qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; >> + my $definition_body = >> qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*}; >> + my $struct_members = >> qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; >> >> if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { >> $decl_type = $1; >> @@ -1235,27 +1242,27 @@ sub dump_struct($$) { >> # strip comments: >> $members =~ s/\/\*.*?\*\///gos; >> # strip attributes >> - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; >> - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; >> - $members =~ s/\s*__packed\s*/ /gos; >> + $members =~ s/\s*$attribute/ /gi; >> + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; >> + $members =~ s/\s*$packed\s*/ /gos; >> $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; >> - $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; >> - $members =~ s/\s*____cacheline_aligned/ /gos; >> + $members =~ s/\s*$cacheline_aligned_in_smp/ /gos; >> + $members =~ s/\s*$cacheline_aligned/ /gos; >> >> + my $args = qr{([^,)]+)}; >> # replace DECLARE_BITMAP >> $members =~ >> s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, >> __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; >> - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned >> long $1\[BITS_TO_LONGS($2)\]/gos; >> + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long >> $1\[BITS_TO_LONGS($2)\]/gos; >> # replace DECLARE_HASHTABLE >> - $members =~ >> s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) >> - 1)\]/gos; >> + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long >> $1\[1 << (($2) - 1)\]/gos; >> # replace DECLARE_KFIFO >> - $members =~ >> s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; >> + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 >> \*$1/gos; >> # replace DECLARE_KFIFO_PTR >> - $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 >> \*$1/gos; >> - >> + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; >> my $declaration = $members; >> >> # Split nested struct/union elements as newer ones >> - while ($members =~ >> m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { >> + while ($members =~ m/$struct_members/) { >> my $newmember; >> my $maintype = $1; >> my $ids = $4; >> @@ -1315,7 +1322,7 @@ sub dump_struct($$) { >> } >> } >> } >> - $members =~ >> s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; >> + $members =~ s/$struct_members/$newmember/; >> } >> >> # Ignore other nested elements, like enums >> @@ -1555,8 +1562,9 @@ sub create_parameterlist($$$$) { >> my $param; >> >> # temporarily replace commas inside function pointer definition >> - while ($args =~ /(\([^\),]+),/) { >> - $args =~ s/(\([^\),]+),/$1#/g; >> + my $arg_expr = qr{\([^\),]+}; >> + while ($args =~ /$arg_expr,/) { >> + $args =~ s/($arg_expr),/$1#/g; >> } >> >> foreach my $arg (split($splitter, $args)) { >> @@ -1808,8 +1816,11 @@ sub dump_function($$) { >> # - parport_register_device (function pointer parameters) >> # - atomic_set (macro) >> # - pci_match_device, __copy_to_user (long return type) >> + my $name = qr{[a-zA-Z0-9_~:]+}; >> + my $prototype_end1 = qr{\(([^\(]*)\)}; >> + my $prototype_end2 = qr{\(([^\{]*)\)}; >> > > Why do you need end1 and end2 here? > Thanks for pointing out, Lukas. I am looking into the possibility of combining these expressions, and testing against the files. Please let me know if there are any more improvements possible :) Thanks Aditya ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-04-23 12:20 ` Aditya Srivastava 0 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-04-23 12:20 UTC (permalink / raw) To: Lukas Bulwahn Cc: open list:DOCUMENTATION, linux-kernel-mentees, Linux Kernel Mailing List, Jonathan Corbet On 23/4/21 1:03 am, Lukas Bulwahn wrote: > Aditya Srivastava <yashsri421@gmail.com> schrieb am Do., 22. Apr. 2021, > 21:18: > >> There are some regex expressions in the kernel-doc script, which are used >> repeatedly in the script. >> >> Reduce such expressions into variables, which can be used everywhere. >> >> A quick manual check found that no errors and warnings were added/removed >> in this process. >> >> Suggested-by: Jonathan Corbet <corbet@lwn.net> >> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> >> --- >> scripts/kernel-doc | 89 ++++++++++++++++++++++++++-------------------- >> 1 file changed, 50 insertions(+), 39 deletions(-) >> >> diff --git a/scripts/kernel-doc b/scripts/kernel-doc >> index 2a85d34fdcd0..579c9fdd275f 100755 >> --- a/scripts/kernel-doc >> +++ b/scripts/kernel-doc >> @@ -406,6 +406,7 @@ my $doc_inline_sect = >> '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; >> my $doc_inline_end = '^\s*\*/\s*$'; >> my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; >> my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; >> +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; >> >> my %parameterdescs; >> my %parameterdesc_start_lines; >> @@ -694,7 +695,7 @@ sub output_function_man(%) { >> $post = ");"; >> } >> $type = $args{'parametertypes'}{$parameter}; >> - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { >> + if ($type =~ m/$pointer_function/) { >> # pointer-to-function >> print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . >> $post . "\"\n"; >> } else { >> @@ -974,7 +975,7 @@ sub output_function_rst(%) { >> $count++; >> $type = $args{'parametertypes'}{$parameter}; >> >> - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { >> + if ($type =~ m/$pointer_function/) { >> # pointer-to-function >> print $1 . $parameter . ") (" . $2 . ")"; >> } else { >> @@ -1210,8 +1211,14 @@ sub dump_struct($$) { >> my $decl_type; >> my $members; >> my $type = qr{struct|union}; >> + my $packed = qr{__packed}; >> + my $aligned = qr{__aligned}; >> + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; >> + my $cacheline_aligned = qr{____cacheline_aligned}; >> + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; >> # For capturing struct/union definition body, i.e. >> "{members*}qualifiers*" >> - my $definition_body = >> qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; >> + my $definition_body = >> qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*}; >> + my $struct_members = >> qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; >> >> if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { >> $decl_type = $1; >> @@ -1235,27 +1242,27 @@ sub dump_struct($$) { >> # strip comments: >> $members =~ s/\/\*.*?\*\///gos; >> # strip attributes >> - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; >> - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; >> - $members =~ s/\s*__packed\s*/ /gos; >> + $members =~ s/\s*$attribute/ /gi; >> + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; >> + $members =~ s/\s*$packed\s*/ /gos; >> $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; >> - $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; >> - $members =~ s/\s*____cacheline_aligned/ /gos; >> + $members =~ s/\s*$cacheline_aligned_in_smp/ /gos; >> + $members =~ s/\s*$cacheline_aligned/ /gos; >> >> + my $args = qr{([^,)]+)}; >> # replace DECLARE_BITMAP >> $members =~ >> s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, >> __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; >> - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned >> long $1\[BITS_TO_LONGS($2)\]/gos; >> + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long >> $1\[BITS_TO_LONGS($2)\]/gos; >> # replace DECLARE_HASHTABLE >> - $members =~ >> s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) >> - 1)\]/gos; >> + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long >> $1\[1 << (($2) - 1)\]/gos; >> # replace DECLARE_KFIFO >> - $members =~ >> s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; >> + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 >> \*$1/gos; >> # replace DECLARE_KFIFO_PTR >> - $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 >> \*$1/gos; >> - >> + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; >> my $declaration = $members; >> >> # Split nested struct/union elements as newer ones >> - while ($members =~ >> m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { >> + while ($members =~ m/$struct_members/) { >> my $newmember; >> my $maintype = $1; >> my $ids = $4; >> @@ -1315,7 +1322,7 @@ sub dump_struct($$) { >> } >> } >> } >> - $members =~ >> s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; >> + $members =~ s/$struct_members/$newmember/; >> } >> >> # Ignore other nested elements, like enums >> @@ -1555,8 +1562,9 @@ sub create_parameterlist($$$$) { >> my $param; >> >> # temporarily replace commas inside function pointer definition >> - while ($args =~ /(\([^\),]+),/) { >> - $args =~ s/(\([^\),]+),/$1#/g; >> + my $arg_expr = qr{\([^\),]+}; >> + while ($args =~ /$arg_expr,/) { >> + $args =~ s/($arg_expr),/$1#/g; >> } >> >> foreach my $arg (split($splitter, $args)) { >> @@ -1808,8 +1816,11 @@ sub dump_function($$) { >> # - parport_register_device (function pointer parameters) >> # - atomic_set (macro) >> # - pci_match_device, __copy_to_user (long return type) >> + my $name = qr{[a-zA-Z0-9_~:]+}; >> + my $prototype_end1 = qr{\(([^\(]*)\)}; >> + my $prototype_end2 = qr{\(([^\{]*)\)}; >> > > Why do you need end1 and end2 here? > Thanks for pointing out, Lukas. I am looking into the possibility of combining these expressions, and testing against the files. Please let me know if there are any more improvements possible :) Thanks Aditya _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-04-22 19:18 ` Aditya Srivastava @ 2021-04-23 13:21 ` Matthew Wilcox -1 siblings, 0 replies; 37+ messages in thread From: Matthew Wilcox @ 2021-04-23 13:21 UTC (permalink / raw) To: Aditya Srivastava Cc: corbet, lukas.bulwahn, linux-kernel-mentees, linux-doc, linux-kernel On Fri, Apr 23, 2021 at 12:48:39AM +0530, Aditya Srivastava wrote: > +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; Is that a pointer-to-function? Or as people who write C usually call it, a function pointer? Wouldn't it be better to call it $function_pointer? > @@ -1210,8 +1211,14 @@ sub dump_struct($$) { > my $decl_type; > my $members; > my $type = qr{struct|union}; > + my $packed = qr{__packed}; > + my $aligned = qr{__aligned}; > + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; > + my $cacheline_aligned = qr{____cacheline_aligned}; I don't think those four definitions actually simplify anything. > + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; ... whereas this one definitely does. > - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; > - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; > - $members =~ s/\s*__packed\s*/ /gos; > + $members =~ s/\s*$attribute/ /gi; > + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; Maybe put the \s*\([^;]*\) into $aligned? Then it becomes a useful abstraction. > - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { > + } elsif ($prototype =~ m/^()($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || > + $prototype =~ m/^()($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*($name)\s*$prototype_end2/) { This is probably the best patch I've seen so far this year. Now, can we go further? For example: $prototype_end = $prototype_end1|$prototype_end2 That would let us cut the number of lines here in half. Can we create a definition for a variable number of \w and \s and '*' in the return type? In fact, can we define a regex that matches a type? So this would become: > + } elsif ($prototype =~ m/^($type)\s*($name)\s*$prototype_end/) { ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-04-23 13:21 ` Matthew Wilcox 0 siblings, 0 replies; 37+ messages in thread From: Matthew Wilcox @ 2021-04-23 13:21 UTC (permalink / raw) To: Aditya Srivastava; +Cc: linux-doc, linux-kernel-mentees, linux-kernel, corbet On Fri, Apr 23, 2021 at 12:48:39AM +0530, Aditya Srivastava wrote: > +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; Is that a pointer-to-function? Or as people who write C usually call it, a function pointer? Wouldn't it be better to call it $function_pointer? > @@ -1210,8 +1211,14 @@ sub dump_struct($$) { > my $decl_type; > my $members; > my $type = qr{struct|union}; > + my $packed = qr{__packed}; > + my $aligned = qr{__aligned}; > + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; > + my $cacheline_aligned = qr{____cacheline_aligned}; I don't think those four definitions actually simplify anything. > + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; ... whereas this one definitely does. > - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; > - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; > - $members =~ s/\s*__packed\s*/ /gos; > + $members =~ s/\s*$attribute/ /gi; > + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; Maybe put the \s*\([^;]*\) into $aligned? Then it becomes a useful abstraction. > - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { > + } elsif ($prototype =~ m/^()($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || > + $prototype =~ m/^()($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*($name)\s*$prototype_end2/) { This is probably the best patch I've seen so far this year. Now, can we go further? For example: $prototype_end = $prototype_end1|$prototype_end2 That would let us cut the number of lines here in half. Can we create a definition for a variable number of \w and \s and '*' in the return type? In fact, can we define a regex that matches a type? So this would become: > + } elsif ($prototype =~ m/^($type)\s*($name)\s*$prototype_end/) { _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-04-23 13:21 ` Matthew Wilcox @ 2021-04-24 11:57 ` Aditya Srivastava -1 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-04-24 11:57 UTC (permalink / raw) To: Matthew Wilcox Cc: corbet, lukas.bulwahn, linux-kernel-mentees, linux-doc, linux-kernel On 23/4/21 6:51 pm, Matthew Wilcox wrote: > On Fri, Apr 23, 2021 at 12:48:39AM +0530, Aditya Srivastava wrote: >> +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; > > Is that a pointer-to-function? Or as people who write C usually call it, > a function pointer? Wouldn't it be better to call it $function_pointer? > Will do it. >> @@ -1210,8 +1211,14 @@ sub dump_struct($$) { >> my $decl_type; >> my $members; >> my $type = qr{struct|union}; >> + my $packed = qr{__packed}; >> + my $aligned = qr{__aligned}; >> + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; >> + my $cacheline_aligned = qr{____cacheline_aligned}; > > I don't think those four definitions actually simplify anything. > >> + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; > > ... whereas this one definitely does. > >> - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; >> - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; >> - $members =~ s/\s*__packed\s*/ /gos; >> + $members =~ s/\s*$attribute/ /gi; >> + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; > > Maybe put the \s*\([^;]*\) into $aligned? Then it becomes a useful > abstraction. Actually, I had made these variables as they were repeated here and at - my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; + my $definition_body = qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*}; So, defining them at a place might help. What do you think? > >> - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || >> - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || >> - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || >> - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { >> + } elsif ($prototype =~ m/^()($name)\s*$prototype_end1/ || >> + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end1/ || >> + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end1/ || >> + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end1/ || >> + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || >> + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end1/ || >> + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || >> + $prototype =~ m/^()($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*($name)\s*$prototype_end2/) { > > This is probably the best patch I've seen so far this year. > > Now, can we go further? For example: > $prototype_end = $prototype_end1|$prototype_end2 > That would let us cut the number of lines here in half. > > Can we create a definition for a variable number of \w and \s and '*' > in the return type? In fact, can we define a regex that matches a type? > So this would become: > >> + } elsif ($prototype =~ m/^($type)\s*($name)\s*$prototype_end/) { > I have been able to reduce these expressions furthermore. Will send a v2 in few.. Thanks Aditya ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-04-24 11:57 ` Aditya Srivastava 0 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-04-24 11:57 UTC (permalink / raw) To: Matthew Wilcox; +Cc: linux-doc, linux-kernel-mentees, linux-kernel, corbet On 23/4/21 6:51 pm, Matthew Wilcox wrote: > On Fri, Apr 23, 2021 at 12:48:39AM +0530, Aditya Srivastava wrote: >> +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; > > Is that a pointer-to-function? Or as people who write C usually call it, > a function pointer? Wouldn't it be better to call it $function_pointer? > Will do it. >> @@ -1210,8 +1211,14 @@ sub dump_struct($$) { >> my $decl_type; >> my $members; >> my $type = qr{struct|union}; >> + my $packed = qr{__packed}; >> + my $aligned = qr{__aligned}; >> + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; >> + my $cacheline_aligned = qr{____cacheline_aligned}; > > I don't think those four definitions actually simplify anything. > >> + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; > > ... whereas this one definitely does. > >> - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; >> - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; >> - $members =~ s/\s*__packed\s*/ /gos; >> + $members =~ s/\s*$attribute/ /gi; >> + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; > > Maybe put the \s*\([^;]*\) into $aligned? Then it becomes a useful > abstraction. Actually, I had made these variables as they were repeated here and at - my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; + my $definition_body = qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*}; So, defining them at a place might help. What do you think? > >> - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || >> - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || >> - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || >> - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || >> - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { >> + } elsif ($prototype =~ m/^()($name)\s*$prototype_end1/ || >> + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end1/ || >> + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end1/ || >> + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end1/ || >> + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || >> + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end1/ || >> + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || >> + $prototype =~ m/^()($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || >> + $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*($name)\s*$prototype_end2/) { > > This is probably the best patch I've seen so far this year. > > Now, can we go further? For example: > $prototype_end = $prototype_end1|$prototype_end2 > That would let us cut the number of lines here in half. > > Can we create a definition for a variable number of \w and \s and '*' > in the return type? In fact, can we define a regex that matches a type? > So this would become: > >> + } elsif ($prototype =~ m/^($type)\s*($name)\s*$prototype_end/) { > I have been able to reduce these expressions furthermore. Will send a v2 in few.. Thanks Aditya _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply [flat|nested] 37+ messages in thread
* [RFC v2] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-04-24 11:57 ` Aditya Srivastava @ 2021-04-24 12:47 ` Aditya Srivastava -1 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-04-24 12:47 UTC (permalink / raw) To: corbet Cc: yashsri421, lukas.bulwahn, willy, linux-kernel-mentees, linux-doc, linux-kernel There are some regex expressions in the kernel-doc script, which are used repeatedly in the script. Reduce such expressions into variables, which can be used everywhere. A quick manual check found that no errors and warnings were added/removed in this process. Suggested-by: Jonathan Corbet <corbet@lwn.net> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> --- Changes in v2: - Rename $pointer_function to $function_pointer - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end scripts/kernel-doc | 80 +++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 2a85d34fdcd0..fe7f51be44e0 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -406,6 +406,7 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; my $doc_inline_end = '^\s*\*/\s*$'; my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; +my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; my %parameterdescs; my %parameterdesc_start_lines; @@ -694,7 +695,7 @@ sub output_function_man(%) { $post = ");"; } $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$function_pointer/) { # pointer-to-function print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; } else { @@ -974,7 +975,7 @@ sub output_function_rst(%) { $count++; $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$function_pointer/) { # pointer-to-function print $1 . $parameter . ") (" . $2 . ")"; } else { @@ -1210,8 +1211,14 @@ sub dump_struct($$) { my $decl_type; my $members; my $type = qr{struct|union}; + my $packed = qr{__packed}; + my $aligned = qr{__aligned}; + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; + my $cacheline_aligned = qr{____cacheline_aligned}; + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; # For capturing struct/union definition body, i.e. "{members*}qualifiers*" - my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; + my $definition_body = qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*}; + my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { $decl_type = $1; @@ -1235,27 +1242,27 @@ sub dump_struct($$) { # strip comments: $members =~ s/\/\*.*?\*\///gos; # strip attributes - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; - $members =~ s/\s*__packed\s*/ /gos; + $members =~ s/\s*$attribute/ /gi; + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; + $members =~ s/\s*$packed\s*/ /gos; $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; - $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; - $members =~ s/\s*____cacheline_aligned/ /gos; + $members =~ s/\s*$cacheline_aligned_in_smp/ /gos; + $members =~ s/\s*$cacheline_aligned/ /gos; + my $args = qr{([^,)]+)}; # replace DECLARE_BITMAP $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; # replace DECLARE_HASHTABLE - $members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos; + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; # replace DECLARE_KFIFO - $members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; # replace DECLARE_KFIFO_PTR - $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; - + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; my $declaration = $members; # Split nested struct/union elements as newer ones - while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { + while ($members =~ m/$struct_members/) { my $newmember; my $maintype = $1; my $ids = $4; @@ -1315,7 +1322,7 @@ sub dump_struct($$) { } } } - $members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; + $members =~ s/$struct_members/$newmember/; } # Ignore other nested elements, like enums @@ -1555,8 +1562,9 @@ sub create_parameterlist($$$$) { my $param; # temporarily replace commas inside function pointer definition - while ($args =~ /(\([^\),]+),/) { - $args =~ s/(\([^\),]+),/$1#/g; + my $arg_expr = qr{\([^\),]+}; + while ($args =~ /$arg_expr,/) { + $args =~ s/($arg_expr),/$1#/g; } foreach my $arg (split($splitter, $args)) { @@ -1808,8 +1816,14 @@ sub dump_function($$) { # - parport_register_device (function pointer parameters) # - atomic_set (macro) # - pci_match_device, __copy_to_user (long return type) - - if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) { + my $name = qr{[a-zA-Z0-9_~:]+}; + my $prototype_end1 = qr{[^\(]*}; + my $prototype_end2 = qr{[^\{]*}; + my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; + my $type1 = qr{[\w\s]+}; + my $type2 = qr{$type1\*+}; + + if ($define && $prototype =~ m/^()($name)\s+/) { # This is an object-like macro, it has no return type and no parameter # list. # Function-like macros are not allowed to have spaces between @@ -1817,23 +1831,9 @@ sub dump_function($$) { $return_type = $1; $declaration_name = $2; $noret = 1; - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { + } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || + $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || + $prototype =~ m/^($type2)+\s*($name)\s*$prototype_end/) { $return_type = $1; $declaration_name = $2; my $args = $3; @@ -2110,12 +2110,12 @@ sub process_name($$) { } elsif (/$doc_decl/o) { $identifier = $1; my $is_kernel_comment = 0; - my $decl_start = qr{\s*\*}; + my $decl_start = qr{$doc_com}; # test for pointer declaration type, foo * bar() - desc my $fn_type = qr{\w+\s*\*\s*}; my $parenthesis = qr{\(\w*\)}; my $decl_end = qr{[-:].*}; - if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) { + if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) { $identifier = $1; } if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { @@ -2125,8 +2125,8 @@ sub process_name($$) { } # Look for foo() or static void foo() - description; or misspelt # identifier - elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || - /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { + elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || + /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { $identifier = $1; $decl_type = 'function'; $identifier =~ s/^define\s+//; -- 2.17.1 ^ permalink raw reply related [flat|nested] 37+ messages in thread
* [RFC v2] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-04-24 12:47 ` Aditya Srivastava 0 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-04-24 12:47 UTC (permalink / raw) To: corbet; +Cc: linux-doc, yashsri421, linux-kernel, willy, linux-kernel-mentees There are some regex expressions in the kernel-doc script, which are used repeatedly in the script. Reduce such expressions into variables, which can be used everywhere. A quick manual check found that no errors and warnings were added/removed in this process. Suggested-by: Jonathan Corbet <corbet@lwn.net> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> --- Changes in v2: - Rename $pointer_function to $function_pointer - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end scripts/kernel-doc | 80 +++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 2a85d34fdcd0..fe7f51be44e0 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -406,6 +406,7 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; my $doc_inline_end = '^\s*\*/\s*$'; my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; +my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; my %parameterdescs; my %parameterdesc_start_lines; @@ -694,7 +695,7 @@ sub output_function_man(%) { $post = ");"; } $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$function_pointer/) { # pointer-to-function print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; } else { @@ -974,7 +975,7 @@ sub output_function_rst(%) { $count++; $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$function_pointer/) { # pointer-to-function print $1 . $parameter . ") (" . $2 . ")"; } else { @@ -1210,8 +1211,14 @@ sub dump_struct($$) { my $decl_type; my $members; my $type = qr{struct|union}; + my $packed = qr{__packed}; + my $aligned = qr{__aligned}; + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; + my $cacheline_aligned = qr{____cacheline_aligned}; + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; # For capturing struct/union definition body, i.e. "{members*}qualifiers*" - my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; + my $definition_body = qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*}; + my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { $decl_type = $1; @@ -1235,27 +1242,27 @@ sub dump_struct($$) { # strip comments: $members =~ s/\/\*.*?\*\///gos; # strip attributes - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; - $members =~ s/\s*__packed\s*/ /gos; + $members =~ s/\s*$attribute/ /gi; + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; + $members =~ s/\s*$packed\s*/ /gos; $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; - $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; - $members =~ s/\s*____cacheline_aligned/ /gos; + $members =~ s/\s*$cacheline_aligned_in_smp/ /gos; + $members =~ s/\s*$cacheline_aligned/ /gos; + my $args = qr{([^,)]+)}; # replace DECLARE_BITMAP $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; # replace DECLARE_HASHTABLE - $members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos; + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; # replace DECLARE_KFIFO - $members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; # replace DECLARE_KFIFO_PTR - $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; - + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; my $declaration = $members; # Split nested struct/union elements as newer ones - while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { + while ($members =~ m/$struct_members/) { my $newmember; my $maintype = $1; my $ids = $4; @@ -1315,7 +1322,7 @@ sub dump_struct($$) { } } } - $members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; + $members =~ s/$struct_members/$newmember/; } # Ignore other nested elements, like enums @@ -1555,8 +1562,9 @@ sub create_parameterlist($$$$) { my $param; # temporarily replace commas inside function pointer definition - while ($args =~ /(\([^\),]+),/) { - $args =~ s/(\([^\),]+),/$1#/g; + my $arg_expr = qr{\([^\),]+}; + while ($args =~ /$arg_expr,/) { + $args =~ s/($arg_expr),/$1#/g; } foreach my $arg (split($splitter, $args)) { @@ -1808,8 +1816,14 @@ sub dump_function($$) { # - parport_register_device (function pointer parameters) # - atomic_set (macro) # - pci_match_device, __copy_to_user (long return type) - - if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) { + my $name = qr{[a-zA-Z0-9_~:]+}; + my $prototype_end1 = qr{[^\(]*}; + my $prototype_end2 = qr{[^\{]*}; + my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; + my $type1 = qr{[\w\s]+}; + my $type2 = qr{$type1\*+}; + + if ($define && $prototype =~ m/^()($name)\s+/) { # This is an object-like macro, it has no return type and no parameter # list. # Function-like macros are not allowed to have spaces between @@ -1817,23 +1831,9 @@ sub dump_function($$) { $return_type = $1; $declaration_name = $2; $noret = 1; - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { + } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || + $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || + $prototype =~ m/^($type2)+\s*($name)\s*$prototype_end/) { $return_type = $1; $declaration_name = $2; my $args = $3; @@ -2110,12 +2110,12 @@ sub process_name($$) { } elsif (/$doc_decl/o) { $identifier = $1; my $is_kernel_comment = 0; - my $decl_start = qr{\s*\*}; + my $decl_start = qr{$doc_com}; # test for pointer declaration type, foo * bar() - desc my $fn_type = qr{\w+\s*\*\s*}; my $parenthesis = qr{\(\w*\)}; my $decl_end = qr{[-:].*}; - if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) { + if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) { $identifier = $1; } if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { @@ -2125,8 +2125,8 @@ sub process_name($$) { } # Look for foo() or static void foo() - description; or misspelt # identifier - elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || - /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { + elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || + /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { $identifier = $1; $decl_type = 'function'; $identifier =~ s/^define\s+//; -- 2.17.1 _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply related [flat|nested] 37+ messages in thread
* Re: [RFC v2] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-04-24 12:47 ` Aditya Srivastava @ 2021-04-27 15:55 ` Jonathan Corbet -1 siblings, 0 replies; 37+ messages in thread From: Jonathan Corbet @ 2021-04-27 15:55 UTC (permalink / raw) To: Aditya Srivastava Cc: yashsri421, lukas.bulwahn, willy, linux-kernel-mentees, linux-doc, linux-kernel Aditya Srivastava <yashsri421@gmail.com> writes: > There are some regex expressions in the kernel-doc script, which are used > repeatedly in the script. > > Reduce such expressions into variables, which can be used everywhere. > > A quick manual check found that no errors and warnings were added/removed > in this process. > > Suggested-by: Jonathan Corbet <corbet@lwn.net> > Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> So my comments pretty much mirror Willy's ... there is a lot to really like here mixed with some stuff that's not as helpful. > Changes in v2: > - Rename $pointer_function to $function_pointer > - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions > - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end > > scripts/kernel-doc | 80 +++++++++++++++++++++++----------------------- > 1 file changed, 40 insertions(+), 40 deletions(-) > > diff --git a/scripts/kernel-doc b/scripts/kernel-doc > index 2a85d34fdcd0..fe7f51be44e0 100755 > --- a/scripts/kernel-doc > +++ b/scripts/kernel-doc > @@ -406,6 +406,7 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; > my $doc_inline_end = '^\s*\*/\s*$'; > my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; > my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; > +my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; > > my %parameterdescs; > my %parameterdesc_start_lines; > @@ -694,7 +695,7 @@ sub output_function_man(%) { > $post = ");"; > } > $type = $args{'parametertypes'}{$parameter}; > - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { > + if ($type =~ m/$function_pointer/) { This kind of change is wonderful. The more that we can coalesce this regex mess and reuse it throughout the script, the more maintainable it will be. > # pointer-to-function > print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; > } else { > @@ -974,7 +975,7 @@ sub output_function_rst(%) { > $count++; > $type = $args{'parametertypes'}{$parameter}; > > - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { > + if ($type =~ m/$function_pointer/) { > # pointer-to-function > print $1 . $parameter . ") (" . $2 . ")"; > } else { > @@ -1210,8 +1211,14 @@ sub dump_struct($$) { > my $decl_type; > my $members; > my $type = qr{struct|union}; > + my $packed = qr{__packed}; > + my $aligned = qr{__aligned}; > + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; > + my $cacheline_aligned = qr{____cacheline_aligned}; > + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; > # For capturing struct/union definition body, i.e. "{members*}qualifiers*" > - my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; > + my $definition_body = qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*}; > + my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; This is a bit less helpful - variables like $aligned don't do much for us. That can be seen just below: > > if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { > $decl_type = $1; > @@ -1235,27 +1242,27 @@ sub dump_struct($$) { > # strip comments: > $members =~ s/\/\*.*?\*\///gos; > # strip attributes > - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; > - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; > - $members =~ s/\s*__packed\s*/ /gos; > + $members =~ s/\s*$attribute/ /gi; > + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; > + $members =~ s/\s*$packed\s*/ /gos; The use of the variables here doesn't really make those expressions more readable. > $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; > - $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; > - $members =~ s/\s*____cacheline_aligned/ /gos; > + $members =~ s/\s*$cacheline_aligned_in_smp/ /gos; > + $members =~ s/\s*$cacheline_aligned/ /gos; > > + my $args = qr{([^,)]+)}; > # replace DECLARE_BITMAP > $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; > - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; > + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; Here too ... this is the kind of stuff that makes me glad that Colorado is a legal-weed state, and the new version, while better, doesn't change that basic fact. > # replace DECLARE_HASHTABLE > - $members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos; > + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; > # replace DECLARE_KFIFO > - $members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; > + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; > # replace DECLARE_KFIFO_PTR > - $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; > - > + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; > my $declaration = $members; > > # Split nested struct/union elements as newer ones > - while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { > + while ($members =~ m/$struct_members/) { ...but this is great. > my $newmember; > my $maintype = $1; > my $ids = $4; > @@ -1315,7 +1322,7 @@ sub dump_struct($$) { > } > } > } > - $members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; > + $members =~ s/$struct_members/$newmember/; > } > > # Ignore other nested elements, like enums > @@ -1555,8 +1562,9 @@ sub create_parameterlist($$$$) { > my $param; > > # temporarily replace commas inside function pointer definition > - while ($args =~ /(\([^\),]+),/) { > - $args =~ s/(\([^\),]+),/$1#/g; > + my $arg_expr = qr{\([^\),]+}; > + while ($args =~ /$arg_expr,/) { > + $args =~ s/($arg_expr),/$1#/g; > } > > foreach my $arg (split($splitter, $args)) { > @@ -1808,8 +1816,14 @@ sub dump_function($$) { > # - parport_register_device (function pointer parameters) > # - atomic_set (macro) > # - pci_match_device, __copy_to_user (long return type) > - > - if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) { > + my $name = qr{[a-zA-Z0-9_~:]+}; > + my $prototype_end1 = qr{[^\(]*}; > + my $prototype_end2 = qr{[^\{]*}; > + my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; > + my $type1 = qr{[\w\s]+}; > + my $type2 = qr{$type1\*+}; > + > + if ($define && $prototype =~ m/^()($name)\s+/) { > # This is an object-like macro, it has no return type and no parameter > # list. > # Function-like macros are not allowed to have spaces between > @@ -1817,23 +1831,9 @@ sub dump_function($$) { > $return_type = $1; > $declaration_name = $2; > $noret = 1; > - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { > + } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || > + $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || > + $prototype =~ m/^($type2)+\s*($name)\s*$prototype_end/) { It's hard not to be happy about something like that - this is a definite step toward clarifying this code. I think I'll stop here; hopefully I've gotten my point across. I really like where this work is heading; focusing just a bit more on pulling the regexes together and making the whole thing more readable would be wonderful. Thanks, jon ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v2] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-04-27 15:55 ` Jonathan Corbet 0 siblings, 0 replies; 37+ messages in thread From: Jonathan Corbet @ 2021-04-27 15:55 UTC (permalink / raw) To: Aditya Srivastava Cc: linux-doc, yashsri421, linux-kernel, willy, linux-kernel-mentees Aditya Srivastava <yashsri421@gmail.com> writes: > There are some regex expressions in the kernel-doc script, which are used > repeatedly in the script. > > Reduce such expressions into variables, which can be used everywhere. > > A quick manual check found that no errors and warnings were added/removed > in this process. > > Suggested-by: Jonathan Corbet <corbet@lwn.net> > Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> So my comments pretty much mirror Willy's ... there is a lot to really like here mixed with some stuff that's not as helpful. > Changes in v2: > - Rename $pointer_function to $function_pointer > - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions > - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end > > scripts/kernel-doc | 80 +++++++++++++++++++++++----------------------- > 1 file changed, 40 insertions(+), 40 deletions(-) > > diff --git a/scripts/kernel-doc b/scripts/kernel-doc > index 2a85d34fdcd0..fe7f51be44e0 100755 > --- a/scripts/kernel-doc > +++ b/scripts/kernel-doc > @@ -406,6 +406,7 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; > my $doc_inline_end = '^\s*\*/\s*$'; > my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; > my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; > +my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; > > my %parameterdescs; > my %parameterdesc_start_lines; > @@ -694,7 +695,7 @@ sub output_function_man(%) { > $post = ");"; > } > $type = $args{'parametertypes'}{$parameter}; > - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { > + if ($type =~ m/$function_pointer/) { This kind of change is wonderful. The more that we can coalesce this regex mess and reuse it throughout the script, the more maintainable it will be. > # pointer-to-function > print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; > } else { > @@ -974,7 +975,7 @@ sub output_function_rst(%) { > $count++; > $type = $args{'parametertypes'}{$parameter}; > > - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { > + if ($type =~ m/$function_pointer/) { > # pointer-to-function > print $1 . $parameter . ") (" . $2 . ")"; > } else { > @@ -1210,8 +1211,14 @@ sub dump_struct($$) { > my $decl_type; > my $members; > my $type = qr{struct|union}; > + my $packed = qr{__packed}; > + my $aligned = qr{__aligned}; > + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; > + my $cacheline_aligned = qr{____cacheline_aligned}; > + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; > # For capturing struct/union definition body, i.e. "{members*}qualifiers*" > - my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; > + my $definition_body = qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*}; > + my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; This is a bit less helpful - variables like $aligned don't do much for us. That can be seen just below: > > if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { > $decl_type = $1; > @@ -1235,27 +1242,27 @@ sub dump_struct($$) { > # strip comments: > $members =~ s/\/\*.*?\*\///gos; > # strip attributes > - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; > - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; > - $members =~ s/\s*__packed\s*/ /gos; > + $members =~ s/\s*$attribute/ /gi; > + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; > + $members =~ s/\s*$packed\s*/ /gos; The use of the variables here doesn't really make those expressions more readable. > $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; > - $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; > - $members =~ s/\s*____cacheline_aligned/ /gos; > + $members =~ s/\s*$cacheline_aligned_in_smp/ /gos; > + $members =~ s/\s*$cacheline_aligned/ /gos; > > + my $args = qr{([^,)]+)}; > # replace DECLARE_BITMAP > $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; > - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; > + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; Here too ... this is the kind of stuff that makes me glad that Colorado is a legal-weed state, and the new version, while better, doesn't change that basic fact. > # replace DECLARE_HASHTABLE > - $members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos; > + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; > # replace DECLARE_KFIFO > - $members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; > + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; > # replace DECLARE_KFIFO_PTR > - $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; > - > + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; > my $declaration = $members; > > # Split nested struct/union elements as newer ones > - while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { > + while ($members =~ m/$struct_members/) { ...but this is great. > my $newmember; > my $maintype = $1; > my $ids = $4; > @@ -1315,7 +1322,7 @@ sub dump_struct($$) { > } > } > } > - $members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; > + $members =~ s/$struct_members/$newmember/; > } > > # Ignore other nested elements, like enums > @@ -1555,8 +1562,9 @@ sub create_parameterlist($$$$) { > my $param; > > # temporarily replace commas inside function pointer definition > - while ($args =~ /(\([^\),]+),/) { > - $args =~ s/(\([^\),]+),/$1#/g; > + my $arg_expr = qr{\([^\),]+}; > + while ($args =~ /$arg_expr,/) { > + $args =~ s/($arg_expr),/$1#/g; > } > > foreach my $arg (split($splitter, $args)) { > @@ -1808,8 +1816,14 @@ sub dump_function($$) { > # - parport_register_device (function pointer parameters) > # - atomic_set (macro) > # - pci_match_device, __copy_to_user (long return type) > - > - if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) { > + my $name = qr{[a-zA-Z0-9_~:]+}; > + my $prototype_end1 = qr{[^\(]*}; > + my $prototype_end2 = qr{[^\{]*}; > + my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; > + my $type1 = qr{[\w\s]+}; > + my $type2 = qr{$type1\*+}; > + > + if ($define && $prototype =~ m/^()($name)\s+/) { > # This is an object-like macro, it has no return type and no parameter > # list. > # Function-like macros are not allowed to have spaces between > @@ -1817,23 +1831,9 @@ sub dump_function($$) { > $return_type = $1; > $declaration_name = $2; > $noret = 1; > - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { > + } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || > + $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || > + $prototype =~ m/^($type2)+\s*($name)\s*$prototype_end/) { It's hard not to be happy about something like that - this is a definite step toward clarifying this code. I think I'll stop here; hopefully I've gotten my point across. I really like where this work is heading; focusing just a bit more on pulling the regexes together and making the whole thing more readable would be wonderful. Thanks, jon _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v2] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-04-27 15:55 ` Jonathan Corbet @ 2021-04-27 16:56 ` Matthew Wilcox -1 siblings, 0 replies; 37+ messages in thread From: Matthew Wilcox @ 2021-04-27 16:56 UTC (permalink / raw) To: Jonathan Corbet Cc: Aditya Srivastava, lukas.bulwahn, linux-kernel-mentees, linux-doc, linux-kernel On Tue, Apr 27, 2021 at 09:55:35AM -0600, Jonathan Corbet wrote: > The use of the variables here doesn't really make those expressions more > readable. > > > $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; > > - $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; > > - $members =~ s/\s*____cacheline_aligned/ /gos; > > + $members =~ s/\s*$cacheline_aligned_in_smp/ /gos; > > + $members =~ s/\s*$cacheline_aligned/ /gos; > > > > + my $args = qr{([^,)]+)}; > > # replace DECLARE_BITMAP > > $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; > > - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; > > + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; > > Here too ... this is the kind of stuff that makes me glad that Colorado > is a legal-weed state, and the new version, while better, doesn't change > that basic fact. I'm going to have to disagree with you on this one (I agree with you on all the others). I find this much easier to read ... "DECLARE_BITMAP followed by any amount of whitespace, literal open bracket, an argument, literal comma, whitespace, another argument, literal close bracket" Before, I get to "DECLARE_BITMAP followed by any amount of whitespace, then some line noise". Obviously I'm less experienced at reading regexes than you are, but this simplification really does help me. > I think I'll stop here; hopefully I've gotten my point across. I really > like where this work is heading; focusing just a bit more on pulling the > regexes together and making the whole thing more readable would be > wonderful. Amen. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v2] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-04-27 16:56 ` Matthew Wilcox 0 siblings, 0 replies; 37+ messages in thread From: Matthew Wilcox @ 2021-04-27 16:56 UTC (permalink / raw) To: Jonathan Corbet Cc: linux-doc, linux-kernel-mentees, linux-kernel, Aditya Srivastava On Tue, Apr 27, 2021 at 09:55:35AM -0600, Jonathan Corbet wrote: > The use of the variables here doesn't really make those expressions more > readable. > > > $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; > > - $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; > > - $members =~ s/\s*____cacheline_aligned/ /gos; > > + $members =~ s/\s*$cacheline_aligned_in_smp/ /gos; > > + $members =~ s/\s*$cacheline_aligned/ /gos; > > > > + my $args = qr{([^,)]+)}; > > # replace DECLARE_BITMAP > > $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; > > - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; > > + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; > > Here too ... this is the kind of stuff that makes me glad that Colorado > is a legal-weed state, and the new version, while better, doesn't change > that basic fact. I'm going to have to disagree with you on this one (I agree with you on all the others). I find this much easier to read ... "DECLARE_BITMAP followed by any amount of whitespace, literal open bracket, an argument, literal comma, whitespace, another argument, literal close bracket" Before, I get to "DECLARE_BITMAP followed by any amount of whitespace, then some line noise". Obviously I'm less experienced at reading regexes than you are, but this simplification really does help me. > I think I'll stop here; hopefully I've gotten my point across. I really > like where this work is heading; focusing just a bit more on pulling the > regexes together and making the whole thing more readable would be > wonderful. Amen. _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply [flat|nested] 37+ messages in thread
* [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-04-27 16:56 ` Matthew Wilcox @ 2021-04-29 6:37 ` Aditya Srivastava -1 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-04-29 6:37 UTC (permalink / raw) To: corbet Cc: yashsri421, lukas.bulwahn, willy, linux-kernel-mentees, linux-doc, linux-kernel There are some regex expressions in the kernel-doc script, which are used repeatedly in the script. Reduce such expressions into variables, which can be used everywhere. A quick manual check found that no errors and warnings were added/removed in this process. Suggested-by: Jonathan Corbet <corbet@lwn.net> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> --- Changes in v3: - Remove variables for separate qualifiers in "sub dump_struct" - Make a common variable for all the qualifiers - Make $attribute global variable to use it at "sub check_sections" as well Changes in v2: - Rename $pointer_function to $function_pointer - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 2a85d34fdcd0..721005a02e64 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -406,6 +406,8 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; my $doc_inline_end = '^\s*\*/\s*$'; my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; +my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; +my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; my %parameterdescs; my %parameterdesc_start_lines; @@ -694,7 +696,7 @@ sub output_function_man(%) { $post = ");"; } $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$function_pointer/) { # pointer-to-function print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; } else { @@ -974,7 +976,7 @@ sub output_function_rst(%) { $count++; $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$function_pointer/) { # pointer-to-function print $1 . $parameter . ") (" . $2 . ")"; } else { @@ -1211,7 +1213,9 @@ sub dump_struct($$) { my $members; my $type = qr{struct|union}; # For capturing struct/union definition body, i.e. "{members*}qualifiers*" - my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; + my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned}; + my $definition_body = qr{\{(.*)\}\s*$qualifiers*}; + my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { $decl_type = $1; @@ -1235,27 +1239,27 @@ sub dump_struct($$) { # strip comments: $members =~ s/\/\*.*?\*\///gos; # strip attributes - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; + $members =~ s/\s*$attribute/ /gi; $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; $members =~ s/\s*__packed\s*/ /gos; $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; $members =~ s/\s*____cacheline_aligned/ /gos; + my $args = qr{([^,)]+)}; # replace DECLARE_BITMAP $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; # replace DECLARE_HASHTABLE - $members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos; + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; # replace DECLARE_KFIFO - $members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; # replace DECLARE_KFIFO_PTR - $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; - + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; my $declaration = $members; # Split nested struct/union elements as newer ones - while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { + while ($members =~ m/$struct_members/) { my $newmember; my $maintype = $1; my $ids = $4; @@ -1315,7 +1319,7 @@ sub dump_struct($$) { } } } - $members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; + $members =~ s/$struct_members/$newmember/; } # Ignore other nested elements, like enums @@ -1555,8 +1559,9 @@ sub create_parameterlist($$$$) { my $param; # temporarily replace commas inside function pointer definition - while ($args =~ /(\([^\),]+),/) { - $args =~ s/(\([^\),]+),/$1#/g; + my $arg_expr = qr{\([^\),]+}; + while ($args =~ /$arg_expr,/) { + $args =~ s/($arg_expr),/$1#/g; } foreach my $arg (split($splitter, $args)) { @@ -1707,7 +1712,7 @@ sub check_sections($$$$$) { foreach $px (0 .. $#prms) { $prm_clean = $prms[$px]; $prm_clean =~ s/\[.*\]//; - $prm_clean =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; + $prm_clean =~ s/$attribute//i; # ignore array size in a parameter string; # however, the original param string may contain # spaces, e.g.: addr[6 + 2] @@ -1808,8 +1813,14 @@ sub dump_function($$) { # - parport_register_device (function pointer parameters) # - atomic_set (macro) # - pci_match_device, __copy_to_user (long return type) - - if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) { + my $name = qr{[a-zA-Z0-9_~:]+}; + my $prototype_end1 = qr{[^\(]*}; + my $prototype_end2 = qr{[^\{]*}; + my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; + my $type1 = qr{[\w\s]+}; + my $type2 = qr{$type1\*+}; + + if ($define && $prototype =~ m/^()($name)\s+/) { # This is an object-like macro, it has no return type and no parameter # list. # Function-like macros are not allowed to have spaces between @@ -1817,23 +1828,9 @@ sub dump_function($$) { $return_type = $1; $declaration_name = $2; $noret = 1; - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { + } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || + $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || + $prototype =~ m/^($type2)+\s*($name)\s*$prototype_end/) { $return_type = $1; $declaration_name = $2; my $args = $3; @@ -2110,12 +2107,12 @@ sub process_name($$) { } elsif (/$doc_decl/o) { $identifier = $1; my $is_kernel_comment = 0; - my $decl_start = qr{\s*\*}; + my $decl_start = qr{$doc_com}; # test for pointer declaration type, foo * bar() - desc my $fn_type = qr{\w+\s*\*\s*}; my $parenthesis = qr{\(\w*\)}; my $decl_end = qr{[-:].*}; - if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) { + if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) { $identifier = $1; } if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { @@ -2125,8 +2122,8 @@ sub process_name($$) { } # Look for foo() or static void foo() - description; or misspelt # identifier - elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || - /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { + elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || + /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { $identifier = $1; $decl_type = 'function'; $identifier =~ s/^define\s+//; -- 2.17.1 ^ permalink raw reply related [flat|nested] 37+ messages in thread
* [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-04-29 6:37 ` Aditya Srivastava 0 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-04-29 6:37 UTC (permalink / raw) To: corbet; +Cc: linux-doc, yashsri421, linux-kernel, willy, linux-kernel-mentees There are some regex expressions in the kernel-doc script, which are used repeatedly in the script. Reduce such expressions into variables, which can be used everywhere. A quick manual check found that no errors and warnings were added/removed in this process. Suggested-by: Jonathan Corbet <corbet@lwn.net> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> --- Changes in v3: - Remove variables for separate qualifiers in "sub dump_struct" - Make a common variable for all the qualifiers - Make $attribute global variable to use it at "sub check_sections" as well Changes in v2: - Rename $pointer_function to $function_pointer - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 2a85d34fdcd0..721005a02e64 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -406,6 +406,8 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; my $doc_inline_end = '^\s*\*/\s*$'; my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; +my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; +my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; my %parameterdescs; my %parameterdesc_start_lines; @@ -694,7 +696,7 @@ sub output_function_man(%) { $post = ");"; } $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$function_pointer/) { # pointer-to-function print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; } else { @@ -974,7 +976,7 @@ sub output_function_rst(%) { $count++; $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$function_pointer/) { # pointer-to-function print $1 . $parameter . ") (" . $2 . ")"; } else { @@ -1211,7 +1213,9 @@ sub dump_struct($$) { my $members; my $type = qr{struct|union}; # For capturing struct/union definition body, i.e. "{members*}qualifiers*" - my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; + my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned}; + my $definition_body = qr{\{(.*)\}\s*$qualifiers*}; + my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { $decl_type = $1; @@ -1235,27 +1239,27 @@ sub dump_struct($$) { # strip comments: $members =~ s/\/\*.*?\*\///gos; # strip attributes - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; + $members =~ s/\s*$attribute/ /gi; $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; $members =~ s/\s*__packed\s*/ /gos; $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; $members =~ s/\s*____cacheline_aligned/ /gos; + my $args = qr{([^,)]+)}; # replace DECLARE_BITMAP $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; # replace DECLARE_HASHTABLE - $members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos; + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; # replace DECLARE_KFIFO - $members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; # replace DECLARE_KFIFO_PTR - $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; - + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; my $declaration = $members; # Split nested struct/union elements as newer ones - while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { + while ($members =~ m/$struct_members/) { my $newmember; my $maintype = $1; my $ids = $4; @@ -1315,7 +1319,7 @@ sub dump_struct($$) { } } } - $members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; + $members =~ s/$struct_members/$newmember/; } # Ignore other nested elements, like enums @@ -1555,8 +1559,9 @@ sub create_parameterlist($$$$) { my $param; # temporarily replace commas inside function pointer definition - while ($args =~ /(\([^\),]+),/) { - $args =~ s/(\([^\),]+),/$1#/g; + my $arg_expr = qr{\([^\),]+}; + while ($args =~ /$arg_expr,/) { + $args =~ s/($arg_expr),/$1#/g; } foreach my $arg (split($splitter, $args)) { @@ -1707,7 +1712,7 @@ sub check_sections($$$$$) { foreach $px (0 .. $#prms) { $prm_clean = $prms[$px]; $prm_clean =~ s/\[.*\]//; - $prm_clean =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; + $prm_clean =~ s/$attribute//i; # ignore array size in a parameter string; # however, the original param string may contain # spaces, e.g.: addr[6 + 2] @@ -1808,8 +1813,14 @@ sub dump_function($$) { # - parport_register_device (function pointer parameters) # - atomic_set (macro) # - pci_match_device, __copy_to_user (long return type) - - if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) { + my $name = qr{[a-zA-Z0-9_~:]+}; + my $prototype_end1 = qr{[^\(]*}; + my $prototype_end2 = qr{[^\{]*}; + my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; + my $type1 = qr{[\w\s]+}; + my $type2 = qr{$type1\*+}; + + if ($define && $prototype =~ m/^()($name)\s+/) { # This is an object-like macro, it has no return type and no parameter # list. # Function-like macros are not allowed to have spaces between @@ -1817,23 +1828,9 @@ sub dump_function($$) { $return_type = $1; $declaration_name = $2; $noret = 1; - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { + } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || + $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || + $prototype =~ m/^($type2)+\s*($name)\s*$prototype_end/) { $return_type = $1; $declaration_name = $2; my $args = $3; @@ -2110,12 +2107,12 @@ sub process_name($$) { } elsif (/$doc_decl/o) { $identifier = $1; my $is_kernel_comment = 0; - my $decl_start = qr{\s*\*}; + my $decl_start = qr{$doc_com}; # test for pointer declaration type, foo * bar() - desc my $fn_type = qr{\w+\s*\*\s*}; my $parenthesis = qr{\(\w*\)}; my $decl_end = qr{[-:].*}; - if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) { + if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) { $identifier = $1; } if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { @@ -2125,8 +2122,8 @@ sub process_name($$) { } # Look for foo() or static void foo() - description; or misspelt # identifier - elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || - /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { + elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || + /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { $identifier = $1; $decl_type = 'function'; $identifier =~ s/^define\s+//; -- 2.17.1 _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply related [flat|nested] 37+ messages in thread
* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-04-29 6:37 ` Aditya Srivastava @ 2021-04-29 23:39 ` Jonathan Corbet -1 siblings, 0 replies; 37+ messages in thread From: Jonathan Corbet @ 2021-04-29 23:39 UTC (permalink / raw) To: Aditya Srivastava Cc: yashsri421, lukas.bulwahn, willy, linux-kernel-mentees, linux-doc, linux-kernel Aditya Srivastava <yashsri421@gmail.com> writes: > There are some regex expressions in the kernel-doc script, which are used > repeatedly in the script. > > Reduce such expressions into variables, which can be used everywhere. > > A quick manual check found that no errors and warnings were added/removed > in this process. > > Suggested-by: Jonathan Corbet <corbet@lwn.net> > Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> > --- > Changes in v3: > - Remove variables for separate qualifiers in "sub dump_struct" > - Make a common variable for all the qualifiers > - Make $attribute global variable to use it at "sub check_sections" as well > > Changes in v2: > - Rename $pointer_function to $function_pointer > - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions > - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end > > scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------ > 1 file changed, 34 insertions(+), 37 deletions(-) So this looks good but ... it adds a warning to the build: /stuff/k/git/kernel/Documentation/driver-api/media/v4l2-controls:823: ./include/media/v4l2-ctrls.h:964: WARNING: Invalid C declaration: Expected identifier in nested name. [error at 6] const * v4l2_ctrl_get_menu (u32 id) ------^ So it looks like something isn't being parsed quite identically? Thanks, jon ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-04-29 23:39 ` Jonathan Corbet 0 siblings, 0 replies; 37+ messages in thread From: Jonathan Corbet @ 2021-04-29 23:39 UTC (permalink / raw) To: Aditya Srivastava Cc: linux-doc, yashsri421, linux-kernel, willy, linux-kernel-mentees Aditya Srivastava <yashsri421@gmail.com> writes: > There are some regex expressions in the kernel-doc script, which are used > repeatedly in the script. > > Reduce such expressions into variables, which can be used everywhere. > > A quick manual check found that no errors and warnings were added/removed > in this process. > > Suggested-by: Jonathan Corbet <corbet@lwn.net> > Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> > --- > Changes in v3: > - Remove variables for separate qualifiers in "sub dump_struct" > - Make a common variable for all the qualifiers > - Make $attribute global variable to use it at "sub check_sections" as well > > Changes in v2: > - Rename $pointer_function to $function_pointer > - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions > - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end > > scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------ > 1 file changed, 34 insertions(+), 37 deletions(-) So this looks good but ... it adds a warning to the build: /stuff/k/git/kernel/Documentation/driver-api/media/v4l2-controls:823: ./include/media/v4l2-ctrls.h:964: WARNING: Invalid C declaration: Expected identifier in nested name. [error at 6] const * v4l2_ctrl_get_menu (u32 id) ------^ So it looks like something isn't being parsed quite identically? Thanks, jon _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-04-29 23:39 ` Jonathan Corbet @ 2021-04-30 2:03 ` Joe Perches -1 siblings, 0 replies; 37+ messages in thread From: Joe Perches @ 2021-04-30 2:03 UTC (permalink / raw) To: Jonathan Corbet, Aditya Srivastava Cc: lukas.bulwahn, willy, linux-kernel-mentees, linux-doc, linux-kernel On Thu, 2021-04-29 at 17:39 -0600, Jonathan Corbet wrote: > Aditya Srivastava <yashsri421@gmail.com> writes: > > > There are some regex expressions in the kernel-doc script, which are used > > repeatedly in the script. > > > > Reduce such expressions into variables, which can be used everywhere. > > > > A quick manual check found that no errors and warnings were added/removed > > in this process. > > > > Suggested-by: Jonathan Corbet <corbet@lwn.net> > > Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> > > --- > > Changes in v3: > > - Remove variables for separate qualifiers in "sub dump_struct" > > - Make a common variable for all the qualifiers > > - Make $attribute global variable to use it at "sub check_sections" as well > > > > Changes in v2: > > - Rename $pointer_function to $function_pointer > > - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions > > - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end > > > > scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------ > > 1 file changed, 34 insertions(+), 37 deletions(-) > > So this looks good but ... it adds a warning to the build: > > /stuff/k/git/kernel/Documentation/driver-api/media/v4l2-controls:823: ./include/media/v4l2-ctrls.h:964: WARNING: Invalid C declaration: Expected identifier in nested name. [error at 6] > const * v4l2_ctrl_get_menu (u32 id) > ------^ > > So it looks like something isn't being parsed quite identically? Perhaps a few of the regexes from checkpatch could be used or maybe a linux specific perl module produced. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-04-30 2:03 ` Joe Perches 0 siblings, 0 replies; 37+ messages in thread From: Joe Perches @ 2021-04-30 2:03 UTC (permalink / raw) To: Jonathan Corbet, Aditya Srivastava Cc: linux-kernel-mentees, linux-kernel, willy, linux-doc On Thu, 2021-04-29 at 17:39 -0600, Jonathan Corbet wrote: > Aditya Srivastava <yashsri421@gmail.com> writes: > > > There are some regex expressions in the kernel-doc script, which are used > > repeatedly in the script. > > > > Reduce such expressions into variables, which can be used everywhere. > > > > A quick manual check found that no errors and warnings were added/removed > > in this process. > > > > Suggested-by: Jonathan Corbet <corbet@lwn.net> > > Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> > > --- > > Changes in v3: > > - Remove variables for separate qualifiers in "sub dump_struct" > > - Make a common variable for all the qualifiers > > - Make $attribute global variable to use it at "sub check_sections" as well > > > > Changes in v2: > > - Rename $pointer_function to $function_pointer > > - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions > > - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end > > > > scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------ > > 1 file changed, 34 insertions(+), 37 deletions(-) > > So this looks good but ... it adds a warning to the build: > > /stuff/k/git/kernel/Documentation/driver-api/media/v4l2-controls:823: ./include/media/v4l2-ctrls.h:964: WARNING: Invalid C declaration: Expected identifier in nested name. [error at 6] > const * v4l2_ctrl_get_menu (u32 id) > ------^ > > So it looks like something isn't being parsed quite identically? Perhaps a few of the regexes from checkpatch could be used or maybe a linux specific perl module produced. _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-04-29 23:39 ` Jonathan Corbet @ 2021-05-01 9:30 ` Aditya Srivastava -1 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-05-01 9:30 UTC (permalink / raw) To: Jonathan Corbet Cc: lukas.bulwahn, willy, linux-kernel-mentees, linux-doc, linux-kernel On 30/4/21 5:09 am, Jonathan Corbet wrote: > Aditya Srivastava <yashsri421@gmail.com> writes: > >> There are some regex expressions in the kernel-doc script, which are used >> repeatedly in the script. >> >> Reduce such expressions into variables, which can be used everywhere. >> >> A quick manual check found that no errors and warnings were added/removed >> in this process. >> >> Suggested-by: Jonathan Corbet <corbet@lwn.net> >> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> >> --- >> Changes in v3: >> - Remove variables for separate qualifiers in "sub dump_struct" >> - Make a common variable for all the qualifiers >> - Make $attribute global variable to use it at "sub check_sections" as well >> >> Changes in v2: >> - Rename $pointer_function to $function_pointer >> - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions >> - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end >> >> scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------ >> 1 file changed, 34 insertions(+), 37 deletions(-) > > So this looks good but ... it adds a warning to the build: > > /stuff/k/git/kernel/Documentation/driver-api/media/v4l2-controls:823: ./include/media/v4l2-ctrls.h:964: WARNING: Invalid C declaration: Expected identifier in nested name. [error at 6] > const * v4l2_ctrl_get_menu (u32 id) > ------^ > > So it looks like something isn't being parsed quite identically? > Hi Jonathan! I could not reproduce this error.. Can you suggest me how can I reproduce this error? I ran kernel-doc -none {$file} over the tree. Probably, this is not a kernel-doc error Thanks Aditya ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-05-01 9:30 ` Aditya Srivastava 0 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-05-01 9:30 UTC (permalink / raw) To: Jonathan Corbet; +Cc: linux-kernel-mentees, linux-kernel, willy, linux-doc On 30/4/21 5:09 am, Jonathan Corbet wrote: > Aditya Srivastava <yashsri421@gmail.com> writes: > >> There are some regex expressions in the kernel-doc script, which are used >> repeatedly in the script. >> >> Reduce such expressions into variables, which can be used everywhere. >> >> A quick manual check found that no errors and warnings were added/removed >> in this process. >> >> Suggested-by: Jonathan Corbet <corbet@lwn.net> >> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> >> --- >> Changes in v3: >> - Remove variables for separate qualifiers in "sub dump_struct" >> - Make a common variable for all the qualifiers >> - Make $attribute global variable to use it at "sub check_sections" as well >> >> Changes in v2: >> - Rename $pointer_function to $function_pointer >> - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions >> - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end >> >> scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------ >> 1 file changed, 34 insertions(+), 37 deletions(-) > > So this looks good but ... it adds a warning to the build: > > /stuff/k/git/kernel/Documentation/driver-api/media/v4l2-controls:823: ./include/media/v4l2-ctrls.h:964: WARNING: Invalid C declaration: Expected identifier in nested name. [error at 6] > const * v4l2_ctrl_get_menu (u32 id) > ------^ > > So it looks like something isn't being parsed quite identically? > Hi Jonathan! I could not reproduce this error.. Can you suggest me how can I reproduce this error? I ran kernel-doc -none {$file} over the tree. Probably, this is not a kernel-doc error Thanks Aditya _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-05-01 9:30 ` Aditya Srivastava @ 2021-05-01 15:03 ` Jonathan Corbet -1 siblings, 0 replies; 37+ messages in thread From: Jonathan Corbet @ 2021-05-01 15:03 UTC (permalink / raw) To: Aditya Srivastava Cc: lukas.bulwahn, willy, linux-kernel-mentees, linux-doc, linux-kernel Aditya Srivastava <yashsri421@gmail.com> writes: > On 30/4/21 5:09 am, Jonathan Corbet wrote: >> Aditya Srivastava <yashsri421@gmail.com> writes: >> >>> There are some regex expressions in the kernel-doc script, which are used >>> repeatedly in the script. >>> >>> Reduce such expressions into variables, which can be used everywhere. >>> >>> A quick manual check found that no errors and warnings were added/removed >>> in this process. >>> >>> Suggested-by: Jonathan Corbet <corbet@lwn.net> >>> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> >>> --- >>> Changes in v3: >>> - Remove variables for separate qualifiers in "sub dump_struct" >>> - Make a common variable for all the qualifiers >>> - Make $attribute global variable to use it at "sub check_sections" as well >>> >>> Changes in v2: >>> - Rename $pointer_function to $function_pointer >>> - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions >>> - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end >>> >>> scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------ >>> 1 file changed, 34 insertions(+), 37 deletions(-) >> >> So this looks good but ... it adds a warning to the build: >> >> /stuff/k/git/kernel/Documentation/driver-api/media/v4l2-controls:823: ./include/media/v4l2-ctrls.h:964: WARNING: Invalid C declaration: Expected identifier in nested name. [error at 6] >> const * v4l2_ctrl_get_menu (u32 id) >> ------^ >> >> So it looks like something isn't being parsed quite identically? >> > > Hi Jonathan! > I could not reproduce this error.. > Can you suggest me how can I reproduce this error? > I ran kernel-doc -none {$file} over the tree. > > Probably, this is not a kernel-doc error It's a Sphinx error; run "make htmldocs" to see it. That said, the error itself should be enough to point at where the problem is. Thanks, jon ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-05-01 15:03 ` Jonathan Corbet 0 siblings, 0 replies; 37+ messages in thread From: Jonathan Corbet @ 2021-05-01 15:03 UTC (permalink / raw) To: Aditya Srivastava; +Cc: linux-kernel-mentees, linux-kernel, willy, linux-doc Aditya Srivastava <yashsri421@gmail.com> writes: > On 30/4/21 5:09 am, Jonathan Corbet wrote: >> Aditya Srivastava <yashsri421@gmail.com> writes: >> >>> There are some regex expressions in the kernel-doc script, which are used >>> repeatedly in the script. >>> >>> Reduce such expressions into variables, which can be used everywhere. >>> >>> A quick manual check found that no errors and warnings were added/removed >>> in this process. >>> >>> Suggested-by: Jonathan Corbet <corbet@lwn.net> >>> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> >>> --- >>> Changes in v3: >>> - Remove variables for separate qualifiers in "sub dump_struct" >>> - Make a common variable for all the qualifiers >>> - Make $attribute global variable to use it at "sub check_sections" as well >>> >>> Changes in v2: >>> - Rename $pointer_function to $function_pointer >>> - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions >>> - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end >>> >>> scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------ >>> 1 file changed, 34 insertions(+), 37 deletions(-) >> >> So this looks good but ... it adds a warning to the build: >> >> /stuff/k/git/kernel/Documentation/driver-api/media/v4l2-controls:823: ./include/media/v4l2-ctrls.h:964: WARNING: Invalid C declaration: Expected identifier in nested name. [error at 6] >> const * v4l2_ctrl_get_menu (u32 id) >> ------^ >> >> So it looks like something isn't being parsed quite identically? >> > > Hi Jonathan! > I could not reproduce this error.. > Can you suggest me how can I reproduce this error? > I ran kernel-doc -none {$file} over the tree. > > Probably, this is not a kernel-doc error It's a Sphinx error; run "make htmldocs" to see it. That said, the error itself should be enough to point at where the problem is. Thanks, jon _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply [flat|nested] 37+ messages in thread
* [RFC v4] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-05-01 15:03 ` Jonathan Corbet @ 2021-05-14 14:42 ` Aditya Srivastava -1 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-05-14 14:42 UTC (permalink / raw) To: corbet Cc: yashsri421, lukas.bulwahn, willy, linux-kernel-mentees, linux-doc, linux-kernel There are some regex expressions in the kernel-doc script, which are used repeatedly in the script. Reduce such expressions into variables, which can be used everywhere. A quick manual check found that no errors and warnings were added/removed in this process. Suggested-by: Jonathan Corbet <corbet@lwn.net> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> --- Changes in v4: - Fix htmldocs warning at function parsing, involving repeated $type2 identifiers capture - Re-tested against all files in kernel tree Changes in v3: - Remove variables for separate qualifiers in "sub dump_struct" - Make a common variable for all the qualifiers - Make $attribute global variable to use it at "sub check_sections" as well Changes in v2: - Rename $pointer_function to $function_pointer - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 4840e748fca8..7c4a6a507ac4 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -406,6 +406,8 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; my $doc_inline_end = '^\s*\*/\s*$'; my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; +my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; +my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; my %parameterdescs; my %parameterdesc_start_lines; @@ -694,7 +696,7 @@ sub output_function_man(%) { $post = ");"; } $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$function_pointer/) { # pointer-to-function print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; } else { @@ -974,7 +976,7 @@ sub output_function_rst(%) { $count++; $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$function_pointer/) { # pointer-to-function print $1 . $parameter . ") (" . $2 . ")"; } else { @@ -1211,7 +1213,9 @@ sub dump_struct($$) { my $members; my $type = qr{struct|union}; # For capturing struct/union definition body, i.e. "{members*}qualifiers*" - my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; + my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned}; + my $definition_body = qr{\{(.*)\}\s*$qualifiers*}; + my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { $decl_type = $1; @@ -1235,27 +1239,27 @@ sub dump_struct($$) { # strip comments: $members =~ s/\/\*.*?\*\///gos; # strip attributes - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; + $members =~ s/\s*$attribute/ /gi; $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; $members =~ s/\s*__packed\s*/ /gos; $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; $members =~ s/\s*____cacheline_aligned/ /gos; + my $args = qr{([^,)]+)}; # replace DECLARE_BITMAP $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; # replace DECLARE_HASHTABLE - $members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos; + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; # replace DECLARE_KFIFO - $members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; # replace DECLARE_KFIFO_PTR - $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; - + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; my $declaration = $members; # Split nested struct/union elements as newer ones - while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { + while ($members =~ m/$struct_members/) { my $newmember; my $maintype = $1; my $ids = $4; @@ -1315,7 +1319,7 @@ sub dump_struct($$) { } } } - $members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; + $members =~ s/$struct_members/$newmember/; } # Ignore other nested elements, like enums @@ -1555,8 +1559,9 @@ sub create_parameterlist($$$$) { my $param; # temporarily replace commas inside function pointer definition - while ($args =~ /(\([^\),]+),/) { - $args =~ s/(\([^\),]+),/$1#/g; + my $arg_expr = qr{\([^\),]+}; + while ($args =~ /$arg_expr,/) { + $args =~ s/($arg_expr),/$1#/g; } foreach my $arg (split($splitter, $args)) { @@ -1707,7 +1712,7 @@ sub check_sections($$$$$) { foreach $px (0 .. $#prms) { $prm_clean = $prms[$px]; $prm_clean =~ s/\[.*\]//; - $prm_clean =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; + $prm_clean =~ s/$attribute//i; # ignore array size in a parameter string; # however, the original param string may contain # spaces, e.g.: addr[6 + 2] @@ -1809,8 +1814,14 @@ sub dump_function($$) { # - parport_register_device (function pointer parameters) # - atomic_set (macro) # - pci_match_device, __copy_to_user (long return type) - - if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) { + my $name = qr{[a-zA-Z0-9_~:]+}; + my $prototype_end1 = qr{[^\(]*}; + my $prototype_end2 = qr{[^\{]*}; + my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; + my $type1 = qr{[\w\s]+}; + my $type2 = qr{$type1\*+}; + + if ($define && $prototype =~ m/^()($name)\s+/) { # This is an object-like macro, it has no return type and no parameter # list. # Function-like macros are not allowed to have spaces between @@ -1818,23 +1829,9 @@ sub dump_function($$) { $return_type = $1; $declaration_name = $2; $noret = 1; - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { + } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || + $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || + $prototype =~ m/^($type2+)\s*($name)\s*$prototype_end/) { $return_type = $1; $declaration_name = $2; my $args = $3; @@ -2111,12 +2108,12 @@ sub process_name($$) { } elsif (/$doc_decl/o) { $identifier = $1; my $is_kernel_comment = 0; - my $decl_start = qr{\s*\*}; + my $decl_start = qr{$doc_com}; # test for pointer declaration type, foo * bar() - desc my $fn_type = qr{\w+\s*\*\s*}; my $parenthesis = qr{\(\w*\)}; my $decl_end = qr{[-:].*}; - if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) { + if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) { $identifier = $1; } if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { @@ -2126,8 +2123,8 @@ sub process_name($$) { } # Look for foo() or static void foo() - description; or misspelt # identifier - elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || - /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { + elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || + /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { $identifier = $1; $decl_type = 'function'; $identifier =~ s/^define\s+//; -- 2.17.1 ^ permalink raw reply related [flat|nested] 37+ messages in thread
* [RFC v4] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-05-14 14:42 ` Aditya Srivastava 0 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-05-14 14:42 UTC (permalink / raw) To: corbet; +Cc: linux-doc, yashsri421, linux-kernel, willy, linux-kernel-mentees There are some regex expressions in the kernel-doc script, which are used repeatedly in the script. Reduce such expressions into variables, which can be used everywhere. A quick manual check found that no errors and warnings were added/removed in this process. Suggested-by: Jonathan Corbet <corbet@lwn.net> Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> --- Changes in v4: - Fix htmldocs warning at function parsing, involving repeated $type2 identifiers capture - Re-tested against all files in kernel tree Changes in v3: - Remove variables for separate qualifiers in "sub dump_struct" - Make a common variable for all the qualifiers - Make $attribute global variable to use it at "sub check_sections" as well Changes in v2: - Rename $pointer_function to $function_pointer - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 4840e748fca8..7c4a6a507ac4 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -406,6 +406,8 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; my $doc_inline_end = '^\s*\*/\s*$'; my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; +my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; +my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; my %parameterdescs; my %parameterdesc_start_lines; @@ -694,7 +696,7 @@ sub output_function_man(%) { $post = ");"; } $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$function_pointer/) { # pointer-to-function print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; } else { @@ -974,7 +976,7 @@ sub output_function_rst(%) { $count++; $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + if ($type =~ m/$function_pointer/) { # pointer-to-function print $1 . $parameter . ") (" . $2 . ")"; } else { @@ -1211,7 +1213,9 @@ sub dump_struct($$) { my $members; my $type = qr{struct|union}; # For capturing struct/union definition body, i.e. "{members*}qualifiers*" - my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; + my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned}; + my $definition_body = qr{\{(.*)\}\s*$qualifiers*}; + my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { $decl_type = $1; @@ -1235,27 +1239,27 @@ sub dump_struct($$) { # strip comments: $members =~ s/\/\*.*?\*\///gos; # strip attributes - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; + $members =~ s/\s*$attribute/ /gi; $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; $members =~ s/\s*__packed\s*/ /gos; $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; $members =~ s/\s*____cacheline_aligned/ /gos; + my $args = qr{([^,)]+)}; # replace DECLARE_BITMAP $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; # replace DECLARE_HASHTABLE - $members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos; + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; # replace DECLARE_KFIFO - $members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; # replace DECLARE_KFIFO_PTR - $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; - + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; my $declaration = $members; # Split nested struct/union elements as newer ones - while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { + while ($members =~ m/$struct_members/) { my $newmember; my $maintype = $1; my $ids = $4; @@ -1315,7 +1319,7 @@ sub dump_struct($$) { } } } - $members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; + $members =~ s/$struct_members/$newmember/; } # Ignore other nested elements, like enums @@ -1555,8 +1559,9 @@ sub create_parameterlist($$$$) { my $param; # temporarily replace commas inside function pointer definition - while ($args =~ /(\([^\),]+),/) { - $args =~ s/(\([^\),]+),/$1#/g; + my $arg_expr = qr{\([^\),]+}; + while ($args =~ /$arg_expr,/) { + $args =~ s/($arg_expr),/$1#/g; } foreach my $arg (split($splitter, $args)) { @@ -1707,7 +1712,7 @@ sub check_sections($$$$$) { foreach $px (0 .. $#prms) { $prm_clean = $prms[$px]; $prm_clean =~ s/\[.*\]//; - $prm_clean =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; + $prm_clean =~ s/$attribute//i; # ignore array size in a parameter string; # however, the original param string may contain # spaces, e.g.: addr[6 + 2] @@ -1809,8 +1814,14 @@ sub dump_function($$) { # - parport_register_device (function pointer parameters) # - atomic_set (macro) # - pci_match_device, __copy_to_user (long return type) - - if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) { + my $name = qr{[a-zA-Z0-9_~:]+}; + my $prototype_end1 = qr{[^\(]*}; + my $prototype_end2 = qr{[^\{]*}; + my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; + my $type1 = qr{[\w\s]+}; + my $type2 = qr{$type1\*+}; + + if ($define && $prototype =~ m/^()($name)\s+/) { # This is an object-like macro, it has no return type and no parameter # list. # Function-like macros are not allowed to have spaces between @@ -1818,23 +1829,9 @@ sub dump_function($$) { $return_type = $1; $declaration_name = $2; $noret = 1; - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { + } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || + $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || + $prototype =~ m/^($type2+)\s*($name)\s*$prototype_end/) { $return_type = $1; $declaration_name = $2; my $args = $3; @@ -2111,12 +2108,12 @@ sub process_name($$) { } elsif (/$doc_decl/o) { $identifier = $1; my $is_kernel_comment = 0; - my $decl_start = qr{\s*\*}; + my $decl_start = qr{$doc_com}; # test for pointer declaration type, foo * bar() - desc my $fn_type = qr{\w+\s*\*\s*}; my $parenthesis = qr{\(\w*\)}; my $decl_end = qr{[-:].*}; - if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) { + if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) { $identifier = $1; } if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { @@ -2126,8 +2123,8 @@ sub process_name($$) { } # Look for foo() or static void foo() - description; or misspelt # identifier - elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || - /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { + elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || + /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { $identifier = $1; $decl_type = 'function'; $identifier =~ s/^define\s+//; -- 2.17.1 _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply related [flat|nested] 37+ messages in thread
* Re: [RFC v4] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-05-14 14:42 ` Aditya Srivastava @ 2021-05-14 15:10 ` Aditya Srivastava -1 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-05-14 15:10 UTC (permalink / raw) To: corbet Cc: lukas.bulwahn, willy, linux-kernel-mentees, linux-doc, linux-kernel On 14/5/21 8:12 pm, Aditya Srivastava wrote: > There are some regex expressions in the kernel-doc script, which are used > repeatedly in the script. > > Reduce such expressions into variables, which can be used everywhere. > > A quick manual check found that no errors and warnings were added/removed > in this process. > > Suggested-by: Jonathan Corbet <corbet@lwn.net> > Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> > --- > Changes in v4: > - Fix htmldocs warning at function parsing, involving repeated $type2 identifiers capture > - Re-tested against all files in kernel tree > > Changes in v3: > - Remove variables for separate qualifiers in "sub dump_struct" > - Make a common variable for all the qualifiers > - Make $attribute global variable to use it at "sub check_sections" as well > > Changes in v2: > - Rename $pointer_function to $function_pointer > - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions > - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end > > scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------ > 1 file changed, 34 insertions(+), 37 deletions(-) > > diff --git a/scripts/kernel-doc b/scripts/kernel-doc > index 4840e748fca8..7c4a6a507ac4 100755 > --- a/scripts/kernel-doc > +++ b/scripts/kernel-doc > @@ -406,6 +406,8 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; > my $doc_inline_end = '^\s*\*/\s*$'; > my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; > my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; > +my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; > +my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; > > my %parameterdescs; > my %parameterdesc_start_lines; > @@ -694,7 +696,7 @@ sub output_function_man(%) { > $post = ");"; > } > $type = $args{'parametertypes'}{$parameter}; > - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { > + if ($type =~ m/$function_pointer/) { > # pointer-to-function > print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; > } else { > @@ -974,7 +976,7 @@ sub output_function_rst(%) { > $count++; > $type = $args{'parametertypes'}{$parameter}; > > - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { > + if ($type =~ m/$function_pointer/) { > # pointer-to-function > print $1 . $parameter . ") (" . $2 . ")"; > } else { > @@ -1211,7 +1213,9 @@ sub dump_struct($$) { > my $members; > my $type = qr{struct|union}; > # For capturing struct/union definition body, i.e. "{members*}qualifiers*" > - my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; > + my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned}; > + my $definition_body = qr{\{(.*)\}\s*$qualifiers*}; > + my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; > > if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { > $decl_type = $1; > @@ -1235,27 +1239,27 @@ sub dump_struct($$) { > # strip comments: > $members =~ s/\/\*.*?\*\///gos; > # strip attributes > - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; > + $members =~ s/\s*$attribute/ /gi; > $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; > $members =~ s/\s*__packed\s*/ /gos; > $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; > $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; > $members =~ s/\s*____cacheline_aligned/ /gos; > > + my $args = qr{([^,)]+)}; > # replace DECLARE_BITMAP > $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; > - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; > + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; > # replace DECLARE_HASHTABLE > - $members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos; > + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; > # replace DECLARE_KFIFO > - $members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; > + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; > # replace DECLARE_KFIFO_PTR > - $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; > - > + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; > my $declaration = $members; > > # Split nested struct/union elements as newer ones > - while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { > + while ($members =~ m/$struct_members/) { > my $newmember; > my $maintype = $1; > my $ids = $4; > @@ -1315,7 +1319,7 @@ sub dump_struct($$) { > } > } > } > - $members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; > + $members =~ s/$struct_members/$newmember/; > } > > # Ignore other nested elements, like enums > @@ -1555,8 +1559,9 @@ sub create_parameterlist($$$$) { > my $param; > > # temporarily replace commas inside function pointer definition > - while ($args =~ /(\([^\),]+),/) { > - $args =~ s/(\([^\),]+),/$1#/g; > + my $arg_expr = qr{\([^\),]+}; > + while ($args =~ /$arg_expr,/) { > + $args =~ s/($arg_expr),/$1#/g; > } > > foreach my $arg (split($splitter, $args)) { > @@ -1707,7 +1712,7 @@ sub check_sections($$$$$) { > foreach $px (0 .. $#prms) { > $prm_clean = $prms[$px]; > $prm_clean =~ s/\[.*\]//; > - $prm_clean =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; > + $prm_clean =~ s/$attribute//i; > # ignore array size in a parameter string; > # however, the original param string may contain > # spaces, e.g.: addr[6 + 2] > @@ -1809,8 +1814,14 @@ sub dump_function($$) { > # - parport_register_device (function pointer parameters) > # - atomic_set (macro) > # - pci_match_device, __copy_to_user (long return type) > - > - if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) { > + my $name = qr{[a-zA-Z0-9_~:]+}; > + my $prototype_end1 = qr{[^\(]*}; > + my $prototype_end2 = qr{[^\{]*}; > + my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; > + my $type1 = qr{[\w\s]+}; > + my $type2 = qr{$type1\*+}; > + > + if ($define && $prototype =~ m/^()($name)\s+/) { > # This is an object-like macro, it has no return type and no parameter > # list. > # Function-like macros are not allowed to have spaces between > @@ -1818,23 +1829,9 @@ sub dump_function($$) { > $return_type = $1; > $declaration_name = $2; > $noret = 1; > - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { > + } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || > + $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || > + $prototype =~ m/^($type2+)\s*($name)\s*$prototype_end/) { > $return_type = $1; > $declaration_name = $2; > my $args = $3; > @@ -2111,12 +2108,12 @@ sub process_name($$) { > } elsif (/$doc_decl/o) { > $identifier = $1; > my $is_kernel_comment = 0; > - my $decl_start = qr{\s*\*}; > + my $decl_start = qr{$doc_com}; > # test for pointer declaration type, foo * bar() - desc > my $fn_type = qr{\w+\s*\*\s*}; > my $parenthesis = qr{\(\w*\)}; > my $decl_end = qr{[-:].*}; > - if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) { > + if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) { > $identifier = $1; > } > if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { > @@ -2126,8 +2123,8 @@ sub process_name($$) { > } > # Look for foo() or static void foo() - description; or misspelt > # identifier > - elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || > - /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { > + elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || > + /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { > $identifier = $1; > $decl_type = 'function'; > $identifier =~ s/^define\s+//; > Hi Jonathan! The warning you mentioned was not showing to me on running "make htmldocs", for some reason.. As a result, I haven't been able to test the patch for this warning.. However, I understood the reason for the error. It was in this line: > + $prototype =~ m/^($type2)+\s*($name)\s*$prototype_end/) { Here, $1 was taking only the last captured value, instead of all the occurrences, as was desired by me. Just for reference, these were the warnings which I was getting: https://github.com/AdityaSrivast/kernel-tasks/blob/master/random/kernel-doc/htmldocs_msgs Thanks Aditya ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v4] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-05-14 15:10 ` Aditya Srivastava 0 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-05-14 15:10 UTC (permalink / raw) To: corbet; +Cc: linux-kernel-mentees, linux-kernel, willy, linux-doc On 14/5/21 8:12 pm, Aditya Srivastava wrote: > There are some regex expressions in the kernel-doc script, which are used > repeatedly in the script. > > Reduce such expressions into variables, which can be used everywhere. > > A quick manual check found that no errors and warnings were added/removed > in this process. > > Suggested-by: Jonathan Corbet <corbet@lwn.net> > Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> > --- > Changes in v4: > - Fix htmldocs warning at function parsing, involving repeated $type2 identifiers capture > - Re-tested against all files in kernel tree > > Changes in v3: > - Remove variables for separate qualifiers in "sub dump_struct" > - Make a common variable for all the qualifiers > - Make $attribute global variable to use it at "sub check_sections" as well > > Changes in v2: > - Rename $pointer_function to $function_pointer > - Combine elsif-block expressions at "sub dump_function" into lesser regex expressions > - Combine $prototype_end1,$prototype_end2 expressions into a common $prototype_end > > scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------ > 1 file changed, 34 insertions(+), 37 deletions(-) > > diff --git a/scripts/kernel-doc b/scripts/kernel-doc > index 4840e748fca8..7c4a6a507ac4 100755 > --- a/scripts/kernel-doc > +++ b/scripts/kernel-doc > @@ -406,6 +406,8 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; > my $doc_inline_end = '^\s*\*/\s*$'; > my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; > my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; > +my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; > +my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; > > my %parameterdescs; > my %parameterdesc_start_lines; > @@ -694,7 +696,7 @@ sub output_function_man(%) { > $post = ");"; > } > $type = $args{'parametertypes'}{$parameter}; > - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { > + if ($type =~ m/$function_pointer/) { > # pointer-to-function > print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; > } else { > @@ -974,7 +976,7 @@ sub output_function_rst(%) { > $count++; > $type = $args{'parametertypes'}{$parameter}; > > - if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { > + if ($type =~ m/$function_pointer/) { > # pointer-to-function > print $1 . $parameter . ") (" . $2 . ")"; > } else { > @@ -1211,7 +1213,9 @@ sub dump_struct($$) { > my $members; > my $type = qr{struct|union}; > # For capturing struct/union definition body, i.e. "{members*}qualifiers*" > - my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; > + my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned}; > + my $definition_body = qr{\{(.*)\}\s*$qualifiers*}; > + my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; > > if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { > $decl_type = $1; > @@ -1235,27 +1239,27 @@ sub dump_struct($$) { > # strip comments: > $members =~ s/\/\*.*?\*\///gos; > # strip attributes > - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; > + $members =~ s/\s*$attribute/ /gi; > $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; > $members =~ s/\s*__packed\s*/ /gos; > $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; > $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; > $members =~ s/\s*____cacheline_aligned/ /gos; > > + my $args = qr{([^,)]+)}; > # replace DECLARE_BITMAP > $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; > - $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; > + $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; > # replace DECLARE_HASHTABLE > - $members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos; > + $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; > # replace DECLARE_KFIFO > - $members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; > + $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; > # replace DECLARE_KFIFO_PTR > - $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; > - > + $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; > my $declaration = $members; > > # Split nested struct/union elements as newer ones > - while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { > + while ($members =~ m/$struct_members/) { > my $newmember; > my $maintype = $1; > my $ids = $4; > @@ -1315,7 +1319,7 @@ sub dump_struct($$) { > } > } > } > - $members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; > + $members =~ s/$struct_members/$newmember/; > } > > # Ignore other nested elements, like enums > @@ -1555,8 +1559,9 @@ sub create_parameterlist($$$$) { > my $param; > > # temporarily replace commas inside function pointer definition > - while ($args =~ /(\([^\),]+),/) { > - $args =~ s/(\([^\),]+),/$1#/g; > + my $arg_expr = qr{\([^\),]+}; > + while ($args =~ /$arg_expr,/) { > + $args =~ s/($arg_expr),/$1#/g; > } > > foreach my $arg (split($splitter, $args)) { > @@ -1707,7 +1712,7 @@ sub check_sections($$$$$) { > foreach $px (0 .. $#prms) { > $prm_clean = $prms[$px]; > $prm_clean =~ s/\[.*\]//; > - $prm_clean =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; > + $prm_clean =~ s/$attribute//i; > # ignore array size in a parameter string; > # however, the original param string may contain > # spaces, e.g.: addr[6 + 2] > @@ -1809,8 +1814,14 @@ sub dump_function($$) { > # - parport_register_device (function pointer parameters) > # - atomic_set (macro) > # - pci_match_device, __copy_to_user (long return type) > - > - if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) { > + my $name = qr{[a-zA-Z0-9_~:]+}; > + my $prototype_end1 = qr{[^\(]*}; > + my $prototype_end2 = qr{[^\{]*}; > + my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; > + my $type1 = qr{[\w\s]+}; > + my $type2 = qr{$type1\*+}; > + > + if ($define && $prototype =~ m/^()($name)\s+/) { > # This is an object-like macro, it has no return type and no parameter > # list. > # Function-like macros are not allowed to have spaces between > @@ -1818,23 +1829,9 @@ sub dump_function($$) { > $return_type = $1; > $declaration_name = $2; > $noret = 1; > - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { > + } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || > + $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || > + $prototype =~ m/^($type2+)\s*($name)\s*$prototype_end/) { > $return_type = $1; > $declaration_name = $2; > my $args = $3; > @@ -2111,12 +2108,12 @@ sub process_name($$) { > } elsif (/$doc_decl/o) { > $identifier = $1; > my $is_kernel_comment = 0; > - my $decl_start = qr{\s*\*}; > + my $decl_start = qr{$doc_com}; > # test for pointer declaration type, foo * bar() - desc > my $fn_type = qr{\w+\s*\*\s*}; > my $parenthesis = qr{\(\w*\)}; > my $decl_end = qr{[-:].*}; > - if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) { > + if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) { > $identifier = $1; > } > if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { > @@ -2126,8 +2123,8 @@ sub process_name($$) { > } > # Look for foo() or static void foo() - description; or misspelt > # identifier > - elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || > - /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { > + elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || > + /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) { > $identifier = $1; > $decl_type = 'function'; > $identifier =~ s/^define\s+//; > Hi Jonathan! The warning you mentioned was not showing to me on running "make htmldocs", for some reason.. As a result, I haven't been able to test the patch for this warning.. However, I understood the reason for the error. It was in this line: > + $prototype =~ m/^($type2)+\s*($name)\s*$prototype_end/) { Here, $1 was taking only the last captured value, instead of all the occurrences, as was desired by me. Just for reference, these were the warnings which I was getting: https://github.com/AdityaSrivast/kernel-tasks/blob/master/random/kernel-doc/htmldocs_msgs Thanks Aditya _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v4] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-05-14 14:42 ` Aditya Srivastava @ 2021-05-17 17:49 ` Jonathan Corbet -1 siblings, 0 replies; 37+ messages in thread From: Jonathan Corbet @ 2021-05-17 17:49 UTC (permalink / raw) To: Aditya Srivastava Cc: yashsri421, lukas.bulwahn, willy, linux-kernel-mentees, linux-doc, linux-kernel Aditya Srivastava <yashsri421@gmail.com> writes: > There are some regex expressions in the kernel-doc script, which are used > repeatedly in the script. > > Reduce such expressions into variables, which can be used everywhere. > > A quick manual check found that no errors and warnings were added/removed > in this process. > > Suggested-by: Jonathan Corbet <corbet@lwn.net> > Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> > --- > Changes in v4: > - Fix htmldocs warning at function parsing, involving repeated $type2 identifiers capture > - Re-tested against all files in kernel tree Applied, thanks for stickint with this. jon ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v4] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-05-17 17:49 ` Jonathan Corbet 0 siblings, 0 replies; 37+ messages in thread From: Jonathan Corbet @ 2021-05-17 17:49 UTC (permalink / raw) To: Aditya Srivastava Cc: linux-doc, yashsri421, linux-kernel, willy, linux-kernel-mentees Aditya Srivastava <yashsri421@gmail.com> writes: > There are some regex expressions in the kernel-doc script, which are used > repeatedly in the script. > > Reduce such expressions into variables, which can be used everywhere. > > A quick manual check found that no errors and warnings were added/removed > in this process. > > Suggested-by: Jonathan Corbet <corbet@lwn.net> > Signed-off-by: Aditya Srivastava <yashsri421@gmail.com> > --- > Changes in v4: > - Fix htmldocs warning at function parsing, involving repeated $type2 identifiers capture > - Re-tested against all files in kernel tree Applied, thanks for stickint with this. jon _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-04-29 6:37 ` Aditya Srivastava @ 2021-05-01 15:43 ` Matthew Wilcox -1 siblings, 0 replies; 37+ messages in thread From: Matthew Wilcox @ 2021-05-01 15:43 UTC (permalink / raw) To: Aditya Srivastava Cc: corbet, lukas.bulwahn, linux-kernel-mentees, linux-doc, linux-kernel On Thu, Apr 29, 2021 at 12:07:29PM +0530, Aditya Srivastava wrote: > + my $name = qr{[a-zA-Z0-9_~:]+}; > + my $prototype_end1 = qr{[^\(]*}; > + my $prototype_end2 = qr{[^\{]*}; > + my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; Would this be better written as: my $prototype_end = qr{\([^\(\{]*\)} And now that I look at the whole thing, doesn't this fail to parse a function declared as: int f(void (*g)(long)); (that is, f takes a single argument, which is a pointer to a function which takes a long argument and returns void) Still, I don't think this was parsed correctly before, so it's not an argument against this patch, just something to take care of later. > + my $type1 = qr{[\w\s]+}; > + my $type2 = qr{$type1\*+}; > + > + if ($define && $prototype =~ m/^()($name)\s+/) { > # This is an object-like macro, it has no return type and no parameter > # list. > # Function-like macros are not allowed to have spaces between > @@ -1817,23 +1828,9 @@ sub dump_function($$) { > $return_type = $1; > $declaration_name = $2; > $noret = 1; > - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { > + } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || > + $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || > + $prototype =~ m/^($type2)+\s*($name)\s*$prototype_end/) { > $return_type = $1; > $declaration_name = $2; > my $args = $3; ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-05-01 15:43 ` Matthew Wilcox 0 siblings, 0 replies; 37+ messages in thread From: Matthew Wilcox @ 2021-05-01 15:43 UTC (permalink / raw) To: Aditya Srivastava; +Cc: linux-doc, linux-kernel-mentees, linux-kernel, corbet On Thu, Apr 29, 2021 at 12:07:29PM +0530, Aditya Srivastava wrote: > + my $name = qr{[a-zA-Z0-9_~:]+}; > + my $prototype_end1 = qr{[^\(]*}; > + my $prototype_end2 = qr{[^\{]*}; > + my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; Would this be better written as: my $prototype_end = qr{\([^\(\{]*\)} And now that I look at the whole thing, doesn't this fail to parse a function declared as: int f(void (*g)(long)); (that is, f takes a single argument, which is a pointer to a function which takes a long argument and returns void) Still, I don't think this was parsed correctly before, so it's not an argument against this patch, just something to take care of later. > + my $type1 = qr{[\w\s]+}; > + my $type2 = qr{$type1\*+}; > + > + if ($define && $prototype =~ m/^()($name)\s+/) { > # This is an object-like macro, it has no return type and no parameter > # list. > # Function-like macros are not allowed to have spaces between > @@ -1817,23 +1828,9 @@ sub dump_function($$) { > $return_type = $1; > $declaration_name = $2; > $noret = 1; > - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { > + } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || > + $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || > + $prototype =~ m/^($type2)+\s*($name)\s*$prototype_end/) { > $return_type = $1; > $declaration_name = $2; > my $args = $3; _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-05-01 15:43 ` Matthew Wilcox @ 2021-05-14 16:17 ` Aditya Srivastava -1 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-05-14 16:17 UTC (permalink / raw) To: Matthew Wilcox Cc: corbet, lukas.bulwahn, linux-kernel-mentees, linux-doc, linux-kernel On 1/5/21 9:13 pm, Matthew Wilcox wrote: > On Thu, Apr 29, 2021 at 12:07:29PM +0530, Aditya Srivastava wrote: >> + my $name = qr{[a-zA-Z0-9_~:]+}; >> + my $prototype_end1 = qr{[^\(]*}; >> + my $prototype_end2 = qr{[^\{]*}; >> + my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; > > Would this be better written as: > > my $prototype_end = qr{\([^\(\{]*\)} > Hi Matthew I have actually tried this earlier, but it does not work as expected, probably because of greedy matching. I have produced the list of warning differences before and after over the files, when using this regex: https://github.com/AdityaSrivast/kernel-tasks/blob/master/random/kernel-doc/diff_on_alt_protend > And now that I look at the whole thing, doesn't this fail to parse > a function declared as: > > int f(void (*g)(long)); > > (that is, f takes a single argument, which is a pointer to a function > which takes a long argument and returns void) > I think this will match against: $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ Thanks Aditya ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC v3] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-05-14 16:17 ` Aditya Srivastava 0 siblings, 0 replies; 37+ messages in thread From: Aditya Srivastava @ 2021-05-14 16:17 UTC (permalink / raw) To: Matthew Wilcox; +Cc: linux-doc, linux-kernel-mentees, linux-kernel, corbet On 1/5/21 9:13 pm, Matthew Wilcox wrote: > On Thu, Apr 29, 2021 at 12:07:29PM +0530, Aditya Srivastava wrote: >> + my $name = qr{[a-zA-Z0-9_~:]+}; >> + my $prototype_end1 = qr{[^\(]*}; >> + my $prototype_end2 = qr{[^\{]*}; >> + my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; > > Would this be better written as: > > my $prototype_end = qr{\([^\(\{]*\)} > Hi Matthew I have actually tried this earlier, but it does not work as expected, probably because of greedy matching. I have produced the list of warning differences before and after over the files, when using this regex: https://github.com/AdityaSrivast/kernel-tasks/blob/master/random/kernel-doc/diff_on_alt_protend > And now that I look at the whole thing, doesn't this fail to parse > a function declared as: > > int f(void (*g)(long)); > > (that is, f takes a single argument, which is a pointer to a function > which takes a long argument and returns void) > I think this will match against: $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ Thanks Aditya _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables 2021-04-24 11:57 ` Aditya Srivastava @ 2021-04-26 17:31 ` Matthew Wilcox -1 siblings, 0 replies; 37+ messages in thread From: Matthew Wilcox @ 2021-04-26 17:31 UTC (permalink / raw) To: Aditya Srivastava Cc: corbet, lukas.bulwahn, linux-kernel-mentees, linux-doc, linux-kernel On Sat, Apr 24, 2021 at 05:27:34PM +0530, Aditya Srivastava wrote: > On 23/4/21 6:51 pm, Matthew Wilcox wrote: > > On Fri, Apr 23, 2021 at 12:48:39AM +0530, Aditya Srivastava wrote: > >> +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; > > > > Is that a pointer-to-function? Or as people who write C usually call it, > > a function pointer? Wouldn't it be better to call it $function_pointer? > > > Will do it. > > >> @@ -1210,8 +1211,14 @@ sub dump_struct($$) { > >> my $decl_type; > >> my $members; > >> my $type = qr{struct|union}; > >> + my $packed = qr{__packed}; > >> + my $aligned = qr{__aligned}; > >> + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; > >> + my $cacheline_aligned = qr{____cacheline_aligned}; > > > > I don't think those four definitions actually simplify anything. > > > >> + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; > > > > ... whereas this one definitely does. > > > >> - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; > >> - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; > >> - $members =~ s/\s*__packed\s*/ /gos; > >> + $members =~ s/\s*$attribute/ /gi; > >> + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; > > > > Maybe put the \s*\([^;]*\) into $aligned? Then it becomes a useful > > abstraction. > > Actually, I had made these variables as they were repeated here and at > - my $definition_body = > qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; > + my $definition_body = > qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*}; > > So, defining them at a place might help. > > What do you think? I don't think that seeing $packed is any easier to read than __packed. Indeed, I think it's harder, because now I have to look up what $packed is defined as. Defining a variable, say $decorations = qr{__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\))} (i didn't count brackets to be sure i got that right) would be helpful because then we could say: my $definition_body = qr{\{(.*)\}...$decorations... and have a fighting chance of understanding what it means. Now, this other place we use it, we do the =~ operation a number of times. Is there a way to use the $decorations variable to do the same thing with a single operation? ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables @ 2021-04-26 17:31 ` Matthew Wilcox 0 siblings, 0 replies; 37+ messages in thread From: Matthew Wilcox @ 2021-04-26 17:31 UTC (permalink / raw) To: Aditya Srivastava; +Cc: linux-doc, linux-kernel-mentees, linux-kernel, corbet On Sat, Apr 24, 2021 at 05:27:34PM +0530, Aditya Srivastava wrote: > On 23/4/21 6:51 pm, Matthew Wilcox wrote: > > On Fri, Apr 23, 2021 at 12:48:39AM +0530, Aditya Srivastava wrote: > >> +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; > > > > Is that a pointer-to-function? Or as people who write C usually call it, > > a function pointer? Wouldn't it be better to call it $function_pointer? > > > Will do it. > > >> @@ -1210,8 +1211,14 @@ sub dump_struct($$) { > >> my $decl_type; > >> my $members; > >> my $type = qr{struct|union}; > >> + my $packed = qr{__packed}; > >> + my $aligned = qr{__aligned}; > >> + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; > >> + my $cacheline_aligned = qr{____cacheline_aligned}; > > > > I don't think those four definitions actually simplify anything. > > > >> + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; > > > > ... whereas this one definitely does. > > > >> - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; > >> - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; > >> - $members =~ s/\s*__packed\s*/ /gos; > >> + $members =~ s/\s*$attribute/ /gi; > >> + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; > > > > Maybe put the \s*\([^;]*\) into $aligned? Then it becomes a useful > > abstraction. > > Actually, I had made these variables as they were repeated here and at > - my $definition_body = > qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*}; > + my $definition_body = > qr{\{(.*)\}(?:\s*(?:$packed|$aligned|$cacheline_aligned_in_smp|$cacheline_aligned|$attribute))*}; > > So, defining them at a place might help. > > What do you think? I don't think that seeing $packed is any easier to read than __packed. Indeed, I think it's harder, because now I have to look up what $packed is defined as. Defining a variable, say $decorations = qr{__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\))} (i didn't count brackets to be sure i got that right) would be helpful because then we could say: my $definition_body = qr{\{(.*)\}...$decorations... and have a fighting chance of understanding what it means. Now, this other place we use it, we do the =~ operation a number of times. Is there a way to use the $decorations variable to do the same thing with a single operation? _______________________________________________ Linux-kernel-mentees mailing list Linux-kernel-mentees@lists.linuxfoundation.org https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees ^ permalink raw reply [flat|nested] 37+ messages in thread
end of thread, other threads:[~2021-05-17 17:49 UTC | newest] Thread overview: 37+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2021-04-22 19:18 [RFC] scripts: kernel-doc: reduce repeated regex expressions into variables Aditya Srivastava 2021-04-22 19:18 ` Aditya Srivastava 2021-04-22 19:33 ` Lukas Bulwahn 2021-04-23 12:20 ` Aditya Srivastava 2021-04-23 12:20 ` Aditya Srivastava 2021-04-23 13:21 ` Matthew Wilcox 2021-04-23 13:21 ` Matthew Wilcox 2021-04-24 11:57 ` Aditya Srivastava 2021-04-24 11:57 ` Aditya Srivastava 2021-04-24 12:47 ` [RFC v2] " Aditya Srivastava 2021-04-24 12:47 ` Aditya Srivastava 2021-04-27 15:55 ` Jonathan Corbet 2021-04-27 15:55 ` Jonathan Corbet 2021-04-27 16:56 ` Matthew Wilcox 2021-04-27 16:56 ` Matthew Wilcox 2021-04-29 6:37 ` [RFC v3] " Aditya Srivastava 2021-04-29 6:37 ` Aditya Srivastava 2021-04-29 23:39 ` Jonathan Corbet 2021-04-29 23:39 ` Jonathan Corbet 2021-04-30 2:03 ` Joe Perches 2021-04-30 2:03 ` Joe Perches 2021-05-01 9:30 ` Aditya Srivastava 2021-05-01 9:30 ` Aditya Srivastava 2021-05-01 15:03 ` Jonathan Corbet 2021-05-01 15:03 ` Jonathan Corbet 2021-05-14 14:42 ` [RFC v4] " Aditya Srivastava 2021-05-14 14:42 ` Aditya Srivastava 2021-05-14 15:10 ` Aditya Srivastava 2021-05-14 15:10 ` Aditya Srivastava 2021-05-17 17:49 ` Jonathan Corbet 2021-05-17 17:49 ` Jonathan Corbet 2021-05-01 15:43 ` [RFC v3] " Matthew Wilcox 2021-05-01 15:43 ` Matthew Wilcox 2021-05-14 16:17 ` Aditya Srivastava 2021-05-14 16:17 ` Aditya Srivastava 2021-04-26 17:31 ` [RFC] " Matthew Wilcox 2021-04-26 17:31 ` Matthew Wilcox
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.