All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
To: Linux Doc Mailing List <linux-doc@vger.kernel.org>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>,
	"Jonathan Corbet" <corbet@lwn.net>,
	linux-kernel@vger.kernel.org
Subject: [PATCH 03/13] scripts: get_abi.pl: detect softlinks
Date: Thu, 23 Sep 2021 15:30:01 +0200	[thread overview]
Message-ID: <244fcb2b85b84d1d26e7a7c4cbc6ea17a67a9025.1632402570.git.mchehab+huawei@kernel.org> (raw)
In-Reply-To: <cover.1632402570.git.mchehab+huawei@kernel.org>

The way sysfs works is that the same leave may be present under
/sys/devices, /sys/bus and /sys/class, etc, linked via soft
symlinks.

To make it harder to parse, the ABI definition usually refers
only to one of those locations.

So, improve the logic in order to retrieve the symlinks.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 scripts/get_abi.pl | 207 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 165 insertions(+), 42 deletions(-)

diff --git a/scripts/get_abi.pl b/scripts/get_abi.pl
index e714bf75f5c2..a7cb4be6886c 100755
--- a/scripts/get_abi.pl
+++ b/scripts/get_abi.pl
@@ -8,8 +8,10 @@ use Pod::Usage;
 use Getopt::Long;
 use File::Find;
 use Fcntl ':mode';
+use Cwd 'abs_path';
 
 my $help = 0;
+my $hint = 0;
 my $man = 0;
 my $debug = 0;
 my $enable_lineno = 0;
@@ -28,6 +30,7 @@ GetOptions(
 	"rst-source!" => \$description_is_rst,
 	"dir=s" => \$prefix,
 	'help|?' => \$help,
+	"show-hints" => \$hint,
 	man => \$man
 ) or pod2usage(2);
 
@@ -527,7 +530,7 @@ sub search_symbols {
 }
 
 # Exclude /sys/kernel/debug and /sys/kernel/tracing from the search path
-sub skip_debugfs {
+sub dont_parse_special_attributes {
 	if (($File::Find::dir =~ m,^/sys/kernel,)) {
 		return grep {!/(debug|tracing)/ } @_;
 	}
@@ -540,64 +543,178 @@ sub skip_debugfs {
 }
 
 my %leaf;
+my %aliases;
+my @files;
 
-my $escape_symbols = qr { ([\x01-\x08\x0e-\x1f\x21-\x29\x2b-\x2d\x3a-\x40\x7b-\xff]) }x;
+my $escape_symbols = qr { ([\x01-\x08\x0e-\x1f\x21-\x29\x2b-\x2d\x3a-\x40\x7b-\xfe]) }x;
 sub parse_existing_sysfs {
 	my $file = $File::Find::name;
+	my $mode = (lstat($file))[2];
+	my $abs_file = abs_path($file);
 
-	my $mode = (stat($file))[2];
-	return if ($mode & S_IFDIR);
-
-	my $leave = $file;
-	$leave =~ s,.*/,,;
-
-	if (defined($leaf{$leave})) {
-		# FIXME: need to check if the path makes sense
-		my $what = $leaf{$leave};
-
-		$what =~ s/,/ /g;
-
-		$what =~ s/\<[^\>]+\>/.*/g;
-		$what =~ s/\{[^\}]+\}/.*/g;
-		$what =~ s/\[[^\]]+\]/.*/g;
-		$what =~ s,/\.\.\./,/.*/,g;
-		$what =~ s,/\*/,/.*/,g;
-
-		$what =~ s/\s+/ /g;
-
-		# Escape all other symbols
-		$what =~ s/$escape_symbols/\\$1/g;
-
-		foreach my $i (split / /,$what) {
-			if ($file =~ m#^$i$#) {
-#				print "$file: $i: OK!\n";
-				return;
-			}
-		}
-
-		print "$file: $leave is defined at $what\n";
-
+	if (S_ISLNK($mode)) {
+		$aliases{$file} = $abs_file;
 		return;
 	}
 
-	print "$file not found.\n";
+	return if (S_ISDIR($mode));
+
+	# Trivial: file is defined exactly the same way at ABI What:
+	return if (defined($data{$file}));
+	return if (defined($data{$abs_file}));
+
+	push @files, $abs_file;
+}
+
+sub check_undefined_symbols {
+	foreach my $file (sort @files) {
+
+		# sysfs-module is special, as its definitions are inside
+		# a text. For now, just ignore them.
+		next if ($file =~ m#^/sys/module/#);
+
+		# Ignore cgroup and firmware
+		next if ($file =~ m#^/sys/(fs/cgroup|firmware)/#);
+
+		my $defined = 0;
+		my $exact = 0;
+		my $whats = "";
+
+		my $leave = $file;
+		$leave =~ s,.*/,,;
+
+		my $path = $file;
+		$path =~ s,(.*/).*,$1,;
+
+		if (defined($leaf{$leave})) {
+			my $what = $leaf{$leave};
+			$whats .= " $what" if (!($whats =~ m/$what/));
+
+			foreach my $w (split / /, $what) {
+				if ($file =~ m#^$w$#) {
+					$exact = 1;
+					last;
+				}
+			}
+			# Check for aliases
+			#
+			# TODO: this algorithm is O(w * n²). It can be
+			# improved in the future in order to handle it
+			# faster, by changing parse_existing_sysfs to
+			# store the sysfs inside a tree, at the expense
+			# on making the code less readable and/or using some
+			# additional perl library.
+			foreach my $a (keys %aliases) {
+				my $new = $aliases{$a};
+				my $len = length($new);
+
+				if (substr($file, 0, $len) eq $new) {
+					my $newf = $a . substr($file, $len);
+
+					foreach my $w (split / /, $what) {
+						if ($newf =~ m#^$w$#) {
+							$exact = 1;
+							last;
+						}
+					}
+				}
+			}
+
+			$defined++;
+		}
+		next if ($exact);
+
+		# Ignore some sysfs nodes
+		next if ($file =~ m#/(sections|notes)/#);
+
+		# Would need to check at
+		# Documentation/admin-guide/kernel-parameters.txt, but this
+		# is not easily parseable.
+		next if ($file =~ m#/parameters/#);
+
+		if ($hint && $defined) {
+			print "$leave at $path might be one of:$whats\n";
+			next;
+		}
+		print "$file not found.\n";
+	}
 }
 
 sub undefined_symbols {
+	find({
+		wanted =>\&parse_existing_sysfs,
+		preprocess =>\&dont_parse_special_attributes,
+		no_chdir => 1
+	     }, $sysfs_prefix);
+
 	foreach my $w (sort keys %data) {
 		foreach my $what (split /\xac /,$w) {
+			next if (!($what =~ m/^$sysfs_prefix/));
+
+			# Convert what into regular expressions
+
+			$what =~ s,/\.\.\./,/*/,g;
+			$what =~ s,\*,.*,g;
+
+			# Temporarily change [0-9]+ type of patterns
+			$what =~ s/\[0\-9\]\+/\xff/g;
+
+			# Temporarily change [\d+-\d+] type of patterns
+			$what =~ s/\[0\-\d+\]/\xff/g;
+			$what =~ s/\[(\d+)\]/\xf4$1\xf5/g;
+
+			# Temporarily change [0-9] type of patterns
+			$what =~ s/\[(\d)\-(\d)\]/\xf4$1-$2\xf5/g;
+
+			# Handle multiple option patterns
+			$what =~ s/[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]/($1|$2)/g;
+
+			# Handle wildcards
+			$what =~ s/\<[^\>]+\>/.*/g;
+			$what =~ s/\{[^\}]+\}/.*/g;
+			$what =~ s/\[[^\]]+\]/.*/g;
+
+			$what =~ s/[XYZ]/.*/g;
+
+			# Recover [0-9] type of patterns
+			$what =~ s/\xf4/[/g;
+			$what =~ s/\xf5/]/g;
+
+			# Remove duplicated spaces
+			$what =~ s/\s+/ /g;
+
+			# Special case: this ABI has a parenthesis on it
+			$what =~ s/sqrt\(x^2\+y^2\+z^2\)/sqrt\(x^2\+y^2\+z^2\)/;
+
+			# Special case: drop comparition as in:
+			#	What: foo = <something>
+			# (this happens on a few IIO definitions)
+			$what =~ s,\s*\=.*$,,;
+
 			my $leave = $what;
 			$leave =~ s,.*/,,;
 
-			if (defined($leaf{$leave})) {
-				$leaf{$leave} .= " " . $what;
-			} else {
-				$leaf{$leave} = $what;
+			next if ($leave =~ m/^\.\*/ || $leave eq "");
+
+			# Escape all other symbols
+			$what =~ s/$escape_symbols/\\$1/g;
+			$what =~ s/\\\\/\\/g;
+			$what =~ s/\\([\[\]\(\)\|])/$1/g;
+			$what =~ s/(\d+)\\(-\d+)/$1$2/g;
+
+			$leave =~ s/[\(\)]//g;
+
+			foreach my $l (split /\|/, $leave) {
+				if (defined($leaf{$l})) {
+					next if ($leaf{$l} =~ m/$what/);
+					$leaf{$l} .= " " . $what;
+				} else {
+					$leaf{$l} = $what;
+				}
 			}
 		}
 	}
-
-	find({wanted =>\&parse_existing_sysfs, preprocess =>\&skip_debugfs, no_chdir => 1}, $sysfs_prefix);
+	check_undefined_symbols;
 }
 
 # Ensure that the prefix will always end with a slash
@@ -647,7 +764,8 @@ abi_book.pl - parse the Linux ABI files and produce a ReST book.
 =head1 SYNOPSIS
 
 B<abi_book.pl> [--debug] [--enable-lineno] [--man] [--help]
-	       [--(no-)rst-source] [--dir=<dir>] <COMAND> [<ARGUMENT>]
+	       [--(no-)rst-source] [--dir=<dir>] [--show-hints]
+	       <COMAND> [<ARGUMENT>]
 
 Where <COMMAND> can be:
 
@@ -689,6 +807,11 @@ Enable output of #define LINENO lines.
 Put the script in verbose mode, useful for debugging. Can be called multiple
 times, to increase verbosity.
 
+=item B<--show-hints>
+
+Show hints about possible definitions for the missing ABI symbols.
+Used only when B<undefined>.
+
 =item B<--help>
 
 Prints a brief help message and exits.
-- 
2.31.1


  parent reply	other threads:[~2021-09-23 13:31 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-23 13:29 [PATCH 00/13] get_abi.pl undefined: improve precision and performance Mauro Carvalho Chehab
2021-09-23 13:29 ` [PATCH 01/13] scripts: get_abi.pl: Better handle multiple What parameters Mauro Carvalho Chehab
2021-09-23 13:30 ` [PATCH 02/13] scripts: get_abi.pl: Check for missing symbols at the ABI specs Mauro Carvalho Chehab
2021-09-23 13:30 ` Mauro Carvalho Chehab [this message]
2021-09-23 13:30 ` [PATCH 04/13] scripts: get_abi.pl: add an option to filter undefined results Mauro Carvalho Chehab
2021-09-23 13:30 ` [PATCH 05/13] scripts: get_abi.pl: don't skip what that ends with wildcards Mauro Carvalho Chehab
2021-09-23 13:30 ` [PATCH 06/13] scripts: get_abi.pl: Ignore fs/cgroup sysfs nodes earlier Mauro Carvalho Chehab
2021-09-23 13:30 ` [PATCH 07/13] scripts: get_abi.pl: add a graph to speedup the undefined algorithm Mauro Carvalho Chehab
2021-09-23 13:30 ` [PATCH 08/13] scripts: get_abi.pl: improve debug logic Mauro Carvalho Chehab
2021-09-23 13:30 ` [PATCH 09/13] scripts: get_abi.pl: Better handle leaves with wildcards Mauro Carvalho Chehab
2021-09-23 13:30 ` [PATCH 10/13] scripts: get_abi.pl: ignore some sysfs nodes earlier Mauro Carvalho Chehab
2021-09-23 13:30 ` [PATCH 11/13] scripts: get_abi.pl: stop check loop earlier when regex is found Mauro Carvalho Chehab
2021-09-23 13:30 ` [PATCH 12/13] scripts: get_abi.pl: precompile what match regexes Mauro Carvalho Chehab
2021-09-23 13:30 ` [PATCH 13/13] scripts: get_abi.pl: ensure that "others" regex will be parsed Mauro Carvalho Chehab
2021-09-23 13:58 ` [PATCH 00/13] get_abi.pl undefined: improve precision and performance Greg Kroah-Hartman
2021-09-23 15:41   ` [PATCH 0/8] (REBASED) " Mauro Carvalho Chehab
2021-09-23 15:41     ` [PATCH 1/8] scripts: get_abi.pl: Fix get_abi.pl search output Mauro Carvalho Chehab
2021-09-23 15:41     ` [PATCH 2/8] scripts: get_abi.pl: call get_leave() a little late Mauro Carvalho Chehab
2021-09-23 15:41     ` [PATCH 3/8] scripts: get_abi.pl: improve debug logic Mauro Carvalho Chehab
2021-09-23 15:41     ` [PATCH 4/8] scripts: get_abi.pl: Better handle leaves with wildcards Mauro Carvalho Chehab
2021-09-23 15:41     ` [PATCH 5/8] scripts: get_abi.pl: ignore some sysfs nodes earlier Mauro Carvalho Chehab
2021-09-23 15:41     ` [PATCH 6/8] scripts: get_abi.pl: stop check loop earlier when regex is found Mauro Carvalho Chehab
2021-09-23 15:41     ` [PATCH 7/8] scripts: get_abi.pl: precompile what match regexes Mauro Carvalho Chehab
2021-09-23 15:41     ` [PATCH 8/8] scripts: get_abi.pl: ensure that "others" regex will be parsed Mauro Carvalho Chehab
2021-09-23 17:13     ` [PATCH 0/8] (REBASED) get_abi.pl undefined: improve precision and performance Greg Kroah-Hartman
2021-09-27  8:55       ` Mauro Carvalho Chehab
2021-09-27  9:23         ` Greg Kroah-Hartman
2021-09-27 13:39           ` Mauro Carvalho Chehab
2021-09-27 15:48             ` Greg Kroah-Hartman
2021-09-28 10:03               ` Mauro Carvalho Chehab
2021-09-28 10:43                 ` Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=244fcb2b85b84d1d26e7a7c4cbc6ea17a67a9025.1632402570.git.mchehab+huawei@kernel.org \
    --to=mchehab+huawei@kernel.org \
    --cc=corbet@lwn.net \
    --cc=gregkh@linuxfoundation.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.