[PATCH 8/9] docs: Provide parse-support-md

From: Ian Jackson <ian.jackson@eu.citrix.com>
To: xen-devel@lists.xenproject.org
Cc: Juergen Gross <jgross@suse.com>,
	Lars Kurth <lars.kurth@citrix.com>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>,
	Wei Liu <wei.liu2@citrix.com>,
	George Dunlap <george.dunlap@citrix.com>
Subject: [PATCH 8/9] docs: Provide parse-support-md
Date: Wed, 11 Apr 2018 16:35:38 +0100	[thread overview]
Message-ID: <1523460939-15013-9-git-send-email-ian.jackson@eu.citrix.com> (raw)
In-Reply-To: <1523460939-15013-1-git-send-email-ian.jackson@eu.citrix.com>

This utility reads json format pandoc output, from parsing one or more
SUPPORT.md files, and generates an HTML table element containing the
principal version and feature information.

This is rather hairier than I anticipated when I started out; hence
the 400-odd-line Perl script.

Machinery to assemble the appropriate inputs for parse-support-md
will be in the next commit.

Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
---
v2: New in this version of the series.
---
 docs/parse-support-md | 410 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 410 insertions(+)
 create mode 100755 docs/parse-support-md

diff --git a/docs/parse-support-md b/docs/parse-support-md
new file mode 100755
index 0000000..b882705
--- /dev/null
+++ b/docs/parse-support-md
@@ -0,0 +1,410 @@
+#!/usr/bin/perl -w
+#
+# Written with reference to pandoc_markdown from Debian jessie
+# We require atx-style headers
+#
+# usage:
+#   pandoc -t json SUPPORT.md >j-unstable
+#   git-cat-file ... | pandoc -t json >j-4.10
+#   docs/parse-support-md \
+#            j-unstable https://xenbits/unstable/SUPPORT.html
+#            j-4.10 https://xenbits/4.10/SUPPORT.html
+# or equivalent
+
+use strict;
+use JSON;
+use Tie::IxHash;
+use IO::File;
+use CGI qw(escapeHTML);
+use Data::Dumper;
+
+#---------- accumulating input/output ----------
+
+# This combines information from all of the input files.
+
+sub new_sectlist () { { } };
+our $toplevel_sectlist = new_sectlist();
+# an $sectlist is
+#   { }                 nothing seen yet
+#   a tied hashref      something seen
+# (tied $sectlist)    is an object of type Tie::IxHash
+# $sectlist->{KEY} a $sectnode:
+# $sectlist->{KEY}{Status}[VI] = absent or markdown content
+# $sectlist->{KEY}{HasText}[VI] = trueish iff there was a Para
+# $sectlist->{KEY}{Children} = a further $sectlist
+# $sectlist->{KEY}{Key} = KEY
+# $sectlist->{KEY}{Anchor} = value for < id="" > in the pandoc html
+#
+# A $sectnode represents a single section from the original markdown
+# document.  Its subsections are in Children.
+#
+# Also, the input syntax:
+#    Status, something or other: Supported
+# is treated as a $sectnode, is as if it were a subsection -
+# one called `something or other'.
+#
+# KEY is the Anchor, or derived from the `something or other'.
+# It is used to match up identical features in different versions.
+
+#---------- state for this input file ----------
+
+our $version_index;
+our @version_urls;
+
+our @insections;
+# $insections[]{Key} = string
+# $insections[]{Anchor} = string or undef
+# $insections[]{Headline} = markdown content
+
+our $had_unknown;
+our $current_sectnode;
+# adding new variable ?  it must be reset in r_toplevel
+
+#---------- parsing ----------
+
+sub ri_Header {
+    my ($c) = @_;
+    my ($level, $infos, $hl) = @$c;
+#print STDERR 'RI_HEADER ', Dumper($c, \@c);
+    my ($id) = @$infos;
+    die unless $level >= 1;
+    die unless $level-2 <= $#insections;
+    $#insections = $level-2;
+    push @insections,
+        {
+         Key => $id,
+         Anchor => $id,
+         Headline => $hl,
+        };
+#print STDERR Dumper(\@insections);
+    $current_sectnode = undef;
+}
+
+sub ri_Para {
+    if ($current_sectnode) {
+        $current_sectnode->{HasText}[$version_index] = 1;
+    }
+};
+
+sub parse_feature_entry ($) {
+    my ($value) = @_;
+    die unless @insections;
+
+    my $sectnode;
+    my $anchor = '';
+    foreach my $s (@insections) {
+        my $sectlist = $sectnode
+            ? $sectnode->{Children} : $toplevel_sectlist;
+        my $key = $s->{Key};
+        $anchor = $s->{Anchor} if $s->{Anchor};
+        tie %$sectlist, 'Tie::IxHash' unless tied %$sectlist;
+#print STDERR "PARSE_FEATURE_ENTRY ", Dumper($s);
+        $sectlist->{$key} //=
+            {
+             Children => new_sectlist(),
+             Headline => $s->{Headline},
+             Key => $key,
+             Anchor => $anchor,
+            };
+        $sectnode = $sectlist->{$key};
+    }
+    die unless $sectnode;
+    $sectnode->{Status}[$version_index] = $value;
+    $current_sectnode = $sectnode;
+}
+
+sub ri_CodeBlock {
+    my ($c) = @_;
+    my ($infos, $text) = @$c;
+
+    if ($text =~ m{^(?: Functional\ completeness 
+                   | Functional\ stability
+                   | Interface\ stability
+                   | Security\ supported ) \:}x) {
+        # ignore this
+        return;
+    }
+    die "$had_unknown / $text ?" if $had_unknown;
+
+    my $toplevel = $text =~ m{^Xen-Version:};
+
+    foreach my $l (split /\n/, $text) {
+        $l =~ s/\s*$//;
+        next unless $l =~ m/\S/;
+
+        my ($descr, $value) =
+            $toplevel
+            ? $l =~ m{^([A-Z][-A-Z0-9a-z]+)\:\s+(\S.*)$}
+            : $l =~ m{^(?:Status|Supported)(?:\,\s*([^:]+))?\:\s+(\S.*)$}
+            or die ("$text\n^ cannot parse status codeblock line:".
+                    ($toplevel and 'top').
+                    "\n$l\n ?");
+        if (length $descr) {
+            die unless @insections;
+            my $key = lc $descr;
+            $key =~ y/ /-/;
+            $key =~ y/-0-9A-Za-z//cd;
+            $key = $insections[$#insections]{Anchor}.'--'.$key;
+            push @insections,
+                {
+                 Key => $key,
+                 Headline => [{ t => 'Str', c => $descr }],
+                };
+        }
+        parse_feature_entry $value;
+        if (length $descr) {
+            pop @insections;
+        }
+    }
+}
+
+sub process_unknown {
+    my ($c, $e) = @_;
+    $had_unknown = Dumper($e);
+}
+
+sub r_content ($) {
+    my ($i) = @_;
+    foreach my $e (@$i) {
+        my $f = ${*::}{"ri_$e->{t}"};
+        $f //= \&process_unknown;
+        $f->($e->{c}, $e);
+    }
+}
+
+sub r_toplevel ($) {
+    my ($i) = @_;
+
+    die unless defined $version_index;
+
+    @insections = ();
+    $had_unknown = undef;
+    $current_sectnode = undef;
+
+    foreach my $e (@$i) {
+        next unless ref $e eq 'ARRAY';
+        r_content $e;
+    }
+}
+
+sub read_inputs () {
+    $version_index = 0;
+
+    local $/;
+    undef $/;
+
+    while (my $f = shift @ARGV) {
+        push @version_urls, shift @ARGV;
+        eval {
+            open F, '<', $f or die $!;
+            my $input_toplevel = decode_json <F>;
+            r_toplevel $input_toplevel;
+        };
+        die "$@\nwhile processing input file $f\n" if $@;
+        $version_index++;
+    }
+}
+
+#---------- reprocessing ----------
+
+# variables generated by analyse_reprocess:
+our $maxdepth;
+
+sub pandoc2html_inline ($) {
+    my ($content) = @_;
+
+    my $json_fh = IO::File::new_tmpfile or die $!;
+    print $json_fh to_json([
+                            { unMeta => { } },
+                            [{ t => 'Para', c => $content }],
+                           ]) or die $!;
+    flush $json_fh or die $!;
+    seek $json_fh,0,0 or die $!;
+
+    my $c = open PD, "-|" // die $!;
+    if (!$c) {
+        open STDIN, "<&", $json_fh;
+        exec qw(pandoc -f json) or die $!;
+    }
+
+    local $/;
+    undef $/;
+    my $html = <PD>;
+    $?=$!=0;
+    if (!close PD) {
+        eval {
+            seek $json_fh,0,0 or die $!;
+            open STDIN, '<&', $json_fh or die $!;
+            system 'json_pp';
+        };
+        die "\n $? $!";
+    }
+
+    $html =~ s{^\<p\>}{} or die "$html ?";
+    $html =~ s{\</p\>$}{} or die "$html ?";
+    $html =~ s{\n$}{};
+    return $html;
+}
+
+sub reprocess_sectlist ($$);
+
+sub reprocess_sectnode ($$) {
+    my ($sectnode, $d) = @_;
+
+    $sectnode->{Depth} = $d;
+
+    if ($sectnode->{Status}) {
+        $maxdepth = $d if $d > $maxdepth;
+    }
+
+    if ($sectnode->{Headline}) {
+#            print STDERR Dumper($sectnode);
+        $sectnode->{Headline} =
+            pandoc2html_inline $sectnode->{Headline};
+    }
+
+    reprocess_sectlist $sectnode->{Children}, $d;
+}
+
+sub reprocess_sectlist ($$) {
+    my ($sectlist, $d) = @_;
+    $d++;
+
+    foreach my $sectnode (values %$sectlist) {
+        reprocess_sectnode $sectnode, $d;
+    }
+}
+
+sub count_rows_sectlist ($);
+
+sub count_rows_sectnode ($) {
+    my ($sectnode) = @_;
+    my $rows = 0;
+    $rows++ if $sectnode->{Status};
+    $rows += count_rows_sectlist $sectnode->{Children};
+    $sectnode->{Rows} = $rows;
+    return $rows;
+}
+
+sub count_rows_sectlist ($) {
+    my ($sectlist) = @_;
+    my $rows = 0;
+    foreach my $sectnode (values %$sectlist) {
+        $rows += count_rows_sectnode $sectnode;
+    }
+    return $rows;
+}
+
+# After reprocess_sectlist,
+#    ->{Headline}   is in html
+#    ->{Status}     is in plain text
+
+sub analyse_reprocess () {
+    $maxdepth = 0;
+    reprocess_sectlist $toplevel_sectlist, 0;
+}
+
+#---------- output ----------
+
+sub o { print @_ or die $!; }
+
+our @pending_headings;
+
+sub write_output_row ($) {
+    my ($sectnode) = @_;
+#    print STDERR 'WOR ', Dumper($d, $sectnode);
+    o('<tr>');
+    my $span = sub {
+        my ($rowcol, $n) = @_;
+        o(sprintf ' %sspan="%d"', $rowcol, $n) if $n != 1;
+    };
+    # This is all a bit tricky because (i) the input is hierarchical
+    # with variable depth, whereas the output has to have a fixed
+    # number of heading columns on the LHS; (ii) the HTML
+    # colspan/rowspan system means that when we are writing out, we
+    # have to not write table elements for table entries which have
+    # already been written with a span instruction that covers what we
+    # would write now.
+    while (my $heading = shift @pending_headings) {
+        o('<th valign="top"');
+        o(sprintf ' id="%s"', $heading->{Key});
+        $span->('row', $heading->{Rows});
+        $span->('col', $maxdepth - $heading->{Depth} + 1)
+            if !%{ $heading->{Children} };
+        o(' align="left">');
+        o($heading->{Headline});
+        o('</th>');
+    }
+    if (%{ $sectnode->{Children} }) {
+        # we suppressed the colspan above, but we do need to make the gap
+        my $n = $maxdepth - $sectnode->{Depth};
+        die 'XX '. Dumper($n, $sectnode) if $n<0;
+        if ($n) {
+            o('<td');
+            $span->('col', $n);
+            o('></td>');
+        }
+    }
+    for (my $i=0; $i<@version_urls; $i++) {
+        my $st = $sectnode->{Status}[$i];
+        $st //= '-';
+        o('<td>');
+        my $end_a = '';
+        if ($sectnode->{Key} eq 'release-support--xen-version') {
+            o(sprintf '<a href="%s">', $version_urls[$i]);
+            $end_a = '</a>';
+        }
+        o(escapeHTML($st));
+        if ($sectnode->{HasText}[$i] && $sectnode->{Anchor}) {
+            o(sprintf '<a href="%s#%s">[*]</a>',
+              $version_urls[$i], $sectnode->{Anchor});
+        }
+        o($end_a);
+        o('</td>');
+    }
+    o("</tr>\n");
+}      
+
+sub write_output_sectlist ($);
+sub write_output_sectlist ($) {
+    my ($sectlist) = @_;
+    foreach my $key (keys %$sectlist) {
+        my $sectnode = $sectlist->{$key};
+        push @pending_headings, $sectnode;
+        write_output_row $sectnode if $sectnode->{Status};
+        write_output_sectlist $sectnode->{Children};
+    }
+}
+
+sub write_output () {
+    o('<table rules="all">');
+    write_output_sectlist $toplevel_sectlist;
+    o('</table>');
+}
+
+#---------- main program ----------
+
+open DEBUG, '>', '/dev/null' or die $!;
+if (@ARGV && $ARGV[0] eq '-D') {
+    shift @ARGV;
+    open DEBUG, '>&2' or die $!;
+}
+
+die unless @ARGV;
+die if $ARGV[0] =~ m/^-/;
+die if @ARGV % 2;
+
+read_inputs();
+
+#use Data::Dumper;
+#print DEBUG Dumper($toplevel_sectlist);
+
+analyse_reprocess();
+# Now Headline is in HTML
+
+count_rows_sectlist($toplevel_sectlist);
+
+#use Data::Dumper;
+print DEBUG Dumper($toplevel_sectlist);
+
+write_output();
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel