All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ian Jackson <ian.jackson@eu.citrix.com>
To: xen-devel@lists.xenproject.org
Cc: Juergen Gross <jgross@suse.com>,
	Lars Kurth <lars.kurth@citrix.com>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>,
	Wei Liu <wei.liu2@citrix.com>,
	George Dunlap <george.dunlap@citrix.com>
Subject: [PATCH 8/9] docs: Provide parse-support-md
Date: Wed, 11 Apr 2018 16:35:38 +0100	[thread overview]
Message-ID: <1523460939-15013-9-git-send-email-ian.jackson@eu.citrix.com> (raw)
In-Reply-To: <1523460939-15013-1-git-send-email-ian.jackson@eu.citrix.com>

This utility reads json format pandoc output, from parsing one or more
SUPPORT.md files, and generates an HTML table element containing the
principal version and feature information.

This is rather hairier than I anticipated when I started out; hence
the 400-odd-line Perl script.

Machinery to assemble the appropriate inputs for parse-support-md
will be in the next commit.

Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
---
v2: New in this version of the series.
---
 docs/parse-support-md | 410 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 410 insertions(+)
 create mode 100755 docs/parse-support-md

diff --git a/docs/parse-support-md b/docs/parse-support-md
new file mode 100755
index 0000000..b882705
--- /dev/null
+++ b/docs/parse-support-md
@@ -0,0 +1,410 @@
+#!/usr/bin/perl -w
+#
+# Written with reference to pandoc_markdown from Debian jessie
+# We require atx-style headers
+#
+# usage:
+#   pandoc -t json SUPPORT.md >j-unstable
+#   git-cat-file ... | pandoc -t json >j-4.10
+#   docs/parse-support-md \
+#            j-unstable https://xenbits/unstable/SUPPORT.html
+#            j-4.10 https://xenbits/4.10/SUPPORT.html
+# or equivalent
+
+use strict;
+use JSON;
+use Tie::IxHash;
+use IO::File;
+use CGI qw(escapeHTML);
+use Data::Dumper;
+
+#---------- accumulating input/output ----------
+
+# This combines information from all of the input files.
+
+sub new_sectlist () { { } };
+our $toplevel_sectlist = new_sectlist();
+# an $sectlist is
+#   { }                 nothing seen yet
+#   a tied hashref      something seen
+# (tied $sectlist)    is an object of type Tie::IxHash
+# $sectlist->{KEY} a $sectnode:
+# $sectlist->{KEY}{Status}[VI] = absent or markdown content
+# $sectlist->{KEY}{HasText}[VI] = trueish iff there was a Para
+# $sectlist->{KEY}{Children} = a further $sectlist
+# $sectlist->{KEY}{Key} = KEY
+# $sectlist->{KEY}{Anchor} = value for < id="" > in the pandoc html
+#
+# A $sectnode represents a single section from the original markdown
+# document.  Its subsections are in Children.
+#
+# Also, the input syntax:
+#    Status, something or other: Supported
+# is treated as a $sectnode, is as if it were a subsection -
+# one called `something or other'.
+#
+# KEY is the Anchor, or derived from the `something or other'.
+# It is used to match up identical features in different versions.
+
+#---------- state for this input file ----------
+
+our $version_index;
+our @version_urls;
+
+our @insections;
+# $insections[]{Key} = string
+# $insections[]{Anchor} = string or undef
+# $insections[]{Headline} = markdown content
+
+our $had_unknown;
+our $current_sectnode;
+# adding new variable ?  it must be reset in r_toplevel
+
+#---------- parsing ----------
+
+sub ri_Header {
+    my ($c) = @_;
+    my ($level, $infos, $hl) = @$c;
+#print STDERR 'RI_HEADER ', Dumper($c, \@c);
+    my ($id) = @$infos;
+    die unless $level >= 1;
+    die unless $level-2 <= $#insections;
+    $#insections = $level-2;
+    push @insections,
+        {
+         Key => $id,
+         Anchor => $id,
+         Headline => $hl,
+        };
+#print STDERR Dumper(\@insections);
+    $current_sectnode = undef;
+}
+
+sub ri_Para {
+    if ($current_sectnode) {
+        $current_sectnode->{HasText}[$version_index] = 1;
+    }
+};
+
+sub parse_feature_entry ($) {
+    my ($value) = @_;
+    die unless @insections;
+
+    my $sectnode;
+    my $anchor = '';
+    foreach my $s (@insections) {
+        my $sectlist = $sectnode
+            ? $sectnode->{Children} : $toplevel_sectlist;
+        my $key = $s->{Key};
+        $anchor = $s->{Anchor} if $s->{Anchor};
+        tie %$sectlist, 'Tie::IxHash' unless tied %$sectlist;
+#print STDERR "PARSE_FEATURE_ENTRY ", Dumper($s);
+        $sectlist->{$key} //=
+            {
+             Children => new_sectlist(),
+             Headline => $s->{Headline},
+             Key => $key,
+             Anchor => $anchor,
+            };
+        $sectnode = $sectlist->{$key};
+    }
+    die unless $sectnode;
+    $sectnode->{Status}[$version_index] = $value;
+    $current_sectnode = $sectnode;
+}
+
+sub ri_CodeBlock {
+    my ($c) = @_;
+    my ($infos, $text) = @$c;
+
+    if ($text =~ m{^(?: Functional\ completeness 
+                   | Functional\ stability
+                   | Interface\ stability
+                   | Security\ supported ) \:}x) {
+        # ignore this
+        return;
+    }
+    die "$had_unknown / $text ?" if $had_unknown;
+
+    my $toplevel = $text =~ m{^Xen-Version:};
+
+    foreach my $l (split /\n/, $text) {
+        $l =~ s/\s*$//;
+        next unless $l =~ m/\S/;
+
+        my ($descr, $value) =
+            $toplevel
+            ? $l =~ m{^([A-Z][-A-Z0-9a-z]+)\:\s+(\S.*)$}
+            : $l =~ m{^(?:Status|Supported)(?:\,\s*([^:]+))?\:\s+(\S.*)$}
+            or die ("$text\n^ cannot parse status codeblock line:".
+                    ($toplevel and 'top').
+                    "\n$l\n ?");
+        if (length $descr) {
+            die unless @insections;
+            my $key = lc $descr;
+            $key =~ y/ /-/;
+            $key =~ y/-0-9A-Za-z//cd;
+            $key = $insections[$#insections]{Anchor}.'--'.$key;
+            push @insections,
+                {
+                 Key => $key,
+                 Headline => [{ t => 'Str', c => $descr }],
+                };
+        }
+        parse_feature_entry $value;
+        if (length $descr) {
+            pop @insections;
+        }
+    }
+}
+
+sub process_unknown {
+    my ($c, $e) = @_;
+    $had_unknown = Dumper($e);
+}
+
+sub r_content ($) {
+    my ($i) = @_;
+    foreach my $e (@$i) {
+        my $f = ${*::}{"ri_$e->{t}"};
+        $f //= \&process_unknown;
+        $f->($e->{c}, $e);
+    }
+}
+
+sub r_toplevel ($) {
+    my ($i) = @_;
+
+    die unless defined $version_index;
+
+    @insections = ();
+    $had_unknown = undef;
+    $current_sectnode = undef;
+
+    foreach my $e (@$i) {
+        next unless ref $e eq 'ARRAY';
+        r_content $e;
+    }
+}
+
+sub read_inputs () {
+    $version_index = 0;
+
+    local $/;
+    undef $/;
+
+    while (my $f = shift @ARGV) {
+        push @version_urls, shift @ARGV;
+        eval {
+            open F, '<', $f or die $!;
+            my $input_toplevel = decode_json <F>;
+            r_toplevel $input_toplevel;
+        };
+        die "$@\nwhile processing input file $f\n" if $@;
+        $version_index++;
+    }
+}
+
+#---------- reprocessing ----------
+
+# variables generated by analyse_reprocess:
+our $maxdepth;
+
+sub pandoc2html_inline ($) {
+    my ($content) = @_;
+
+    my $json_fh = IO::File::new_tmpfile or die $!;
+    print $json_fh to_json([
+                            { unMeta => { } },
+                            [{ t => 'Para', c => $content }],
+                           ]) or die $!;
+    flush $json_fh or die $!;
+    seek $json_fh,0,0 or die $!;
+
+    my $c = open PD, "-|" // die $!;
+    if (!$c) {
+        open STDIN, "<&", $json_fh;
+        exec qw(pandoc -f json) or die $!;
+    }
+
+    local $/;
+    undef $/;
+    my $html = <PD>;
+    $?=$!=0;
+    if (!close PD) {
+        eval {
+            seek $json_fh,0,0 or die $!;
+            open STDIN, '<&', $json_fh or die $!;
+            system 'json_pp';
+        };
+        die "\n $? $!";
+    }
+
+    $html =~ s{^\<p\>}{} or die "$html ?";
+    $html =~ s{\</p\>$}{} or die "$html ?";
+    $html =~ s{\n$}{};
+    return $html;
+}
+
+sub reprocess_sectlist ($$);
+
+sub reprocess_sectnode ($$) {
+    my ($sectnode, $d) = @_;
+
+    $sectnode->{Depth} = $d;
+
+    if ($sectnode->{Status}) {
+        $maxdepth = $d if $d > $maxdepth;
+    }
+
+    if ($sectnode->{Headline}) {
+#            print STDERR Dumper($sectnode);
+        $sectnode->{Headline} =
+            pandoc2html_inline $sectnode->{Headline};
+    }
+
+    reprocess_sectlist $sectnode->{Children}, $d;
+}
+
+sub reprocess_sectlist ($$) {
+    my ($sectlist, $d) = @_;
+    $d++;
+
+    foreach my $sectnode (values %$sectlist) {
+        reprocess_sectnode $sectnode, $d;
+    }
+}
+
+sub count_rows_sectlist ($);
+
+sub count_rows_sectnode ($) {
+    my ($sectnode) = @_;
+    my $rows = 0;
+    $rows++ if $sectnode->{Status};
+    $rows += count_rows_sectlist $sectnode->{Children};
+    $sectnode->{Rows} = $rows;
+    return $rows;
+}
+
+sub count_rows_sectlist ($) {
+    my ($sectlist) = @_;
+    my $rows = 0;
+    foreach my $sectnode (values %$sectlist) {
+        $rows += count_rows_sectnode $sectnode;
+    }
+    return $rows;
+}
+
+# After reprocess_sectlist,
+#    ->{Headline}   is in html
+#    ->{Status}     is in plain text
+
+sub analyse_reprocess () {
+    $maxdepth = 0;
+    reprocess_sectlist $toplevel_sectlist, 0;
+}
+
+#---------- output ----------
+
+sub o { print @_ or die $!; }
+
+our @pending_headings;
+
+sub write_output_row ($) {
+    my ($sectnode) = @_;
+#    print STDERR 'WOR ', Dumper($d, $sectnode);
+    o('<tr>');
+    my $span = sub {
+        my ($rowcol, $n) = @_;
+        o(sprintf ' %sspan="%d"', $rowcol, $n) if $n != 1;
+    };
+    # This is all a bit tricky because (i) the input is hierarchical
+    # with variable depth, whereas the output has to have a fixed
+    # number of heading columns on the LHS; (ii) the HTML
+    # colspan/rowspan system means that when we are writing out, we
+    # have to not write table elements for table entries which have
+    # already been written with a span instruction that covers what we
+    # would write now.
+    while (my $heading = shift @pending_headings) {
+        o('<th valign="top"');
+        o(sprintf ' id="%s"', $heading->{Key});
+        $span->('row', $heading->{Rows});
+        $span->('col', $maxdepth - $heading->{Depth} + 1)
+            if !%{ $heading->{Children} };
+        o(' align="left">');
+        o($heading->{Headline});
+        o('</th>');
+    }
+    if (%{ $sectnode->{Children} }) {
+        # we suppressed the colspan above, but we do need to make the gap
+        my $n = $maxdepth - $sectnode->{Depth};
+        die 'XX '. Dumper($n, $sectnode) if $n<0;
+        if ($n) {
+            o('<td');
+            $span->('col', $n);
+            o('></td>');
+        }
+    }
+    for (my $i=0; $i<@version_urls; $i++) {
+        my $st = $sectnode->{Status}[$i];
+        $st //= '-';
+        o('<td>');
+        my $end_a = '';
+        if ($sectnode->{Key} eq 'release-support--xen-version') {
+            o(sprintf '<a href="%s">', $version_urls[$i]);
+            $end_a = '</a>';
+        }
+        o(escapeHTML($st));
+        if ($sectnode->{HasText}[$i] && $sectnode->{Anchor}) {
+            o(sprintf '<a href="%s#%s">[*]</a>',
+              $version_urls[$i], $sectnode->{Anchor});
+        }
+        o($end_a);
+        o('</td>');
+    }
+    o("</tr>\n");
+}      
+
+sub write_output_sectlist ($);
+sub write_output_sectlist ($) {
+    my ($sectlist) = @_;
+    foreach my $key (keys %$sectlist) {
+        my $sectnode = $sectlist->{$key};
+        push @pending_headings, $sectnode;
+        write_output_row $sectnode if $sectnode->{Status};
+        write_output_sectlist $sectnode->{Children};
+    }
+}
+
+sub write_output () {
+    o('<table rules="all">');
+    write_output_sectlist $toplevel_sectlist;
+    o('</table>');
+}
+
+#---------- main program ----------
+
+open DEBUG, '>', '/dev/null' or die $!;
+if (@ARGV && $ARGV[0] eq '-D') {
+    shift @ARGV;
+    open DEBUG, '>&2' or die $!;
+}
+
+die unless @ARGV;
+die if $ARGV[0] =~ m/^-/;
+die if @ARGV % 2;
+
+read_inputs();
+
+#use Data::Dumper;
+#print DEBUG Dumper($toplevel_sectlist);
+
+analyse_reprocess();
+# Now Headline is in HTML
+
+count_rows_sectlist($toplevel_sectlist);
+
+#use Data::Dumper;
+print DEBUG Dumper($toplevel_sectlist);
+
+write_output();
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  parent reply	other threads:[~2018-04-11 15:36 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-11 15:35 [PATCH for-4.11 v2 0/9] Provide support matrix generator Ian Jackson
2018-04-11 15:35 ` [PATCH 1/9] SUPPORT.md: Syntax: Fix some bullet lists Ian Jackson
2018-04-11 15:35 ` [PATCH 2/9] SUPPORT.md: Syntax: Fix a typo "States" Ian Jackson
2018-04-11 15:35 ` [PATCH 3/9] SUPPORT.md: Syntax: Provide a title rather than a spurious empty section Ian Jackson
2018-04-11 15:37   ` George Dunlap
2018-04-11 15:48     ` Ian Jackson
2018-04-11 15:35 ` [PATCH 4/9] docs/gen-html-index: Extract titles from HTML documents Ian Jackson
2018-04-11 15:35 ` [PATCH 5/9] docs/gen-html-index: Support documents at the toplevel Ian Jackson
2018-04-11 15:35 ` [PATCH 6/9] docs/Makefile: Introduce GENERATE_PANDOC_RULE_RAW Ian Jackson
2018-04-11 15:35 ` [PATCH 7/9] docs/Makefile: Format SUPPORT.md into the toplevel Ian Jackson
2018-04-11 15:35 ` Ian Jackson [this message]
2018-04-11 15:39   ` [PATCH 8/9] docs: Provide parse-support-md Juergen Gross
2018-04-11 15:35 ` [PATCH 9/9] docs: Provide support-matrix-generate, to generate a support matrix in HTML Ian Jackson
2018-04-11 15:40   ` Juergen Gross
2018-04-11 17:47 ` [PATCH for-4.11 v2 0/9] Provide support matrix generator Lars Kurth

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1523460939-15013-9-git-send-email-ian.jackson@eu.citrix.com \
    --to=ian.jackson@eu.citrix.com \
    --cc=george.dunlap@citrix.com \
    --cc=jgross@suse.com \
    --cc=lars.kurth@citrix.com \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.