From 322d1310b9f80dfe625e735d7ce27dc7d80c834c Mon Sep 17 00:00:00 2001 From: Mark Hansen Date: Sat, 16 May 2020 18:08:12 +1000 Subject: [PATCH] Migrate output format doc generation ksh to jinja2 This commit replaces the ksh-based templating with Python Jinja2 templating. Previously output.html was generated with: - output.1 - the output of mkoutput.sh - output.2 My end goal here is to make it much simpler to generate more complex output format docs - however I've made this change so that the output is virtually unchanged, except for HTML-escaping a single apostrophe. My plan is to follow this up with adding some more features to the output format documentation. Templating in general provides a few opportunities: - safe auto-html-escaping - easier editing, easier adding of more data - some separation of logic and templating - inclusion of subtemplates I chose Jinja2 because: - It's the most popular Python templating engine, used by mature products like Ansible and Flask. - Graphviz's docs generation already takes a dependency on Python for jconvert.py. - The Python language is pretty accessible to contributors. - Jinja2 seems pretty stable, going back to 2007. Unlikely to break soon. Alternatives considered: - I could just as well have gone with Go's stdlib template/html but I don't think Go is already used to build Graphviz, and Go isn't as popular as Python, so not as accessible to contributors. - Python Django templates: they're pretty similar to Jinja2, but I think Django's templates are more heavyweight --- doc/info/output.html | 2 +- doc/infosrc/Makefile | 22 ++++-- doc/infosrc/mkoutput.py | 65 ++++++++++++++++ doc/infosrc/mkoutput.sh | 107 --------------------------- doc/infosrc/outputs | 22 +++--- doc/infosrc/requirements.txt | 3 + doc/infosrc/templates/output.html.j2 | 30 ++++++++ 7 files changed, 124 insertions(+), 127 deletions(-) create mode 100755 doc/infosrc/mkoutput.py delete mode 100755 doc/infosrc/mkoutput.sh create mode 100644 doc/infosrc/requirements.txt create mode 100644 doc/infosrc/templates/output.html.j2 diff --git a/doc/info/output.html b/doc/info/output.html index 95f541080..36144bc31 100644 --- a/doc/info/output.html +++ b/doc/info/output.html @@ -92,7 +92,7 @@ formats need to be interpreted in this manner. pdf Portable Document Format (PDF) pic -Kernighan's PIC graphics language +Kernighan's PIC graphics language plain
plain-ext Simple text format diff --git a/doc/infosrc/Makefile b/doc/infosrc/Makefile index 9a5b20308..01ce342f4 100644 --- a/doc/infosrc/Makefile +++ b/doc/infosrc/Makefile @@ -6,9 +6,14 @@ # ps_to_png.sh and other make rules rely on netpbm tools # and psconvert, which is part of the GMT library. # -# python is used to run jconvert.py, which converts the json schema -# graphviz_json_schema.json to html. This also relies on the python -# package json2html. +# Install python modules with: +# $ pip install --user -r requirements.txt +# +# python is used to run: +# - jconvert.py, which converts the json schema graphviz_json_schema.json to html. +# This also relies on the python package json2html. +# - mkoutput.py, which converts templates/output.html to output.html +# This requires the jinja2 python package. # # The main product are 7 web pages: # arrows.html - arrow_grammar @@ -142,14 +147,15 @@ colors.html : colors.1 colors.n ../../lib/common/color_names ../../lib/common/sv cat colors.n >> colors.html rm -rf colortmp -schema.html : jconvert.py graphviz_json_schema.json - ./jconvert.py graphviz_json_schema.json schema.html -output.html : output.1 output.2 outputs mkoutput.sh plugins.png jconvert.py schema.html +output.html : output.1 output.2 outputs mkoutput.py plugins.png jconvert.py schema.html templates/output.html.j2 cat output.1 > output.html - ./mkoutput.sh < outputs >> output.html + ./mkoutput.py < outputs >> output.html cat output.2 >> output.html +schema.html : jconvert.py graphviz_json_schema.json + ./jconvert.py graphviz_json_schema.json schema.html + html.html : html.1 html_grammar html.2 html.3 html1.gif html2.gif html3.gif html4.gif mklang ./mklang html_grammar gramtmp cat html.1 > html.html @@ -222,7 +228,7 @@ distclean : clean (for s in $$(cat shapelist); do rm -f $$s.gif; done) EXTRA_DIST = $(XGIF) mklang.y mkarrows.sh mkattrs.sh mkshapes.sh mkstyles.sh mktapers.sh \ - mktypes.sh mkarrowtbl.sh mkoutput.sh mkshhtml.sh \ + mktypes.sh mkarrowtbl.sh mkoutput.py mkshhtml.sh \ ps_to_png.sh arrow_grammar grammar html_grammar \ shapelist attrs.1 colors.1 colors.n \ output.1 output.2 html.1 html.2 html1.dot html.3 \ diff --git a/doc/infosrc/mkoutput.py b/doc/infosrc/mkoutput.py new file mode 100755 index 000000000..ac350c4ef --- /dev/null +++ b/doc/infosrc/mkoutput.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# Takes `outputs` as stdin and generates output.html +# Uses `templates/output.html.j2` +# See `outputs` file for format documentation. + +import jinja2 +import markupsafe +import re +import sys +from typing import Dict, Tuple + +HEADER_RE = re.compile(r'^:(?P[^:]+):(?P.*)') + +# Tuple of command-line-params for an output format, e.g. ('jpg', 'jpeg', 'jpe') +params : Tuple[str, ...] = () + +# Map from tuple of command-line-params to full name of the output format +formats : Dict[Tuple[str, ...], str] = {} + +# Map from tuple of command-line-params to an HTML description string +html_descriptions : Dict[Tuple[str, ...], str] = {} + +for line in sys.stdin: + # Skip comment lines. + if line.startswith('#'): + continue + + m = HEADER_RE.match(line) + if m: + # This is a header line. Grab out the values. + + # Command-line formats are slash-separated. + params = tuple(m.group('params').split('/')) + + # Full format name is plain text + formats[params] = m.group('format') + + # Set an empty string html description, ready to append to. + html_descriptions[params] = '' + else: + # This is an HTML line, possibly a continuation of a previous HTML line. + html_descriptions[params] += line + + +env = jinja2.Environment( + # Load template files from ./templates/ + loader=jinja2.FileSystemLoader('templates'), + # Auto-HTML-escape any html or xml files. + autoescape=jinja2.select_autoescape(['html', 'xml', 'html.j2', 'xml.j2']), + # Whitespace control + trim_blocks=True, + lstrip_blocks=True, + # Raise exception on any attempt to access undefined variables. + undefined=jinja2.StrictUndefined, +) +template = env.get_template('output.html.j2') +print(template.render( + formats=formats, + # Vouch for the HTML descriptions as being safe and not needing auto-HTML-escaping. + # This is reasonable because the HTML descriptions are not attacker-controlled. + descriptions={ + params: markupsafe.Markup(desc) + for params, desc in html_descriptions.items() + } +)) diff --git a/doc/infosrc/mkoutput.sh b/doc/infosrc/mkoutput.sh deleted file mode 100755 index 60a13f481..000000000 --- a/doc/infosrc/mkoutput.sh +++ /dev/null @@ -1,107 +0,0 @@ -#! /bin/ksh -typeset -A desc fullname -name= - -# Given the output specifications as stdin, read each one, -# storing the formats long name and description in the fullname -# and desc arrays, respectively, indexed by the format name. -# The initial line of an item has the format :name:long name -# -# Lines beginning with '#' are treated as comments. -OLDIFS="$IFS" -IFS= -while read line -do - c=${line:0:1} - if [[ $c == '#' ]] - then - continue - elif [[ $c == ':' ]] - then - if [[ -n "$name" ]] - then - desc[$name]=$txt - fullname[$name]=$tag - txt="" - fi - line=${line#:} - if [[ "$line" == *:* ]] - then - name=${line%%:*} - tag=${line#$name:} - else - name=$line - tag="" - fi - else - txt="$txt${line}\n" - fi -done -IFS="$OLDIFS" - -if [[ -n "$name" ]] -then - desc[$name]=$txt - fullname[$name]=$tag - txt="" -fi - -#print ${!fullname[@]} -#print ${desc[@]} -#exit - -set -s ${!desc[@]} - -# Output a brief description of the formats as a table. -# This is followed by a list of the formats, with the long -# description of each. -# The formats are alphabetized on output. -# Note that an item may have multiple names, i.e., the first -# field may have the format name1/name2/name3. -# The output format is given the anchor a: in the -# table and d: in the list. - -print "" -print "" -for i -do - print -n " "; - print -n ""; -done -print "
Command-line
parameter
Format
"; - first=yes - for n in ${i//\// } - do - if [[ -n $first ]] - then - first= - else - print -n "
" - fi - printf "%s\n" $n $n $n - done - print -n "
" - print -n ${fullname[$i]}; print "
" - -print "
" - -#set -s ${!desc[@]} -print "

Format Descriptions

\n
" -for i -do - first=yes - for n in ${i//\// } - do - if [[ -n $first ]] - then - first= - else - print -n "," - fi - printf "
%s\n" $n $n $n - done - print "
${desc[$i]}" -done -print "
\n
" - -exit 0 diff --git a/doc/infosrc/outputs b/doc/infosrc/outputs index df3b1a1d2..1ae6f7398 100644 --- a/doc/infosrc/outputs +++ b/doc/infosrc/outputs @@ -1,8 +1,8 @@ # List of Graphviz output formats -# Each item consists of line of the form :: +# Each item consists of line of the form :[/...]: # followed by a description of the format in HTML -# The name may consist of multiple, related names, separated by '/'. -# The long description is just text. +# The command-line params may consist of multiple, related params, separated by '/'. +# The format is just text. # The items are alphabetized when the page is created. # :eps:Encapsulated PostScript @@ -469,28 +469,28 @@ The only real advantages to these formats is their terseness and their ease of parsing. In general, the dot and xdot are preferable in terms of the quantity of information provided. -:bmp: Windows Bitmap Format +:bmp:Windows Bitmap Format Outputs images in the Windows BMP format. -:ico: Icon Image File Format +:ico:Icon Image File Format Outputs images in the Windows ICO format. -:pdf: Portable Document Format (PDF) +:pdf:Portable Document Format (PDF) Produces PDF output. (This option assumes Graphviz includes the Cairo renderer.) Alternatively, one can use the ps2 option to produce PDF-compatible PostScript, and then use a ps-to-pdf converter. -:tif/tiff: TIFF (Tag Image File Format) +:tif/tiff:TIFF (Tag Image File Format) Produces TIFF output. -:vml/vmlz: Vector Markup Language (VML) +:vml/vmlz:Vector Markup Language (VML) Produces VML output, the latter in compressed format.

See Note. -:gtk: GTK canvas +:gtk:GTK canvas Creates a GTK window and displays the output there. -:webp: Image format for the Web +:webp:Image format for the Web Produces output in the image format for the Web (WEBP) format, optimized for web devices such as tablets. See Wikipedia's WebP or Google's webp pages. -:xlib/x11: Xlib canvas +:xlib/x11:Xlib canvas Creates an Xlib window and displays the output there. diff --git a/doc/infosrc/requirements.txt b/doc/infosrc/requirements.txt new file mode 100644 index 000000000..1097146a3 --- /dev/null +++ b/doc/infosrc/requirements.txt @@ -0,0 +1,3 @@ +json2html==1.3.0 +Jinja2==2.10 +MarkupSafe==1.0 diff --git a/doc/infosrc/templates/output.html.j2 b/doc/infosrc/templates/output.html.j2 new file mode 100644 index 000000000..d82c71263 --- /dev/null +++ b/doc/infosrc/templates/output.html.j2 @@ -0,0 +1,30 @@ + + +{% for params, format in formats | dictsort %} + +{% endfor %} +
Command-line
parameter
Format
+ {%- for p in params -%} + {{p}} + {%- if not loop.last %} + +
+ {%- endif %} + {%- endfor %} + +
{{ format }}
+


+

Format Descriptions

+
+{% for params, description in descriptions | dictsort %} + {% for p in params %} + {% if not loop.first -%} + , + {%- endif -%} +
{{p}} + {% endfor -%} +
+ {{- description }} +{% endfor %} +
+
-- 2.50.1