]> granicus.if.org Git - jq/commitdiff
Remove ruby dependency from manpage build
authorWilliam Langford <wlangfor@gmail.com>
Thu, 21 Feb 2019 01:53:10 +0000 (20:53 -0500)
committerNico Williams <nico@cryptonector.com>
Tue, 26 Feb 2019 17:10:38 +0000 (11:10 -0600)
Makefile.am
docs/Pipfile
docs/Pipfile.lock
docs/build_manpage.py [changed mode: 0644->0755]
docs/content/manual/manual.yml

index 9f2de43566cd331d05be94418e35ed0078dec45f..8cb4a1a6852d222af33679ad4aea21707b4c229f 100644 (file)
@@ -165,8 +165,8 @@ install-binaries: $(BUILT_SOURCES)
        $(MAKE) $(AM_MAKEFLAGS) install-exec
 
 DOC_FILES = docs/content docs/public docs/templates docs/site.yml       \
-        docs/Pipfile docs/Pipfile.lock docs/build_website.py            \
-        docs/README.md jq.1.prebuilt
+        docs/Pipfile docs/Pipfile.lock docs/build_manpage.py            \
+        docs/build_manpage.py docs/README.md jq.1.prebuilt
 
 EXTRA_DIST = $(DOC_FILES) $(man_MANS) $(TESTS) $(TEST_LOG_COMPILER)     \
         jq.1.prebuilt jq.spec src/lexer.c src/lexer.h src/parser.c      \
index e68cfbba89a43c72272eab14def9352e79d136e0..24eaf578459e436e90df2896a5d5e4255009f8fe 100644 (file)
@@ -8,4 +8,5 @@ verify_ssl = true
 [packages]
 jinja2 = "*"
 pyyaml = "*"
-markdown = "*"
\ No newline at end of file
+markdown = "*"
+lxml = "*"
\ No newline at end of file
index 33ff75b5652451dbc0fccc0d5b8d6d19e214e772..3c1d2d477e839f6cf1969fe53876985239746628 100644 (file)
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "6cffc39e0d43a6d7c78f39636c7ed1b047f7b819b158213a96d7cbfefd6524d7"
+            "sha256": "16a9ef323592a417142c30be17e58c3cf36dc2bfdbf4757106a796b22262a1ce"
         },
         "pipfile-spec": 6,
         "requires": {},
             "index": "pypi",
             "version": "==2.10"
         },
+        "lxml": {
+            "hashes": [
+                "sha256:0537eee4902e8bf4f41bfee8133f7edf96533dd175930a12086d6a40d62376b2",
+                "sha256:0562ec748abd230ab87d73384e08fa784f9b9cee89e28696087d2d22c052cc27",
+                "sha256:09e91831e749fbf0f24608694e4573be0ef51430229450c39c83176cc2e2d353",
+                "sha256:1ae4c0722fc70c0d4fba43ae33c2885f705e96dce1db41f75ae14a2d2749b428",
+                "sha256:1c630c083d782cbaf1f7f37f6cac87bda9cff643cf2803a5f180f30d97955cef",
+                "sha256:2fe74e3836bd8c0fa7467ffae05545233c7f37de1eb765cacfda15ad20c6574a",
+                "sha256:37af783c2667ead34a811037bda56a0b142ac8438f7ed29ae93f82ddb812fbd6",
+                "sha256:3f2d9eafbb0b24a33f56acd16f39fc935756524dcb3172892721c54713964c70",
+                "sha256:47d8365a8ef14097aa4c65730689be51851b4ade677285a3b2daa03b37893e26",
+                "sha256:510e904079bc56ea784677348e151e1156040dbfb736f1d8ea4b9e6d0ab2d9f4",
+                "sha256:58d0851da422bba31c7f652a7e9335313cf94a641aa6d73b8f3c67602f75b593",
+                "sha256:7940d5c2185ffb989203dacbb28e6ae88b4f1bb25d04e17f94b0edd82232bcbd",
+                "sha256:7cf39bb3a905579836f7a8f3a45320d9eb22f16ab0c1e112efb940ced4d057a5",
+                "sha256:9563a23c1456c0ab550c087833bc13fcc61013a66c6420921d5b70550ea312bf",
+                "sha256:95b392952935947e0786a90b75cc33388549dcb19af716b525dae65b186138fc",
+                "sha256:983129f3fd3cef5c3cf067adcca56e30a169656c00fcc6c648629dbb850b27fa",
+                "sha256:a0b75b1f1854771844c647c464533def3e0a899dd094a85d1d4ed72ecaaee93d",
+                "sha256:b5db89cc0ef624f3a81214b7961a99f443b8c91e88188376b6b322fd10d5b118",
+                "sha256:c0a7751ba1a4bfbe7831920d98cee3ce748007eab8dfda74593d44079568219a",
+                "sha256:c0c5a7d4aafcc30c9b6d8613a362567e32e5f5b708dc41bc3a81dac56f8af8bb",
+                "sha256:d4d63d85eacc6cb37b459b16061e1f100d154bee89dc8d8f9a6128a5a538e92e",
+                "sha256:da5e7e941d6e71c9c9a717c93725cda0708c2474f532e3680ac5e39ec57d224d",
+                "sha256:dccad2b3c583f036f43f80ac99ee212c2fa9a45151358d55f13004d095e683b2",
+                "sha256:df46307d39f2aeaafa1d25309b8a8d11738b73e9861f72d4d0a092528f498baa",
+                "sha256:e70b5e1cb48828ddd2818f99b1662cb9226dc6f57d07fc75485405c77da17436",
+                "sha256:ea825562b8cd057cbc9810d496b8b5dec37a1e2fc7b27bc7c1e72ce94462a09a"
+            ],
+            "index": "pypi",
+            "version": "==4.3.1"
+        },
         "markdown": {
             "hashes": [
                 "sha256:c00429bd503a47ec88d5e30a751e147dcb4c6889663cd3e2ba0afe858e009baa",
old mode 100644 (file)
new mode 100755 (executable)
index ab73048..a0ab4ed
@@ -1,2 +1,235 @@
 #!/usr/bin/env python3
-print("Manpage build not yet supported")
+from datetime import date
+from io import StringIO
+from lxml import etree
+import markdown
+from markdown.extensions import Extension
+import re
+import sys
+import yaml
+
+# Prevent our markdown parser from trying to help by interpreting things in angle brackets as HTML tags.
+class EscapeHtml(Extension):
+  def extendMarkdown(self, md, md_globals):
+    del md.preprocessors['html_block']
+    del md.inlinePatterns['html']
+
+class RoffWalker(object):
+  def __init__(self, tree, output=sys.stdout):
+    self.tree = tree
+    self.target = output
+    self.f = StringIO()
+
+  def walk(self):
+    self._walk(self.tree, parent_tag=None)
+    # We don't want to start lines with \. because that can confuse man
+    # For lines that start with \., we need to prefix them with \& so it
+    # knows not to treat that line as a directive
+    data = re.sub(r'^\\\.', r'\&.', self.f.getvalue(), flags=re.MULTILINE)
+    self.target.write(data)
+
+  def _ul_is_special(self, root):
+    if len(root) != 1:
+      return False
+    child = root[0]
+    if child.tag != 'li':
+      return False
+    msg = ''.join(child.itertext()).strip()
+    return msg.endswith(':')
+
+  def _walk_child(self, root):
+    if len(root) > 0:
+      self._walk(root[0], parent_tag=root.tag)
+
+  def _write_element(self, root, ensure_newline=True):
+    if root.text is not None:
+      text = self._sanitize(root.text)
+      self.__write_raw(text)
+    self._walk_child(root)
+    self._write_tail(root, ensure_newline=ensure_newline)
+
+  def _write_tail(self, root, ensure_newline=False, inline=False):
+    if root.tail is not None:
+      if inline or root.tail != '\n':
+        text = self._sanitize(root.tail)
+        if text.endswith('\n'):
+          ensure_newline = False
+        self.__write_raw(text)
+    if ensure_newline:
+      self.__write_raw('\n')
+
+  def _walk(self, root, parent_tag=None):
+    last_tag = None
+    while root is not None:
+      if root.tag == 'h1':
+        self.__write_cmd('.TH "JQ" "1" "{}" "" ""'.format(date.today().strftime('%B %Y')))
+        self.__write_cmd('.SH "NAME"')
+        # TODO: properly parse this
+        self.__write_raw(r'\fBjq\fR \- Command\-line JSON processor' + "\n")
+
+      elif root.tag == 'h2':
+        self.__write_cmd('.SH "{}"'.format(''.join(root.itertext()).strip()))
+
+      elif root.tag == 'h3':
+        text = ''.join(root.itertext()).strip()
+        self.__write_cmd('.SS "{}"'.format(self._h3_sanitize(text)))
+
+      elif root.tag == 'p':
+        if last_tag not in ['h2', 'h3'] and parent_tag not in ['li']:
+          self.__write_cmd('.P')
+        self._write_element(root, ensure_newline=(parent_tag != 'li'))
+
+      elif root.tag == 'ul':
+        if self._ul_is_special(root):
+          li = root[0]
+          self.__write_cmd('.TP')
+          self._write_element(li)
+          next = root.getnext()
+          while next is not None and next.tag == 'p':
+            if next.getnext() is not None and next.getnext().tag == 'pre':
+              # we don't want to .IP these, because it'll look funny with the code indent
+              break
+            self.__write_cmd('.IP')
+            self._write_element(next)
+            root = next
+            next = root.getnext()
+        else:
+          self._walk_child(root)
+          self._write_tail(root)
+          # A pre tag after the end of a list doesn't want two of the indentation commands
+          if root.getnext() is None or root.getnext().tag != 'pre':
+            self.__write_cmd('.IP "" 0')
+
+      elif root.tag == 'li':
+        self.__write_cmd(r'.IP "\(bu" 4')
+        if root.text is not None and root.text.strip() != '':
+          text = self._sanitize(root.text)
+          self.__write_raw(text)
+        self._walk_child(root)
+        self._write_tail(root, ensure_newline=True)
+
+      elif root.tag == 'strong':
+        if root.text is not None:
+          text = self._sanitize(root.text)
+          self.__write_raw('\\fB{}\\fR'.format(text))
+
+        self._write_tail(root, inline=True)
+
+      elif root.tag == 'em':
+        if root.text is not None:
+          text = self._sanitize(root.text)
+          self.__write_raw('\\fI{}\\fR'.format(text))
+        self._write_tail(root, inline=True)
+
+      elif root.tag == 'code':
+        if root.text is not None:
+          text = self._code_sanitize(root.text)
+          self.__write_raw('\\fB{}\\fR'.format(text))
+        self._write_tail(root, inline=True)
+
+      elif root.tag == 'pre':
+        self.__write_cmd('.IP "" 4')
+        self.__write_cmd('.nf\n') # extra newline for spacing reasons
+        next = root
+        first = True
+        while next is not None and next.tag == 'pre':
+          if not first:
+            self.__write_raw('\n')
+          text = ''.join(next.itertext(with_tail=False))
+          self.__write_raw(self._pre_sanitize(text))
+          first = False
+          root = next
+          next = next.getnext()
+        self.__write_cmd('.fi')
+        self.__write_cmd('.IP "" 0')
+
+      else:
+        self._walk_child(root)
+
+      last_tag = root.tag
+      root = root.getnext()
+
+  def _base_sanitize(self, text):
+    text = re.sub(r'\\', r'\\e', text)
+    text = re.sub(r'\.', r'\\.', text)
+    text = re.sub("'", r"\'", text)
+    text = re.sub('-', r'\-', text)
+    return text
+
+  def _pre_sanitize(self, text):
+    return self._base_sanitize(text)
+
+  def _code_sanitize(self, text):
+    text = self._base_sanitize(text)
+    text = re.sub(r'\s', ' ', text)
+    return text
+
+  def _h3_sanitize(self, text):
+    text = self._base_sanitize(text)
+    text = re.sub(' \n|\n ', ' ', text)
+    text = re.sub('\n', ' ', text)
+    return text
+
+  def _sanitize(self, text):
+    text = self._base_sanitize(text)
+    text = re.sub(r'<([^>]+)>', r'\\fI\1\\fR', text)
+    text = re.sub(r' +', ' ', text)
+    text = re.sub('\n', ' ', text)
+    return text
+
+  def __write_cmd(self, dat):
+    print('.', dat, sep='\n', file=self.f)
+    pass
+
+  def __write_raw(self, dat):
+    print(dat, sep='', end='', file=self.f)
+    pass
+
+def load_yml_file(fn):
+  with open(fn) as f:
+    return yaml.load(f)
+
+def dedent_body(body):
+  lines = [re.sub(r'^  (\S)', r'\1', l) for l in body.split('\n')]
+  return '\n'.join(lines)
+
+def convert_manual_to_markdown():
+  f = StringIO()
+  manual = load_yml_file("content/manual/manual.yml")
+  f.write(manual.get('manpage_intro', '\n'))
+  f.write(dedent_body(manual.get('body', '\n')))
+  for section in manual.get('sections', []):
+    f.write('## {}\n'.format(section.get('title', '').upper()))
+    f.write(dedent_body(section.get('body', '\n')))
+    f.write('\n')
+    for entry in section.get('entries', []):
+      f.write('### {}\n'.format(entry.get('title', '')))
+      f.write(dedent_body(entry.get('body', '\n')))
+      f.write('\n')
+      if entry.get('examples') is not None:
+        f.write("~~~~\n")
+        first = True
+        for example in entry.get('examples'):
+          if not first:
+            f.write('\n')
+          f.write("jq '{}'\n".format(example.get('program', '')))
+          f.write("   {}\n".format(example.get('input', '')))
+          output = [str(x) for x in example.get('output', [])]
+          f.write("=> {}\n".format(', '.join(output)))
+          first = False
+        f.write("~~~~\n")
+    f.write('\n')
+  f.write(manual.get('manpage_epilogue', ''))
+  return f.getvalue()
+
+# Convert manual.yml to our special markdown format
+markdown_data = convert_manual_to_markdown()
+
+# Convert markdown to html
+html_data = markdown.markdown(markdown_data, extensions=[EscapeHtml(), 'fenced_code'])
+
+# Parse the html into a tree so we can walk it
+tr = etree.HTML(html_data, etree.HTMLParser())
+
+# Convert the markdown to ROFF
+RoffWalker(tr).walk()
index a9d2fcd258b68410a90528ac543f43e9dbb27ccf..ef609a649661a9e0d1bbf66d2599ba2bd2373527 100644 (file)
@@ -172,7 +172,7 @@ sections:
         ASCII output with every non-ASCII character replaced with the
         equivalent escape sequence.
 
-      * `--unbuffered`
+      * `--unbuffered`:
 
         Flush the output after each JSON object is printed (useful if
         you're piping a slow data source into jq and piping jq's
@@ -658,10 +658,10 @@ sections:
           - **Strings** are added by being joined into a larger string.
 
           - **Objects** are added by merging, that is, inserting all
-              the key-value pairs from both objects into a single
-              combined object. If both objects contain a value for the
-              same key, the object on the right of the `+` wins. (For
-              recursive merge use the `*` operator.)
+            the key-value pairs from both objects into a single
+            combined object. If both objects contain a value for the
+            same key, the object on the right of the `+` wins. (For
+            recursive merge use the `*` operator.)
 
           `null` can be added to any value, and returns the other
           value unchanged.
@@ -1975,40 +1975,40 @@ sections:
 
           jq provides a few SQL-style operators.
 
-           * INDEX(stream; index_expression):
+          * INDEX(stream; index_expression):
 
-             This builtin produces an object whose keys are computed by
-             the given index expression applied to each value from the
-             given stream.
+            This builtin produces an object whose keys are computed by
+            the given index expression applied to each value from the
+            given stream.
 
-           * JOIN($idx; stream; idx_expr; join_expr):
+          * JOIN($idx; stream; idx_expr; join_expr):
 
-             This builtin joins the values from the given stream to the
-             given index.  The index's keys are computed by applying the
-             given index expression to each value from the given stream.
-             An array of the value in the stream and the corresponding
-             value from the index is fed to the given join expression to
-             produce each result.
+            This builtin joins the values from the given stream to the
+            given index.  The index's keys are computed by applying the
+            given index expression to each value from the given stream.
+            An array of the value in the stream and the corresponding
+            value from the index is fed to the given join expression to
+            produce each result.
 
-           * JOIN($idx; stream; idx_expr):
+          * JOIN($idx; stream; idx_expr):
 
-             Same as `JOIN($idx; stream; idx_expr; .)`.
+            Same as `JOIN($idx; stream; idx_expr; .)`.
 
-           * JOIN($idx; idx_expr):
+          * JOIN($idx; idx_expr):
 
-             This builtin joins the input `.` to the given index, applying
-             the given index expression to `.` to compute the index key.
-             The join operation is as described above.
+            This builtin joins the input `.` to the given index, applying
+            the given index expression to `.` to compute the index key.
+            The join operation is as described above.
 
-           * IN(s):
+          * IN(s):
 
-             This builtin outputs `true` if `.` appears in the given
-             stream, otherwise it outputs `false`.
+            This builtin outputs `true` if `.` appears in the given
+            stream, otherwise it outputs `false`.
 
-           * IN(source; s):
+          * IN(source; s):
 
-             This builtin outputs `true` if any value in the source stream
-             appears in the second stream, otherwise it outputs `false`.
+            This builtin outputs `true` if any value in the source stream
+            appears in the second stream, otherwise it outputs `false`.
 
       - title: "`builtins`"
         body: |
@@ -2643,9 +2643,9 @@ sections:
 
           For example, in the following expression there is a binding
           which is visible "to the right" of it, `... | .*3 as
-          $times_three | [.  + $times_three] | ...`, but not "to the
+          $times_three | [. + $times_three] | ...`, but not "to the
           left".  Consider this expression now, `... | (.*3 as
-          $times_three | [.+ $times_three]) | ...`: here the binding
+          $times_three | [. + $times_three]) | ...`: here the binding
           `$times_three` is _not_ visible past the closing parenthesis.
 
       - title: Reduce
@@ -2868,7 +2868,7 @@ sections:
 
       Two builtins provide minimal output capabilities, `debug`, and
       `stderr`.  (Recall that a jq program's output values are always
-      output as JSON texts on `stdout`.)  The `debug` builtin can have
+      output as JSON texts on `stdout`.) The `debug` builtin can have
       application-specific behavior, such as for executables that use
       the libjq C API but aren't the jq executable itself.  The `stderr`
       builtin outputs its input in raw mode to stder with no additional