Lots of adjustments to deal with the document content now being stored

author Fred Drake <fdrake@acm.org>

Mon, 10 May 1999 19:36:52 +0000 (19:36 +0000)

committer Fred Drake <fdrake@acm.org>

Mon, 10 May 1999 19:36:52 +0000 (19:36 +0000)
author Fred Drake <fdrake@acm.org>
Mon, 10 May 1999 19:36:52 +0000 (19:36 +0000)
committer Fred Drake <fdrake@acm.org>
Mon, 10 May 1999 19:36:52 +0000 (19:36 +0000)
diff --git a/Doc/tools/sgmlconv/docfixer.py b/Doc/tools/sgmlconv/docfixer.py

index 11c487d8d4a34f882abf9fd60ad1e89f6d26fc2e..f700134a1bd3a46ebd01c2777a13b92519260f4f 100755 (executable)
--- a/Doc/tools/sgmlconv/docfixer.py
+++ b/Doc/tools/sgmlconv/docfixer.py
@@ -12,7 +12,10 @@ import re
  import string
  import sys
  import xml.dom.core
-import xml.dom.esis_builder
+
+from xml.dom.core import \
+     ELEMENT, \
+     TEXT
  
  
  class ConversionError(Exception):
@@ -32,11 +35,11 @@ else:
  # Workaround to deal with invalid documents (multiple root elements).  This
  # does not indicate a bug in the DOM implementation.
  #
-def get_documentElement(self):
+def get_documentElement(doc):
      docelem = None
-    for n in self._node.children:
-        if n.type == xml.dom.core.ELEMENT:
-            docelem = xml.dom.core.Element(n, self, self)
+    for n in doc.childNodes:
+        if n.nodeType == ELEMENT:
+            docelem = n
      return docelem
  
  xml.dom.core.Document.get_documentElement = get_documentElement
@@ -46,15 +49,15 @@ xml.dom.core.Document.get_documentElement = get_documentElement
  # accessed from the Document object via .childNodes (no matter how many
  # levels of access are used) will be given an ownerDocument of None.
  #
-def get_childNodes(self):
-    return xml.dom.core.NodeList(self._node.children, self, self)
+def get_childNodes(doc):
+    return xml.dom.core.NodeList(doc._node.children, doc._node)
  
  xml.dom.core.Document.get_childNodes = get_childNodes
  
  
  def get_first_element(doc, gi):
      for n in doc.childNodes:
-        if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
+        if n.nodeType == ELEMENT and n.tagName == gi:
              return n
  
  def extract_first_element(doc, gi):
@@ -66,10 +69,10 @@ def extract_first_element(doc, gi):
  
  def find_all_elements(doc, gi):
      nodes = []
-    if doc.nodeType == xml.dom.core.ELEMENT and doc.tagName == gi:
+    if doc.nodeType == ELEMENT and doc.tagName == gi:
          nodes.append(doc)
      for child in doc.childNodes:
-        if child.nodeType == xml.dom.core.ELEMENT:
+        if child.nodeType == ELEMENT:
              if child.tagName == gi:
                  nodes.append(child)
              for node in child.getElementsByTagName(gi):
@@ -77,36 +80,36 @@ def find_all_elements(doc, gi):
      return nodes        
  
  
-def simplify(doc):
+def simplify(doc, fragment):
      # Try to rationalize the document a bit, since these things are simply
      # not valid SGML/XML documents as they stand, and need a little work.
      documentclass = "document"
      inputs = []
-    node = extract_first_element(doc, "documentclass")
+    node = extract_first_element(fragment, "documentclass")
      if node is not None:
          documentclass = node.getAttribute("classname")
-    node = extract_first_element(doc, "title")
+    node = extract_first_element(fragment, "title")
      if node is not None:
          inputs.append(node)
      # update the name of the root element
-    node = get_first_element(doc, "document")
+    node = get_first_element(fragment, "document")
      if node is not None:
          node._node.name = documentclass
      while 1:
-        node = extract_first_element(doc, "input")
+        node = extract_first_element(fragment, "input")
          if node is None:
              break
          inputs.append(node)
      if inputs:
-        docelem = doc.documentElement
+        docelem = get_documentElement(fragment)
          inputs.reverse()
          for node in inputs:
              text = doc.createTextNode("\n")
              docelem.insertBefore(text, docelem.firstChild)
              docelem.insertBefore(node, text)
          docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
-    while doc.firstChild.nodeType == xml.dom.core.TEXT:
-        doc.removeChild(doc.firstChild)
+    while fragment.firstChild.nodeType == TEXT:
+        fragment.removeChild(fragment.firstChild)
  
  
  def cleanup_root_text(doc):
@@ -115,9 +118,9 @@ def cleanup_root_text(doc):
      for n in doc.childNodes:
          prevskip = skip
          skip = 0
-        if n.nodeType == xml.dom.core.TEXT and not prevskip:
+        if n.nodeType == TEXT and not prevskip:
              discards.append(n)
-        elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT":
+        elif n.nodeType == ELEMENT and n.tagName == "COMMENT":
              skip = 1
      for node in discards:
          doc.removeChild(node)
@@ -130,8 +133,8 @@ DESCRIPTOR_ELEMENTS = (
      "datadesc", "datadescni",
      )
  
-def fixup_descriptors(doc):
-    sections = find_all_elements(doc, "section")
+def fixup_descriptors(doc, fragment):
+    sections = find_all_elements(fragment, "section")
      for section in sections:
          find_and_fix_descriptors(doc, section)
  
@@ -139,7 +142,7 @@ def fixup_descriptors(doc):
  def find_and_fix_descriptors(doc, container):
      children = container.childNodes
      for child in children:
-        if child.nodeType == xml.dom.core.ELEMENT:
+        if child.nodeType == ELEMENT:
              tagName = child.tagName
              if tagName in DESCRIPTOR_ELEMENTS:
                  rewrite_descriptor(doc, child)
@@ -191,7 +194,7 @@ def rewrite_descriptor(doc, descriptor):
      pos = skip_leading_nodes(children, 0)
      if pos < len(children):
          child = children[pos]
-        if child.nodeType == xml.dom.core.ELEMENT and child.tagName == "args":
+        if child.nodeType == ELEMENT and child.tagName == "args":
              # create an <args> in <signature>:
              args = doc.createElement("args")
              argchildren = []
@@ -205,7 +208,7 @@ def rewrite_descriptor(doc, descriptor):
      # 3, 4.
      pos = skip_leading_nodes(children, pos + 1)
      while pos < len(children) \
-          and children[pos].nodeType == xml.dom.core.ELEMENT \
+          and children[pos].nodeType == ELEMENT \
            and children[pos].tagName in (linename, "versionadded"):
          if children[pos].tagName == linename:
              # this is really a supplemental signature, create <signature>
@@ -222,7 +225,7 @@ def rewrite_descriptor(doc, descriptor):
      newchildren.append(description)
      move_children(descriptor, description, pos)
      last = description.childNodes[-1]
-    if last.nodeType == xml.dom.core.TEXT:
+    if last.nodeType == TEXT:
          last.data = string.rstrip(last.data) + "\n  "
      # 6.
      # should have nothing but whitespace and signature lines in <descriptor>;
@@ -259,16 +262,16 @@ def move_children(origin, dest, start=0):
          dest.appendChild(node)
  
  
-def handle_appendix(doc):
+def handle_appendix(doc, fragment):
      # must be called after simplfy() if document is multi-rooted to begin with
-    docelem = doc.documentElement
+    docelem = get_documentElement(fragment)
      toplevel = docelem.tagName == "manual" and "chapter" or "section"
      appendices = 0
      nodes = []
      for node in docelem.childNodes:
          if appendices:
              nodes.append(node)
-        elif node.nodeType == xml.dom.core.ELEMENT:
+        elif node.nodeType == ELEMENT:
              appnodes = node.getElementsByTagName("appendix")
              if appnodes:
                  appendices = 1
@@ -281,7 +284,7 @@ def handle_appendix(doc):
          back = doc.createElement("back-matter")
          docelem.appendChild(back)
          back.appendChild(doc.createTextNode("\n"))
-        while nodes and nodes[0].nodeType == xml.dom.core.TEXT \
+        while nodes and nodes[0].nodeType == TEXT \
                and not string.strip(nodes[0].data):
              del nodes[0]
          map(back.appendChild, nodes)
@@ -307,28 +310,28 @@ def fixup_trailing_whitespace(doc, wsmap):
      while queue:
          node = queue[0]
          del queue[0]
-        if node.nodeType == xml.dom.core.ELEMENT \
+        if node.nodeType == ELEMENT \
             and wsmap.has_key(node.tagName):
              ws = wsmap[node.tagName]
              children = node.childNodes
              children.reverse()
-            if children[0].nodeType == xml.dom.core.TEXT:
+            if children[0].nodeType == TEXT:
                  data = string.rstrip(children[0].data) + ws
                  children[0].data = data
              children.reverse()
              # hack to get the title in place:
              if node.tagName == "title" \
-               and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
+               and node.parentNode.firstChild.nodeType == ELEMENT:
                  node.parentNode.insertBefore(doc.createText("\n  "),
                                               node.parentNode.firstChild)
          for child in node.childNodes:
-            if child.nodeType == xml.dom.core.ELEMENT:
+            if child.nodeType == ELEMENT:
                  queue.append(child)
  
  
  def normalize(doc):
      for node in doc.childNodes:
-        if node.nodeType == xml.dom.core.ELEMENT:
+        if node.nodeType == ELEMENT:
              node.normalize()
  
  
@@ -339,7 +342,7 @@ def cleanup_trailing_parens(doc, element_names):
      rewrite_element = d.has_key
      queue = []
      for node in doc.childNodes:
-        if node.nodeType == xml.dom.core.ELEMENT:
+        if node.nodeType == ELEMENT:
              queue.append(node)
      while queue:
          node = queue[0]
@@ -347,13 +350,13 @@ def cleanup_trailing_parens(doc, element_names):
          if rewrite_element(node.tagName):
              children = node.childNodes
              if len(children) == 1 \
-               and children[0].nodeType == xml.dom.core.TEXT:
+               and children[0].nodeType == TEXT:
                  data = children[0].data
                  if data[-2:] == "()":
                      children[0].data = data[:-2]
          else:
              for child in node.childNodes:
-                if child.nodeType == xml.dom.core.ELEMENT:
+                if child.nodeType == ELEMENT:
                      queue.append(child)
  
  
@@ -366,13 +369,13 @@ def contents_match(left, right):
          nodeType = l.nodeType
          if nodeType != r.nodeType:
              return 0
-        if nodeType == xml.dom.core.ELEMENT:
+        if nodeType == ELEMENT:
              if l.tagName != r.tagName:
                  return 0
              # should check attributes, but that's not a problem here
              if not contents_match(l, r):
                  return 0
-        elif nodeType == xml.dom.core.TEXT:
+        elif nodeType == TEXT:
              if l.data != r.data:
                  return 0
          else:
@@ -388,7 +391,7 @@ def create_module_info(doc, section):
          return
      node._node.name = "synopsis"
      lastchild = node.childNodes[-1]
-    if lastchild.nodeType == xml.dom.core.TEXT \
+    if lastchild.nodeType == TEXT \
         and lastchild.data[-1:] == ".":
          lastchild.data = lastchild.data[:-1]
      modauthor = extract_first_element(section, "moduleauthor")
@@ -423,7 +426,7 @@ def create_module_info(doc, section):
          if title:
              children = title.childNodes
              if len(children) >= 2 \
-               and children[0].nodeType == xml.dom.core.ELEMENT \
+               and children[0].nodeType == ELEMENT \
                 and children[0].tagName == "module" \
                 and children[0].childNodes[0].data == name:
                  # this is it; morph the <title> into <short-synopsis>
@@ -431,7 +434,7 @@ def create_module_info(doc, section):
                  if first_data.data[:4] == " ---":
                      first_data.data = string.lstrip(first_data.data[4:])
                  title._node.name = "short-synopsis"
-                if children[-1].nodeType == xml.dom.core.TEXT \
+                if children[-1].nodeType == TEXT \
                     and children[-1].data[-1:] == ".":
                      children[-1].data = children[-1].data[:-1]
                  section.removeChild(title)
@@ -470,10 +473,10 @@ def create_module_info(doc, section):
          children = section.childNodes
          for i in range(len(children)):
              node = children[i]
-            if node.nodeType == xml.dom.core.ELEMENT \
+            if node.nodeType == ELEMENT \
                 and node.tagName == "moduleinfo":
                  nextnode = children[i+1]
-                if nextnode.nodeType == xml.dom.core.TEXT:
+                if nextnode.nodeType == TEXT:
                      data = nextnode.data
                      if len(string.lstrip(data)) < (len(data) - 4):
                          nextnode.data = "\n\n\n" + string.lstrip(data)
@@ -487,7 +490,7 @@ def cleanup_synopses(doc):
  def remap_element_names(root, name_map):
      queue = []
      for child in root.childNodes:
-        if child.nodeType == xml.dom.core.ELEMENT:
+        if child.nodeType == ELEMENT:
              queue.append(child)
      while queue:
          node = queue.pop()
@@ -498,13 +501,13 @@ def remap_element_names(root, name_map):
              for attr, value in attrs.items():
                  node.setAttribute(attr, value)
          for child in node.childNodes:
-            if child.nodeType == xml.dom.core.ELEMENT:
+            if child.nodeType == ELEMENT:
                  queue.append(child)
  
  
-def fixup_table_structures(doc):
+def fixup_table_structures(doc, fragment):
      # must be done after remap_element_names(), or the tables won't be found
-    for table in find_all_elements(doc, "table"):
+    for table in find_all_elements(fragment, "table"):
          fixup_table(doc, table)
  
  
@@ -522,7 +525,7 @@ def fixup_table(doc, table):
      last_was_hline = 0
      children = table.childNodes
      for child in children:
-        if child.nodeType == xml.dom.core.ELEMENT:
+        if child.nodeType == ELEMENT:
              tagName = child.tagName
              if tagName == "hline" and prev_row is not None:
                  prev_row.setAttribute("rowsep", "1")
@@ -535,12 +538,12 @@ def fixup_table(doc, table):
      while children:
          child = children[0]
          nodeType = child.nodeType
-        if nodeType == xml.dom.core.TEXT:
+        if nodeType == TEXT:
              if string.strip(child.data):
                  raise ConversionError("unexpected free data in table")
              table.removeChild(child)
              continue
-        if nodeType == xml.dom.core.ELEMENT:
+        if nodeType == ELEMENT:
              if child.tagName != "hline":
                  raise ConversionError(
                      "unexpected <%s> in table" % child.tagName)
@@ -572,7 +575,7 @@ def fixup_row(doc, row):
  def move_elements_by_name(doc, source, dest, name, sep=None):
      nodes = []
      for child in source.childNodes:
-        if child.nodeType == xml.dom.core.ELEMENT and child.tagName == name:
+        if child.nodeType == ELEMENT and child.tagName == name:
              nodes.append(child)
      for node in nodes:
          source.removeChild(node)
@@ -606,13 +609,13 @@ PARA_LEVEL_PRECEEDERS = (
      )
  
  
-def fixup_paras(doc):
-    for child in doc.childNodes:
-        if child.nodeType == xml.dom.core.ELEMENT \
+def fixup_paras(doc, fragment):
+    for child in fragment.childNodes:
+        if child.nodeType == ELEMENT \
             and child.tagName in RECURSE_INTO_PARA_CONTAINERS:
              #
              fixup_paras_helper(doc, child)
-    descriptions = find_all_elements(doc, "description")
+    descriptions = find_all_elements(fragment, "description")
      for description in descriptions:
          fixup_paras_helper(doc, description)
  
@@ -628,7 +631,7 @@ def fixup_paras_helper(doc, container, depth=0):
          #
          # Either paragraph material or something to recurse into:
          #
-        if (children[start].nodeType == xml.dom.core.ELEMENT) \
+        if (children[start].nodeType == ELEMENT) \
             and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS):
              fixup_paras_helper(doc, children[start])
              start = skip_leading_nodes(children, start + 1)
@@ -653,11 +656,11 @@ def build_para(doc, parent, start, i):
          after = j + 1
          child = children[j]
          nodeType = child.nodeType
-        if nodeType == xml.dom.core.ELEMENT:
+        if nodeType == ELEMENT:
              if child.tagName in BREAK_ELEMENTS:
                  after = j
                  break
-        elif nodeType == xml.dom.core.TEXT:
+        elif nodeType == TEXT:
              pos = string.find(child.data, "\n\n")
              if pos == 0:
                  after = j
@@ -670,7 +673,7 @@ def build_para(doc, parent, start, i):
      if (start + 1) > after:
          raise ConversionError(
              "build_para() could not identify content to turn into a paragraph")
-    if children[after - 1].nodeType == xml.dom.core.TEXT:
+    if children[after - 1].nodeType == TEXT:
          # we may need to split off trailing white space:
          child = children[after - 1]
          data = child.data
@@ -707,7 +710,7 @@ def skip_leading_nodes(children, start):
          # skip over leading comments and whitespace:
          child = children[start]
          nodeType = child.nodeType
-        if nodeType == xml.dom.core.TEXT:
+        if nodeType == TEXT:
              data = child.data
              shortened = string.lstrip(data)
              if shortened:
@@ -717,7 +720,7 @@ def skip_leading_nodes(children, start):
                      return start + 1
                  return start
              # all whitespace, just skip
-        elif nodeType == xml.dom.core.ELEMENT:
+        elif nodeType == ELEMENT:
              tagName = child.tagName
              if tagName in RECURSE_INTO_PARA_CONTAINERS:
                  return start
@@ -727,15 +730,15 @@ def skip_leading_nodes(children, start):
      return start
  
  
-def fixup_rfc_references(doc):
-    for rfcnode in find_all_elements(doc, "rfc"):
+def fixup_rfc_references(doc, fragment):
+    for rfcnode in find_all_elements(fragment, "rfc"):
          rfcnode.appendChild(doc.createTextNode(
              "RFC " + rfcnode.getAttribute("num")))
  
  
-def fixup_signatures(doc):
-    for child in doc.childNodes:
-        if child.nodeType == xml.dom.core.ELEMENT:
+def fixup_signatures(doc, fragment):
+    for child in fragment.childNodes:
+        if child.nodeType == ELEMENT:
              args = child.getElementsByTagName("args")
              for arg in args:
                  fixup_args(doc, arg)
@@ -748,7 +751,7 @@ def fixup_signatures(doc):
  
  def fixup_args(doc, arglist):
      for child in arglist.childNodes:
-        if child.nodeType == xml.dom.core.ELEMENT \
+        if child.nodeType == ELEMENT \
             and child.tagName == "optional":
              # found it; fix and return
              arglist.insertBefore(doc.createTextNode("["), child)
@@ -762,8 +765,8 @@ def fixup_args(doc, arglist):
              return fixup_args(doc, arglist)
  
  
-def fixup_sectionauthors(doc):
-    for sectauth in find_all_elements(doc, "sectionauthor"):
+def fixup_sectionauthors(doc, fragment):
+    for sectauth in find_all_elements(fragment, "sectionauthor"):
          section = sectauth.parentNode
          section.removeChild(sectauth)
          sectauth._node.name = "author"
@@ -772,7 +775,7 @@ def fixup_sectionauthors(doc):
          sectauth.removeAttribute("name")
          after = section.childNodes[2]
          title = section.childNodes[1]
-        if title.nodeType == xml.dom.core.ELEMENT and title.tagName != "title":
+        if title.nodeType == ELEMENT and title.tagName != "title":
              after = section.childNodes[0]
          section.insertBefore(doc.createTextNode("\n  "), after)
          section.insertBefore(sectauth, after)
@@ -781,10 +784,9 @@ def fixup_sectionauthors(doc):
  def fixup_verbatims(doc):
      for verbatim in find_all_elements(doc, "verbatim"):
          child = verbatim.childNodes[0]
-        if child.nodeType == xml.dom.core.TEXT \
+        if child.nodeType == TEXT \
             and string.lstrip(child.data)[:3] == ">>>":
-            verbatim._node.name = "interpreter-session"
-            #verbatim.setAttribute("interactive", "interactive")
+            verbatim._node.name = "interactive-session"
  
  
  _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
@@ -792,7 +794,7 @@ _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
  def write_esis(doc, ofp, knownempty):
      for node in doc.childNodes:
          nodeType = node.nodeType
-        if nodeType == xml.dom.core.ELEMENT:
+        if nodeType == ELEMENT:
              gi = node.tagName
              if knownempty(gi):
                  if node.hasChildNodes():
@@ -808,7 +810,7 @@ def write_esis(doc, ofp, knownempty):
              ofp.write("(%s\n" % gi)
              write_esis(node, ofp, knownempty)
              ofp.write(")%s\n" % gi)
-        elif nodeType == xml.dom.core.TEXT:
+        elif nodeType == TEXT:
              ofp.write("-%s\n" % esistools.encode(node.data))
          else:
              raise RuntimeError, "unsupported node type: %s" % nodeType
@@ -818,10 +820,11 @@ def convert(ifp, ofp):
      p = esistools.ExtendedEsisBuilder()
      p.feed(ifp.read())
      doc = p.document
-    normalize(doc)
-    simplify(doc)
-    handle_labels(doc)
-    handle_appendix(doc)
+    fragment = p.fragment
+    normalize(fragment)
+    simplify(doc, fragment)
+    handle_labels(fragment)
+    handle_appendix(doc, fragment)
      fixup_trailing_whitespace(doc, {
          "abstract": "\n",
          "title": "",
@@ -835,12 +838,12 @@ def convert(ifp, ofp):
      cleanup_root_text(doc)
      cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
      cleanup_synopses(doc)
-    fixup_descriptors(doc)
-    fixup_verbatims(doc)
-    normalize(doc)
-    fixup_paras(doc)
-    fixup_sectionauthors(doc)
-    remap_element_names(doc, {
+    fixup_descriptors(doc, fragment)
+    fixup_verbatims(fragment)
+    normalize(fragment)
+    fixup_paras(doc, fragment)
+    fixup_sectionauthors(doc, fragment)
+    remap_element_names(fragment, {
          "tableii": ("table", {"cols": "2"}),
          "tableiii": ("table", {"cols": "3"}),
          "tableiv": ("table", {"cols": "4"}),
@@ -849,9 +852,9 @@ def convert(ifp, ofp):
          "lineiv": ("row", {}),
          "refmodule": ("module", {"link": "link"}),
          })
-    fixup_table_structures(doc)
-    fixup_rfc_references(doc)
-    fixup_signatures(doc)
+    fixup_table_structures(doc, fragment)
+    fixup_rfc_references(doc, fragment)
+    fixup_signatures(doc, fragment)
      #
      d = {}
      for gi in p.get_empties():
@@ -861,7 +864,7 @@ def convert(ifp, ofp):
      knownempty = d.has_key
      #
      try:
-        write_esis(doc, ofp, knownempty)
+        write_esis(fragment, ofp, knownempty)
      except IOError, (err, msg):
          # Ignore EPIPE; it just means that whoever we're writing to stopped
          # reading.  The rest of the output would be ignored.  All other errors
author	Fred Drake <fdrake@acm.org>
	Mon, 10 May 1999 19:36:52 +0000 (19:36 +0000)
committer	Fred Drake <fdrake@acm.org>
	Mon, 10 May 1999 19:36:52 +0000 (19:36 +0000)