import string
import sys
import xml.dom.core
-import xml.dom.esis_builder
+
+from xml.dom.core import \
+ ELEMENT, \
+ TEXT
class ConversionError(Exception):
# Workaround to deal with invalid documents (multiple root elements). This
# does not indicate a bug in the DOM implementation.
#
-def get_documentElement(self):
+def get_documentElement(doc):
docelem = None
- for n in self._node.children:
- if n.type == xml.dom.core.ELEMENT:
- docelem = xml.dom.core.Element(n, self, self)
+ for n in doc.childNodes:
+ if n.nodeType == ELEMENT:
+ docelem = n
return docelem
xml.dom.core.Document.get_documentElement = get_documentElement
# accessed from the Document object via .childNodes (no matter how many
# levels of access are used) will be given an ownerDocument of None.
#
-def get_childNodes(self):
- return xml.dom.core.NodeList(self._node.children, self, self)
+def get_childNodes(doc):
+ return xml.dom.core.NodeList(doc._node.children, doc._node)
xml.dom.core.Document.get_childNodes = get_childNodes
def get_first_element(doc, gi):
for n in doc.childNodes:
- if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
+ if n.nodeType == ELEMENT and n.tagName == gi:
return n
def extract_first_element(doc, gi):
def find_all_elements(doc, gi):
nodes = []
- if doc.nodeType == xml.dom.core.ELEMENT and doc.tagName == gi:
+ if doc.nodeType == ELEMENT and doc.tagName == gi:
nodes.append(doc)
for child in doc.childNodes:
- if child.nodeType == xml.dom.core.ELEMENT:
+ if child.nodeType == ELEMENT:
if child.tagName == gi:
nodes.append(child)
for node in child.getElementsByTagName(gi):
return nodes
-def simplify(doc):
+def simplify(doc, fragment):
# Try to rationalize the document a bit, since these things are simply
# not valid SGML/XML documents as they stand, and need a little work.
documentclass = "document"
inputs = []
- node = extract_first_element(doc, "documentclass")
+ node = extract_first_element(fragment, "documentclass")
if node is not None:
documentclass = node.getAttribute("classname")
- node = extract_first_element(doc, "title")
+ node = extract_first_element(fragment, "title")
if node is not None:
inputs.append(node)
# update the name of the root element
- node = get_first_element(doc, "document")
+ node = get_first_element(fragment, "document")
if node is not None:
node._node.name = documentclass
while 1:
- node = extract_first_element(doc, "input")
+ node = extract_first_element(fragment, "input")
if node is None:
break
inputs.append(node)
if inputs:
- docelem = doc.documentElement
+ docelem = get_documentElement(fragment)
inputs.reverse()
for node in inputs:
text = doc.createTextNode("\n")
docelem.insertBefore(text, docelem.firstChild)
docelem.insertBefore(node, text)
docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
- while doc.firstChild.nodeType == xml.dom.core.TEXT:
- doc.removeChild(doc.firstChild)
+ while fragment.firstChild.nodeType == TEXT:
+ fragment.removeChild(fragment.firstChild)
def cleanup_root_text(doc):
for n in doc.childNodes:
prevskip = skip
skip = 0
- if n.nodeType == xml.dom.core.TEXT and not prevskip:
+ if n.nodeType == TEXT and not prevskip:
discards.append(n)
- elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT":
+ elif n.nodeType == ELEMENT and n.tagName == "COMMENT":
skip = 1
for node in discards:
doc.removeChild(node)
"datadesc", "datadescni",
)
-def fixup_descriptors(doc):
- sections = find_all_elements(doc, "section")
+def fixup_descriptors(doc, fragment):
+ sections = find_all_elements(fragment, "section")
for section in sections:
find_and_fix_descriptors(doc, section)
def find_and_fix_descriptors(doc, container):
children = container.childNodes
for child in children:
- if child.nodeType == xml.dom.core.ELEMENT:
+ if child.nodeType == ELEMENT:
tagName = child.tagName
if tagName in DESCRIPTOR_ELEMENTS:
rewrite_descriptor(doc, child)
pos = skip_leading_nodes(children, 0)
if pos < len(children):
child = children[pos]
- if child.nodeType == xml.dom.core.ELEMENT and child.tagName == "args":
+ if child.nodeType == ELEMENT and child.tagName == "args":
# create an <args> in <signature>:
args = doc.createElement("args")
argchildren = []
# 3, 4.
pos = skip_leading_nodes(children, pos + 1)
while pos < len(children) \
- and children[pos].nodeType == xml.dom.core.ELEMENT \
+ and children[pos].nodeType == ELEMENT \
and children[pos].tagName in (linename, "versionadded"):
if children[pos].tagName == linename:
# this is really a supplemental signature, create <signature>
newchildren.append(description)
move_children(descriptor, description, pos)
last = description.childNodes[-1]
- if last.nodeType == xml.dom.core.TEXT:
+ if last.nodeType == TEXT:
last.data = string.rstrip(last.data) + "\n "
# 6.
# should have nothing but whitespace and signature lines in <descriptor>;
dest.appendChild(node)
-def handle_appendix(doc):
+def handle_appendix(doc, fragment):
# must be called after simplfy() if document is multi-rooted to begin with
- docelem = doc.documentElement
+ docelem = get_documentElement(fragment)
toplevel = docelem.tagName == "manual" and "chapter" or "section"
appendices = 0
nodes = []
for node in docelem.childNodes:
if appendices:
nodes.append(node)
- elif node.nodeType == xml.dom.core.ELEMENT:
+ elif node.nodeType == ELEMENT:
appnodes = node.getElementsByTagName("appendix")
if appnodes:
appendices = 1
back = doc.createElement("back-matter")
docelem.appendChild(back)
back.appendChild(doc.createTextNode("\n"))
- while nodes and nodes[0].nodeType == xml.dom.core.TEXT \
+ while nodes and nodes[0].nodeType == TEXT \
and not string.strip(nodes[0].data):
del nodes[0]
map(back.appendChild, nodes)
while queue:
node = queue[0]
del queue[0]
- if node.nodeType == xml.dom.core.ELEMENT \
+ if node.nodeType == ELEMENT \
and wsmap.has_key(node.tagName):
ws = wsmap[node.tagName]
children = node.childNodes
children.reverse()
- if children[0].nodeType == xml.dom.core.TEXT:
+ if children[0].nodeType == TEXT:
data = string.rstrip(children[0].data) + ws
children[0].data = data
children.reverse()
# hack to get the title in place:
if node.tagName == "title" \
- and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
+ and node.parentNode.firstChild.nodeType == ELEMENT:
node.parentNode.insertBefore(doc.createText("\n "),
node.parentNode.firstChild)
for child in node.childNodes:
- if child.nodeType == xml.dom.core.ELEMENT:
+ if child.nodeType == ELEMENT:
queue.append(child)
def normalize(doc):
for node in doc.childNodes:
- if node.nodeType == xml.dom.core.ELEMENT:
+ if node.nodeType == ELEMENT:
node.normalize()
rewrite_element = d.has_key
queue = []
for node in doc.childNodes:
- if node.nodeType == xml.dom.core.ELEMENT:
+ if node.nodeType == ELEMENT:
queue.append(node)
while queue:
node = queue[0]
if rewrite_element(node.tagName):
children = node.childNodes
if len(children) == 1 \
- and children[0].nodeType == xml.dom.core.TEXT:
+ and children[0].nodeType == TEXT:
data = children[0].data
if data[-2:] == "()":
children[0].data = data[:-2]
else:
for child in node.childNodes:
- if child.nodeType == xml.dom.core.ELEMENT:
+ if child.nodeType == ELEMENT:
queue.append(child)
nodeType = l.nodeType
if nodeType != r.nodeType:
return 0
- if nodeType == xml.dom.core.ELEMENT:
+ if nodeType == ELEMENT:
if l.tagName != r.tagName:
return 0
# should check attributes, but that's not a problem here
if not contents_match(l, r):
return 0
- elif nodeType == xml.dom.core.TEXT:
+ elif nodeType == TEXT:
if l.data != r.data:
return 0
else:
return
node._node.name = "synopsis"
lastchild = node.childNodes[-1]
- if lastchild.nodeType == xml.dom.core.TEXT \
+ if lastchild.nodeType == TEXT \
and lastchild.data[-1:] == ".":
lastchild.data = lastchild.data[:-1]
modauthor = extract_first_element(section, "moduleauthor")
if title:
children = title.childNodes
if len(children) >= 2 \
- and children[0].nodeType == xml.dom.core.ELEMENT \
+ and children[0].nodeType == ELEMENT \
and children[0].tagName == "module" \
and children[0].childNodes[0].data == name:
# this is it; morph the <title> into <short-synopsis>
if first_data.data[:4] == " ---":
first_data.data = string.lstrip(first_data.data[4:])
title._node.name = "short-synopsis"
- if children[-1].nodeType == xml.dom.core.TEXT \
+ if children[-1].nodeType == TEXT \
and children[-1].data[-1:] == ".":
children[-1].data = children[-1].data[:-1]
section.removeChild(title)
children = section.childNodes
for i in range(len(children)):
node = children[i]
- if node.nodeType == xml.dom.core.ELEMENT \
+ if node.nodeType == ELEMENT \
and node.tagName == "moduleinfo":
nextnode = children[i+1]
- if nextnode.nodeType == xml.dom.core.TEXT:
+ if nextnode.nodeType == TEXT:
data = nextnode.data
if len(string.lstrip(data)) < (len(data) - 4):
nextnode.data = "\n\n\n" + string.lstrip(data)
def remap_element_names(root, name_map):
queue = []
for child in root.childNodes:
- if child.nodeType == xml.dom.core.ELEMENT:
+ if child.nodeType == ELEMENT:
queue.append(child)
while queue:
node = queue.pop()
for attr, value in attrs.items():
node.setAttribute(attr, value)
for child in node.childNodes:
- if child.nodeType == xml.dom.core.ELEMENT:
+ if child.nodeType == ELEMENT:
queue.append(child)
-def fixup_table_structures(doc):
+def fixup_table_structures(doc, fragment):
# must be done after remap_element_names(), or the tables won't be found
- for table in find_all_elements(doc, "table"):
+ for table in find_all_elements(fragment, "table"):
fixup_table(doc, table)
last_was_hline = 0
children = table.childNodes
for child in children:
- if child.nodeType == xml.dom.core.ELEMENT:
+ if child.nodeType == ELEMENT:
tagName = child.tagName
if tagName == "hline" and prev_row is not None:
prev_row.setAttribute("rowsep", "1")
while children:
child = children[0]
nodeType = child.nodeType
- if nodeType == xml.dom.core.TEXT:
+ if nodeType == TEXT:
if string.strip(child.data):
raise ConversionError("unexpected free data in table")
table.removeChild(child)
continue
- if nodeType == xml.dom.core.ELEMENT:
+ if nodeType == ELEMENT:
if child.tagName != "hline":
raise ConversionError(
"unexpected <%s> in table" % child.tagName)
def move_elements_by_name(doc, source, dest, name, sep=None):
nodes = []
for child in source.childNodes:
- if child.nodeType == xml.dom.core.ELEMENT and child.tagName == name:
+ if child.nodeType == ELEMENT and child.tagName == name:
nodes.append(child)
for node in nodes:
source.removeChild(node)
)
-def fixup_paras(doc):
- for child in doc.childNodes:
- if child.nodeType == xml.dom.core.ELEMENT \
+def fixup_paras(doc, fragment):
+ for child in fragment.childNodes:
+ if child.nodeType == ELEMENT \
and child.tagName in RECURSE_INTO_PARA_CONTAINERS:
#
fixup_paras_helper(doc, child)
- descriptions = find_all_elements(doc, "description")
+ descriptions = find_all_elements(fragment, "description")
for description in descriptions:
fixup_paras_helper(doc, description)
#
# Either paragraph material or something to recurse into:
#
- if (children[start].nodeType == xml.dom.core.ELEMENT) \
+ if (children[start].nodeType == ELEMENT) \
and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS):
fixup_paras_helper(doc, children[start])
start = skip_leading_nodes(children, start + 1)
after = j + 1
child = children[j]
nodeType = child.nodeType
- if nodeType == xml.dom.core.ELEMENT:
+ if nodeType == ELEMENT:
if child.tagName in BREAK_ELEMENTS:
after = j
break
- elif nodeType == xml.dom.core.TEXT:
+ elif nodeType == TEXT:
pos = string.find(child.data, "\n\n")
if pos == 0:
after = j
if (start + 1) > after:
raise ConversionError(
"build_para() could not identify content to turn into a paragraph")
- if children[after - 1].nodeType == xml.dom.core.TEXT:
+ if children[after - 1].nodeType == TEXT:
# we may need to split off trailing white space:
child = children[after - 1]
data = child.data
# skip over leading comments and whitespace:
child = children[start]
nodeType = child.nodeType
- if nodeType == xml.dom.core.TEXT:
+ if nodeType == TEXT:
data = child.data
shortened = string.lstrip(data)
if shortened:
return start + 1
return start
# all whitespace, just skip
- elif nodeType == xml.dom.core.ELEMENT:
+ elif nodeType == ELEMENT:
tagName = child.tagName
if tagName in RECURSE_INTO_PARA_CONTAINERS:
return start
return start
-def fixup_rfc_references(doc):
- for rfcnode in find_all_elements(doc, "rfc"):
+def fixup_rfc_references(doc, fragment):
+ for rfcnode in find_all_elements(fragment, "rfc"):
rfcnode.appendChild(doc.createTextNode(
"RFC " + rfcnode.getAttribute("num")))
-def fixup_signatures(doc):
- for child in doc.childNodes:
- if child.nodeType == xml.dom.core.ELEMENT:
+def fixup_signatures(doc, fragment):
+ for child in fragment.childNodes:
+ if child.nodeType == ELEMENT:
args = child.getElementsByTagName("args")
for arg in args:
fixup_args(doc, arg)
def fixup_args(doc, arglist):
for child in arglist.childNodes:
- if child.nodeType == xml.dom.core.ELEMENT \
+ if child.nodeType == ELEMENT \
and child.tagName == "optional":
# found it; fix and return
arglist.insertBefore(doc.createTextNode("["), child)
return fixup_args(doc, arglist)
-def fixup_sectionauthors(doc):
- for sectauth in find_all_elements(doc, "sectionauthor"):
+def fixup_sectionauthors(doc, fragment):
+ for sectauth in find_all_elements(fragment, "sectionauthor"):
section = sectauth.parentNode
section.removeChild(sectauth)
sectauth._node.name = "author"
sectauth.removeAttribute("name")
after = section.childNodes[2]
title = section.childNodes[1]
- if title.nodeType == xml.dom.core.ELEMENT and title.tagName != "title":
+ if title.nodeType == ELEMENT and title.tagName != "title":
after = section.childNodes[0]
section.insertBefore(doc.createTextNode("\n "), after)
section.insertBefore(sectauth, after)
def fixup_verbatims(doc):
for verbatim in find_all_elements(doc, "verbatim"):
child = verbatim.childNodes[0]
- if child.nodeType == xml.dom.core.TEXT \
+ if child.nodeType == TEXT \
and string.lstrip(child.data)[:3] == ">>>":
- verbatim._node.name = "interpreter-session"
- #verbatim.setAttribute("interactive", "interactive")
+ verbatim._node.name = "interactive-session"
_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
def write_esis(doc, ofp, knownempty):
for node in doc.childNodes:
nodeType = node.nodeType
- if nodeType == xml.dom.core.ELEMENT:
+ if nodeType == ELEMENT:
gi = node.tagName
if knownempty(gi):
if node.hasChildNodes():
ofp.write("(%s\n" % gi)
write_esis(node, ofp, knownempty)
ofp.write(")%s\n" % gi)
- elif nodeType == xml.dom.core.TEXT:
+ elif nodeType == TEXT:
ofp.write("-%s\n" % esistools.encode(node.data))
else:
raise RuntimeError, "unsupported node type: %s" % nodeType
p = esistools.ExtendedEsisBuilder()
p.feed(ifp.read())
doc = p.document
- normalize(doc)
- simplify(doc)
- handle_labels(doc)
- handle_appendix(doc)
+ fragment = p.fragment
+ normalize(fragment)
+ simplify(doc, fragment)
+ handle_labels(fragment)
+ handle_appendix(doc, fragment)
fixup_trailing_whitespace(doc, {
"abstract": "\n",
"title": "",
cleanup_root_text(doc)
cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
cleanup_synopses(doc)
- fixup_descriptors(doc)
- fixup_verbatims(doc)
- normalize(doc)
- fixup_paras(doc)
- fixup_sectionauthors(doc)
- remap_element_names(doc, {
+ fixup_descriptors(doc, fragment)
+ fixup_verbatims(fragment)
+ normalize(fragment)
+ fixup_paras(doc, fragment)
+ fixup_sectionauthors(doc, fragment)
+ remap_element_names(fragment, {
"tableii": ("table", {"cols": "2"}),
"tableiii": ("table", {"cols": "3"}),
"tableiv": ("table", {"cols": "4"}),
"lineiv": ("row", {}),
"refmodule": ("module", {"link": "link"}),
})
- fixup_table_structures(doc)
- fixup_rfc_references(doc)
- fixup_signatures(doc)
+ fixup_table_structures(doc, fragment)
+ fixup_rfc_references(doc, fragment)
+ fixup_signatures(doc, fragment)
#
d = {}
for gi in p.get_empties():
knownempty = d.has_key
#
try:
- write_esis(doc, ofp, knownempty)
+ write_esis(fragment, ofp, knownempty)
except IOError, (err, msg):
# Ignore EPIPE; it just means that whoever we're writing to stopped
# reading. The rest of the output would be ignored. All other errors