]> granicus.if.org Git - python/commitdiff
Some adjustments, mostly to make it more general.
authorFred Drake <fdrake@acm.org>
Thu, 18 Feb 1999 16:30:16 +0000 (16:30 +0000)
committerFred Drake <fdrake@acm.org>
Thu, 18 Feb 1999 16:30:16 +0000 (16:30 +0000)
Doc/tools/sgmlconv/esis2sgml.py

index 762e5ffb4f20c7d8e589c59b22518a59f1052560..aea99626ebda04593a3bd93f05f84fcd3fcba388 100755 (executable)
@@ -5,6 +5,10 @@
 This is limited, but seems sufficient for the ESIS generated by the
 latex2esis.py script when run over the Python documentation.
 """
+
+# This should have an explicit option to indicate whether the *INPUT* was
+# generated from an SGML or an XML application.
+
 __version__ = '$Revision$'
 
 import errno
@@ -16,29 +20,52 @@ import string
 from xml.utils import escape
 
 
+AUTOCLOSE = ()
+
 EMPTIES_FILENAME = "../sgml/empties.dat"
 LIST_EMPTIES = 0
 
 
+_elem_map = {}
+_attr_map = {}
+_token_map = {}
+
+_normalize_case = str
+
+def map_gi(sgmlgi, map):
+    uncased = _normalize_case(sgmlgi)
+    try:
+        return map[uncased]
+    except IndexError:
+        map[uncased] = sgmlgi
+        return sgmlgi
+
+def null_map_gi(sgmlgi, map):
+    return sgmlgi
+
+
 def format_attrs(attrs, xml=0):
     attrs = attrs.items()
     attrs.sort()
-    s = ''
+    parts = []
+    append = parts.append
     for name, value in attrs:
         if xml:
-            s = '%s %s="%s"' % (s, name, escape(value))
+            append('%s="%s"' % (name, escape(value)))
         else:
             # this is a little bogus, but should do for now
             if name == value and isnmtoken(value):
-                s = "%s %s" % (s, value)
+                append(value)
             elif istoken(value):
                 if value == "no" + name:
-                    s = "%s %s" % (s, value)
+                    append(value)
                 else:
-                    s = "%s %s=%s" % (s, name, value)
+                    append("%s=%s" % (name, value))
             else:
-                s = '%s %s="%s"' % (s, name, escape(value))
-    return s
+                append('%s="%s"' % (name, escape(value)))
+    if parts:
+        parts.insert(0, '')
+    return string.join(parts)
 
 
 _nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)
@@ -78,6 +105,7 @@ def do_convert(ifp, ofp, xml=0, autoclose=()):
             if data == "COMMENT":
                 ofp.write("<!--")
                 continue
+            data = map_gi(data, _elem_map)
             if knownempty and xml:
                 ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
             else:
@@ -93,6 +121,7 @@ def do_convert(ifp, ofp, xml=0, autoclose=()):
             if data == "COMMENT":
                 ofp.write("-->")
                 continue
+            data = map_gi(data, _elem_map)
             if xml:
                 if not lastempty:
                     ofp.write("</%s>" % data)
@@ -107,19 +136,24 @@ def do_convert(ifp, ofp, xml=0, autoclose=()):
             lastempty = 0
         elif type == "A":
             name, type, value = string.split(data, " ", 2)
+            name = map_gi(name, _attr_map)
             attrs[name] = esistools.decode(value)
         elif type == "e":
             knownempty = 1
 
     if LIST_EMPTIES:
-        knownempties.append("")
-        if os.path.isfile(EMPTIES_FILENAME):
-            mode = "a"
-        else:
-            mode = "w"
-        fp = open(EMPTIES_FILENAME, mode)
-        fp.write(string.join(knownempties, "\n"))
-        fp.close()
+        dump_empty_element_names(knownempties)
+
+
+def dump_empty_element_names(knownempties):
+    knownempties.append("")
+    if os.path.isfile(EMPTIES_FILENAME):
+        mode = "a"
+    else:
+        mode = "w"
+    fp = open(EMPTIES_FILENAME, mode)
+    fp.write(string.join(knownempties, "\n"))
+    fp.close()
 
 
 def sgml_convert(ifp, ofp, autoclose):
@@ -130,7 +164,13 @@ def xml_convert(ifp, ofp, autoclose):
     return do_convert(ifp, ofp, xml=1, autoclose=autoclose)
 
 
-AUTOCLOSE = ("para", "term",)
+def update_gi_map(map, names, fromsgml=1):
+    for name in string.split(names, ","):
+        if fromsgml:
+            uncased = string.lower(name)
+        else:
+            uncased = name
+        map[uncased] = name
 
 
 def main():
@@ -138,19 +178,39 @@ def main():
     import sys
     #
     autoclose = AUTOCLOSE
-    convert = sgml_convert
-    xml = 0
+    convert = xml_convert
+    xml = 1
     xmldecl = 0
-    opts, args = getopt.getopt(sys.argv[1:], "adx",
-                               ["autoclose", "declare", "xml"])
+    elem_names = ''
+    attr_names = ''
+    value_names = ''
+    opts, args = getopt.getopt(sys.argv[1:], "adesx",
+                               ["autoclose=", "declare", "sgml", "xml",
+                                "elements-map=", "attributes-map",
+                                "values-map="])
     for opt, arg in opts:
         if opt in ("-d", "--declare"):
             xmldecl = 1
+        elif opt == "-e":
+            global LIST_EMPTIES
+            LIST_EMPTIES = 1
+        elif opt in ("-s", "--sgml"):
+            xml = 0
+            convert = sgml_convert
         elif opt in ("-x", "--xml"):
             xml = 1
             convert = xml_convert
         elif opt in ("-a", "--autoclose"):
             autoclose = string.split(arg, ",")
+        elif opt == "--elements-map":
+            elem_names = ("%s,%s" % (elem_names, arg))[1:]
+        elif opt == "--attributes-map":
+            attr_names = ("%s,%s" % (attr_names, arg))[1:]
+        elif opt == "--values-map":
+            value_names = ("%s,%s" % (value_names, arg))[1:]
+    #
+    # open input streams:
+    #
     if len(args) == 0:
         ifp = sys.stdin
         ofp = sys.stdout
@@ -163,7 +223,23 @@ def main():
     else:
         usage()
         sys.exit(2)
-    # knownempties is ignored in the XML version
+    #
+    # setup the name maps:
+    #
+    if elem_names or attr_names or value_names:
+        # assume the origin was SGML; ignore case of the names from the ESIS
+        # stream but set up conversion tables to get the case right on output
+        global _normalize_case
+        _normalize_case = string.lower
+        update_gi_map(_elem_map, string.split(elem_names, ","))
+        update_gi_map(_attr_map, string.split(attr_names, ","))
+        update_gi_map(_values_map, string.split(value_names, ","))
+    else:
+        global map_gi
+        map_gi = null_map_gi
+    #
+    # run the conversion:
+    #
     try:
         if xml and xmldecl:
             opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')