]> granicus.if.org Git - python/commitdiff
Reference cycle fixes
authorPaul Prescod <prescod@prescod.net>
Sat, 1 Jul 2000 04:58:47 +0000 (04:58 +0000)
committerPaul Prescod <prescod@prescod.net>
Sat, 1 Jul 2000 04:58:47 +0000 (04:58 +0000)
Lib/xml/__init__.py
Lib/xml/dom/minidom.py
Lib/xml/dom/pulldom.py
Lib/xml/sax/__init__.py

index a89277245a38c8cf8e16963b8bdebea74980ef54..7daa4ea8efc1cb4dda1e51793b32c94719c5ac57 100644 (file)
@@ -8,6 +8,6 @@ dom -- The W3C Document Object Model.  This supports DOM Level 1 +
 parser -- Python wrappers for XML parsers (currently only supports Expat).
 
 sax -- The Simple API for XML, developed by XML-Dev, led by David
-       Megginson.  This supports the SAX 2 API.
-
+       Megginson and ported to Python by Lars Marius Garsholm.  This 
+       supports the SAX 2 API.
 """
index 32d2d2b1bfa89eb5403af83d810120f600e56f13..0283fee0eb5d231c96556b47194a3ea1367d7578 100644 (file)
@@ -29,11 +29,19 @@ class Node:
     DOCUMENT_FRAGMENT_NODE      = 11
     NOTATION_NODE               = 12
 
-    allnodes=[]
+    allnodes={}
+    _debug=0
+    _makeParentNodes=1
+    debug=None
 
     def __init__( self ):
         self.childNodes=[]
-        Node.allnodes.append( repr( id( self ))+repr( self.__class__ ))
+        if Node._debug: 
+            index=repr( id( self ))+repr( self.__class__ )
+            Node.allnodes[index]=repr( self.__dict__ )
+            if Node.debug==None:
+                Node.debug=open( "debug4.out", "w" )
+            Node.debug.write( "create %s\n"%index )
 
     def __getattr__( self, key ):
         if key[0:2]=="__": raise AttributeError
@@ -72,12 +80,39 @@ class Node:
         if self.childNodes: return 1
         else: return 0
 
+    def _get_firstChild( self ):
+        return self.childNodes[0]
+
+    def _get_lastChild( self ):
+        return self.childNodes[-1]
+
     def insertBefore( self, newChild, refChild):
         index=self.childNodes.index( refChild )
         self.childNodes.insert( index, newChild )
+        if self._makeParentNodes:
+            newChild.parentNode=self
 
     def appendChild( self, node ):
         self.childNodes.append( node )
+        return node
+
+    def replaceChild( self, newChild, oldChild ):
+        index=self.childNodes.index( oldChild )
+        self.childNodes[index]=oldChild
+
+    def removeChild( self, oldChild ):
+        index=self.childNodes.index( oldChild )
+        del self.childNodes[index]
+
+    def cloneNode( self, deep ):
+        import new
+        clone=new.instance( self.__class__, self.__dict__ )
+        clone.attributes=self.attributes.copy()
+        if not deep:
+            clone.childNodes=[]
+        else:
+            clone.childNodes=map( lambda x: x.cloneNode, self.childNodes )
+        return clone
 
     def unlink( self ):
         self.parentNode=None
@@ -86,11 +121,14 @@ class Node:
             del self.childNodes[-1] # probably not most efficient!
         self.childNodes=None
         if self.attributes:
-            for attr in self.attributes.values():
-                attr.unlink()
-        self.attributes=None
-        index=Node.allnodes.index( repr( id( self ))+repr( self.__class__ ))
-        del Node.allnodes[index]
+            for attr in self._attrs.values():
+                self.removeAttributeNode( attr )
+            assert not len( self._attrs )
+            assert not len( self._attrsNS )
+        if Node._debug:
+            index=repr( id( self ))+repr( self.__class__ )
+            self.debug.write( "Deleting: %s\n" % index )
+            del Node.allnodes[index]
 
 def _write_data( writer, data):
     "Writes datachars to writer."
@@ -100,11 +138,6 @@ def _write_data( writer, data):
     data=string.replace(data,">","&gt;")
     writer.write(data)
 
-def _closeElement( element ):
-    del element.parentNode
-    for node in element.elements:
-        _closeElement( node )
-
 def _getElementsByTagNameHelper( parent, name, rc ):
     for node in parent.childNodes:
         if node.nodeType==Node.ELEMENT_NODE and\
@@ -123,17 +156,16 @@ def _getElementsByTagNameNSHelper( parent, nsURI, localName, rc ):
 
 class Attr(Node):
     nodeType=Node.ATTRIBUTE_NODE
-    def __init__( self, qName, namespaceURI="", prefix="",
-                  localName=None ):
-        Node.__init__( self )
-        assert qName
+    def __init__( self, qName, namespaceURI="", localName=None,
+prefix=None ):
         # skip setattr for performance
-        self.__dict__["nodeName"] = self.__dict__["name"] = qName
         self.__dict__["localName"]=localName or qName
-        self.__dict__["prefix"]=prefix
+        self.__dict__["nodeName"] = self.__dict__["name"] = qName
         self.__dict__["namespaceURI"]=namespaceURI
-        # nodeValue and value are set elsewhere
+        self.__dict__["prefix"]=prefix
         self.attributes=None
+        Node.__init__( self )
+        # nodeValue and value are set elsewhere
 
     def __setattr__( self, name, value ):
         if name in ("value", "nodeValue" ):
@@ -142,12 +174,13 @@ class Attr(Node):
             self.__dict__[name]=value
 
 class AttributeList:
-    # the attribute list is a transient interface to the underlying dictionaries
-    # mutations here will change the underlying element's dictionary
+    """the attribute list is a transient interface to the underlying
+dictionaries.  mutations here will change the underlying element's
+dictionary"""
     def __init__( self, attrs, attrsNS ):
-        self.__attrs=attrs
-        self.__attrsNS=attrs
-        self.length=len( self.__attrs.keys() )
+        self._attrs=attrs
+        self._attrsNS=attrsNS
+        self.length=len( self._attrs.keys() )
 
     def item( self, index ):
         try:
@@ -157,40 +190,46 @@ class AttributeList:
         
     def items( self ):
         return map( lambda node: (node.tagName, node.value),
-                    self.__attrs.values() )
+                    self._attrs.values() )
 
     def itemsNS( self ):
         return map( lambda node: ((node.URI, node.localName), node.value),
-                    self.__attrs.values() )
+                    self._attrs.values() )
     
     def keys( self ):
-        return self.__attrs.keys()
+        return self._attrs.keys()
 
     def keysNS( self ):
-        return self.__attrsNS.keys()
+        return self._attrsNS.keys()
 
     def values( self ):
-        return self.__attrs.values()
+        return self._attrs.values()
 
     def __len__( self ):
         return self.length
 
     def __cmp__( self, other ):
-        if self.__attrs is other.__attrs: 
+        if self._attrs is getattr( other, "_attrs", None ):
             return 0
         else: 
             return cmp( id( self ), id( other ) )
 
     #FIXME: is it appropriate to return .value?
     def __getitem__( self, attname_or_tuple ):
-        if type( attname_or_tuple ) == type( (1,2) ):
-            return self.__attrsNS[attname_or_tuple].value
+        if type( attname_or_tuple ) == type( () ):
+            return self._attrsNS[attname_or_tuple]
         else:
-            return self.__attrs[attname_or_tuple].value
+            return self._attrs[attname_or_tuple]
 
     def __setitem__( self, attname ):
         raise TypeError, "object does not support item assignment"
-        
+
+    def __delitem__( self, attname_or_tuple ):
+        node=self[attname_or_tuple]
+        node.unlink()
+        del self._attrs[node.name]
+        del self._attrsNS[(node.namespaceURI, node.localName)]
 class Element( Node ):
     nodeType=Node.ELEMENT_NODE
     def __init__( self, tagName, namespaceURI="", prefix="",
@@ -202,18 +241,18 @@ class Element( Node ):
         self.namespaceURI=namespaceURI
         self.nodeValue=None
 
-        self.__attrs={}  # attributes are double-indexed:
-        self.__attrsNS={}#    tagName -> Attribute
+        self._attrs={}  # attributes are double-indexed:
+        self._attrsNS={}#    tagName -> Attribute
                 #    URI,localName -> Attribute
                 # in the future: consider lazy generation of attribute objects
                 #                this is too tricky for now because of headaches
                 #                with namespaces.
 
     def getAttribute( self, attname ):
-        return self.__attrs[attname].value
+        return self._attrs[attname].value
 
     def getAttributeNS( self, namespaceURI, localName ):
-        return self.__attrsNS[(namespaceURI, localName)].value
+        return self._attrsNS[(namespaceURI, localName)].value
     
     def setAttribute( self, attname, value ):
         attr=Attr( attname )
@@ -222,26 +261,37 @@ class Element( Node ):
         self.setAttributeNode( attr )
 
     def setAttributeNS( self, namespaceURI, qualifiedName, value ):
-        attr=createAttributeNS( namespaceURI, qualifiedName )
+        prefix,localname=_nssplit( qualifiedName )
         # for performance
+        attr = Attr( qualifiedName, namespaceURI, localname, prefix )
         attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
         self.setAttributeNode( attr )
 
+    def getAttributeNode( self, attrname ):
+        return self._attrs.get( attrname )
+
+    def getAttributeNodeNS( self, namespaceURI, localName ):
+        return self._attrsNS[(namespaceURI, localName)]
+
     def setAttributeNode( self, attr ):
-        self.__attrs[attr.name]=attr
-        self.__attrsNS[(attr.namespaceURI,attr.localName)]=attr
+        old=self._attrs.get( attr.name, None)
+        if old:
+            old.unlink()
+        self._attrs[attr.name]=attr
+        self._attrsNS[(attr.namespaceURI,attr.localName)]=attr
 
     def removeAttribute( self, name ):
-        attr = self.__attrs[name]
+        attr = self._attrs[name]
         self.removeAttributeNode( attr )
 
     def removeAttributeNS( self, namespaceURI, localName ):
-        attr = self.__attrsNS[(uri, localName)]
+        attr = self._attrsNS[(namespaceURI, localName)]
         self.removeAttributeNode( attr )
 
     def removeAttributeNode( self, node ):
-        del self.__attrs[node.name]
-        del self.__attrsNS[(node.namespaceURI, node.localName)]
+        node.unlink()
+        del self._attrs[node.name]
+        del self._attrsNS[(node.namespaceURI, node.localName)]
         
     def getElementsByTagName( self, name ):
         return _getElementsByTagNameHelper( self, name, [] )
@@ -271,7 +321,7 @@ class Element( Node ):
             writer.write("/>")
 
     def _get_attributes( self ):
-        return AttributeList( self.__attrs, self.__attrsNS )
+        return AttributeList( self._attrs, self._attrsNS )
 
 class Comment( Node ):
     nodeType=Node.COMMENT_NODE
@@ -313,15 +363,30 @@ class Text( Node ):
     def writexml( self, writer ):
         _write_data( writer, self.data )
 
+def _nssplit( qualifiedName ):
+    fields = string.split(qualifiedName, ':')
+    if len(fields) == 2:
+        return fields
+    elif len(fields) == 1:
+        return( '', fields[0] )
+
 class Document( Node ):
     nodeType=Node.DOCUMENT_NODE
+    documentElement=None
     def __init__( self ):
         Node.__init__( self )
-        self.documentElement=None
         self.attributes=None
         self.nodeName="#document"
         self.nodeValue=None
 
+    def appendChild( self, node ):
+        if node.nodeType==Node.ELEMENT_NODE and self.documentElement:
+            raise TypeError, "Two document elements disallowed"
+        else:
+            self.documentElement=node
+        Node.appendChild( self, node )
+        return node
+
     createElement=Element
 
     createTextNode=Text
@@ -333,32 +398,16 @@ class Document( Node ):
     createAttribute=Attr
 
     def createElementNS(self, namespaceURI, qualifiedName):
-        fields = string.split(qualifiedName, ':')
-        if len(fields) == 2:
-            prefix = fields[0]
-            localName = fields[1]
-        elif len(fields) == 1:
-            prefix = ''
-            localName = fields[0]            
-        return Element(self, qualifiedName, namespaceURI, prefix, localName)
+        prefix,localName=_nssplit( qualifiedName )
+        return Element(qualifiedName, namespaceURI, prefix, localName)
 
     def createAttributeNS(self, namespaceURI, qualifiedName):
-        fields = string.split(qualifiedName,':')
-        if len(fields) == 2:
-            localName = fields[1]
-            prefix = fields[0]
-        elif len(fields) == 1:
-            localName = fields[0]
-            prefix = None
-        return Attr(qualifiedName, namespaceURI, prefix, localName)
+        prefix,localName=_nssplit( qualifiedName )
+        return Attr(namespaceURI, qualifiedName, localName, prefix)
 
     def getElementsByTagNameNS(self,namespaceURI,localName):
         _getElementsByTagNameNSHelper( self, namespaceURI, localName )
 
-    def close( self ):
-        for node in self.elements:
-            _closeElement( node )
-
     def unlink( self ):
         self.documentElement=None
         Node.unlink( self )
index 9c856469b1eeef38b0f1e8a5b5aef2455dd3ec5b..0c047f67874aae6a099848885b567c4683c84664 100644 (file)
@@ -2,7 +2,6 @@ import minidom
 import types
 import string
 import sys
-import pyexpat
 from xml.sax import ExpatParser
 
 #todo: SAX2/namespace handling
@@ -140,12 +139,8 @@ class DOMEventStream:
             if cur_node is node: return
             
             if token !=END_ELEMENT:
-                cur_node.parentNode.childNodes.append( cur_node )
+                cur_node.parentNode.appendChild( cur_node )
             event=self.getEvent()
-        if node.nodeType==minidom.Node.DOCUMENT_NODE:
-            for child in node.childNodes:
-                if child.nodeType==minidom.Node.ELEMENT_NODE:
-                    node.documentElement=child
 
     def getEvent( self ):
         if not self.pulldom.firstEvent[1]:
@@ -193,75 +188,7 @@ def parseString( string, parser=None ):
         stringio=StringIO.StringIO
         
     bufsize=len( string )
-    stringio( string )
+    buf=stringio( string )
     parser=_getParser()
     return DOMEventStream( buf, parser, bufsize )
 
-#FIXME: Use Lars' instead!!!
-class SAX_expat:
-    "SAX driver for the Pyexpat C module."
-
-    def __init__(self):
-        self.parser=pyexpat.ParserCreate()
-        self.started=0
-
-    def setDocumentHandler( self, handler ):
-        self.parser.StartElementHandler = handler.startElement
-        self.parser.EndElementHandler = handler.endElement
-        self.parser.CharacterDataHandler = handler.datachars
-        self.parser.ProcessingInstructionHandler = handler.processingInstruction
-        self.doc_handler=handler
-
-    def setErrorHandler( self, handler ):
-        self.err_handler=handler
-
-    # --- Locator methods. Only usable after errors.
-
-    def getLineNumber(self):
-        return self.parser.ErrorLineNumber
-
-    def getColumnNumber(self):
-        return self.parser.ErrorColumnNumber    
-
-    # --- Internal
-
-    def __report_error(self):
-        msg=pyexpat.ErrorString(self.parser.ErrorCode)
-        self.err_handler.fatalError(msg)
-
-    # --- EXPERIMENTAL PYTHON SAX EXTENSIONS
-        
-    def get_parser_name(self):
-        return "pyexpat"
-
-    def get_parser_version(self):
-        return "Unknown"
-
-    def get_driver_version(self):
-        return version
-    
-    def is_validating(self):
-        return 0
-
-    def is_dtd_reading(self):
-        return 0
-
-    def reset(self):
-        self.parser=pyexpat.ParserCreate()
-        self.parser.StartElementHandler = self.startElement
-        self.parser.EndElementHandler = self.endElement
-        self.parser.CharacterDataHandler = self.characters
-        self.parser.ProcessingInstructionHandler = self.processingInstruction
-    
-    def feed(self,data):
-        if not self.started:
-            self.doc_handler.startDocument()
-            self.started=1  
-        if not self.parser.Parse(data):
-            self.__report_error()
-
-    def close(self):
-        if not self.parser.Parse("",1):
-            self.__report_error()
-        self.doc_handler.endDocument()
-        self.parser = None
index 5d0fea5f8a313f4c8fe37f110227dc4dbce14915..324558d0e885bdd1595f4506c65551380b6ccd6c 100644 (file)
@@ -23,3 +23,27 @@ from _exceptions import *
 from saxutils import *
 from _exceptions import SAXParseException
 import xmlreader
+
+def parse( filename_or_stream, handler, errorHandler=ErrorHandler() ):
+    parser=ExpatParser()
+    parser.setContentHandler( handler )
+    parse.setErrorHandler( errorHandler )
+    parser.parse( filename_or_stream )
+
+# this may not work yet...Expat doesn't handle buffer inputs
+def parseString( string, handler, errorHandler=ErrorHandler() ):
+    try:
+        import cStringIO
+        stringio=cStringIO.StringIO
+    except ImportError:
+        import StringIO
+        stringio=StringIO.StringIO
+        
+    bufsize=len( string )
+    buf=stringio( string )
+    parser=ExpatParser()
+    parser.setContentHandler( handler )
+    parse.setErrorHandler( errorHandler )
+    parser.parse( buf )
+