2000-06-29 19:39:57 +00:00
|
|
|
import minidom
|
|
|
|
import types
|
|
|
|
import string
|
|
|
|
import sys
|
|
|
|
from xml.sax import ExpatParser
|
|
|
|
|
|
|
|
#todo: SAX2/namespace handling
|
|
|
|
|
|
|
|
START_ELEMENT="START_ELEMENT"
|
|
|
|
END_ELEMENT="END_ELEMENT"
|
|
|
|
COMMENT="COMMENT"
|
|
|
|
START_DOCUMENT="START_DOCUMENT"
|
|
|
|
END_DOCUMENT="END_DOCUMENT"
|
|
|
|
PROCESSING_INSTRUCTION="PROCESSING_INSTRUCTION"
|
|
|
|
IGNORABLE_WHITESPACE="IGNORABLE_WHITESPACE"
|
|
|
|
CHARACTERS="CHARACTERS"
|
|
|
|
|
|
|
|
class PullDOM:
|
|
|
|
def __init__( self ):
|
|
|
|
self.firstEvent=[None,None]
|
|
|
|
self.lastEvent=self.firstEvent
|
|
|
|
|
|
|
|
def setDocumentLocator( self, locator ): pass
|
|
|
|
|
2000-07-04 03:39:33 +00:00
|
|
|
def startElement( self, name, tagName , attrs ):
|
2000-06-29 19:39:57 +00:00
|
|
|
if not hasattr( self, "curNode" ):
|
|
|
|
# FIXME: hack!
|
|
|
|
self.startDocument( )
|
|
|
|
|
|
|
|
node = self.document.createElement( tagName ) #FIXME namespaces!
|
|
|
|
for attr in attrs.keys():
|
|
|
|
node.setAttribute( attr, attrs[attr] )
|
|
|
|
|
|
|
|
parent=self.curNode
|
|
|
|
node.parentNode = parent
|
|
|
|
if parent.childNodes:
|
|
|
|
node.previousSibling=parent.childNodes[-1]
|
|
|
|
node.previousSibling.nextSibling=node
|
|
|
|
self.curNode = node
|
|
|
|
# FIXME: do I have to screen namespace attributes
|
|
|
|
self.lastEvent[1]=[(START_ELEMENT, node), None ]
|
|
|
|
self.lastEvent=self.lastEvent[1]
|
|
|
|
#self.events.append( (START_ELEMENT, node) )
|
|
|
|
|
2000-07-04 03:39:33 +00:00
|
|
|
def endElement( self, name, tagName ):
|
2000-06-29 19:39:57 +00:00
|
|
|
node = self.curNode
|
|
|
|
self.lastEvent[1]=[(END_ELEMENT, node), None ]
|
|
|
|
self.lastEvent=self.lastEvent[1]
|
|
|
|
#self.events.append( (END_ELEMENT, node ))
|
|
|
|
self.curNode = node.parentNode
|
|
|
|
|
|
|
|
def comment( self, s):
|
|
|
|
node = self.document.createComment ( s )
|
|
|
|
parent=self.curNode
|
|
|
|
node.parentNode=parent
|
|
|
|
if parent.childNodes:
|
|
|
|
node.previousSibling=parent.childNodes[-1]
|
|
|
|
node.previousSibling.nextSibling=node
|
|
|
|
self.lastEvent[1]=[(COMMENT, node), None ]
|
|
|
|
self.lastEvent=self.lastEvent[1]
|
|
|
|
#self.events.append( (COMMENT, node ))
|
|
|
|
|
|
|
|
def processingInstruction( self, target, data ):
|
|
|
|
node = self.document.createProcessingInstruction( target, data )
|
|
|
|
#self.appendChild( node )
|
|
|
|
|
|
|
|
parent=self.curNode
|
|
|
|
node.parentNode=parent
|
|
|
|
if parent.childNodes:
|
|
|
|
node.previousSibling=parent.childNodes[-1]
|
|
|
|
node.previousSibling.nextSibling=node
|
|
|
|
self.lastEvent[1]=[(PROCESSING_INSTRUCTION, node), None ]
|
|
|
|
self.lastEvent=self.lastEvent[1]
|
|
|
|
#self.events.append( (PROCESSING_INSTRUCTION, node) )
|
|
|
|
|
|
|
|
def ignorableWhitespace( self, chars ):
|
|
|
|
node = self.document.createTextNode( chars[start:start+length] )
|
|
|
|
parent=self.curNode
|
|
|
|
node.parentNode=parent
|
|
|
|
if parent.childNodes:
|
|
|
|
node.previousSibling=parent.childNodes[-1]
|
|
|
|
node.previousSibling.nextSibling=node
|
|
|
|
self.lastEvent[1]=[(IGNORABLE_WHITESPACE, node), None ]
|
|
|
|
self.lastEvent=self.lastEvent[1]
|
|
|
|
#self.events.append( (IGNORABLE_WHITESPACE, node))
|
|
|
|
|
|
|
|
def characters( self, chars ):
|
|
|
|
node = self.document.createTextNode( chars )
|
|
|
|
node.parentNode=self.curNode
|
|
|
|
self.lastEvent[1]=[(CHARACTERS, node), None ]
|
|
|
|
self.lastEvent=self.lastEvent[1]
|
|
|
|
|
|
|
|
def startDocument( self ):
|
|
|
|
node = self.curNode = self.document = minidom.Document()
|
|
|
|
node.parentNode=None
|
|
|
|
self.lastEvent[1]=[(START_DOCUMENT, node), None ]
|
|
|
|
self.lastEvent=self.lastEvent[1]
|
|
|
|
#self.events.append( (START_DOCUMENT, node) )
|
|
|
|
|
|
|
|
def endDocument( self ):
|
|
|
|
assert( not self.curNode.parentNode )
|
|
|
|
for node in self.curNode.childNodes:
|
|
|
|
if node.nodeType==node.ELEMENT_NODE:
|
|
|
|
self.document.documentElement = node
|
|
|
|
#if not self.document.documentElement:
|
|
|
|
# raise Error, "No document element"
|
|
|
|
|
|
|
|
self.lastEvent[1]=[(END_DOCUMENT, node), None ]
|
|
|
|
#self.events.append( (END_DOCUMENT, self.curNode) )
|
|
|
|
|
|
|
|
class ErrorHandler:
|
|
|
|
def warning( self, exception ):
|
|
|
|
print exception
|
|
|
|
def error( self, exception ):
|
|
|
|
raise exception
|
|
|
|
def fatalError( self, exception ):
|
|
|
|
raise exception
|
|
|
|
|
|
|
|
class DOMEventStream:
|
|
|
|
def __init__( self, stream, parser, bufsize ):
|
|
|
|
self.stream=stream
|
|
|
|
self.parser=parser
|
|
|
|
self.bufsize=bufsize
|
|
|
|
self.reset()
|
|
|
|
|
|
|
|
def reset( self ):
|
|
|
|
self.pulldom = PullDOM()
|
|
|
|
self.parser.setContentHandler( self.pulldom )
|
|
|
|
|
|
|
|
def __getitem__( self, pos ):
|
|
|
|
rc=self.getEvent()
|
|
|
|
if rc: return rc
|
|
|
|
raise IndexError
|
|
|
|
|
|
|
|
def expandNode( self, node ):
|
|
|
|
event=self.getEvent()
|
|
|
|
while event:
|
|
|
|
token,cur_node=event
|
|
|
|
if cur_node is node: return
|
|
|
|
|
|
|
|
if token !=END_ELEMENT:
|
2000-07-01 04:58:47 +00:00
|
|
|
cur_node.parentNode.appendChild( cur_node )
|
2000-06-29 19:39:57 +00:00
|
|
|
event=self.getEvent()
|
|
|
|
|
|
|
|
def getEvent( self ):
|
|
|
|
if not self.pulldom.firstEvent[1]:
|
|
|
|
self.pulldom.lastEvent=self.pulldom.firstEvent
|
|
|
|
while not self.pulldom.firstEvent[1]:
|
|
|
|
buf=self.stream.read( self.bufsize )
|
|
|
|
if not buf:
|
|
|
|
#FIXME: why doesn't Expat close work?
|
|
|
|
#self.parser.close()
|
|
|
|
return None
|
|
|
|
self.parser.feed( buf )
|
|
|
|
rc=self.pulldom.firstEvent[1][0]
|
|
|
|
self.pulldom.firstEvent[1]=self.pulldom.firstEvent[1][1]
|
|
|
|
return rc
|
|
|
|
|
|
|
|
# FIXME: sax2
|
|
|
|
#def _getParser( ):
|
|
|
|
# from xml.sax.saxexts import make_parser
|
|
|
|
# expat doesn't report errors properly! Figure it out
|
|
|
|
# return make_parser()
|
|
|
|
# return make_parser("xml.sax.drivers.drv_xmllib")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _getParser():
|
|
|
|
return ExpatParser()
|
|
|
|
|
|
|
|
default_bufsize=(2**14)-20
|
|
|
|
# FIXME: move into sax package for common usage
|
|
|
|
def parse( stream_or_string, parser=None, bufsize=default_bufsize ):
|
|
|
|
if type( stream_or_string ) == type( "" ):
|
|
|
|
stream=open( stream_or_string )
|
|
|
|
else:
|
|
|
|
stream=stream_or_string
|
|
|
|
if not parser:
|
|
|
|
parser=_getParser()
|
|
|
|
return DOMEventStream( stream, parser, bufsize )
|
|
|
|
|
|
|
|
def parseString( string, parser=None ):
|
|
|
|
try:
|
|
|
|
import cStringIO
|
|
|
|
stringio=cStringIO.StringIO
|
|
|
|
except ImportError:
|
|
|
|
import StringIO
|
|
|
|
stringio=StringIO.StringIO
|
|
|
|
|
|
|
|
bufsize=len( string )
|
2000-07-01 04:58:47 +00:00
|
|
|
buf=stringio( string )
|
2000-06-29 19:39:57 +00:00
|
|
|
parser=_getParser()
|
|
|
|
return DOMEventStream( buf, parser, bufsize )
|
|
|
|
|