2000-06-29 19:34:54 +00:00
|
|
|
"""
|
|
|
|
SAX driver for the Pyexpat C module. This driver works with
|
|
|
|
pyexpat.__version__ == '1.5'.
|
|
|
|
|
|
|
|
$Id$
|
|
|
|
"""
|
|
|
|
|
|
|
|
# Todo on driver:
|
|
|
|
# - make it support external entities (wait for pyexpat.c)
|
|
|
|
# - enable configuration between reset() and feed() calls
|
|
|
|
# - support lexical events?
|
|
|
|
# - proper inputsource handling
|
|
|
|
# - properties and features
|
|
|
|
|
|
|
|
# Todo on pyexpat.c:
|
|
|
|
# - support XML_ExternalEntityParserCreate
|
|
|
|
# - exceptions in callouts from pyexpat to python code lose position info
|
|
|
|
|
|
|
|
version = "0.20"
|
|
|
|
|
|
|
|
from string import split
|
|
|
|
|
|
|
|
from xml.sax import xmlreader
|
|
|
|
import pyexpat
|
|
|
|
import xml.sax
|
|
|
|
|
|
|
|
# --- ExpatParser
|
|
|
|
|
|
|
|
class ExpatParser( xmlreader.IncrementalParser, xmlreader.Locator ):
|
|
|
|
"SAX driver for the Pyexpat C module."
|
|
|
|
|
|
|
|
def __init__(self, namespaceHandling=0, bufsize=2**16-20):
|
|
|
|
xmlreader.IncrementalParser.__init__(self, bufsize)
|
|
|
|
self._source = None
|
|
|
|
self._parser = None
|
|
|
|
self._namespaces = namespaceHandling
|
|
|
|
self._parsing = 0
|
|
|
|
|
|
|
|
# XMLReader methods
|
|
|
|
|
|
|
|
def parse(self, stream_or_string ):
|
2000-07-06 02:56:36 +00:00
|
|
|
"Parse an XML document from a URL."
|
2000-06-29 19:34:54 +00:00
|
|
|
if type( stream_or_string ) == type( "" ):
|
|
|
|
stream=open( stream_or_string )
|
|
|
|
else:
|
|
|
|
stream=stream_or_string
|
|
|
|
|
|
|
|
self.reset()
|
|
|
|
self._cont_handler.setDocumentLocator(self)
|
|
|
|
try:
|
|
|
|
xmlreader.IncrementalParser.parse(self, stream)
|
|
|
|
except pyexpat.error:
|
|
|
|
error_code = self._parser.ErrorCode
|
|
|
|
raise xml.sax.SAXParseException(pyexpat.ErrorString(error_code),
|
|
|
|
None, self)
|
|
|
|
|
|
|
|
self._cont_handler.endDocument()
|
|
|
|
|
|
|
|
def prepareParser(self, filename=None):
|
|
|
|
self._source = filename
|
|
|
|
|
|
|
|
if self._source != None:
|
|
|
|
self._parser.SetBase(self._source)
|
|
|
|
|
|
|
|
def getFeature(self, name):
|
|
|
|
"Looks up and returns the state of a SAX2 feature."
|
|
|
|
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
|
|
|
|
|
|
|
|
def setFeature(self, name, state):
|
|
|
|
"Sets the state of a SAX2 feature."
|
|
|
|
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
|
|
|
|
|
|
|
|
def getProperty(self, name):
|
|
|
|
"Looks up and returns the value of a SAX2 property."
|
|
|
|
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
|
|
|
|
|
|
|
def setProperty(self, name, value):
|
|
|
|
"Sets the value of a SAX2 property."
|
|
|
|
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
|
|
|
|
|
|
|
# IncrementalParser methods
|
|
|
|
|
|
|
|
def feed(self, data):
|
|
|
|
if not self._parsing:
|
|
|
|
self._parsing=1
|
|
|
|
self.reset()
|
|
|
|
self._cont_handler.startDocument()
|
|
|
|
# FIXME: error checking and endDocument()
|
|
|
|
self._parser.Parse(data, 0)
|
|
|
|
|
|
|
|
def close(self):
|
|
|
|
if self._parsing:
|
|
|
|
self._cont_handler.endDocument()
|
|
|
|
self._parsing=0
|
|
|
|
self._parser.Parse("", 1)
|
|
|
|
|
|
|
|
def reset(self):
|
|
|
|
if self._namespaces:
|
|
|
|
self._parser = pyexpat.ParserCreate(None, " ")
|
|
|
|
self._parser.StartElementHandler = self.start_element_ns
|
|
|
|
self._parser.EndElementHandler = self.end_element_ns
|
|
|
|
else:
|
|
|
|
self._parser = pyexpat.ParserCreate()
|
2000-07-04 03:39:33 +00:00
|
|
|
self._parser.StartElementHandler = self.start_element
|
|
|
|
self._parser.EndElementHandler = self.end_element
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
self._parser.ProcessingInstructionHandler = \
|
|
|
|
self._cont_handler.processingInstruction
|
|
|
|
self._parser.CharacterDataHandler = self._cont_handler.characters
|
|
|
|
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
|
|
|
|
self._parser.NotationDeclHandler = self.notation_decl
|
|
|
|
self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
|
|
|
|
self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
|
|
|
|
# self._parser.CommentHandler =
|
|
|
|
# self._parser.StartCdataSectionHandler =
|
|
|
|
# self._parser.EndCdataSectionHandler =
|
|
|
|
# self._parser.DefaultHandler =
|
|
|
|
# self._parser.DefaultHandlerExpand =
|
|
|
|
# self._parser.NotStandaloneHandler =
|
|
|
|
self._parser.ExternalEntityRefHandler = self.external_entity_ref
|
|
|
|
|
|
|
|
# Locator methods
|
|
|
|
|
|
|
|
def getColumnNumber(self):
|
|
|
|
return self._parser.ErrorColumnNumber
|
|
|
|
|
|
|
|
def getLineNumber(self):
|
|
|
|
return self._parser.ErrorLineNumber
|
|
|
|
|
|
|
|
def getPublicId(self):
|
|
|
|
return self._source.getPublicId()
|
|
|
|
|
|
|
|
def getSystemId(self):
|
|
|
|
return self._parser.GetBase()
|
|
|
|
|
|
|
|
# event handlers
|
|
|
|
def start_element(self, name, attrs):
|
2000-07-04 03:39:33 +00:00
|
|
|
self._cont_handler.startElement(name, name,
|
2000-06-29 19:34:54 +00:00
|
|
|
xmlreader.AttributesImpl(attrs, attrs))
|
|
|
|
|
|
|
|
def end_element(self, name):
|
2000-07-04 03:39:33 +00:00
|
|
|
self._cont_handler.endElement( name, name )
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
def start_element_ns(self, name, attrs):
|
|
|
|
pair = split(name)
|
|
|
|
if len(pair) == 1:
|
2000-07-04 03:39:33 +00:00
|
|
|
tup = (None, name )
|
2000-06-29 19:34:54 +00:00
|
|
|
else:
|
2000-07-04 03:39:33 +00:00
|
|
|
tup = pair
|
2000-06-29 19:34:54 +00:00
|
|
|
|
2000-07-04 03:39:33 +00:00
|
|
|
self._cont_handler.startElement(tup, None,
|
2000-06-29 19:34:54 +00:00
|
|
|
xmlreader.AttributesImpl(attrs, None))
|
|
|
|
|
|
|
|
def end_element_ns(self, name):
|
|
|
|
pair = split(name)
|
|
|
|
if len(pair) == 1:
|
|
|
|
name = (None, name, None)
|
|
|
|
else:
|
|
|
|
name = pair+[None] # prefix is not implemented yet!
|
|
|
|
|
2000-07-04 03:39:33 +00:00
|
|
|
self._cont_handler.endElement(name, None)
|
2000-06-29 19:34:54 +00:00
|
|
|
|
2000-07-04 03:39:33 +00:00
|
|
|
# this is not used
|
2000-06-29 19:34:54 +00:00
|
|
|
def processing_instruction(self, target, data):
|
|
|
|
self._cont_handler.processingInstruction(target, data)
|
|
|
|
|
2000-07-04 03:39:33 +00:00
|
|
|
# this is not used
|
2000-06-29 19:34:54 +00:00
|
|
|
def character_data(self, data):
|
|
|
|
self._cont_handler.characters(data)
|
|
|
|
|
|
|
|
def start_namespace_decl(self, prefix, uri):
|
|
|
|
self._cont_handler.startPrefixMapping(prefix, uri)
|
|
|
|
|
|
|
|
def end_namespace_decl(self, prefix):
|
|
|
|
self._cont_handler.endPrefixMapping(prefix)
|
|
|
|
|
|
|
|
def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
|
|
|
|
self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
|
|
|
|
|
|
|
|
def notation_decl(self, name, base, sysid, pubid):
|
|
|
|
self._dtd_handler.notationDecl(name, pubid, sysid)
|
|
|
|
|
|
|
|
def external_entity_ref(self, context, base, sysid, pubid):
|
|
|
|
assert 0 # not implemented
|
|
|
|
source = self._ent_handler.resolveEntity(pubid, sysid)
|
|
|
|
source = saxutils.prepare_input_source(source)
|
|
|
|
# FIXME: create new parser, stack self._source and self._parser
|
|
|
|
# FIXME: reuse code from self.parse(...)
|
|
|
|
return 1
|
|
|
|
|
|
|
|
# ---
|
|
|
|
|
|
|
|
def create_parser(*args, **kwargs):
|
|
|
|
return apply( ExpatParser, args, kwargs )
|
|
|
|
|
|
|
|
# ---
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
import xml.sax
|
|
|
|
p = create_parser()
|
|
|
|
p.setContentHandler(xml.sax.XMLGenerator())
|
|
|
|
p.setErrorHandler(xml.sax.ErrorHandler())
|
|
|
|
p.parse("../../../hamlet.xml")
|