2000-06-29 19:34:54 +00:00
|
|
|
"""
|
|
|
|
SAX driver for the Pyexpat C module. This driver works with
|
2000-09-24 20:38:18 +00:00
|
|
|
pyexpat.__version__ == '2.22'.
|
2000-06-29 19:34:54 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
version = "0.20"
|
|
|
|
|
2000-09-24 18:29:24 +00:00
|
|
|
from xml.sax._exceptions import *
|
2001-06-17 07:05:43 +00:00
|
|
|
|
|
|
|
# xml.parsers.expat does not raise ImportError in Jython
|
|
|
|
import sys
|
2001-07-30 22:41:23 +00:00
|
|
|
if sys.platform[:4] == "java":
|
2001-06-17 07:05:43 +00:00
|
|
|
raise SAXReaderNotAvailable("expat not available in Java", None)
|
|
|
|
del sys
|
|
|
|
|
2000-10-06 17:41:52 +00:00
|
|
|
try:
|
|
|
|
from xml.parsers import expat
|
|
|
|
except ImportError:
|
2001-07-30 22:41:23 +00:00
|
|
|
raise SAXReaderNotAvailable("expat not supported", None)
|
2001-07-30 21:49:22 +00:00
|
|
|
else:
|
|
|
|
if not hasattr(expat, "ParserCreate"):
|
2001-07-30 22:41:23 +00:00
|
|
|
raise SAXReaderNotAvailable("expat not supported", None)
|
2000-09-24 21:17:39 +00:00
|
|
|
from xml.sax import xmlreader, saxutils, handler
|
2000-06-29 19:34:54 +00:00
|
|
|
|
2000-09-24 18:39:23 +00:00
|
|
|
AttributesImpl = xmlreader.AttributesImpl
|
|
|
|
AttributesNSImpl = xmlreader.AttributesNSImpl
|
|
|
|
|
2000-09-29 19:00:40 +00:00
|
|
|
import string
|
|
|
|
|
2000-06-29 19:34:54 +00:00
|
|
|
# --- ExpatParser
|
|
|
|
|
2000-09-23 05:32:26 +00:00
|
|
|
class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
|
2000-06-29 19:34:54 +00:00
|
|
|
"SAX driver for the Pyexpat C module."
|
|
|
|
|
|
|
|
def __init__(self, namespaceHandling=0, bufsize=2**16-20):
|
|
|
|
xmlreader.IncrementalParser.__init__(self, bufsize)
|
2000-09-24 20:19:45 +00:00
|
|
|
self._source = xmlreader.InputSource()
|
2000-06-29 19:34:54 +00:00
|
|
|
self._parser = None
|
|
|
|
self._namespaces = namespaceHandling
|
2001-01-27 08:56:24 +00:00
|
|
|
self._lex_handler_prop = None
|
2000-06-29 19:34:54 +00:00
|
|
|
self._parsing = 0
|
2000-09-24 20:19:45 +00:00
|
|
|
self._entity_stack = []
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
# XMLReader methods
|
|
|
|
|
2000-09-24 18:54:49 +00:00
|
|
|
def parse(self, source):
|
2000-09-24 20:38:18 +00:00
|
|
|
"Parse an XML document from a URL or an InputSource."
|
2000-09-24 18:54:49 +00:00
|
|
|
source = saxutils.prepare_input_source(source)
|
|
|
|
|
|
|
|
self._source = source
|
2000-06-29 19:34:54 +00:00
|
|
|
self.reset()
|
|
|
|
self._cont_handler.setDocumentLocator(self)
|
2000-10-23 18:09:50 +00:00
|
|
|
xmlreader.IncrementalParser.parse(self, source)
|
2000-06-29 19:34:54 +00:00
|
|
|
|
2000-09-24 18:54:49 +00:00
|
|
|
def prepareParser(self, source):
|
|
|
|
if source.getSystemId() != None:
|
|
|
|
self._parser.SetBase(source.getSystemId())
|
2000-10-23 18:09:50 +00:00
|
|
|
|
2001-06-17 07:05:43 +00:00
|
|
|
# Redefined setContentHandle to allow changing handlers during parsing
|
|
|
|
|
|
|
|
def setContentHandler(self, handler):
|
|
|
|
xmlreader.IncrementalParser.setContentHandler(self, handler)
|
|
|
|
if self._parsing:
|
|
|
|
self._reset_cont_handler()
|
|
|
|
|
2000-06-29 19:34:54 +00:00
|
|
|
def getFeature(self, name):
|
2000-09-24 21:17:39 +00:00
|
|
|
if name == handler.feature_namespaces:
|
2000-09-24 18:29:24 +00:00
|
|
|
return self._namespaces
|
2000-06-29 19:34:54 +00:00
|
|
|
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
|
|
|
|
|
|
|
|
def setFeature(self, name, state):
|
2000-09-24 18:29:24 +00:00
|
|
|
if self._parsing:
|
|
|
|
raise SAXNotSupportedException("Cannot set features while parsing")
|
2000-09-24 21:17:39 +00:00
|
|
|
if name == handler.feature_namespaces:
|
2000-09-24 18:29:24 +00:00
|
|
|
self._namespaces = state
|
|
|
|
else:
|
|
|
|
raise SAXNotRecognizedException("Feature '%s' not recognized" %
|
|
|
|
name)
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
def getProperty(self, name):
|
2001-01-27 08:56:24 +00:00
|
|
|
if name == handler.property_lexical_handler:
|
|
|
|
return self._lex_handler_prop
|
2000-06-29 19:34:54 +00:00
|
|
|
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
|
|
|
|
|
|
|
def setProperty(self, name, value):
|
2001-01-27 08:56:24 +00:00
|
|
|
if name == handler.property_lexical_handler:
|
|
|
|
self._lex_handler_prop = value
|
2001-06-17 07:05:43 +00:00
|
|
|
if self._parsing:
|
|
|
|
self._reset_lex_handler_prop()
|
2001-01-27 08:56:24 +00:00
|
|
|
else:
|
|
|
|
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
# IncrementalParser methods
|
|
|
|
|
2000-10-06 21:08:59 +00:00
|
|
|
def feed(self, data, isFinal = 0):
|
2000-06-29 19:34:54 +00:00
|
|
|
if not self._parsing:
|
|
|
|
self.reset()
|
2000-10-14 10:28:01 +00:00
|
|
|
self._parsing = 1
|
2000-06-29 19:34:54 +00:00
|
|
|
self._cont_handler.startDocument()
|
2000-09-24 18:29:24 +00:00
|
|
|
|
2000-10-06 21:08:59 +00:00
|
|
|
try:
|
|
|
|
# The isFinal parameter is internal to the expat reader.
|
|
|
|
# If it is set to true, expat will check validity of the entire
|
|
|
|
# document. When feeding chunks, they are not normally final -
|
|
|
|
# except when invoked from close.
|
|
|
|
self._parser.Parse(data, isFinal)
|
|
|
|
except expat.error:
|
|
|
|
error_code = self._parser.ErrorCode
|
2000-10-09 16:45:54 +00:00
|
|
|
exc = SAXParseException(expat.ErrorString(error_code), None, self)
|
2001-01-27 08:56:24 +00:00
|
|
|
# FIXME: when to invoke error()?
|
2000-10-09 16:45:54 +00:00
|
|
|
self._err_handler.fatalError(exc)
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
def close(self):
|
2000-10-06 21:08:59 +00:00
|
|
|
if self._entity_stack:
|
|
|
|
# If we are completing an external entity, do nothing here
|
|
|
|
return
|
|
|
|
self.feed("", isFinal = 1)
|
|
|
|
self._cont_handler.endDocument()
|
|
|
|
self._parsing = 0
|
2001-01-27 08:56:24 +00:00
|
|
|
# break cycle created by expat handlers pointing to our methods
|
|
|
|
self._parser = None
|
2000-10-23 18:09:50 +00:00
|
|
|
|
2001-06-17 07:05:43 +00:00
|
|
|
def _reset_cont_handler(self):
|
|
|
|
self._parser.ProcessingInstructionHandler = \
|
|
|
|
self._cont_handler.processingInstruction
|
|
|
|
self._parser.CharacterDataHandler = self._cont_handler.characters
|
|
|
|
|
|
|
|
def _reset_lex_handler_prop(self):
|
|
|
|
self._parser.CommentHandler = self._lex_handler_prop.comment
|
|
|
|
self._parser.StartCdataSectionHandler = self._lex_handler_prop.startCDATA
|
|
|
|
self._parser.EndCdataSectionHandler = self._lex_handler_prop.endCDATA
|
|
|
|
|
2000-06-29 19:34:54 +00:00
|
|
|
def reset(self):
|
|
|
|
if self._namespaces:
|
2000-09-23 04:49:30 +00:00
|
|
|
self._parser = expat.ParserCreate(None, " ")
|
2000-06-29 19:34:54 +00:00
|
|
|
self._parser.StartElementHandler = self.start_element_ns
|
|
|
|
self._parser.EndElementHandler = self.end_element_ns
|
|
|
|
else:
|
2000-09-23 04:49:30 +00:00
|
|
|
self._parser = expat.ParserCreate()
|
2000-07-04 03:39:33 +00:00
|
|
|
self._parser.StartElementHandler = self.start_element
|
|
|
|
self._parser.EndElementHandler = self.end_element
|
2000-06-29 19:34:54 +00:00
|
|
|
|
2001-06-17 07:05:43 +00:00
|
|
|
self._reset_cont_handler()
|
2000-06-29 19:34:54 +00:00
|
|
|
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
|
|
|
|
self._parser.NotationDeclHandler = self.notation_decl
|
|
|
|
self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
|
|
|
|
self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
|
2001-01-27 09:01:20 +00:00
|
|
|
|
2001-01-27 08:56:24 +00:00
|
|
|
self._decl_handler_prop = None
|
|
|
|
if self._lex_handler_prop:
|
2001-06-17 07:05:43 +00:00
|
|
|
self._reset_lex_handler_prop()
|
2001-01-27 09:01:20 +00:00
|
|
|
# self._parser.DefaultHandler =
|
|
|
|
# self._parser.DefaultHandlerExpand =
|
|
|
|
# self._parser.NotStandaloneHandler =
|
2000-06-29 19:34:54 +00:00
|
|
|
self._parser.ExternalEntityRefHandler = self.external_entity_ref
|
2000-09-24 20:38:18 +00:00
|
|
|
|
2000-10-14 10:28:01 +00:00
|
|
|
self._parsing = 0
|
2000-09-24 20:38:18 +00:00
|
|
|
self._entity_stack = []
|
2000-10-23 18:09:50 +00:00
|
|
|
|
2000-06-29 19:34:54 +00:00
|
|
|
# Locator methods
|
|
|
|
|
|
|
|
def getColumnNumber(self):
|
2001-01-27 08:56:24 +00:00
|
|
|
if self._parser is None:
|
|
|
|
return None
|
2000-06-29 19:34:54 +00:00
|
|
|
return self._parser.ErrorColumnNumber
|
|
|
|
|
|
|
|
def getLineNumber(self):
|
2001-01-27 08:56:24 +00:00
|
|
|
if self._parser is None:
|
|
|
|
return 1
|
2000-06-29 19:34:54 +00:00
|
|
|
return self._parser.ErrorLineNumber
|
|
|
|
|
|
|
|
def getPublicId(self):
|
|
|
|
return self._source.getPublicId()
|
|
|
|
|
|
|
|
def getSystemId(self):
|
2000-10-06 21:08:59 +00:00
|
|
|
return self._source.getSystemId()
|
2000-10-23 18:09:50 +00:00
|
|
|
|
2000-06-29 19:34:54 +00:00
|
|
|
# event handlers
|
|
|
|
def start_element(self, name, attrs):
|
2000-09-24 18:39:23 +00:00
|
|
|
self._cont_handler.startElement(name, AttributesImpl(attrs))
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
def end_element(self, name):
|
2000-09-24 18:29:24 +00:00
|
|
|
self._cont_handler.endElement(name)
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
def start_element_ns(self, name, attrs):
|
2000-09-29 19:00:40 +00:00
|
|
|
pair = string.split(name)
|
2000-06-29 19:34:54 +00:00
|
|
|
if len(pair) == 1:
|
2000-09-24 18:29:24 +00:00
|
|
|
pair = (None, name)
|
2000-10-19 07:36:29 +00:00
|
|
|
else:
|
|
|
|
pair = tuple(pair)
|
2000-06-29 19:34:54 +00:00
|
|
|
|
2000-09-24 18:39:23 +00:00
|
|
|
newattrs = {}
|
|
|
|
for (aname, value) in attrs.items():
|
2000-09-29 19:00:40 +00:00
|
|
|
apair = string.split(aname)
|
2000-09-24 18:39:23 +00:00
|
|
|
if len(apair) == 1:
|
|
|
|
apair = (None, aname)
|
|
|
|
else:
|
|
|
|
apair = tuple(apair)
|
|
|
|
|
|
|
|
newattrs[apair] = value
|
|
|
|
|
2000-10-23 18:09:50 +00:00
|
|
|
self._cont_handler.startElementNS(pair, None,
|
2000-09-24 18:39:23 +00:00
|
|
|
AttributesNSImpl(newattrs, {}))
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
def end_element_ns(self, name):
|
2000-09-29 19:00:40 +00:00
|
|
|
pair = string.split(name)
|
2000-06-29 19:34:54 +00:00
|
|
|
if len(pair) == 1:
|
2000-09-24 18:39:23 +00:00
|
|
|
pair = (None, name)
|
2001-01-27 08:56:24 +00:00
|
|
|
else:
|
|
|
|
pair = tuple(pair)
|
2000-10-23 18:09:50 +00:00
|
|
|
|
2000-09-24 18:29:24 +00:00
|
|
|
self._cont_handler.endElementNS(pair, None)
|
2000-06-29 19:34:54 +00:00
|
|
|
|
2000-09-24 18:29:24 +00:00
|
|
|
# this is not used (call directly to ContentHandler)
|
2000-06-29 19:34:54 +00:00
|
|
|
def processing_instruction(self, target, data):
|
|
|
|
self._cont_handler.processingInstruction(target, data)
|
|
|
|
|
2000-09-24 18:29:24 +00:00
|
|
|
# this is not used (call directly to ContentHandler)
|
2000-06-29 19:34:54 +00:00
|
|
|
def character_data(self, data):
|
|
|
|
self._cont_handler.characters(data)
|
|
|
|
|
|
|
|
def start_namespace_decl(self, prefix, uri):
|
|
|
|
self._cont_handler.startPrefixMapping(prefix, uri)
|
|
|
|
|
|
|
|
def end_namespace_decl(self, prefix):
|
|
|
|
self._cont_handler.endPrefixMapping(prefix)
|
2000-10-23 18:09:50 +00:00
|
|
|
|
2000-06-29 19:34:54 +00:00
|
|
|
def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
|
|
|
|
self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
|
|
|
|
|
|
|
|
def notation_decl(self, name, base, sysid, pubid):
|
|
|
|
self._dtd_handler.notationDecl(name, pubid, sysid)
|
|
|
|
|
|
|
|
def external_entity_ref(self, context, base, sysid, pubid):
|
|
|
|
source = self._ent_handler.resolveEntity(pubid, sysid)
|
2000-09-24 20:19:45 +00:00
|
|
|
source = saxutils.prepare_input_source(source,
|
|
|
|
self._source.getSystemId() or
|
|
|
|
"")
|
2000-10-23 18:09:50 +00:00
|
|
|
|
2000-09-24 20:19:45 +00:00
|
|
|
self._entity_stack.append((self._parser, self._source))
|
|
|
|
self._parser = self._parser.ExternalEntityParserCreate(context)
|
|
|
|
self._source = source
|
|
|
|
|
|
|
|
try:
|
|
|
|
xmlreader.IncrementalParser.parse(self, source)
|
|
|
|
except:
|
|
|
|
return 0 # FIXME: save error info here?
|
|
|
|
|
|
|
|
(self._parser, self._source) = self._entity_stack[-1]
|
|
|
|
del self._entity_stack[-1]
|
2000-06-29 19:34:54 +00:00
|
|
|
return 1
|
2000-10-23 18:09:50 +00:00
|
|
|
|
2000-06-29 19:34:54 +00:00
|
|
|
# ---
|
2000-10-23 18:09:50 +00:00
|
|
|
|
2000-06-29 19:34:54 +00:00
|
|
|
def create_parser(*args, **kwargs):
|
2000-09-23 05:32:26 +00:00
|
|
|
return apply(ExpatParser, args, kwargs)
|
2000-10-23 18:09:50 +00:00
|
|
|
|
2000-06-29 19:34:54 +00:00
|
|
|
# ---
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
import xml.sax
|
|
|
|
p = create_parser()
|
|
|
|
p.setContentHandler(xml.sax.XMLGenerator())
|
|
|
|
p.setErrorHandler(xml.sax.ErrorHandler())
|
|
|
|
p.parse("../../../hamlet.xml")
|