2000-06-29 19:34:54 +00:00
|
|
|
"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
|
|
|
|
should be based on this code. """
|
2000-09-21 17:43:48 +00:00
|
|
|
|
|
|
|
import handler
|
|
|
|
|
2000-06-29 19:34:54 +00:00
|
|
|
# ===== XMLREADER =====
|
|
|
|
|
|
|
|
class XMLReader:
|
|
|
|
def __init__(self):
|
2000-07-06 03:01:40 +00:00
|
|
|
self._cont_handler = handler.ContentHandler()
|
2000-09-21 17:43:48 +00:00
|
|
|
#self._dtd_handler = handler.DTDHandler()
|
|
|
|
#self._ent_handler = handler.EntityResolver()
|
|
|
|
self._err_handler = handler.ErrorHandler()
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
def parse(self, source):
|
2000-07-06 03:01:40 +00:00
|
|
|
"Parse an XML document from a system identifier or an InputSource."
|
2000-06-29 19:34:54 +00:00
|
|
|
raise NotImplementedError("This method must be implemented!")
|
|
|
|
|
|
|
|
def getContentHandler(self):
|
|
|
|
"Returns the current ContentHandler."
|
|
|
|
return self._cont_handler
|
|
|
|
|
|
|
|
def setContentHandler(self, handler):
|
|
|
|
"Registers a new object to receive document content events."
|
|
|
|
self._cont_handler = handler
|
|
|
|
|
|
|
|
def getDTDHandler(self):
|
|
|
|
"Returns the current DTD handler."
|
|
|
|
return self._dtd_handler
|
|
|
|
|
|
|
|
def setDTDHandler(self, handler):
|
2000-07-06 03:01:40 +00:00
|
|
|
"Register an object to receive basic DTD-related events."
|
|
|
|
self._dtd_handler = handler
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
def getEntityResolver(self):
|
|
|
|
"Returns the current EntityResolver."
|
|
|
|
return self._ent_handler
|
|
|
|
|
|
|
|
def setEntityResolver(self, resolver):
|
2000-07-06 03:01:40 +00:00
|
|
|
"Register an object to resolve external entities."
|
|
|
|
self._ent_handler = resolver
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
def getErrorHandler(self):
|
|
|
|
"Returns the current ErrorHandler."
|
|
|
|
return self._err_handler
|
|
|
|
|
|
|
|
def setErrorHandler(self, handler):
|
2000-07-06 03:01:40 +00:00
|
|
|
"Register an object to receive error-message events."
|
|
|
|
self._err_handler = handler
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
def setLocale(self, locale):
|
|
|
|
"""Allow an application to set the locale for errors and warnings.
|
|
|
|
|
2000-07-16 12:04:32 +00:00
|
|
|
SAX parsers are not required to provide localization for errors
|
2000-06-29 19:34:54 +00:00
|
|
|
and warnings; if they cannot support the requested locale,
|
|
|
|
however, they must throw a SAX exception. Applications may
|
|
|
|
request a locale change in the middle of a parse."""
|
|
|
|
raise SAXNotSupportedException("Locale support not implemented")
|
|
|
|
|
|
|
|
def getFeature(self, name):
|
|
|
|
"Looks up and returns the state of a SAX2 feature."
|
|
|
|
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
|
|
|
|
|
|
|
|
def setFeature(self, name, state):
|
|
|
|
"Sets the state of a SAX2 feature."
|
|
|
|
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
|
|
|
|
|
|
|
|
def getProperty(self, name):
|
|
|
|
"Looks up and returns the value of a SAX2 property."
|
|
|
|
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
|
|
|
|
|
|
|
def setProperty(self, name, value):
|
|
|
|
"Sets the value of a SAX2 property."
|
|
|
|
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
|
|
|
|
|
|
|
|
|
|
|
class IncrementalParser(XMLReader):
|
|
|
|
"""This interface adds three extra methods to the XMLReader
|
|
|
|
interface that allow XML parsers to support incremental
|
|
|
|
parsing. Support for this interface is optional, since not all
|
|
|
|
underlying XML parsers support this functionality.
|
|
|
|
|
|
|
|
When the parser is instantiated it is ready to begin accepting
|
|
|
|
data from the feed method immediately. After parsing has been
|
|
|
|
finished with a call to close the reset method must be called to
|
|
|
|
make the parser ready to accept new data, either from feed or
|
|
|
|
using the parse method.
|
|
|
|
|
|
|
|
Note that these methods must _not_ be called during parsing, that
|
|
|
|
is, after parse has been called and before it returns.
|
|
|
|
|
|
|
|
By default, the class also implements the parse method of the XMLReader
|
|
|
|
interface using the feed, close and reset methods of the
|
|
|
|
IncrementalParser interface as a convenience to SAX 2.0 driver
|
|
|
|
writers."""
|
2000-09-21 17:43:48 +00:00
|
|
|
|
|
|
|
def __init__(self, bufsize=2**16):
|
|
|
|
self._bufsize = bufsize
|
|
|
|
XMLReader.__init__(self)
|
|
|
|
|
|
|
|
def _parseOpenFile(self, source):
|
|
|
|
buffer = source.read(self._bufsize)
|
2000-06-29 19:34:54 +00:00
|
|
|
while buffer != "":
|
|
|
|
self.feed(buffer)
|
2000-09-21 17:43:48 +00:00
|
|
|
buffer = source.read(self._bufsize)
|
2000-06-29 19:34:54 +00:00
|
|
|
self.close()
|
|
|
|
self.reset()
|
|
|
|
|
2000-09-21 17:43:48 +00:00
|
|
|
def parse(self, source):
|
|
|
|
if hasattr(source, "read"):
|
|
|
|
self._parseOpenFile(source)
|
|
|
|
else:
|
|
|
|
#FIXME: how to recognize if it is a URL instead of filename?
|
|
|
|
self.prepareParser(source)
|
|
|
|
file = open(source)
|
|
|
|
self._parseOpenFile(file)
|
|
|
|
file.close()
|
|
|
|
|
2000-06-29 19:34:54 +00:00
|
|
|
def feed(self, data):
|
|
|
|
"""This method gives the raw XML data in the data parameter to
|
|
|
|
the parser and makes it parse the data, emitting the
|
|
|
|
corresponding events. It is allowed for XML constructs to be
|
|
|
|
split across several calls to feed.
|
|
|
|
|
|
|
|
feed may raise SAXException."""
|
|
|
|
raise NotImplementedError("This method must be implemented!")
|
2000-09-21 17:43:48 +00:00
|
|
|
|
2000-06-29 19:34:54 +00:00
|
|
|
def prepareParser(self, source):
|
|
|
|
"""This method is called by the parse implementation to allow
|
|
|
|
the SAX 2.0 driver to prepare itself for parsing."""
|
|
|
|
raise NotImplementedError("prepareParser must be overridden!")
|
|
|
|
|
|
|
|
def close(self):
|
|
|
|
"""This method is called when the entire XML document has been
|
|
|
|
passed to the parser through the feed method, to notify the
|
|
|
|
parser that there are no more data. This allows the parser to
|
|
|
|
do the final checks on the document and empty the internal
|
|
|
|
data buffer.
|
|
|
|
|
|
|
|
The parser will not be ready to parse another document until
|
|
|
|
the reset method has been called.
|
|
|
|
|
|
|
|
close may raise SAXException."""
|
|
|
|
raise NotImplementedError("This method must be implemented!")
|
|
|
|
|
|
|
|
def reset(self):
|
|
|
|
"""This method is called after close has been called to reset
|
|
|
|
the parser so that it is ready to parse new documents. The
|
|
|
|
results of calling parse or feed after close without calling
|
|
|
|
reset are undefined."""
|
|
|
|
raise NotImplementedError("This method must be implemented!")
|
|
|
|
|
|
|
|
# ===== LOCATOR =====
|
|
|
|
class Locator:
|
|
|
|
"""Interface for associating a SAX event with a document
|
|
|
|
location. A locator object will return valid results only during
|
|
|
|
calls to DocumentHandler methods; at any other time, the
|
|
|
|
results are unpredictable."""
|
|
|
|
|
|
|
|
def getColumnNumber(self):
|
2000-07-06 03:01:40 +00:00
|
|
|
"Return the column number where the current event ends."
|
|
|
|
return -1
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
def getLineNumber(self):
|
2000-07-06 03:01:40 +00:00
|
|
|
"Return the line number where the current event ends."
|
|
|
|
return -1
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
def getPublicId(self):
|
2000-07-06 03:01:40 +00:00
|
|
|
"Return the public identifier for the current event."
|
|
|
|
return None
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
def getSystemId(self):
|
2000-07-06 03:01:40 +00:00
|
|
|
"Return the system identifier for the current event."
|
|
|
|
return None
|
2000-06-29 19:34:54 +00:00
|
|
|
|
|
|
|
# --- AttributesImpl
|
|
|
|
class AttributesImpl:
|
|
|
|
def __init__(self, attrs, rawnames):
|
|
|
|
self._attrs = attrs
|
|
|
|
self._rawnames = rawnames
|
|
|
|
|
|
|
|
def getLength(self):
|
|
|
|
return len(self._attrs)
|
|
|
|
|
|
|
|
def getType(self, name):
|
|
|
|
return "CDATA"
|
|
|
|
|
|
|
|
def getValue(self, name):
|
|
|
|
return self._attrs[name]
|
|
|
|
|
|
|
|
def getValueByQName(self, name):
|
|
|
|
return self._attrs[self._rawnames[name]]
|
|
|
|
|
|
|
|
def getNameByQName(self, name):
|
|
|
|
return self._rawnames[name]
|
|
|
|
|
|
|
|
def getNames(self):
|
|
|
|
return self._attrs.keys()
|
|
|
|
|
|
|
|
def getQNames(self):
|
|
|
|
return self._rawnames.keys()
|
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
return len(self._attrs)
|
|
|
|
|
|
|
|
def __getitem__(self, name):
|
|
|
|
return self._attrs[name]
|
|
|
|
|
|
|
|
def keys(self):
|
|
|
|
return self._attrs.keys()
|
|
|
|
|
|
|
|
def has_key(self, name):
|
|
|
|
return self._attrs.has_key(name)
|
|
|
|
|
|
|
|
def get(self, name, alternative=None):
|
|
|
|
return self._attrs.get(name, alternative)
|
|
|
|
|
|
|
|
def copy(self):
|
|
|
|
return self.__class__(self._attrs, self._rawnames)
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
return self._attrs.items()
|
|
|
|
|
|
|
|
def values(self):
|
|
|
|
return self._attrs.values()
|
|
|
|
|
2000-09-21 17:43:48 +00:00
|
|
|
|
2000-06-29 19:34:54 +00:00
|
|
|
def _test():
|
|
|
|
XMLReader()
|
|
|
|
IncrementalParser()
|
|
|
|
Locator()
|
|
|
|
AttributesImpl()
|
|
|
|
|
2000-09-21 17:43:48 +00:00
|
|
|
if __name__ == "__main__":
|
2000-06-29 19:34:54 +00:00
|
|
|
_test()
|