#! /usr/bin/env python """Perform massive transformations on a document tree created from the LaTeX of the Python documentation, and dump the ESIS data for the transformed tree. """ __version__ = '$Revision$' import errno import esistools import re import string import sys import xml.dom.core import xml.dom.esis_builder class ConversionError(Exception): pass DEBUG_PARA_FIXER = 0 if DEBUG_PARA_FIXER: def para_msg(s): sys.stderr.write("*** %s\n" % s) else: def para_msg(s): pass # Workaround to deal with invalid documents (multiple root elements). This # does not indicate a bug in the DOM implementation. # def get_documentElement(self): docelem = None for n in self._node.children: if n.type == xml.dom.core.ELEMENT: docelem = xml.dom.core.Element(n, self, self) return docelem xml.dom.core.Document.get_documentElement = get_documentElement # Replace get_childNodes for the Document class; without this, children # accessed from the Document object via .childNodes (no matter how many # levels of access are used) will be given an ownerDocument of None. # def get_childNodes(self): return xml.dom.core.NodeList(self._node.children, self, self) xml.dom.core.Document.get_childNodes = get_childNodes def get_first_element(doc, gi): for n in doc.childNodes: if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi: return n def extract_first_element(doc, gi): node = get_first_element(doc, gi) if node is not None: doc.removeChild(node) return node def find_all_elements(doc, gi): nodes = [] if doc.nodeType == xml.dom.core.ELEMENT and doc.tagName == gi: nodes.append(doc) for child in doc.childNodes: if child.nodeType == xml.dom.core.ELEMENT: if child.tagName == gi: nodes.append(child) for node in child.getElementsByTagName(gi): nodes.append(node) return nodes def simplify(doc): # Try to rationalize the document a bit, since these things are simply # not valid SGML/XML documents as they stand, and need a little work. documentclass = "document" inputs = [] node = extract_first_element(doc, "documentclass") if node is not None: documentclass = node.getAttribute("classname") node = extract_first_element(doc, "title") if node is not None: inputs.append(node) # update the name of the root element node = get_first_element(doc, "document") if node is not None: node._node.name = documentclass while 1: node = extract_first_element(doc, "input") if node is None: break inputs.append(node) if inputs: docelem = doc.documentElement inputs.reverse() for node in inputs: text = doc.createTextNode("\n") docelem.insertBefore(text, docelem.firstChild) docelem.insertBefore(node, text) docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild) while doc.firstChild.nodeType == xml.dom.core.TEXT: doc.removeChild(doc.firstChild) def cleanup_root_text(doc): discards = [] skip = 0 for n in doc.childNodes: prevskip = skip skip = 0 if n.nodeType == xml.dom.core.TEXT and not prevskip: discards.append(n) elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT": skip = 1 for node in discards: doc.removeChild(node) def handle_args(doc): for node in find_all_elements(doc, "args"): parent = node.parentNode nodes = [] for n in parent.childNodes: if n.nodeType != xml.dom.core.ELEMENT or n.tagName != "args": nodes.append(n) signature = doc.createElement("signature") signature.appendChild(doc.createTextNode("\n ")) name = doc.createElement("name") name.appendChild(doc.createTextNode(parent.getAttribute("name"))) parent.removeAttribute("name") signature.appendChild(name) desc = doc.createElement("description") for n in nodes: parent.removeChild(n) desc.appendChild(n) desc.appendChild(doc.createTextNode("\n ")) parent.replaceChild(signature, node) parent.insertBefore(doc.createTextNode("\n "), signature) if node.childNodes: # keep the ..., newline & indent signature.appendChild(doc.createTextNode("\n ")) signature.appendChild(node) parent.appendChild(doc.createText("\n ")) parent.appendChild(desc) parent.appendChild(doc.createText("\n")) signature.appendChild(doc.createTextNode("\n ")) def methodline_to_signature(doc, methodline): signature = doc.createElement("signature") signature.appendChild(doc.createTextNode("\n ")) name = doc.createElement("name") name.appendChild(doc.createTextNode(methodline.getAttribute("name"))) signature.appendChild(name) methodline.parentNode.removeChild(methodline) if len(methodline.childNodes): methodline._node.name = "args" methodline.removeAttribute("name") signature.appendChild(doc.createTextNode("\n ")) signature.appendChild(methodline) signature.appendChild(doc.createTextNode("\n ")) return signature def handle_appendix(doc): # must be called after simplfy() if document is multi-rooted to begin with docelem = doc.documentElement toplevel = docelem.tagName == "manual" and "chapter" or "section" appendices = 0 nodes = [] for node in docelem.childNodes: if appendices: nodes.append(node) elif node.nodeType == xml.dom.core.ELEMENT: appnodes = node.getElementsByTagName("appendix") if appnodes: appendices = 1 parent = appnodes[0].parentNode parent.removeChild(appnodes[0]) parent.normalize() if nodes: map(docelem.removeChild, nodes) docelem.appendChild(doc.createTextNode("\n\n\n")) back = doc.createElement("back-matter") docelem.appendChild(back) back.appendChild(doc.createTextNode("\n")) while nodes and nodes[0].nodeType == xml.dom.core.TEXT \ and not string.strip(nodes[0].data): del nodes[0] map(back.appendChild, nodes) docelem.appendChild(doc.createTextNode("\n")) def handle_labels(doc): for label in find_all_elements(doc, "label"): id = label.getAttribute("id") if not id: continue parent = label.parentNode if parent.tagName == "title": parent.parentNode.setAttribute("id", id) else: parent.setAttribute("id", id) # now, remove