First cut at a script to generate ESIS events from LaTeX source,

probably only usable for the Python docs.
This commit is contained in:
Fred Drake 1998-11-18 23:27:24 +00:00
parent 691d27a7ae
commit 95f4f92b47
1 changed files with 334 additions and 0 deletions

334
Doc/tools/latex2esis.py Executable file
View File

@ -0,0 +1,334 @@
#! /usr/bin/env python
"""Generate ESIS events based on a LaTeX source document and configuration
data.
"""
__version__ = '$Revision$'
import re
import string
import StringIO
import sys
class Error(Exception):
pass
class LaTeXFormatError(Error):
pass
_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?)({| |)")
_comment_rx = re.compile("%([^\n]*)\n")
_text_rx = re.compile(r"[^]%\\{}]+")
_optional_rx = re.compile(r"[[]([^]]*)[]]")
_parameter_rx = re.compile("[ \n]*{([^}]*)}")
_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
_start_group_rx = re.compile("[ \n]*{")
_start_optional_rx = re.compile("[ \n]*[[]")
_charmap = {}
for c in map(chr, range(256)):
_charmap[c] = c
_charmap["\n"] = r"\n"
_charmap["\\"] = r"\\"
del c
def encode(s):
return string.join(map(_charmap.get, s), '')
ESCAPED_CHARS = "$%#^ {}"
def subconvert(line, ofp, table, discards, autoclosing, knownempty,
endchar=None):
stack = []
while line:
if line[0] == endchar and not stack:
return line[1:]
m = _comment_rx.match(line)
if m:
text = m.group(1)
if text:
ofp.write("(COMMENT\n")
ofp.write("-%s\n" % encode(text))
ofp.write(")COMMENT\n")
ofp.write("-\\n\n")
else:
ofp.write("-\\n\n")
line = line[m.end():]
continue
m = _begin_env_rx.match(line)
if m:
# re-write to use the macro handler
line = r"\%s%s" % (m.group(1), line[m.end():])
continue
m =_end_env_rx.match(line)
if m:
# end of environment
envname = m.group(1)
if envname == "document":
# special magic
for n in stack[1:]:
if n not in autoclosing:
raise LaTeXFormatError("open element on stack: " + `n`)
# should be more careful, but this is easier to code:
stack = []
ofp.write(")document\n")
elif envname == stack[-1]:
ofp.write(")%s\n" % envname)
del stack[-1]
else:
## print "envname ==>", envname
## print stack
raise LaTeXFormatError("environment close doesn't match")
line = line[m.end():]
continue
m = _begin_macro_rx.match(line)
if m:
# start of macro
macroname = m.group(1)
if macroname == "verbatim":
# really magic case!
pos = string.find(line, "\\end{verbatim}")
text = line[m.end(1):pos]
ofp.write("(verbatim\n")
ofp.write("-%s\n" % encode(text))
ofp.write(")verbatim\n")
line = line[pos + len("\\end{verbatim}"):]
continue
numbered = 1
if macroname[-1] == "*":
macroname = macroname[:-1]
numbered = 0
real_ofp = ofp
if macroname in autoclosing and macroname in stack:
while stack[-1] != macroname:
if stack[-1] and stack[-1] not in discards:
ofp.write(")%s\n-\\n\n" % stack[-1])
del stack[-1]
if macroname not in discards:
ofp.write("-\\n\n)%s\n-\\n\n" % macroname)
del stack[-1]
if macroname in discards:
ofp = StringIO.StringIO()
#
conversion = table.get(macroname, ([], 0, 0))
if type(conversion) is type(""):
# XXX convert to general entity; ESIS cheats!
line = "&%s;%s" % (conversion, line[m.end(1):])
continue
params, optional, empty = conversion
empty = empty or knownempty(macroname)
if empty:
ofp.write("e\n")
if not numbered:
ofp.write("Anumbered TOKEN no\n")
if params:
if optional and len(params) == 1:
line = line = line[m.end():]
else:
line = line[m.end() - 1:]
else:
line = line[m.end():]
#
# Very ugly special case to deal with \item[]. The catch is that
# this needs to occur outside the for loop that handles attribute
# parsing so we can 'continue' the outer loop.
#
if optional and type(params[0]) is type(()):
# the attribute name isn't used in this special case
stack.append(macroname)
ofp.write("(%s\n" % macroname)
m = _start_optional_rx.match(line)
if m:
line = line[m.end():]
line = subconvert(line, ofp, table, discards,
autoclosing, knownempty, endchar="]")
line = "}" + line
continue
# handle attribute mappings here:
for attrname in params:
if optional:
optional = 0
if type(attrname) is type(""):
m = _optional_rx.match(line)
if m:
line = line[m.end():]
ofp.write("A%s TOKEN %s\n"
% (attrname, encode(m.group(1))))
elif type(attrname) is type(()):
# This is a sub-element; but don't place the
# element we found on the stack (\section-like)
stack.append(macroname)
ofp.write("(%s\n" % macroname)
macroname = attrname[0]
m = _start_group_rx.match(line)
if m:
line = line[m.end():]
elif type(attrname) is type([]):
# A normal subelement.
attrname = attrname[0]
stack.append(macroname)
stack.append(attrname)
ofp.write("(%s\n" % macroname)
macroname = attrname
else:
m = _parameter_rx.match(line)
if not m:
raise LaTeXFormatError(
"could not extract parameter group: "
+ `line`)
value = m.group(1)
if _token_rx.match(value):
dtype = "TOKEN"
else:
dtype = "CDATA"
ofp.write("A%s %s %s\n"
% (attrname, dtype, encode(value)))
line = line[m.end():]
stack.append(macroname)
if type(conversion) is not type(""):
ofp.write("(%s\n" % macroname)
if empty:
line = "}" + line
ofp = real_ofp
continue
if line[0] == "}":
# end of macro
macroname = stack[-1]
conversion = table.get(macroname)
if macroname \
and macroname not in discards \
and type(conversion) is not type(""):
# otherwise, it was just a bare group
ofp.write(")%s\n" % stack[-1])
del stack[-1]
line = line[1:]
continue
if line[0] == "{":
stack.append("")
line = line[1:]
continue
if line[0] == "\\" and line[1] in ESCAPED_CHARS:
## print "*** Found", `line[1]`, "as escaped character. ***"
ofp.write("-%s\n" % encode(line[1]))
line = line[2:]
continue
if line[:2] == r"\\":
ofp.write("(BREAK\n)BREAK\n")
line = line[2:]
continue
m = _text_rx.match(line)
if m:
text = encode(m.group())
ofp.write("-%s\n" % text)
line = line[m.end():]
continue
# special case because of \item[]
if line[0] == "]":
ofp.write("-]\n")
line = line[1:]
continue
# avoid infinite loops
extra = ""
if len(line) > 100:
extra = "..."
raise LaTeXFormatError("could not identify markup: %s%s"
% (`line[:100]`, extra))
def convert(ifp, ofp, table={}, discards=(), autoclosing=(), knownempties=()):
d = {}
for gi in knownempties:
d[gi] = gi
return subconvert(ifp.read(), ofp, table, discards, autoclosing, d.has_key)
def main():
if len(sys.argv) == 2:
ifp = open(sys.argv[1])
ofp = sys.stdout
elif len(sys.argv) == 3:
ifp = open(sys.argv[1])
ofp = open(sys.argv[2], "w")
else:
usage()
sys.exit(2)
convert(ifp, ofp, {
# entries are name
# -> ([list of attribute names], first_is_optional, empty)
"cfuncdesc": (["type", "name", ("args",)], 0, 0),
"chapter": ([("title",)], 0, 0),
"chapter*": ([("title",)], 0, 0),
"classdesc": (["name", ("constructor-args",)], 0, 0),
"ctypedesc": (["name"], 0, 0),
"cvardesc": (["type", "name"], 0, 0),
"datadesc": (["name"], 0, 0),
"declaremodule": (["id", "type", "name"], 1, 1),
"deprecated": (["release"], 0, 1),
"documentclass": (["classname"], 0, 1),
"excdesc": (["name"], 0, 0),
"funcdesc": (["name", ("args",)], 0, 0),
"funcdescni": (["name", ("args",)], 0, 0),
"indexii": (["ie1", "ie2"], 0, 1),
"indexiii": (["ie1", "ie2", "ie3"], 0, 1),
"indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1),
"input": (["source"], 0, 1),
"item": ([("leader",)], 1, 0),
"label": (["id"], 0, 1),
"manpage": (["name", "section"], 0, 1),
"memberdesc": (["class", "name"], 1, 0),
"methoddesc": (["class", "name", ("args",)], 1, 0),
"methoddescni": (["class", "name", ("args",)], 1, 0),
"opcodedesc": (["name", "var"], 0, 0),
"par": ([], 0, 1),
"rfc": (["number"], 0, 1),
"section": ([("title",)], 0, 0),
"seemodule": (["ref", "name"], 1, 0),
"tableii": (["colspec", "style", "head1", "head2"], 0, 0),
"tableiii": (["colspec", "style", "head1", "head2", "head3"], 0, 0),
"tableiv": (["colspec", "style", "head1", "head2", "head3", "head4"],
0, 0),
"versionadded": (["version"], 0, 1),
"versionchanged": (["version"], 0, 1),
#
"ABC": "ABC",
"ASCII": "ASCII",
"C": "C",
"Cpp": "Cpp",
"EOF": "EOF",
"e": "backslash",
"ldots": "ldots",
"NULL": "NULL",
"POSIX": "POSIX",
"UNIX": "Unix",
#
# Things that will actually be going away!
#
"fi": ([], 0, 1),
"ifhtml": ([], 0, 1),
"makeindex": ([], 0, 1),
"makemodindex": ([], 0, 1),
"maketitle": ([], 0, 1),
"noindent": ([], 0, 1),
"tableofcontents": ([], 0, 1),
},
discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
"noindent", "tableofcontents"],
autoclosing=["chapter", "section", "subsection", "subsubsection",
"paragraph", "subparagraph", ],
knownempties=["rfc", "declaremodule", "appendix",
"maketitle", "makeindex", "makemodindex",
"localmoduletable", "manpage", "input"])
if __name__ == "__main__":
main()