cpython/Doc/tools/partparse.py

2407 lines
68 KiB
Python
Raw Normal View History

#
# partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
# and generate texinfo source.
#
# This is *not* a good example of good programming practices. In fact, this
# file could use a complete rewrite, in order to become faster, more
# easily extensible and maintainable.
#
# However, I added some comments on a few places for the pityful person who
# would ever need to take a look into this file.
#
# Have I been clear enough??
#
# -jh
#
# Yup. I made some performance improvements and hope this lasts a while;
# I don't want to be the schmuck who ends up re-writting it!
#
# -fld
#
# (sometime later...)
#
# Ok, I've re-worked substantial chunks of this. It's only getting worse.
# It just might be gone before the next source release. (Yeah!)
#
# -fld
import sys, string, regex, getopt, os
1996-09-10 22:19:51 +00:00
from types import IntType, ListType, StringType, TupleType
release_version = string.split(sys.version)[0]
# Different parse modes for phase 1
MODE_REGULAR = 0
MODE_VERBATIM = 1
MODE_CS_SCAN = 2
MODE_COMMENT = 3
MODE_MATH = 4
MODE_DMATH = 5
MODE_GOBBLEWHITE = 6
the_modes = (MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT,
MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE)
# Show the neighbourhood of the scanned buffer
def epsilon(buf, where):
wmt, wpt = where - 10, where + 10
if wmt < 0:
wmt = 0
if wpt > len(buf):
wpt = len(buf)
return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.'
# Should return the line number. never worked
def lin():
global lineno
return ' Line ' + `lineno` + '.'
# Displays the recursion level.
def lv(lvl):
return ' Level ' + `lvl` + '.'
# Combine the three previous functions. Used often.
def lle(lvl, buf, where):
return lv(lvl) + lin() + epsilon(buf, where)
# This class is only needed for _symbolic_ representation of the parse mode.
class Mode:
def __init__(self, arg):
if arg not in the_modes:
raise ValueError, 'mode not in the_modes'
self.mode = arg
def __cmp__(self, other):
if type(self) != type(other):
other = mode[other]
return cmp(self.mode, other.mode)
def __repr__(self):
if self.mode == MODE_REGULAR:
return 'MODE_REGULAR'
elif self.mode == MODE_VERBATIM:
return 'MODE_VERBATIM'
elif self.mode == MODE_CS_SCAN:
return 'MODE_CS_SCAN'
elif self.mode == MODE_COMMENT:
return 'MODE_COMMENT'
elif self.mode == MODE_MATH:
return 'MODE_MATH'
elif self.mode == MODE_DMATH:
return 'MODE_DMATH'
elif self.mode == MODE_GOBBLEWHITE:
return 'MODE_GOBBLEWHITE'
else:
raise ValueError, 'mode not in the_modes'
# just a wrapper around a class initialisation
mode = {}
for t in the_modes:
mode[t] = Mode(t)
# After phase 1, the text consists of chunks, with a certain type
# this type will be assigned to the chtype member of the chunk
# the where-field contains the file position where this is found
# and the data field contains (1): a tuple describing start- end end
# positions of the substring (can be used as slice for the buf-variable),
# (2) just a string, mostly generated by the changeit routine,
# or (3) a list, describing a (recursive) subgroup of chunks
PLAIN = 0 # ASSUME PLAINTEXT, data = the text
GROUP = 1 # GROUP ({}), data = [chunk, chunk,..]
CSNAME = 2 # CONTROL SEQ TOKEN, data = the command
COMMENT = 3 # data is the actual comment
DMATH = 4 # DISPLAYMATH, data = [chunk, chunk,..]
MATH = 5 # MATH, see DISPLAYMATH
OTHER = 6 # CHAR WITH CATCODE OTHER, data = char
ACTIVE = 7 # ACTIVE CHAR
GOBBLEDWHITE = 8 # Gobbled LWSP, after CSNAME
ENDLINE = 9 # END-OF-LINE, data = '\n'
DENDLINE = 10 # DOUBLE EOL, data='\n', indicates \par
ENV = 11 # LaTeX-environment
# data =(envname,[ch,ch,ch,.])
CSLINE = 12 # for texi: next chunk will be one group
# of args. Will be set all on 1 line
IGNORE = 13 # IGNORE this data
ENDENV = 14 # TEMP END OF GROUP INDICATOR
IF = 15 # IF-directive
# data = (flag,negate,[ch, ch, ch,...])
the_types = (PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE,
GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF)
# class, just to display symbolic name
class ChunkType:
def __init__(self, chunk_type):
if chunk_type not in the_types:
raise ValueError, 'chunk_type not in the_types'
self.chunk_type = chunk_type
def __cmp__(self, other):
if type(self) != type(other):
other = chunk_type[other]
return cmp(self.chunk_type, other.chunk_type)
def __repr__(self):
if self.chunk_type == PLAIN:
return 'PLAIN'
elif self.chunk_type == GROUP:
return 'GROUP'
elif self.chunk_type == CSNAME:
return 'CSNAME'
elif self.chunk_type == COMMENT:
return 'COMMENT'
elif self.chunk_type == DMATH:
return 'DMATH'
elif self.chunk_type == MATH:
return 'MATH'
elif self.chunk_type == OTHER:
return 'OTHER'
elif self.chunk_type == ACTIVE:
return 'ACTIVE'
elif self.chunk_type == GOBBLEDWHITE:
return 'GOBBLEDWHITE'
elif self.chunk_type == DENDLINE:
return 'DENDLINE'
elif self.chunk_type == ENDLINE:
return 'ENDLINE'
elif self.chunk_type == ENV:
return 'ENV'
elif self.chunk_type == CSLINE:
return 'CSLINE'
elif self.chunk_type == IGNORE:
return 'IGNORE'
elif self.chunk_type == ENDENV:
return 'ENDENV'
elif self.chunk_type == IF:
return 'IF'
else:
raise ValueError, 'chunk_type not in the_types'
# ...and the wrapper
chunk_type = {}
1996-09-10 22:19:51 +00:00
for t in the_types:
chunk_type[t] = ChunkType(t)
# store a type object of the ChunkType-class-instance...
chunk_type_type = type(chunk_type[PLAIN])
1996-09-10 22:19:51 +00:00
# this class contains a part of the parsed buffer
class Chunk:
def __init__(self, chtype, where, data):
if type(chtype) != chunk_type_type:
chtype = chunk_type[chtype]
self.chtype = chtype
self.where = where
self.data = data
__datatypes = [chunk_type[CSNAME], chunk_type[PLAIN], chunk_type[CSLINE]]
def __repr__(self):
if self.chtype in self.__datatypes:
data = s(self.buf, self.data)
else:
data = self.data
return 'chunk' + `self.chtype, self.where, data`
# and the wrapper
1996-09-10 22:19:51 +00:00
chunk = Chunk
error = 'partparse.error'
#
# TeX's catcodes...
#
CC_ESCAPE = 0
CC_LBRACE = 1
CC_RBRACE = 2
CC_MATHSHIFT = 3
CC_ALIGNMENT = 4
CC_ENDLINE = 5
CC_PARAMETER = 6
CC_SUPERSCRIPT = 7
CC_SUBSCRIPT = 8
CC_IGNORE = 9
CC_WHITE = 10
CC_LETTER = 11
CC_OTHER = 12
CC_ACTIVE = 13
CC_COMMENT = 14
CC_INVALID = 15
# and the names
cc_names = [
'CC_ESCAPE',
'CC_LBRACE',
'CC_RBRACE',
'CC_MATHSHIFT',
'CC_ALIGNMENT',
'CC_ENDLINE',
'CC_PARAMETER',
'CC_SUPERSCRIPT',
'CC_SUBSCRIPT',
'CC_IGNORE',
'CC_WHITE',
'CC_LETTER',
'CC_OTHER',
'CC_ACTIVE',
'CC_COMMENT',
'CC_INVALID',
]
# Show a list of catcode-name-symbols
def pcl(codelist):
result = ''
for i in codelist:
result = result + cc_names[i] + ', '
return '[' + result[:-2] + ']'
# the name of the catcode (ACTIVE, OTHER, etc.)
def pc(code):
return cc_names[code]
# Which catcodes make the parser stop parsing regular plaintext
regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT,
CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT,
CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE]
# same for scanning a control sequence name
csname_scancodes = [CC_LETTER]
# same for gobbling LWSP
white_scancodes = [CC_WHITE]
##white_scancodes = [CC_WHITE, CC_ENDLINE]
# make a list of all catcode id's, except for catcode ``other''
all_but_other_codes = range(16)
del all_but_other_codes[CC_OTHER]
##print all_but_other_codes
# when does a comment end
comment_stopcodes = [CC_ENDLINE]
# gather all characters together, specified by a list of catcodes
def code2string(cc, codelist):
##print 'code2string: codelist = ' + pcl(codelist),
result = ''
for category in codelist:
if cc[category]:
result = result + cc[category]
##print 'result = ' + `result`
return result
# automatically generate all characters of catcode other, being the
# complement set in the ASCII range (128 characters)
def make_other_codes(cc):
otherchars = range(256) # could be made 256, no problem
for category in all_but_other_codes:
if cc[category]:
for c in cc[category]:
otherchars[ord(c)] = None
result = ''
for i in otherchars:
if i != None:
result = result + chr(i)
return result
# catcode dump (which characters have which catcodes).
def dump_cc(name, cc):
##print '\t' + name
##print '=' * (8+len(name))
if len(cc) != 16:
raise TypeError, 'cc not good cat class'
## for i in range(16):
## print pc(i) + '\t' + `cc[i]`
# In the beginning,....
epoch_cc = [None] * 16
##dump_cc('epoch_cc', epoch_cc)
# INITEX
initex_cc = epoch_cc[:]
initex_cc[CC_ESCAPE] = '\\'
initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \
'\n', '\0', ' '
initex_cc[CC_LETTER] = string.uppercase + string.lowercase
initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F'
#initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
##dump_cc('initex_cc', initex_cc)
# LPLAIN: LaTeX catcode setting (see lplain.tex)
lplain_cc = initex_cc[:]
lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}'
lplain_cc[CC_MATHSHIFT] = '$'
lplain_cc[CC_ALIGNMENT] = '&'
lplain_cc[CC_PARAMETER] = '#'
lplain_cc[CC_SUPERSCRIPT] = '^\x0B' # '^' and C-k
lplain_cc[CC_SUBSCRIPT] = '_\x01' # '_' and C-a
lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t'
lplain_cc[CC_ACTIVE] = '~\x0C' # '~' and C-l
lplain_cc[CC_OTHER] = make_other_codes(lplain_cc)
##dump_cc('lplain_cc', lplain_cc)
# Guido's LaTeX environment catcoded '_' as ``other''
# my own purpose catlist
my_cc = lplain_cc[:]
my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here
my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_' # add it to OTHER list
dump_cc('my_cc', my_cc)
# needed for un_re, my equivalent for regexp-quote in Emacs
re_meaning = '\\[]^$'
def un_re(str):
result = ''
for i in str:
if i in re_meaning:
result = result + '\\'
result = result + i
return result
# NOTE the negate ('^') operator in *some* of the regexps below
def make_rc_regular(cc):
# problems here if '[]' are included!!
return regex.compile('[' + code2string(cc, regular_stopcodes) + ']')
def make_rc_cs_scan(cc):
return regex.compile('[^' + code2string(cc, csname_scancodes) + ']')
def make_rc_comment(cc):
return regex.compile('[' + code2string(cc, comment_stopcodes) + ']')
def make_rc_endwhite(cc):
return regex.compile('[^' + code2string(cc, white_scancodes) + ']')
# regular: normal mode:
rc_regular = make_rc_regular(my_cc)
# scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
rc_cs_scan = make_rc_cs_scan(my_cc)
rc_comment = make_rc_comment(my_cc)
rc_endwhite = make_rc_endwhite(my_cc)
# parseit (BUF, PARSEMODE=mode[MODE_REGULAR], START=0, RECURSION-LEVEL=0)
# RECURSION-LEVEL will is incremented on entry.
# result contains the list of chunks returned
# together with this list, the buffer position is returned
# RECURSION-LEVEL will be set to zero *again*, when recursively a
# {,D}MATH-mode scan has been enetered.
# This has been done in order to better check for environment-mismatches
def parseit(buf, parsemode=mode[MODE_REGULAR], start=0, lvl=0):
global lineno
result = []
end = len(buf)
if lvl == 0 and parsemode == mode[MODE_REGULAR]:
lineno = 1
lvl = lvl + 1
##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
#
# some of the more regular modes...
#
if parsemode in (mode[MODE_REGULAR], mode[MODE_DMATH], mode[MODE_MATH]):
cstate = []
newpos = start
curpmode = parsemode
while 1:
where = newpos
#print '\tnew round: ' + epsilon(buf, where)
if where == end:
if lvl > 1 or curpmode != mode[MODE_REGULAR]:
# not the way we started...
raise EOFError, 'premature end of file.' + lle(lvl, buf, where)
# the real ending of lvl-1 parse
return end, result
pos = rc_regular.search(buf, where)
if pos < 0:
pos = end
if pos != where:
newpos, c = pos, chunk(PLAIN, where, (where, pos))
result.append(c)
continue
#
# ok, pos == where and pos != end
#
foundchar = buf[where]
if foundchar in my_cc[CC_LBRACE]:
# recursive subgroup parse...
newpos, data = parseit(buf, curpmode, where+1, lvl)
result.append(chunk(GROUP, where, data))
elif foundchar in my_cc[CC_RBRACE]:
if lvl <= 1:
raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where)
if lvl == 1 and mode != mode[MODE_REGULAR]:
raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
return where + 1, result
elif foundchar in my_cc[CC_ESCAPE]:
#
# call the routine that actually deals with
# this problem. If do_ret is None, than
# return the value of do_ret
#
# Note that handle_cs might call this routine
# recursively again...
#
do_ret, newpos = handlecs(buf, where,
curpmode, lvl, result, end)
if do_ret != None:
return do_ret
elif foundchar in my_cc[CC_COMMENT]:
newpos, data = parseit(buf,
mode[MODE_COMMENT], where+1, lvl)
result.append(chunk(COMMENT, where, data))
elif foundchar in my_cc[CC_MATHSHIFT]:
# note that recursive calls to math-mode
# scanning are called with recursion-level 0
# again, in order to check for bad mathend
#
if where + 1 != end and buf[where + 1] in my_cc[CC_MATHSHIFT]:
#
# double mathshift, e.g. '$$'
#
if curpmode == mode[MODE_REGULAR]:
newpos, data = parseit(buf, mode[MODE_DMATH],
where + 2, 0)
result.append(chunk(DMATH, where, data))
elif curpmode == mode[MODE_MATH]:
raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
elif lvl != 1:
raise error, 'bad mathend.' + lle(lvl, buf, where)
else:
return where + 2, result
else:
#
# single math shift, e.g. '$'
#
if curpmode == mode[MODE_REGULAR]:
newpos, data = parseit(buf, mode[MODE_MATH],
where + 1, 0)
result.append(chunk(MATH, where, data))
elif curpmode == mode[MODE_DMATH]:
raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
elif lvl != 1:
raise error, 'bad mathend.' + lv(lvl, buf, where)
else:
return where + 1, result
elif foundchar in my_cc[CC_IGNORE]:
print 'warning: ignored char', `foundchar`
newpos = where + 1
elif foundchar in my_cc[CC_ACTIVE]:
result.append(chunk(ACTIVE, where, foundchar))
newpos = where + 1
elif foundchar in my_cc[CC_INVALID]:
raise error, 'invalid char ' + `foundchar`
newpos = where + 1
elif foundchar in my_cc[CC_ENDLINE]:
#
# after an end of line, eat the rest of
# whitespace on the beginning of the next line
# this is what LaTeX more or less does
#
# also, try to indicate double newlines (\par)
#
lineno = lineno + 1
savedwhere = where
newpos, dummy = parseit(buf, mode[MODE_GOBBLEWHITE], where + 1, lvl)
if newpos != end and buf[newpos] in my_cc[CC_ENDLINE]:
result.append(chunk(DENDLINE, savedwhere, foundchar))
else:
result.append(chunk(ENDLINE, savedwhere, foundchar))
else:
result.append(chunk(OTHER, where, foundchar))
newpos = where + 1
elif parsemode == mode[MODE_CS_SCAN]:
#
# scan for a control sequence token. `\ape', `\nut' or `\%'
#
if start == end:
raise EOFError, 'can\'t find end of csname'
pos = rc_cs_scan.search(buf, start)
if pos < 0:
pos = end
if pos == start:
# first non-letter right where we started the search
# ---> the control sequence name consists of one single
# character. Also: don't eat white space...
if buf[pos] in my_cc[CC_ENDLINE]:
lineno = lineno + 1
pos = pos + 1
return pos, (start, pos)
else:
spos = pos
if buf[pos] == '\n':
lineno = lineno + 1
spos = pos + 1
pos2, dummy = parseit(buf, mode[MODE_GOBBLEWHITE], spos, lvl)
return pos2, (start, pos)
elif parsemode == mode[MODE_GOBBLEWHITE]:
if start == end:
return start, ''
pos = rc_endwhite.search(buf, start)
if pos < 0:
pos = start
return pos, (start, pos)
elif parsemode == mode[MODE_COMMENT]:
pos = rc_comment.search(buf, start)
lineno = lineno + 1
if pos < 0:
print 'no newline perhaps?'
raise EOFError, 'can\'t find end of comment'
pos = pos + 1
pos2, dummy = parseit(buf, mode[MODE_GOBBLEWHITE], pos, lvl)
return pos2, (start, pos)
else:
raise error, 'Unknown mode (' + `parsemode` + ')'
#moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
#boxcommands = 'mbox', 'fbox'
#defcommands = 'def', 'newcommand'
endverbstr = '\\end{verbatim}'
re_endverb = regex.compile(un_re(endverbstr))
#
# handlecs: helper function for parseit, for the special thing we might
# wanna do after certain command control sequences
# returns: None or return_data, newpos
#
# in the latter case, the calling function is instructed to immediately
# return with the data in return_data
#
def handlecs(buf, where, curpmode, lvl, result, end):
global lineno
# get the control sequence name...
newpos, data = parseit(buf, mode[MODE_CS_SCAN], where+1, lvl)
saveddata = data
s_buf_data = s(buf, data)
if s_buf_data in ('begin', 'end'):
# skip the expected '{' and get the LaTeX-envname '}'
newpos, data = parseit(buf, mode[MODE_REGULAR], newpos+1, lvl)
if len(data) != 1:
raise error, 'expected 1 chunk of data.' + lle(lvl, buf, where)
# yucky, we've got an environment
envname = s(buf, data[0].data)
s_buf_saveddata = s(buf, saveddata)
##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
if s_buf_saveddata == 'begin' and envname == 'verbatim':
# verbatim deserves special treatment
pos = re_endverb.search(buf, newpos)
if pos < 0:
raise error, "%s not found.%s" \
% (`endverbstr`, lle(lvl, buf, where))
result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))])))
newpos = pos + len(endverbstr)
elif s_buf_saveddata == 'begin':
# start parsing recursively... If that parse returns
# from an '\end{...}', then should the last item of
# the returned data be a string containing the ended
# environment
newpos, data = parseit(buf, curpmode, newpos, lvl)
if not data or type(data[-1]) is not StringType:
raise error, "missing 'end'" + lle(lvl, buf, where) \
+ epsilon(buf, newpos)
retenv = data[-1]
del data[-1]
if retenv != envname:
#[`retenv`, `envname`]
raise error, 'environments do not match.%s%s' \
% (lle(lvl, buf, where), epsilon(buf, newpos))
result.append(chunk(ENV, where, (retenv, data)))
else:
# 'end'... append the environment name, as just
# pointed out, and order parsit to return...
result.append(envname)
##print 'POINT of return: ' + epsilon(buf, newpos)
# the tuple will be returned by parseit
return (newpos, result), newpos
# end of \begin ... \end handling
elif s_buf_data[0:2] == 'if':
# another scary monster: the 'if' directive
flag = s_buf_data[2:]
# recursively call parseit, just like environment above..
# the last item of data should contain the if-termination
# e.g., 'else' of 'fi'
newpos, data = parseit(buf, curpmode, newpos, lvl)
if not data or data[-1] not in ('else', 'fi'):
raise error, 'wrong if... termination' + \
lle(lvl, buf, where) + epsilon(buf, newpos)
ifterm = data[-1]
del data[-1]
# 0 means dont_negate flag
result.append(chunk(IF, where, (flag, 0, data)))
if ifterm == 'else':
# do the whole thing again, there is only one way
# to end this one, by 'fi'
newpos, data = parseit(buf, curpmode, newpos, lvl)
if not data or data[-1] not in ('fi', ):
raise error, 'wrong if...else... termination' \
+ lle(lvl, buf, where) \
+ epsilon(buf, newpos)
ifterm = data[-1]
del data[-1]
result.append(chunk(IF, where, (flag, 1, data)))
#done implicitely: return None, newpos
elif s_buf_data in ('else', 'fi'):
result.append(s(buf, data))
# order calling party to return tuple
return (newpos, result), newpos
# end of \if, \else, ... \fi handling
elif s(buf, saveddata) == 'verb':
x2 = saveddata[1]
result.append(chunk(CSNAME, where, data))
if x2 == end:
raise error, 'premature end of command.' + lle(lvl, buf, where)
delimchar = buf[x2]
##print 'VERB: delimchar ' + `delimchar`
pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1)
if pos < 0:
raise error, 'end of \'verb\' argument (' + \
`delimchar` + ') not found.' + \
lle(lvl, buf, where)
result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))]))
newpos = pos + 1
else:
result.append(chunk(CSNAME, where, data))
return None, newpos
# this is just a function to get the string value if the possible data-tuple
def s(buf, data):
if type(data) is StringType:
return data
if len(data) != 2 or not (type(data[0]) is type(data[1]) is IntType):
raise TypeError, 'expected tuple of 2 integers'
x1, x2 = data
return buf[x1:x2]
1996-09-10 22:19:51 +00:00
##length, data1, i = getnextarg(length, buf, pp, i + 1)
# make a deep-copy of some chunks
def crcopy(r):
return map(chunkcopy, r)
1996-09-10 22:19:51 +00:00
# copy a chunk, would better be a method of class Chunk...
def chunkcopy(ch):
if ch.chtype == chunk_type[GROUP]:
return chunk(GROUP, ch.where, map(chunkcopy, ch.data))
else:
return chunk(ch.chtype, ch.where, ch.data)
# get next argument for TeX-macro, flatten a group (insert between)
# or return Command Sequence token, or give back one character
def getnextarg(length, buf, pp, item):
##wobj = Wobj()
##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
while item < length and pp[item].chtype == chunk_type[ENDLINE]:
del pp[item]
length = length - 1
if item >= length:
raise error, 'no next arg.' + epsilon(buf, pp[-1].where)
if pp[item].chtype == chunk_type[GROUP]:
newpp = pp[item].data
del pp[item]
length = length - 1
changeit(buf, newpp)
length = length + len(newpp)
pp[item:item] = newpp
item = item + len(newpp)
if len(newpp) < 10:
wobj = Wobj()
dumpit(buf, wobj.write, newpp)
##print 'GETNEXTARG: inserted ' + `wobj.data`
return length, item
elif pp[item].chtype == chunk_type[PLAIN]:
#grab one char
print 'WARNING: grabbing one char'
if len(s(buf, pp[item].data)) > 1:
pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1]))
item, length = item+1, length+1
pp[item].data = s(buf, pp[item].data)[1:]
else:
item = item+1
return length, item
else:
ch = pp[item]
try:
str = `s(buf, ch.data)`
except TypeError:
str = `ch.data`
if len(str) > 400:
str = str[:400] + '...'
print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str
return length, item
# this one is needed to find the end of LaTeX's optional argument, like
# item[...]
re_endopt = regex.compile(']')
# get a LaTeX-optional argument, you know, the square braces '[' and ']'
def getoptarg(length, buf, pp, item):
wobj = Wobj()
dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
if item >= length or \
pp[item].chtype != chunk_type[PLAIN] or \
s(buf, pp[item].data)[0] != '[':
return length, item
pp[item].data = s(buf, pp[item].data)[1:]
if len(pp[item].data) == 0:
del pp[item]
length = length-1
while 1:
if item == length:
raise error, 'No end of optional arg found'
if pp[item].chtype == chunk_type[PLAIN]:
text = s(buf, pp[item].data)
pos = re_endopt.search(text)
if pos >= 0:
pp[item].data = text[:pos]
if pos == 0:
del pp[item]
length = length-1
else:
item=item+1
text = text[pos+1:]
while text and text[0] in ' \t':
text = text[1:]
if text:
pp.insert(item, chunk(PLAIN, 0, text))
length = length + 1
return length, item
item = item+1
# Wobj just add write-requests to the ``data'' attribute
class Wobj:
data = ''
1996-09-10 22:19:51 +00:00
def write(self, data):
self.data = self.data + data
1995-03-15 11:26:26 +00:00
# ignore these commands
ignoredcommands = ('hline', 'small', '/', 'tableofcontents', 'Large')
# map commands like these to themselves as plaintext
wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF', 'LaTeX', 'POSIX', 'TeX',
'SliTeX')
# \{ --> {, \} --> }, etc
themselves = ('{', '}', ',', '.', '@', ' ', '\n') + wordsselves
# these ones also themselves (see argargs macro in myformat.sty)
inargsselves = (',', '[', ']', '(', ')')
# this is how *I* would show the difference between emph and strong
# code 1 means: fold to uppercase
markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'),
'strong': ('*', '*')}
# recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
# try to remove macros and return flat text
def flattext(buf, pp):
pp = crcopy(pp)
##print '---> FLATTEXT ' + `pp`
wobj = Wobj()
i, length = 0, len(pp)
while 1:
if len(pp) != length:
raise 'FATAL', 'inconsistent length'
if i >= length:
break
ch = pp[i]
i = i+1
if ch.chtype == chunk_type[PLAIN]:
pass
elif ch.chtype == chunk_type[CSNAME]:
s_buf_data = s(buf, ch.data)
if convertible_csname(s_buf_data):
ch.chtype, ch.data, nix = conversion(s_buf_data)
if hist.inargs and s_buf_data in inargsselves:
ch.chtype = chunk_type[PLAIN]
elif len(s_buf_data) == 1 \
and s_buf_data in onlylatexspecial:
ch.chtype = chunk_type[PLAIN]
# if it is followed by an empty group,
# remove that group, it was needed for
# a true space
if i < length \
and pp[i].chtype==chunk_type[GROUP] \
and len(pp[i].data) == 0:
del pp[i]
length = length-1
elif s_buf_data in markcmds.keys():
length, newi = getnextarg(length, buf, pp, i)
str = flattext(buf, pp[i:newi])
del pp[i:newi]
length = length - (newi - i)
ch.chtype = chunk_type[PLAIN]
markcmd = s_buf_data
x = markcmds[markcmd]
if type(x) == TupleType:
pre, after = x
str = pre+str+after
elif x == 1:
str = string.upper(str)
else:
raise 'FATAL', 'corrupt markcmds'
ch.data = str
else:
if s_buf_data not in ignoredcommands:
print 'WARNING: deleting command ' + s_buf_data
print 'PP' + `pp[i-1]`
del pp[i-1]
i, length = i-1, length-1
elif ch.chtype == chunk_type[GROUP]:
length, newi = getnextarg(length, buf, pp, i-1)
i = i-1
## str = flattext(buf, crcopy(pp[i-1:newi]))
## del pp[i:newi]
## length = length - (newi - i)
## ch.chtype = chunk_type[PLAIN]
## ch.data = str
else:
pass
dumpit(buf, wobj.write, pp)
##print 'FLATTEXT: RETURNING ' + `wobj.data`
return wobj.data
# try to generate node names (a bit shorter than the chapter title)
# note that the \nodename command (see elsewhere) overules these efforts
def invent_node_names(text):
words = string.split(text)
##print 'WORDS ' + `words`
if len(words) == 2 \
and string.lower(words[0]) == 'built-in' \
and string.lower(words[1]) not in ('modules', 'functions'):
return words[1]
if len(words) == 3 and string.lower(words[1]) == 'module':
return words[2]
if len(words) == 3 and string.lower(words[1]) == 'object':
return string.join(words[0:2])
if len(words) > 4 \
and (string.lower(string.join(words[-4:])) \
== 'methods and data attributes'):
return string.join(words[:2])
return text
re_commas_etc = regex.compile('[,`\'@{}]')
re_whitespace = regex.compile('[ \t]*')
##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
# look if the next non-white stuff is also a command, resulting in skipping
# double endlines (DENDLINE) too, and thus omitting \par's
# Sometimes this is too much, maybe consider DENDLINE's as stop
def next_command_p(length, buf, pp, i, cmdname):
while 1:
if i >= len(pp):
break
ch = pp[i]
i = i+1
if ch.chtype == chunk_type[ENDLINE]:
continue
if ch.chtype == chunk_type[DENDLINE]:
continue
if ch.chtype == chunk_type[PLAIN]:
if re_whitespace.search(s(buf, ch.data)) == 0 and \
re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)):
continue
return -1
if ch.chtype == chunk_type[CSNAME]:
if s(buf, ch.data) == cmdname:
return i # _after_ the command
return -1
return -1
# things that are special to LaTeX, but not to texi..
onlylatexspecial = '_~^$#&%'
class Struct: pass
hist = Struct()
out = Struct()
def startchange():
global hist, out
hist.chaptertype = "chapter"
hist.inenv = []
hist.nodenames = []
hist.cindex = []
hist.inargs = 0
hist.enumeratenesting, hist.itemizenesting = 0, 0
hist.this_module = None
out.doublenodes = []
out.doublecindeces = []
spacech = [chunk(PLAIN, 0, ' ')]
commach = [chunk(PLAIN, 0, ', ')]
cindexch = [chunk(CSLINE, 0, 'cindex')]
# the standard variation in symbols for itemize
itemizesymbols = ['bullet', 'minus', 'dots']
# same for enumerate
enumeratesymbols = ['1', 'A', 'a']
# Map of things that convert one-to-one. Each entry is a 3-tuple:
#
# new_chtype, new_data, nix_trailing_empty_group
#
d = {}
# add stuff that converts from one name to another:
for name in ('url', 'module', 'function', 'cfunction',
'keyword', 'method', 'exception', 'constant',
'email', 'class', 'member', 'cdata', 'ctype',
'member', 'sectcode', 'verb',
'cfunction', 'cdata', 'ctype',
):
d[name] = chunk_type[CSNAME], 'code', 0
for name in ('emph', 'var', 'strong', 'code', 'kbd', 'key',
'dfn', 'samp', 'file', 'r', 'i', 't'):
d[name] = chunk_type[CSNAME], name, 0
d['character'] = chunk_type[CSNAME], 'samp', 0
d['url'] = chunk_type[CSNAME], 'code', 0
d['email'] = chunk_type[CSNAME], 'code', 0
d['mimetype'] = chunk_type[CSNAME], 'code', 0
d['newsgroup'] = chunk_type[CSNAME], 'code', 0
d['program'] = chunk_type[CSNAME], 'strong', 0
d['\\'] = chunk_type[CSNAME], '*', 0
# add stuff that converts to text:
for name in themselves:
d[name] = chunk_type[PLAIN], name, 0
for name in wordsselves:
d[name] = chunk_type[PLAIN], name, 1
for name in ',[]()':
d[name] = chunk_type[PLAIN], name, 0
# a lot of these are LaTeX2e additions
for name, value in [('quotedblbase', ',,'), ('quotesinglbase', ','),
('textquotedbl', '"'), ('LaTeXe', 'LaTeX2e'),
('e', '\\'), ('textquotedblleft', "``"),
('textquotedblright', "''"), ('textquoteleft', "`"),
('textquoteright', "'"), ('textbackslash', '\\'),
('textbar', '|'), ('textless', '<'),
('textgreater', '>'), ('textasciicircum', '^'),
('Cpp', 'C++'), ('copyright', '')]:
d[name] = chunk_type[PLAIN], value, 1
convertible_csname = d.has_key
conversion = d.get
del d, name, value
##
## \begin{ {func,data,exc}desc }{name}...
## the resulting texi-code is dependent on the contents of indexsubitem
##
# indexsubitem: `['XXX', 'function']
# funcdesc:
# deffn {`idxsi`} NAME (FUNCARGS)
# indexsubitem: `['XXX', 'method']`
# funcdesc:
# defmethod {`idxsi[0]`} NAME (FUNCARGS)
# indexsubitem: `['in', 'module', 'MODNAME']'
# datadesc:
# defcv data {`idxsi[1:]`} NAME
# excdesc:
# defcv exception {`idxsi[1:]`} NAME
# funcdesc:
# deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
# indexsubitem: `['OBJECT', 'attribute']'
# datadesc
# defcv attribute {`OBJECT`} NAME
## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
## or \funcline{NAME}{ARGS}
##
def do_funcdesc(length, buf, pp, i, index=1):
startpoint = i-1
ch = pp[startpoint]
wh = ch.where
length, newi = getnextarg(length, buf, pp, i)
funcname = chunk(GROUP, wh, pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
save = hist.inargs
hist.inargs = 1
length, newi = getnextarg(length, buf, pp, i)
hist.inargs = save
del save
the_args = [chunk(PLAIN, wh, '()'[0])] + pp[i:newi] + \
[chunk(PLAIN, wh, '()'[1])]
del pp[i:newi]
length = length - (newi-i)
idxsi = hist.indexsubitem # words
command = 'deffn'
if hist.this_module:
cat_class = 'function of ' + hist.this_module
else:
cat_class = 'built-in function'
ch.chtype = chunk_type[CSLINE]
ch.data = command
cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
cslinearg.append(chunk(PLAIN, wh, ' '))
cslinearg.append(funcname)
cslinearg.append(chunk(PLAIN, wh, ' '))
l = len(cslinearg)
cslinearg[l:l] = the_args
pp.insert(i, chunk(GROUP, wh, cslinearg))
i, length = i+1, length+1
hist.command = command
return length, i
## this routine will be called on \begin{excdesc}{NAME}
## or \excline{NAME}
##
def do_excdesc(length, buf, pp, i):
startpoint = i-1
ch = pp[startpoint]
wh = ch.where
length, newi = getnextarg(length, buf, pp, i)
excname = chunk(GROUP, wh, pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
idxsi = hist.indexsubitem # words
command = ''
cat_class = ''
class_class = ''
if idxsi == ['built-in', 'exception', 'base', 'class']:
command = 'defvr'
cat_class = 'exception base class'
else:
command = 'defcv'
cat_class = 'exception'
ch.chtype = chunk_type[CSLINE]
ch.data = command
cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
cslinearg.append(chunk(PLAIN, wh, ' '))
if class_class:
cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
cslinearg.append(chunk(PLAIN, wh, ' '))
cslinearg.append(excname)
pp.insert(i, chunk(GROUP, wh, cslinearg))
i, length = i+1, length+1
hist.command = command
return length, i
## same for datadesc or dataline...
def do_datadesc(length, buf, pp, i, index=1):
startpoint = i-1
ch = pp[startpoint]
wh = ch.where
length, newi = getnextarg(length, buf, pp, i)
dataname = chunk(GROUP, wh, pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
idxsi = hist.indexsubitem # words
command = 'defcv'
cat_class = 'data'
class_class = ''
if idxsi[-1] in ('attribute', 'option'):
cat_class = idxsi[-1]
class_class = string.join(idxsi[:-1])
elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
class_class = string.join(idxsi[1:])
elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']:
class_class = string.join(idxsi[2:])
else:
class_class = string.join(idxsi)
ch.chtype = chunk_type[CSLINE]
ch.data = command
cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
cslinearg.append(chunk(PLAIN, wh, ' '))
if class_class:
cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
cslinearg.append(chunk(PLAIN, wh, ' '))
cslinearg.append(dataname)
pp.insert(i, chunk(GROUP, wh, cslinearg))
i, length = i+1, length+1
hist.command = command
return length, i
def do_opcodedesc(length, buf, pp, i):
startpoint = i-1
ch = pp[startpoint]
wh = ch.where
length, newi = getnextarg(length, buf, pp, i)
dataname = chunk(GROUP, wh, pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
ch.chtype = CSLINE
ch.data = "deffn"
cslinearg = [chunk(PLAIN, wh, 'byte\ code\ instruction'),
chunk(GROUP, wh, [chunk(PLAIN, wh, "byte code instruction")]),
chunk(PLAIN, wh, ' '),
dataname,
chunk(PLAIN, wh, ' '),
pp[i],
]
pp[i] = chunk(GROUP, wh, cslinearg)
hist.command = ch.data
return length, i
def add_module_index(pp, length, i, buf, ch, extra, ref=1):
ch.chtype = chunk_type[CSLINE]
ch.data = 'pindex'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
if not ref:
if len(ingroupch) == 1:
hist.this_module = s(buf, ch.data)
else:
hist.this_module = None
print 'add_module_index() error ==>', ingroupch
if extra:
ingroupch.append(chunk(PLAIN, ch.where, ' '))
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
ingroupch.append(chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, extra)]))
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
return length+1, i+1
def yank_indexsubitem(pp, length, i, buf, ch, cmdname):
stuff = pp[i].data
if len(stuff) != 1:
raise error, "first parameter to \\%s too long" % cmdname
if pp[i].chtype != chunk_type[GROUP]:
raise error, "bad chunk type following \\%s" \
"\nexpected GROUP, got %s" + (cmdname, str(ch.chtype))
text = s(buf, stuff[0].data)
if text[:1] != '(' or text[-1:] != ')':
raise error, \
'expected indexsubitem enclosed in parenteses'
hist.indexsubitem = string.split(text[1:-1])
del pp[i-1:i+1]
return length - 2, i - 1
# regular indices: those that are not set in tt font by default....
regindices = ('cindex', )
# remove illegal characters from node names
def rm_commas_etc(text):
result = ''
changed = 0
while 1:
pos = re_commas_etc.search(text)
if pos >= 0:
changed = 1
result = result + text[:pos]
text = text[pos+1:]
else:
result = result + text
break
if changed:
print 'Warning: nodename changed to ' + `result`
return result
# boolean flags
flags = {'texi': 1}
# map of \label{} to node names
label_nodes = {}
##
## changeit: the actual routine, that changes the contents of the parsed
## chunks
##
def changeit(buf, pp):
global onlylatexspecial, hist, out
i, length = 0, len(pp)
while 1:
# sanity check: length should always equal len(pp)
if len(pp) != length:
print i, pp[i]
raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)`
if i >= length:
break
ch = pp[i]
i = i + 1
if type(ch) is StringType:
#normally, only chunks are present in pp,
# but in some cases, some extra info
# has been inserted, e.g., the \end{...} clauses
raise 'FATAL', 'got string, probably too many ' + `end`
if ch.chtype == chunk_type[GROUP]:
# check for {\em ...} constructs
data = ch.data
if data and \
data[0].chtype == chunk_type[CSNAME] and \
fontchanges.has_key(s(buf, data[0].data)):
k = s(buf, data[0].data)
del data[0]
pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k]))
length, i = length+1, i+1
elif data:
if len(data) \
and data[0].chtype == chunk_type[GROUP] \
and len(data[0].data) \
and data[0].data[0].chtype == chunk_type[CSNAME] \
and s(buf, data[0].data[0].data) == 'e':
data[0] = data[0].data[0]
print "invoking \\e magic group transform..."
else:
## print "GROUP -- ch.data[0].data =", ch.data[0].data
k = s(buf, data[0].data)
if k == "fulllineitems":
del data[0]
pp[i-1:i] = data
i = i - 1
length = length + len(data) - 1
continue
# recursively parse the contents of the group
changeit(buf, data)
elif ch.chtype == chunk_type[IF]:
# \if...
flag, negate, data = ch.data
##print 'IF: flag, negate = ' + `flag, negate`
if flag not in flags.keys():
raise error, 'unknown flag ' + `flag`
value = flags[flag]
if negate:
value = (not value)
del pp[i-1]
length, i = length-1, i-1
if value:
pp[i:i] = data
length = length + len(data)
elif ch.chtype == chunk_type[ENV]:
# \begin{...} ....
envname, data = ch.data
#push this environment name on stack
hist.inenv.insert(0, envname)
#append an endenv chunk after grouped data
data.append(chunk(ENDENV, ch.where, envname))
##[`data`]
#delete this object
del pp[i-1]
i, length = i-1, length-1
#insert found data
pp[i:i] = data
length = length + len(data)
if envname == 'verbatim':
pp[i:i] = [chunk(CSLINE, ch.where, 'example'),
chunk(GROUP, ch.where, [])]
length, i = length+2, i+2
elif envname in ('itemize', 'list', 'fulllineitems'):
if hist.itemizenesting > len(itemizesymbols):
raise error, 'too deep itemize nesting'
if envname == 'list':
del pp[i:i+2]
length = length - 2
ingroupch = [chunk(CSNAME, ch.where,
itemizesymbols[hist.itemizenesting])]
hist.itemizenesting = hist.itemizenesting + 1
pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),
chunk(GROUP, ch.where, ingroupch)]
length, i = length+2, i+2
elif envname == 'enumerate':
if hist.enumeratenesting > len(enumeratesymbols):
raise error, 'too deep enumerate nesting'
ingroupch = [chunk(PLAIN, ch.where,
enumeratesymbols[hist.enumeratenesting])]
hist.enumeratenesting = hist.enumeratenesting + 1
pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),
chunk(GROUP, ch.where, ingroupch)]
length, i = length+2, i+2
elif envname == 'description':
ingroupch = [chunk(CSNAME, ch.where, 'b')]
pp[i:i] = [chunk(CSLINE, ch.where, 'table'),
chunk(GROUP, ch.where, ingroupch)]
length, i = length+2, i+2
elif (envname == 'tableiii') or (envname == 'tableii'):
if (envname == 'tableii'):
ltable = 2
else:
ltable = 3
wh = ch.where
newcode = []
#delete tabular format description
# e.g., {|l|c|l|}
length, newi = getnextarg(length, buf, pp, i)
del pp[i:newi]
length = length - (newi-i)
newcode.append(chunk(CSLINE, wh, 'table'))
ingroupch = [chunk(CSNAME, wh, 'asis')]
newcode.append(chunk(GROUP, wh, ingroupch))
newcode.append(chunk(CSLINE, wh, 'item'))
#get the name of macro for @item
# e.g., {code}
length, newi = getnextarg(length, buf, pp, i)
if newi-i != 1:
raise error, 'Sorry, expected 1 chunk argument'
if pp[i].chtype != chunk_type[PLAIN]:
raise error, 'Sorry, expected plain text argument'
hist.itemargmacro = s(buf, pp[i].data)
if convertible_csname(hist.itemargmacro):
hist.itemargmacro = conversion(hist.itemargmacro)[1]
del pp[i:newi]
length = length - (newi-i)
itembody = []
for count in range(ltable):
length, newi = getnextarg(length, buf, pp, i)
emphgroup = [
chunk(CSNAME, wh, 'emph'),
chunk(GROUP, 0, pp[i:newi])]
del pp[i:newi]
length = length - (newi-i)
if count == 0:
itemarg = emphgroup
elif count == ltable-1:
itembody = itembody + \
[chunk(PLAIN, wh, ' --- ')] + emphgroup
else:
itembody = emphgroup
newcode.append(chunk(GROUP, wh, itemarg))
newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')]
pp[i:i] = newcode
l = len(newcode)
length, i = length+l, i+l
del newcode, l
if length != len(pp):
raise 'STILL, SOMETHING wrong', `i`
elif envname in ('methoddesc', 'methoddescni'):
length, newi = getoptarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
#
pp.insert(i, chunk(PLAIN, ch.where, ''))
i, length = i+1, length+1
length, i = do_funcdesc(length, buf, pp, i,
envname[-2:] != "ni")
elif envname in ('memberdesc', 'memberdescni'):
length, newi = getoptarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
#
pp.insert(i, chunk(PLAIN, ch.where, ''))
i, length = i+1, length+1
length, i = do_datadesc(length, buf, pp, i,
envname[-2:] != "ni")
elif envname in ('funcdesc', 'funcdescni', 'classdesc'):
pp.insert(i, chunk(PLAIN, ch.where, ''))
i, length = i+1, length+1
length, i = do_funcdesc(length, buf, pp, i,
envname[-2:] != "ni")
elif envname == 'excdesc':
pp.insert(i, chunk(PLAIN, ch.where, ''))
i, length = i+1, length+1
length, i = do_excdesc(length, buf, pp, i)
elif envname in ('datadesc', 'datadescni'):
pp.insert(i, chunk(PLAIN, ch.where, ''))
i, length = i+1, length+1
length, i = do_datadesc(length, buf, pp, i,
envname[-2:] != "ni")
elif envname == 'opcodedesc':
pp.insert(i, chunk(PLAIN, ch.where, ''))
i, length = i+1, length+1
length, i = do_opcodedesc(length, buf, pp, i)
elif envname == 'seealso':
chunks = [chunk(ENDLINE, ch.where, "\n"),
chunk(CSNAME, ch.where, "b"),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, "See also: ")]),
chunk(ENDLINE, ch.where, "\n"),
chunk(ENDLINE, ch.where, "\n")]
pp[i-1:i] = chunks
length = length + len(chunks) - 1
i = i + len(chunks) - 1
elif envname in ('sloppypar', 'flushleft', 'document'):
pass
else:
print 'WARNING: don\'t know what to do with env ' + `envname`
elif ch.chtype == chunk_type[ENDENV]:
envname = ch.data
if envname != hist.inenv[0]:
raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]`
del hist.inenv[0]
del pp[i-1]
i, length = i-1, length-1
if envname == 'verbatim':
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, 'example')])]
i, length = i+2, length+2
elif envname in ('itemize', 'list', 'fulllineitems'):
hist.itemizenesting = hist.itemizenesting - 1
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, 'itemize')])]
i, length = i+2, length+2
elif envname == 'enumerate':
hist.enumeratenesting = hist.enumeratenesting-1
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, 'enumerate')])]
i, length = i+2, length+2
elif envname == 'description':
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, 'table')])]
i, length = i+2, length+2
elif (envname == 'tableiii') or (envname == 'tableii'):
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, 'table')])]
i, length = i+2, length + 2
pp.insert(i, chunk(DENDLINE, ch.where, '\n'))
i, length = i+1, length+1
elif envname in ('funcdesc', 'excdesc', 'datadesc', 'classdesc',
'funcdescni', 'datadescni',
'methoddesc', 'memberdesc',
'methoddescni', 'memberdescni',
):
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, hist.command)])]
i, length = i+2, length+2
elif envname == 'opcodedesc':
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, "deffn")])]
i, length = i+2, length+2
elif envname in ('seealso', 'sloppypar', 'flushleft', 'document'):
pass
else:
print 'WARNING: ending env %s has no actions' % `envname`
elif ch.chtype == chunk_type[CSNAME]:
# control name transformations
s_buf_data = s(buf, ch.data)
if s_buf_data == 'optional':
pp[i-1].chtype = chunk_type[PLAIN]
pp[i-1].data = '['
if (i < length) and \
(pp[i].chtype == chunk_type[GROUP]):
cp=pp[i].data
pp[i:i+1]=cp + [
chunk(PLAIN, ch.where, ']')]
length = length+len(cp)
elif s_buf_data in ignoredcommands:
del pp[i-1]
i, length = i-1, length-1
elif s_buf_data == '@' and \
i != length and \
pp[i].chtype == chunk_type[PLAIN] and \
s(buf, pp[i].data)[0] == '.':
# \@. --> \. --> @.
ch.data = '.'
del pp[i]
length = length - 1
elif convertible_csname(s_buf_data):
ch.chtype, ch.data, nix = conversion(s_buf_data)
try:
if nix and pp[i].chtype == chunk_type[GROUP] \
and len(pp[i].data) == 0:
del pp[i]
length = length - 1
except IndexError:
pass
elif s_buf_data == '\\':
# \\ --> \* --> @*
ch.data = '*'
elif len(s_buf_data) == 1 and \
s_buf_data in onlylatexspecial:
ch.chtype = chunk_type[PLAIN]
# check if such a command is followed by
# an empty group: e.g., `\%{}'. If so, remove
# this empty group too
if i < length and \
pp[i].chtype == chunk_type[GROUP] \
and len(pp[i].data) == 0:
del pp[i]
length = length-1
elif s_buf_data == "appendix":
hist.chaptertype = "appendix"
del pp[i-1]
i, length = i-1, length-1
elif hist.inargs and s_buf_data in inargsselves:
# This is the special processing of the
# arguments of the \begin{funcdesc}... or
# \funcline... arguments
# \, --> , \[ --> [, \] --> ]
ch.chtype = chunk_type[PLAIN]
elif s_buf_data == 'setindexsubitem':
length, i = yank_indexsubitem(pp, length, i, buf, ch,
'setindexsubitem')
elif s_buf_data == 'withsubitem':
oldsubitem = hist.indexsubitem
try:
length, i = yank_indexsubitem(pp, length, i, buf, ch,
'withsubitem')
stuff = pp[i].data
del pp[i]
length = length - 1
changeit(buf, stuff)
stuff = None
finally:
hist.indexsubitem = oldsubitem
elif s_buf_data in ('textrm', 'pytype'):
stuff = pp[i].data
pp[i-1:i+1] = stuff
length = length - 2 + len(stuff)
stuff = None
i = i - 1
elif s_buf_data == 'newcommand':
print "ignoring definition of \\" + s(buf, pp[i].data[0].data)
del pp[i-1:i+2]
i = i - 1
length = length - 3
elif s_buf_data == 'renewcommand':
print "ignoring redefinition of \\" \
+ s(buf, pp[i].data[0].data)
del pp[i-1:i+2]
i = i - 1
length = length - 3
elif s_buf_data == 'mbox':
stuff = pp[i].data
pp[i-1:i+1] = stuff
i = i - 1
length = length + len(stuff) - 2
stuff = None
elif s_buf_data == 'version':
ch.chtype = chunk_type[PLAIN]
ch.data = release_version
elif s_buf_data == 'item':
ch.chtype = chunk_type[CSLINE]
length, newi = getoptarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
changeit(buf, ingroupch) # catch stuff inside the optional arg
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
i, length = i+1, length+1
elif s_buf_data == 'ttindex':
idxsi = hist.indexsubitem
cat_class = ''
if len(idxsi) >= 2 and idxsi[1] in \
('method', 'function', 'protocol'):
command = 'findex'
elif len(idxsi) >= 2 and idxsi[1] in \
('exception', 'object'):
command = 'vindex'
elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
command = 'cindex'
elif len(idxsi) == 3 and idxsi[:2] == ['class', 'in']:
command = 'findex'
else:
print 'WARNING: can\'t categorize ' + `idxsi` \
+ ' for \'ttindex\' command'
command = 'cindex'
if not cat_class:
cat_class = '(%s)' % string.join(idxsi)
ch.chtype = chunk_type[CSLINE]
ch.data = command
length, newi = getnextarg(length, buf, pp, i)
arg = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
cat_arg = [chunk(PLAIN, ch.where, cat_class)]
# determine what should be set in roman, and
# what in tt-font
if command in regindices:
arg = [chunk(CSNAME, ch.where, 't'),
chunk(GROUP, ch.where, arg)]
else:
cat_arg = [chunk(CSNAME, ch.where, 'r'),
chunk(GROUP, ch.where, cat_arg)]
ingroupch = arg + \
[chunk(PLAIN, ch.where, ' ')] + \
cat_arg
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
length, i = length+1, i+1
elif s_buf_data == 'ldots':
# \ldots --> \dots{} --> @dots{}
ch.data = 'dots'
if i == length \
or pp[i].chtype != chunk_type[GROUP] \
or pp[i].data != []:
pp.insert(i, chunk(GROUP, ch.where, []))
i, length = i+1, length+1
elif s_buf_data in themselves:
# \UNIX --> &UNIX;
ch.chtype = chunk_type[PLAIN]
if i != length \
and pp[i].chtype == chunk_type[GROUP] \
and pp[i].data == []:
del pp[i]
length = length-1
elif s_buf_data == 'manpage':
ch.data = 'emph'
sect = s(buf, pp[i+1].data[0].data)
pp[i+1].data = "(%s)" % sect
pp[i+1].chtype = chunk_type[PLAIN]
elif s_buf_data == 'envvar':
# this should do stuff in the index, too...
ch.data = "$"
ch.chtype = chunk_type[PLAIN]
pp[i] = pp[i].data[0]
elif s_buf_data == 'regexp':
ch.data = 'code'
pp.insert(i+1, chunk(PLAIN, ch.where, '"'))
pp.insert(i-1, chunk(PLAIN, ch.where, '"'))
length = length + 2
i = i + 1
elif s_buf_data in ('lineiii', 'lineii'):
# This is the most tricky one
# \lineiii{a1}{a2}[{a3}] -->
# @item @<cts. of itemargmacro>{a1}
# a2 [ -- a3]
#
if not hist.inenv:
raise error, 'no environment for lineiii'
if (hist.inenv[0] != 'tableiii') and \
(hist.inenv[0] != 'tableii'):
raise error, \
'wrong command (%s) in wrong environment (%s)' \
% (s_buf_data, `hist.inenv[0]`)
ch.chtype = chunk_type[CSLINE]
ch.data = 'item'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = [chunk(CSNAME, 0, hist.itemargmacro),
chunk(GROUP, 0, pp[i:newi])]
del pp[i:newi]
length = length - (newi-i)
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
grouppos = i
i, length = i+1, length+1
length, i = getnextarg(length, buf, pp, i)
length, newi = getnextarg(length, buf, pp, i)
if newi > i:
# we have a 3rd arg
pp.insert(i, chunk(PLAIN, ch.where, ' --- '))
i = newi + 1
length = length + 1
if length != len(pp):
raise 'IN LINEIII IS THE ERR', `i`
elif s_buf_data in ('chapter', 'section',
'subsection', 'subsubsection'):
#\xxxsection{A} ---->
# @node A, , ,
# @xxxsection A
## also: remove commas and quotes
hist.this_module = None
if s_buf_data == "chapter":
ch.data = hist.chaptertype
ch.chtype = chunk_type[CSLINE]
length, newi = getnextarg(length, buf, pp, i)
afternodenamecmd = next_command_p(length, buf,
pp, newi, 'nodename')
if afternodenamecmd < 0:
cp1 = crcopy(pp[i:newi])
pp[i:newi] = [chunk(GROUP, ch.where, pp[i:newi])]
length, newi = length - (newi-i) + 1, i+1
text = flattext(buf, cp1)
text = invent_node_names(text)
else:
length, endarg = getnextarg(length, buf,
pp, afternodenamecmd)
cp1 = crcopy(pp[afternodenamecmd:endarg])
del pp[newi:endarg]
length = length - (endarg-newi)
pp[i:newi] = [chunk(GROUP, ch.where, pp[i:newi])]
length, newi = length - (newi-i) + 1, i + 1
text = flattext(buf, cp1)
if text[-1] == '.':
text = text[:-1]
if text in hist.nodenames:
print 'WARNING: node name ' + `text` + ' already used'
out.doublenodes.append(text)
else:
hist.nodenames.append(text)
text = rm_commas_etc(text)
pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'node'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, text+', , ,')
])]
i, length = newi+2, length+2
elif s_buf_data == 'funcline':
# fold it to a very short environment
pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, hist.command)])]
i, length = i+2, length+2
length, i = do_funcdesc(length, buf, pp, i)
elif s_buf_data == 'dataline':
pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, hist.command)])]
i, length = i+2, length+2
length, i = do_datadesc(length, buf, pp, i)
elif s_buf_data == 'excline':
pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, hist.command)])]
i, length = i+2, length+2
length, i = do_excdesc(length, buf, pp, i)
elif s_buf_data == 'index':
#\index{A} --->
# @cindex A
ch.chtype = chunk_type[CSLINE]
ch.data = 'cindex'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
length, i = length+1, i+1
elif s_buf_data == 'bifuncindex':
ch.chtype = chunk_type[CSLINE]
ch.data = 'findex'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
ingroupch.append(chunk(PLAIN, ch.where, ' '))
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
ingroupch.append(chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where,
'(built-in function)')]))
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
length, i = length+1, i+1
elif s_buf_data == 'obindex':
ch.chtype = chunk_type[CSLINE]
ch.data = 'findex'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
ingroupch.append(chunk(PLAIN, ch.where, ' '))
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
ingroupch.append(chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where,
'(object)')]))
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
length, i = length+1, i+1
elif s_buf_data == 'opindex':
ch.chtype = chunk_type[CSLINE]
ch.data = 'findex'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
ingroupch.append(chunk(PLAIN, ch.where, ' '))
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
ingroupch.append(chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where,
'(operator)')]))
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
length, i = length+1, i+1
elif s_buf_data in ('bimodindex', 'refbimodindex'):
length, i = add_module_index(
pp, length, i, buf, ch, '(built-in)',
(s_buf_data[:3] == 'ref'))
elif s_buf_data in ('modindex', 'refmodindex'):
length, i = add_module_index(
pp, length, i, buf, ch, '',
(s_buf_data[:3] == 'ref'))
elif s_buf_data in ('stmodindex', 'refstmodindex'):
length, i = add_module_index(
pp, length, i, buf, ch, '(standard)',
(s_buf_data[:3] == 'ref'))
elif s_buf_data in ('exmodindex', 'refexmodindex'):
length, i = add_module_index(
pp, length, i, buf, ch, '(extension)',
(s_buf_data[:3] == 'ref'))
elif s_buf_data == 'stindex':
# XXX must actually go to newindex st
what = (s_buf_data[:2] == "st") and "statement" or "keyword"
wh = ch.where
ch.chtype = chunk_type[CSLINE]
ch.data = 'cindex'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = [chunk(CSNAME, wh, 'code'),
chunk(GROUP, wh, pp[i:newi])]
del pp[i:newi]
length = length - (newi-i)
t = ingroupch[:]
t.append(chunk(PLAIN, wh, ' ' + what))
pp.insert(i, chunk(GROUP, wh, t))
i, length = i+1, length+1
pp.insert(i, chunk(CSLINE, wh, 'cindex'))
i, length = i+1, length+1
t = ingroupch[:]
t.insert(0, chunk(PLAIN, wh, what + ', '))
pp.insert(i, chunk(GROUP, wh, t))
i, length = i+1, length+1
elif s_buf_data == 'indexii':
#\indexii{A}{B} --->
# @cindex A B
# @cindex B, A
length, newi = getnextarg(length, buf, pp, i)
cp11 = pp[i:newi]
cp21 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
length, newi = getnextarg(length, buf, pp, i)
cp12 = pp[i:newi]
cp22 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
ch.chtype = chunk_type[CSLINE]
ch.data = 'cindex'
pp.insert(i, chunk(GROUP, ch.where, cp11 + [
chunk(PLAIN, ch.where, ' ')] + cp12))
i, length = i+1, length+1
pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
chunk(GROUP, ch.where, cp22 + [
chunk(PLAIN, ch.where, ', ')]+ cp21)]
i, length = i+2, length+2
elif s_buf_data == 'indexiii':
length, newi = getnextarg(length, buf, pp, i)
cp11 = pp[i:newi]
cp21 = crcopy(pp[i:newi])
cp31 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
length, newi = getnextarg(length, buf, pp, i)
cp12 = pp[i:newi]
cp22 = crcopy(pp[i:newi])
cp32 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
length, newi = getnextarg(length, buf, pp, i)
cp13 = pp[i:newi]
cp23 = crcopy(pp[i:newi])
cp33 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
ch.chtype = chunk_type[CSLINE]
ch.data = 'cindex'
pp.insert(i, chunk(GROUP, ch.where, cp11 + [
chunk(PLAIN, ch.where, ' ')] + cp12
+ [chunk(PLAIN, ch.where, ' ')]
+ cp13))
i, length = i+1, length+1
pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
chunk(GROUP, ch.where, cp22 + [
chunk(PLAIN, ch.where, ' ')]+ cp23
+ [chunk(PLAIN, ch.where, ', ')] +
cp21)]
i, length = i+2, length+2
pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
chunk(GROUP, ch.where, cp33 + [
chunk(PLAIN, ch.where, ', ')]+ cp31
+ [chunk(PLAIN, ch.where, ' ')] +
cp32)]
i, length = i+2, length+2
elif s_buf_data == 'indexiv':
length, newi = getnextarg(length, buf, pp, i)
cp11 = pp[i:newi]
cp21 = crcopy(pp[i:newi])
cp31 = crcopy(pp[i:newi])
cp41 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
length, newi = getnextarg(length, buf, pp, i)
cp12 = pp[i:newi]
cp22 = crcopy(pp[i:newi])
cp32 = crcopy(pp[i:newi])
cp42 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
length, newi = getnextarg(length, buf, pp, i)
cp13 = pp[i:newi]
cp23 = crcopy(pp[i:newi])
cp33 = crcopy(pp[i:newi])
cp43 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
length, newi = getnextarg(length, buf, pp, i)
cp14 = pp[i:newi]
cp24 = crcopy(pp[i:newi])
cp34 = crcopy(pp[i:newi])
cp44 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
ch.chtype = chunk_type[CSLINE]
ch.data = 'cindex'
ingroupch = cp11 + \
spacech + cp12 + \
spacech + cp13 + \
spacech + cp14
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
i, length = i+1, length+1
ingroupch = cp22 + \
spacech + cp23 + \
spacech + cp24 + \
commach + cp21
pp[i:i] = cindexch + [
chunk(GROUP, ch.where, ingroupch)]
i, length = i+2, length+2
ingroupch = cp33 + \
spacech + cp34 + \
commach + cp31 + \
spacech + cp32
pp[i:i] = cindexch + [
chunk(GROUP, ch.where, ingroupch)]
i, length = i+2, length+2
ingroupch = cp44 + \
commach + cp41 + \
spacech + cp42 + \
spacech + cp43
pp[i:i] = cindexch + [
chunk(GROUP, ch.where, ingroupch)]
i, length = i+2, length+2
elif s_buf_data == 'seemodule':
# discard optional arg first:
length, newi = getoptarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
#
ch.data = "code"
data = pp[i+1].data
data.insert(0, chunk(PLAIN, ch.where, " ("))
data.append(chunk(PLAIN, ch.where, ")"))
pp[i+1:i+2] = data
length = length + len(data) - 1
elif s_buf_data == 'seetext':
data = pp[i].data
data.insert(0, chunk(ENDLINE, ch.where, "\n"))
pp[i-1:i+1] = data
i = i - 1
length = length + len(data) - 2
elif s_buf_data == 'deprecated':
length, newi = getnextarg(length, buf, pp, i)
version = pp[i:newi][0]
length, newi2 = getnextarg(length, buf, pp, newi)
action = pp[newi:newi2]
del pp[i-1:newi2]
length = length - (newi2 - i) - 1
stuff = [chunk(PLAIN, ch.where, 'Deprecated since release '),
version,
chunk(PLAIN, ch.where, '.')]
chunks = [chunk(CSNAME, ch.where, 'strong'),
chunk(GROUP, ch.where, stuff),
chunk(PLAIN, ch.where, ' ')] + action \
+ [chunk(DENDLINE, ch.where, '\n')]
stuff = None
i = i - 1
pp[i:i] = chunks
length = length + len(chunks)
elif s_buf_data == "quad":
ch.chtype = PLAIN
ch.data = " "
elif s_buf_data in ('usepackage', 'input'):
del pp[i-1:i+1]
i, length = i-1, length-2
elif s_buf_data in ('noindent', 'indexsubitem', 'footnote'):
pass
elif s_buf_data == 'label':
name = s(buf, pp[i].data[0].data)
del pp[i-1:i+1]
length = length - 2
i = i - 1
label_nodes[name] = hist.nodenames[-1]
elif s_buf_data == 'rfc':
ch.chtype = chunk_type[PLAIN]
ch.data = "RFC " + s(buf, pp[i].data[0].data)
del pp[i]
length = length - 1
elif s_buf_data == 'ref':
name = s(buf, pp[i].data[0].data)
if label_nodes.has_key(name):
pp[i].data[0].data = label_nodes[name]
else:
pp[i-1:i+1] = [
chunk(PLAIN, ch.where,
"(unknown node reference: %s)" % name)]
length = length - 1
print "WARNING: unknown node label", `name`
else:
print "don't know what to do with keyword " + s_buf_data
re_atsign = regex.compile('[@{}]')
re_newline = regex.compile('\n')
def dumpit(buf, wm, pp):
global out
i, length = 0, len(pp)
addspace = 0
while 1:
if len(pp) != length:
raise 'FATAL', 'inconsistent length'
if i == length:
break
ch = pp[i]
i = i + 1
dospace = addspace
addspace = 0
if ch.chtype == chunk_type[CSNAME]:
s_buf_data = s(buf, ch.data)
## if s_buf_data == 'e':
## wm('\\')
## continue
## if s_buf_data == '$':
## wm('$')
## continue
wm('@' + s_buf_data)
if s_buf_data == 'node' and \
pp[i].chtype == chunk_type[PLAIN] and \
s(buf, pp[i].data) in out.doublenodes:
##XXX doesnt work yet??
wm(' ZZZ-' + zfill(`i`, 4))
if s_buf_data[0] in string.letters:
addspace = 1
elif ch.chtype == chunk_type[PLAIN]:
if dospace and s(buf, ch.data) not in (' ', '\t'):
wm(' ')
text = s(buf, ch.data)
while 1:
pos = re_atsign.search(text)
if pos < 0:
break
wm(text[:pos] + '@' + text[pos])
text = text[pos+1:]
wm(text)
elif ch.chtype == chunk_type[GROUP]:
wm('{')
dumpit(buf, wm, ch.data)
wm('}')
elif ch.chtype == chunk_type[DENDLINE]:
wm('\n\n')
while i != length and pp[i].chtype in \
(chunk_type[DENDLINE], chunk_type[ENDLINE]):
i = i + 1
elif ch.chtype == chunk_type[OTHER]:
wm(s(buf, ch.data))
elif ch.chtype == chunk_type[ACTIVE]:
wm(s(buf, ch.data))
elif ch.chtype == chunk_type[ENDLINE]:
wm('\n')
elif ch.chtype == chunk_type[CSLINE]:
if i >= 2 and pp[i-2].chtype not in \
(chunk_type[ENDLINE], chunk_type[DENDLINE]) \
and (pp[i-2].chtype != chunk_type[PLAIN]
or s(buf, pp[i-2].data)[-1] != '\n'):
wm('\n')
wm('@' + s(buf, ch.data))
if i == length:
raise error, 'CSLINE expected another chunk'
if pp[i].chtype != chunk_type[GROUP]:
raise error, 'CSLINE expected GROUP'
if type(pp[i].data) != ListType:
raise error, 'GROUP chould contain []-data'
wobj = Wobj()
dumpit(buf, wobj.write, pp[i].data)
i = i + 1
text = wobj.data
del wobj
if text:
wm(' ')
while 1:
pos = re_newline.search(text)
if pos < 0:
break
# these seem to be completely harmless, so don't warn:
## print 'WARNING: found newline in csline arg (%s)' \
## % s(buf, ch.data)
wm(text[:pos] + ' ')
text = text[pos+1:]
wm(text)
if i >= length or \
pp[i].chtype not in (chunk_type[CSLINE],
chunk_type[ENDLINE], chunk_type[DENDLINE]) \
and (pp[i].chtype != chunk_type[PLAIN]
or s(buf, pp[i].data)[0] != '\n'):
wm('\n')
elif ch.chtype == chunk_type[COMMENT]:
if s(buf, ch.data) and \
regex.match('^[ \t]*$', s(buf, ch.data)) < 0:
if i >= 2 \
and pp[i-2].chtype not in (chunk_type[ENDLINE],
chunk_type[DENDLINE]) \
and not (pp[i-2].chtype == chunk_type[PLAIN]
and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0):
wm('\n')
wm('@c ' + s(buf, ch.data))
elif ch.chtype == chunk_type[IGNORE]:
pass
else:
try:
str = `s(buf, ch.data)`
except TypeError:
str = `ch.data`
if len(str) > 400:
str = str[:400] + '...'
print 'warning:', ch.chtype, 'not handled, data ' + str
def main():
global release_version
outfile = None
headerfile = 'texipre.dat'
trailerfile = 'texipost.dat'
try:
opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:v:')
except getopt.error:
args = []
if not args:
print 'usage: partparse [-o outfile] [-h headerfile]',
print '[-t trailerfile] file ...'
sys.exit(2)
for opt, arg in opts:
if opt == '-o': outfile = arg
if opt == '-h': headerfile = arg
if opt == '-t': trailerfile = arg
if opt == '-v': release_version = arg
if not outfile:
root, ext = os.path.splitext(args[0])
outfile = root + '.texi'
if outfile in args:
print 'will not overwrite input file', outfile
sys.exit(2)
outf = open(outfile, 'w')
outf.write(open(headerfile, 'r').read())
for file in args:
if len(args) > 1: print '='*20, file, '='*20
buf = open(file, 'r').read()
chunk.buf = buf
w, pp = parseit(buf)
startchange()
changeit(buf, pp)
dumpit(buf, outf.write, pp)
outf.write(open(trailerfile, 'r').read())
outf.close()
1996-09-10 22:19:51 +00:00
if __name__ == "__main__":
main()