cpython/Doc/partparse.py

#
# partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
#     and generate texinfo source.
#
# This is *not* a good example of good programming practices. In fact, this
#     file could use a complete rewrite, in order to become faster, more
#     easily extensible and maintainable.
#
# However, I added some comments on a few places for the pityful person who
#     would ever need to take a look into this file.
#
# Have I been clear enough??
#
# -jh
#
# Yup.  I made some performance improvements and hope this lasts a while;
#     I don't want to be the schmuck who ends up re-writting it!
#
# -fld
#
# (sometime later...)
#
#  Ok, I've re-worked substantial chunks of this.  It's only getting worse.
#     It just might be gone before the next source release.  (Yeah!)
#
# -fld

import sys, string, regex, getopt, os

from types import IntType, ListType, StringType, TupleType

release_version = sys.version[:3]

# Different parse modes for phase 1
MODE_REGULAR = 0
MODE_VERBATIM = 1
MODE_CS_SCAN = 2
MODE_COMMENT = 3
MODE_MATH = 4
MODE_DMATH = 5
MODE_GOBBLEWHITE = 6

the_modes = (MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT,
	     MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE)

# Show the neighbourhood of the scanned buffer
def epsilon(buf, where):
    wmt, wpt = where - 10, where + 10
    if wmt < 0:
	wmt = 0
    if wpt > len(buf):
	wpt = len(buf)
    return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.'

# Should return the line number. never worked
def lin():
    global lineno
    return ' Line ' + `lineno` + '.'

# Displays the recursion level.
def lv(lvl):
    return ' Level ' + `lvl` + '.'

# Combine the three previous functions. Used often.
def lle(lvl, buf, where):
    return lv(lvl) + lin() + epsilon(buf, where)


# This class is only needed for _symbolic_ representation of the parse mode.
class Mode:
    def __init__(self, arg):
	if arg not in the_modes:
	    raise ValueError, 'mode not in the_modes'
	self.mode = arg

    def __cmp__(self, other):
	if type(self) != type(other):
	    other = mode[other]
	return cmp(self.mode, other.mode)

    def __repr__(self):
	if self.mode == MODE_REGULAR:
	    return 'MODE_REGULAR'
	elif self.mode == MODE_VERBATIM:
	    return 'MODE_VERBATIM'
	elif self.mode == MODE_CS_SCAN:
	    return 'MODE_CS_SCAN'
	elif self.mode == MODE_COMMENT:
	    return 'MODE_COMMENT'
	elif self.mode == MODE_MATH:
	    return 'MODE_MATH'
	elif self.mode == MODE_DMATH:
	    return 'MODE_DMATH'
	elif self.mode == MODE_GOBBLEWHITE:
	    return 'MODE_GOBBLEWHITE'
	else:
	    raise ValueError, 'mode not in the_modes'

# just a wrapper around a class initialisation
mode = {}
for t in the_modes:
    mode[t] = Mode(t)


# After phase 1, the text consists of chunks, with a certain type
# this type will be assigned to the chtype member of the chunk
# the where-field contains the file position where this is found
# and the data field contains (1): a tuple describing start- end end
# positions of the substring (can be used as slice for the buf-variable),
# (2) just a string, mostly generated by the changeit routine,
# or (3) a list, describing a (recursive) subgroup of chunks
PLAIN = 0			# ASSUME PLAINTEXT, data = the text
GROUP = 1			# GROUP ({}), data = [chunk, chunk,..]
CSNAME = 2			# CONTROL SEQ TOKEN, data = the command
COMMENT = 3			# data is the actual comment
DMATH = 4			# DISPLAYMATH, data = [chunk, chunk,..]
MATH = 5			# MATH, see DISPLAYMATH
OTHER = 6			# CHAR WITH CATCODE OTHER, data = char
ACTIVE = 7			# ACTIVE CHAR
GOBBLEDWHITE = 8		# Gobbled LWSP, after CSNAME
ENDLINE = 9			# END-OF-LINE, data = '\n'
DENDLINE = 10			# DOUBLE EOL, data='\n', indicates \par
ENV = 11			# LaTeX-environment
				# data =(envname,[ch,ch,ch,.])
CSLINE = 12			# for texi: next chunk will be one group
				# of args. Will be set all on 1 line
IGNORE = 13			# IGNORE this data
ENDENV = 14			# TEMP END OF GROUP INDICATOR
IF = 15				# IF-directive
				# data = (flag,negate,[ch, ch, ch,...])

the_types = (PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE,
	     GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF)

# class, just to display symbolic name
class ChunkType:
    def __init__(self, chunk_type):
	if chunk_type not in the_types:
	    raise ValueError, 'chunk_type not in the_types'
	self.chunk_type = chunk_type

    def __cmp__(self, other):
	if type(self) != type(other):
	    other = chunk_type[other]
	return cmp(self.chunk_type, other.chunk_type)

    def __repr__(self):
	if self.chunk_type == PLAIN:
	    return 'PLAIN'
	elif self.chunk_type == GROUP:
	    return 'GROUP'
	elif self.chunk_type == CSNAME:
	    return 'CSNAME'
	elif self.chunk_type == COMMENT:
	    return 'COMMENT'
	elif self.chunk_type == DMATH:
	    return 'DMATH'
	elif self.chunk_type == MATH:
	    return 'MATH'
	elif self.chunk_type == OTHER:
	    return 'OTHER'
	elif self.chunk_type == ACTIVE:
	    return 'ACTIVE'
	elif self.chunk_type == GOBBLEDWHITE:
	    return 'GOBBLEDWHITE'
	elif self.chunk_type == DENDLINE:
	    return 'DENDLINE'
	elif self.chunk_type == ENDLINE:
	    return 'ENDLINE'
	elif self.chunk_type == ENV:
	    return 'ENV'
	elif self.chunk_type == CSLINE:
	    return 'CSLINE'
	elif self.chunk_type == IGNORE:
	    return 'IGNORE'
	elif self.chunk_type == ENDENV:
	    return 'ENDENV'
	elif self.chunk_type == IF:
	    return 'IF'
	else:
	    raise ValueError, 'chunk_type not in the_types'

# ...and the wrapper
chunk_type = {}
for t in the_types:
    chunk_type[t] = ChunkType(t)

# store a type object of the ChunkType-class-instance...
chunk_type_type = type(chunk_type[PLAIN])

# this class contains a part of the parsed buffer
class Chunk:
    def __init__(self, chtype, where, data):
	if type(chtype) != chunk_type_type:
	    chtype = chunk_type[chtype]
	self.chtype = chtype
	self.where = where
	self.data = data

    def __repr__(self):
	return 'chunk' + `self.chtype, self.where, self.data`

# and the wrapper
chunk = Chunk


error = 'partparse.error'

#
# TeX's catcodes...
#
CC_ESCAPE = 0
CC_LBRACE = 1
CC_RBRACE = 2
CC_MATHSHIFT = 3
CC_ALIGNMENT = 4
CC_ENDLINE = 5
CC_PARAMETER = 6
CC_SUPERSCRIPT = 7
CC_SUBSCRIPT = 8
CC_IGNORE = 9
CC_WHITE = 10
CC_LETTER = 11
CC_OTHER = 12
CC_ACTIVE = 13
CC_COMMENT = 14
CC_INVALID = 15

# and the names
cc_names = [
	  'CC_ESCAPE',
	  'CC_LBRACE',
	  'CC_RBRACE',
	  'CC_MATHSHIFT',
	  'CC_ALIGNMENT',
	  'CC_ENDLINE',
	  'CC_PARAMETER',
	  'CC_SUPERSCRIPT',
	  'CC_SUBSCRIPT',
	  'CC_IGNORE',
	  'CC_WHITE',
	  'CC_LETTER',
	  'CC_OTHER',
	  'CC_ACTIVE',
	  'CC_COMMENT',
	  'CC_INVALID',
	  ]

# Show a list of catcode-name-symbols
def pcl(codelist):
    result = ''
    for i in codelist:
	result = result + cc_names[i] + ', '
    return '[' + result[:-2] + ']'

# the name of the catcode (ACTIVE, OTHER, etc.)
def pc(code):
    return cc_names[code]


# Which catcodes make the parser stop parsing regular plaintext
regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT,
	  CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT,
	  CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE]

# same for scanning a control sequence name
csname_scancodes = [CC_LETTER]

# same for gobbling LWSP
white_scancodes = [CC_WHITE]
##white_scancodes = [CC_WHITE, CC_ENDLINE]

# make a list of all catcode id's, except for catcode ``other''
all_but_other_codes = range(16)
del all_but_other_codes[CC_OTHER]
##print all_but_other_codes

# when does a comment end
comment_stopcodes = [CC_ENDLINE]

# gather all characters together, specified by a list of catcodes
def code2string(cc, codelist):
    ##print 'code2string: codelist = ' + pcl(codelist),
    result = ''
    for category in codelist:
	if cc[category]:
	    result = result + cc[category]
    ##print 'result = ' + `result`
    return result

# automatically generate all characters of catcode other, being the
# complement set in the ASCII range (128 characters)
def make_other_codes(cc):
    otherchars = range(256)		# could be made 256, no problem
    for category in all_but_other_codes:
	if cc[category]:
	    for c in cc[category]:
		otherchars[ord(c)] = None
    result = ''
    for i in otherchars:
	if i != None:
	    result = result + chr(i)
    return result

# catcode dump (which characters have which catcodes).
def dump_cc(name, cc):
    ##print '\t' + name
    ##print '=' * (8+len(name))
    if len(cc) != 16:
	raise TypeError, 'cc not good cat class'
##	for i in range(16):
##		print pc(i) + '\t' + `cc[i]`


# In the beginning,....
epoch_cc = [None] * 16
##dump_cc('epoch_cc', epoch_cc)


# INITEX
initex_cc = epoch_cc[:]
initex_cc[CC_ESCAPE] = '\\'
initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \
	  '\n', '\0', ' '
initex_cc[CC_LETTER] = string.uppercase + string.lowercase
initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F'
#initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
##dump_cc('initex_cc', initex_cc)


# LPLAIN: LaTeX catcode setting (see lplain.tex)
lplain_cc = initex_cc[:]
lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}'
lplain_cc[CC_MATHSHIFT] = '$'
lplain_cc[CC_ALIGNMENT] = '&'
lplain_cc[CC_PARAMETER] = '#'
lplain_cc[CC_SUPERSCRIPT] = '^\x0B'	# '^' and C-k
lplain_cc[CC_SUBSCRIPT] = '_\x01'	# '_' and C-a
lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t'
lplain_cc[CC_ACTIVE] = '~\x0C'		# '~' and C-l
lplain_cc[CC_OTHER] = make_other_codes(lplain_cc)
##dump_cc('lplain_cc', lplain_cc)


# Guido's LaTeX environment catcoded '_' as ``other''
# my own purpose catlist
my_cc = lplain_cc[:]
my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here
my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_'	      # add it to OTHER list
dump_cc('my_cc', my_cc)


# needed for un_re, my equivalent for regexp-quote in Emacs
re_meaning = '\\[]^$'

def un_re(str):
    result = ''
    for i in str:
	if i in re_meaning:
	    result = result + '\\'
	result = result + i
    return result

# NOTE the negate ('^') operator in *some* of the regexps below
def make_rc_regular(cc):
    # problems here if '[]' are included!!
    return regex.compile('[' + code2string(cc, regular_stopcodes) + ']')

def make_rc_cs_scan(cc):
    return regex.compile('[^' + code2string(cc, csname_scancodes) + ']')

def make_rc_comment(cc):
    return regex.compile('[' + code2string(cc, comment_stopcodes) + ']')

def make_rc_endwhite(cc):
    return regex.compile('[^' + code2string(cc, white_scancodes) + ']')


# regular: normal mode:
rc_regular = make_rc_regular(my_cc)

# scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
rc_cs_scan = make_rc_cs_scan(my_cc)
rc_comment = make_rc_comment(my_cc)
rc_endwhite = make_rc_endwhite(my_cc)


# parseit (BUF, PARSEMODE=mode[MODE_REGULAR], START=0, RECURSION-LEVEL=0)
#     RECURSION-LEVEL will is incremented on entry.
#     result contains the list of chunks returned
#     together with this list, the buffer position is returned

#     RECURSION-LEVEL will be set to zero *again*, when recursively a
#     {,D}MATH-mode scan has been enetered.
#     This has been done in order to better check for environment-mismatches

def parseit(buf, parsemode=mode[MODE_REGULAR], start=0, lvl=0):
    global lineno

    result = []
    end = len(buf)
    if lvl == 0 and parsemode == mode[MODE_REGULAR]:
	lineno = 1
    lvl = lvl + 1

    ##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'

    #
    # some of the more regular modes...
    #

    if parsemode in (mode[MODE_REGULAR], mode[MODE_DMATH], mode[MODE_MATH]):
	cstate = []
	newpos = start
	curpmode = parsemode
	while 1:
	    where = newpos
	    #print '\tnew round: ' + epsilon(buf, where)
	    if where == end:
		if lvl > 1 or curpmode != mode[MODE_REGULAR]:
		    # not the way we started...
		    raise EOFError, 'premature end of file.' + lle(lvl, buf, where)
		# the real ending of lvl-1 parse
		return end, result

	    pos = rc_regular.search(buf, where)

	    if pos < 0:
		pos = end

	    if pos != where:
		newpos, c = pos, chunk(PLAIN, where, (where, pos))
		result.append(c)
		continue


	    #
	    # ok, pos == where and pos != end
	    #
	    foundchar = buf[where]
	    if foundchar in my_cc[CC_LBRACE]:
		# recursive subgroup parse...
		newpos, data = parseit(buf, curpmode, where+1, lvl)
		result.append(chunk(GROUP, where, data))

	    elif foundchar in my_cc[CC_RBRACE]:
		if lvl <= 1:
		    raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where)
		if  lvl == 1 and mode != mode[MODE_REGULAR]:
		    raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
		return where + 1, result

	    elif foundchar in my_cc[CC_ESCAPE]:
		#
		# call the routine that actually deals with
		#     this problem. If do_ret is None, than
		#     return the value of do_ret
		#
		# Note that handle_cs might call this routine
		#     recursively again...
		#
		do_ret, newpos = handlecs(buf, where,
			  curpmode, lvl, result, end)
		if do_ret != None:
		    return do_ret

	    elif foundchar in my_cc[CC_COMMENT]:
		newpos, data = parseit(buf,
			  mode[MODE_COMMENT], where+1, lvl)
		result.append(chunk(COMMENT, where, data))

	    elif foundchar in my_cc[CC_MATHSHIFT]:
		# note that recursive calls to math-mode
		# scanning are called with recursion-level 0
		# again, in order to check for bad mathend
		#
		if where + 1 != end and buf[where + 1] in my_cc[CC_MATHSHIFT]:
		    #
		    # double mathshift, e.g. '$$'
		    #
		    if curpmode == mode[MODE_REGULAR]:
			newpos, data = parseit(buf, mode[MODE_DMATH],
					       where + 2, 0)
			result.append(chunk(DMATH, where, data))
		    elif curpmode == mode[MODE_MATH]:
			raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
		    elif lvl != 1:
			raise error, 'bad mathend.' + lle(lvl, buf, where)
		    else:
			return where + 2, result
		else:
		    #
		    # single math shift, e.g. '$'
		    #
		    if curpmode == mode[MODE_REGULAR]:
			newpos, data = parseit(buf, mode[MODE_MATH],
					       where + 1, 0)
			result.append(chunk(MATH, where, data))
		    elif curpmode == mode[MODE_DMATH]:
			raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
		    elif lvl != 1:
			raise error, 'bad mathend.' + lv(lvl, buf, where)
		    else:
			return where + 1, result

	    elif foundchar in my_cc[CC_IGNORE]:
		print 'warning: ignored char', `foundchar`
		newpos = where + 1

	    elif foundchar in my_cc[CC_ACTIVE]:
		result.append(chunk(ACTIVE, where, foundchar))
		newpos = where + 1

	    elif foundchar in my_cc[CC_INVALID]:
		raise error, 'invalid char ' + `foundchar`
		newpos = where + 1

	    elif foundchar in my_cc[CC_ENDLINE]:
		#
		# after an end of line, eat the rest of
		# whitespace on the beginning of the next line
		# this is what LaTeX more or less does
		#
		# also, try to indicate double newlines (\par)
		#
		lineno = lineno + 1
		savedwhere = where
		newpos, dummy = parseit(buf, mode[MODE_GOBBLEWHITE], where + 1, lvl)
		if newpos != end and buf[newpos] in my_cc[CC_ENDLINE]:
		    result.append(chunk(DENDLINE, savedwhere, foundchar))
		else:
		    result.append(chunk(ENDLINE, savedwhere, foundchar))
	    else:
		result.append(chunk(OTHER, where, foundchar))
		newpos = where + 1

    elif parsemode == mode[MODE_CS_SCAN]:
	#
	# scan for a control sequence token. `\ape', `\nut' or `\%'
	#
	if start == end:
	    raise EOFError, 'can\'t find end of csname'
	pos = rc_cs_scan.search(buf, start)
	if pos < 0:
	    pos = end
	if pos == start:
	    # first non-letter right where we started the search
	    # ---> the control sequence name consists of one single
	    # character. Also: don't eat white space...
	    if buf[pos] in my_cc[CC_ENDLINE]:
		lineno = lineno + 1
	    pos = pos + 1
	    return pos, (start, pos)
	else:
	    spos = pos
	    if buf[pos] == '\n':
		lineno = lineno + 1
		spos = pos + 1
	    pos2, dummy = parseit(buf, mode[MODE_GOBBLEWHITE], spos, lvl)
	    return pos2, (start, pos)

    elif parsemode == mode[MODE_GOBBLEWHITE]:
	if start == end:
	    return start, ''
	pos = rc_endwhite.search(buf, start)
	if pos < 0:
	    pos = start
	return pos, (start, pos)

    elif parsemode == mode[MODE_COMMENT]:
	pos = rc_comment.search(buf, start)
	lineno = lineno + 1
	if pos < 0:
	    print 'no newline perhaps?'
	    raise EOFError, 'can\'t find end of comment'
	pos = pos + 1
	pos2, dummy = parseit(buf, mode[MODE_GOBBLEWHITE], pos, lvl)
	return pos2, (start, pos)

    else:
	raise error, 'Unknown mode (' + `parsemode` + ')'


#moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)

#boxcommands = 'mbox', 'fbox'
#defcommands = 'def', 'newcommand'

endverbstr = '\\end{verbatim}'

re_endverb = regex.compile(un_re(endverbstr))

#
# handlecs: helper function for parseit, for the special thing we might
#     wanna do after certain command control sequences
# returns: None or return_data, newpos
#
# in the latter case, the calling function is instructed to immediately
# return with the data in return_data
#
def handlecs(buf, where, curpmode, lvl, result, end):
    global lineno

    # get the control sequence name...
    newpos, data = parseit(buf, mode[MODE_CS_SCAN], where+1, lvl)
    saveddata = data
    s_buf_data = s(buf, data)

    if s_buf_data in ('begin', 'end'):
	# skip the expected '{' and get the LaTeX-envname '}'
	newpos, data = parseit(buf, mode[MODE_REGULAR], newpos+1, lvl)
	if len(data) != 1:
	    raise error, 'expected 1 chunk of data.' + lle(lvl, buf, where)

	# yucky, we've got an environment
	envname = s(buf, data[0].data)
	s_buf_saveddata = s(buf, saveddata)
	##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
	if s_buf_saveddata == 'begin' and envname == 'verbatim':
	    # verbatim deserves special treatment
	    pos = re_endverb.search(buf, newpos)
	    if pos < 0:
		raise error, "%s not found.%s" \
		      % (`endverbstr`, lle(lvl, buf, where))
	    result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))])))
	    newpos = pos + len(endverbstr)

	elif s_buf_saveddata == 'begin':
	    # start parsing recursively... If that parse returns
	    # from an '\end{...}', then should the last item of
	    # the returned data be a string containing the ended
	    # environment
	    newpos, data = parseit(buf, curpmode, newpos, lvl)
	    if not data or type(data[-1]) is not StringType:
		raise error, "missing 'end'" + lle(lvl, buf, where) \
		      + epsilon(buf, newpos)
	    retenv = data[-1]
	    del data[-1]
	    if retenv != envname:
		#[`retenv`, `envname`]
		raise error, 'environments do not match.%s%s' \
		      % (lle(lvl, buf, where), epsilon(buf, newpos))
	    result.append(chunk(ENV, where, (retenv, data)))
	else:
	    # 'end'... append the environment name, as just
	    # pointed out, and order parsit to return...
	    result.append(envname)
	    ##print 'POINT of return: ' + epsilon(buf, newpos)
	    # the tuple will be returned by parseit
	    return (newpos, result), newpos

    # end of \begin ... \end handling

    elif s_buf_data[0:2] == 'if':
	# another scary monster: the 'if' directive
	flag = s_buf_data[2:]

	# recursively call parseit, just like environment above..
	# the last item of data should contain the if-termination
	# e.g., 'else' of 'fi'
	newpos, data = parseit(buf, curpmode, newpos, lvl)
	if not data or data[-1] not in ('else', 'fi'):
	    raise error, 'wrong if... termination' + \
		      lle(lvl, buf, where) + epsilon(buf, newpos)

	ifterm = data[-1]
	del data[-1]
	# 0 means dont_negate flag
	result.append(chunk(IF, where, (flag, 0, data)))
	if ifterm == 'else':
	    # do the whole thing again, there is only one way
	    # to end this one, by 'fi'
	    newpos, data = parseit(buf, curpmode, newpos, lvl)
	    if not data or data[-1] not in ('fi', ):
		raise error, 'wrong if...else... termination' \
		      + lle(lvl, buf, where) \
		      + epsilon(buf, newpos)

	    ifterm = data[-1]
	    del data[-1]
	    result.append(chunk(IF, where, (flag, 1, data)))
	#done implicitely: return None, newpos

    elif s_buf_data in ('else', 'fi'):
	result.append(s(buf, data))
	# order calling party to return tuple
	return (newpos, result), newpos

    # end of \if, \else, ... \fi handling

    elif s(buf, saveddata) == 'verb':
	x2 = saveddata[1]
	result.append(chunk(CSNAME, where, data))
	if x2 == end:
	    raise error, 'premature end of command.' + lle(lvl, buf, where)
	delimchar = buf[x2]
	##print 'VERB: delimchar ' + `delimchar`
	pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1)
	if pos < 0:
	    raise error, 'end of \'verb\' argument (' + \
		  `delimchar` + ') not found.' + \
		  lle(lvl, buf, where)
	result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))]))
	newpos = pos + 1
    else:
	result.append(chunk(CSNAME, where, data))
    return None, newpos

# this is just a function to get the string value if the possible data-tuple
def s(buf, data):
    if type(data) is StringType:
	return data
    if len(data) != 2 or not (type(data[0]) is type(data[1]) is IntType):
	raise TypeError, 'expected tuple of 2 integers'
    x1, x2 = data
    return buf[x1:x2]


##length, data1, i = getnextarg(length, buf, pp, i + 1)

# make a deep-copy of some chunks
def crcopy(r):
    return map(chunkcopy, r)


# copy a chunk, would better be a method of class Chunk...
def chunkcopy(ch):
    if ch.chtype == chunk_type[GROUP]:
	return chunk(GROUP, ch.where, map(chunkcopy, ch.data))
    else:
	return chunk(ch.chtype, ch.where, ch.data)


# get next argument for TeX-macro, flatten a group (insert between)
# or return Command Sequence token, or give back one character
def getnextarg(length, buf, pp, item):

    ##wobj = Wobj()
    ##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
    ##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'

    while item < length and pp[item].chtype == chunk_type[ENDLINE]:
	del pp[item]
	length = length - 1
    if item >= length:
	raise error, 'no next arg.' + epsilon(buf, pp[-1].where)
    if pp[item].chtype == chunk_type[GROUP]:
	newpp = pp[item].data
	del pp[item]
	length = length - 1
	changeit(buf, newpp)
	length = length + len(newpp)
	pp[item:item] = newpp
	item = item + len(newpp)
	if len(newpp) < 10:
	    wobj = Wobj()
	    dumpit(buf, wobj.write, newpp)
	    ##print 'GETNEXTARG: inserted ' + `wobj.data`
	return length, item
    elif pp[item].chtype == chunk_type[PLAIN]:
	#grab one char
	print 'WARNING: grabbing one char'
	if len(s(buf, pp[item].data)) > 1:
	    pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1]))
	    item, length = item+1, length+1
	    pp[item].data = s(buf, pp[item].data)[1:]
	else:
	    item = item+1
	return length, item
    else:
	ch = pp[item]
	try:
	    str = `s(buf, ch.data)`
	except TypeError:
	    str = `ch.data`
	    if len(str) > 400:
		str = str[:400] + '...'
	print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str
	return length, item


# this one is needed to find the end of LaTeX's optional argument, like
# item[...]
re_endopt = regex.compile(']')

# get a LaTeX-optional argument, you know, the square braces '[' and ']'
def getoptarg(length, buf, pp, item):

    wobj = Wobj()
    dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
    ##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'

    if item >= length or \
	      pp[item].chtype != chunk_type[PLAIN] or \
	      s(buf, pp[item].data)[0] != '[':
	return length, item

    pp[item].data = s(buf, pp[item].data)[1:]
    if len(pp[item].data) == 0:
	del pp[item]
	length = length-1

    while 1:
	if item == length:
	    raise error, 'No end of optional arg found'
	if pp[item].chtype == chunk_type[PLAIN]:
	    text = s(buf, pp[item].data)
	    pos = re_endopt.search(text)
	    if pos >= 0:
		pp[item].data = text[:pos]
		if pos == 0:
		    del pp[item]
		    length = length-1
		else:
		    item=item+1
		text = text[pos+1:]

		while text and text[0] in ' \t':
		    text = text[1:]

		if text:
		    pp.insert(item, chunk(PLAIN, 0, text))
		    length = length + 1
		return length, item

	item = item+1


# Wobj just add write-requests to the ``data'' attribute
class Wobj:
    data = ''

    def write(self, data):
	self.data = self.data + data

# ignore these commands
ignoredcommands = ('bcode', 'ecode', 'hline', 'small', '/')
# map commands like these to themselves as plaintext
wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF', 'LaTeX', 'POSIX')
# \{ --> {,  \} --> }, etc
themselves = ('{', '}', ',', '.', '@', ' ', '\n') + wordsselves
# these ones also themselves (see argargs macro in myformat.sty)
inargsselves = (',', '[', ']', '(', ')')
# this is how *I* would show the difference between emph and strong
#  code 1 means: fold to uppercase
markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'),
	    'strong': ('*', '*')}

# recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}

# transparent for these commands
for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp',
	    'file', 'r', 'i', 't')


# try to remove macros and return flat text
def flattext(buf, pp):
    pp = crcopy(pp)
    ##print '---> FLATTEXT ' + `pp`
    wobj = Wobj()

    i, length = 0, len(pp)
    while 1:
	if len(pp) != length:
	    raise 'FATAL', 'inconsistent length'
	if i >= length:
	    break
	ch = pp[i]
	i = i+1
	if ch.chtype == chunk_type[PLAIN]:
	    pass
	elif ch.chtype == chunk_type[CSNAME]:
	    s_buf_data = s(buf, ch.data)
	    if s_buf_data in themselves or hist.inargs and s_buf_data in inargsselves:
		ch.chtype = chunk_type[PLAIN]
	    elif s_buf_data == 'e':
		ch.chtype = chunk_type[PLAIN]
		ch.data = '\\'
	    elif len(s_buf_data) == 1 \
		      and s_buf_data in onlylatexspecial:
		ch.chtype = chunk_type[PLAIN]
		# if it is followed by an empty group,
		# remove that group, it was needed for
		# a true space
		if i < length \
			  and pp[i].chtype==chunk_type[GROUP] \
			  and len(pp[i].data) == 0:
		    del pp[i]
		    length = length-1

	    elif s_buf_data in markcmds.keys():
		length, newi = getnextarg(length, buf, pp, i)
		str = flattext(buf, pp[i:newi])
		del pp[i:newi]
		length = length - (newi - i)
		ch.chtype = chunk_type[PLAIN]
		markcmd = s_buf_data
		x = markcmds[markcmd]
		if type(x) == TupleType:
		    pre, after = x
		    str = pre+str+after
		elif x == 1:
		    str = string.upper(str)
		else:
		    raise 'FATAL', 'corrupt markcmds'
		ch.data = str
	    else:
		if s_buf_data not in ignoredcommands:
		    print 'WARNING: deleting command ' + s_buf_data
		    print 'PP' + `pp[i-1]`
		del pp[i-1]
		i, length = i-1, length-1
	elif ch.chtype == chunk_type[GROUP]:
	    length, newi = getnextarg(length, buf, pp, i-1)
	    i = i-1
##			str = flattext(buf, crcopy(pp[i-1:newi]))
##			del pp[i:newi]
##			length = length - (newi - i)
##			ch.chtype = chunk_type[PLAIN]
##			ch.data = str
	else:
	    pass

    dumpit(buf, wobj.write, pp)
    ##print 'FLATTEXT: RETURNING ' + `wobj.data`
    return wobj.data

# try to generate node names (a bit shorter than the chapter title)
# note that the \nodename command (see elsewhere) overules these efforts
def invent_node_names(text):
    words = string.split(text)

    ##print 'WORDS ' + `words`

    if len(words) == 2 \
       and string.lower(words[0]) == 'built-in' \
       and string.lower(words[1]) not in ('modules', 'functions'):
	return words[1]
    if len(words) == 3 and string.lower(words[1]) == 'module':
	return words[2]
    if len(words) == 3 and string.lower(words[1]) == 'object':
	return string.join(words[0:2])
    if len(words) > 4 \
       and (string.lower(string.join(words[-4:])) \
	    == 'methods and data attributes'):
	return string.join(words[:2])
    return text

re_commas_etc = regex.compile('[,`\'@{}]')

re_whitespace = regex.compile('[ \t]*')


##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')

# look if the next non-white stuff is also a command, resulting in skipping
# double endlines (DENDLINE) too, and thus omitting \par's
# Sometimes this is too much, maybe consider DENDLINE's as stop
def next_command_p(length, buf, pp, i, cmdname):

    while 1:
	if i >= len(pp):
	    break
	ch = pp[i]
	i = i+1
	if ch.chtype == chunk_type[ENDLINE]:
	    continue
	if ch.chtype == chunk_type[DENDLINE]:
	    continue
	if ch.chtype == chunk_type[PLAIN]:
	    if re_whitespace.search(s(buf, ch.data)) == 0 and \
		      re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)):
		continue
	    return -1
	if ch.chtype == chunk_type[CSNAME]:
	    if s(buf, ch.data) == cmdname:
		return i # _after_ the command
	    return -1
	return -1


# things that are special to LaTeX, but not to texi..
onlylatexspecial = '_~^$#&%'

class Struct: pass

hist = Struct()
out = Struct()

def startchange():
    global hist, out

    hist.inenv = []
    hist.nodenames = []
    hist.cindex = []
    hist.inargs = 0
    hist.enumeratenesting, hist.itemizenesting = 0, 0

    out.doublenodes = []
    out.doublecindeces = []


spacech = [chunk(PLAIN, 0, ' ')]
commach = [chunk(PLAIN, 0, ', ')]
cindexch = [chunk(CSLINE, 0, 'cindex')]

# the standard variation in symbols for itemize
itemizesymbols = ['bullet', 'minus', 'dots']

# same for enumerate
enumeratesymbols = ['1', 'A', 'a']

##
## \begin{ {func,data,exc}desc }{name}...
##   the resulting texi-code is dependent on the contents of indexsubitem
##

# indexsubitem: `['XXX', 'function']
# funcdesc:
#     deffn {`idxsi`} NAME (FUNCARGS)

# indexsubitem: `['XXX', 'method']`
# funcdesc:
#     defmethod {`idxsi[0]`} NAME (FUNCARGS)

# indexsubitem: `['in', 'module', 'MODNAME']'
# datadesc:
#     defcv data {`idxsi[1:]`} NAME
# excdesc:
#     defcv exception {`idxsi[1:]`} NAME
# funcdesc:
#     deffn {function of `idxsi[1:]`} NAME (FUNCARGS)

# indexsubitem: `['OBJECT', 'attribute']'
# datadesc
#     defcv attribute {`OBJECT`} NAME


## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
##   or \funcline{NAME}{ARGS}
##
def do_funcdesc(length, buf, pp, i, index=1):
    startpoint = i-1
    ch = pp[startpoint]
    wh = ch.where
    length, newi = getnextarg(length, buf, pp, i)
    funcname = chunk(GROUP, wh, pp[i:newi])
    del pp[i:newi]
    length = length - (newi-i)
    save = hist.inargs
    hist.inargs = 1
    length, newi = getnextarg(length, buf, pp, i)
    hist.inargs = save
    del save
    the_args = [chunk(PLAIN, wh, '()'[0])] + pp[i:newi] + \
	       [chunk(PLAIN, wh, '()'[1])]
    del pp[i:newi]
    length = length - (newi-i)

    idxsi = hist.indexsubitem	# words
    command = ''
    cat_class = ''
    if idxsi and idxsi[-1] in ('method', 'protocol', 'attribute'):
	command = 'defmethod'
	cat_class = string.join(idxsi[:-1])
    elif len(idxsi) == 2 and idxsi[1] == 'function':
	command = 'deffn'
	cat_class = string.join(idxsi)
    elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
	command = 'deffn'
	cat_class = 'function of ' + string.join(idxsi[1:])
    elif len(idxsi) > 3 and idxsi[:2] == ['in', 'modules']:
	command = 'deffn'
	cat_class = 'function of ' + string.join(idxsi[1:])

    if not command:
	raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`

    ch.chtype = chunk_type[CSLINE]
    ch.data = command

    cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
    cslinearg.append(chunk(PLAIN, wh, ' '))
    cslinearg.append(funcname)
    cslinearg.append(chunk(PLAIN, wh, ' '))
    l = len(cslinearg)
    cslinearg[l:l] = the_args

    pp.insert(i, chunk(GROUP, wh, cslinearg))
    i, length = i+1, length+1
    hist.command = command
    return length, i


## this routine will be called on \begin{excdesc}{NAME}
## or \excline{NAME}
##
def do_excdesc(length, buf, pp, i):
    startpoint = i-1
    ch = pp[startpoint]
    wh = ch.where
    length, newi = getnextarg(length, buf, pp, i)
    excname = chunk(GROUP, wh, pp[i:newi])
    del pp[i:newi]
    length = length - (newi-i)

    idxsi = hist.indexsubitem	# words
    command = ''
    cat_class = ''
    class_class = ''
    if len(idxsi) == 2 and idxsi[1] == 'exception':
	command = 'defvr'
	cat_class = string.join(idxsi)
    elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
	command = 'defcv'
	cat_class = 'exception'
	class_class = string.join(idxsi[1:])
    elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']:
	command = 'defcv'
	cat_class = 'exception'
	class_class = string.join(idxsi[2:])
    elif idxsi == ['built-in', 'exception', 'base', 'class']:
	command = 'defvr'
	cat_class = 'exception base class'
    else:
	raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`

    ch.chtype = chunk_type[CSLINE]
    ch.data = command

    cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
    cslinearg.append(chunk(PLAIN, wh, ' '))
    if class_class:
	cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
	cslinearg.append(chunk(PLAIN, wh, ' '))
    cslinearg.append(excname)

    pp.insert(i, chunk(GROUP, wh, cslinearg))
    i, length = i+1, length+1
    hist.command = command
    return length, i

## same for datadesc or dataline...
def do_datadesc(length, buf, pp, i, index=1):
    startpoint = i-1
    ch = pp[startpoint]
    wh = ch.where
    length, newi = getnextarg(length, buf, pp, i)
    dataname = chunk(GROUP, wh, pp[i:newi])
    del pp[i:newi]
    length = length - (newi-i)

    idxsi = hist.indexsubitem	# words
    command = 'defcv'
    cat_class = 'data'
    class_class = ''
    if idxsi[-1] in ('attribute', 'option'):
	cat_class = idxsi[-1]
	class_class = string.join(idxsi[:-1])
    elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
	class_class = string.join(idxsi[1:])
    elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']:
	class_class = string.join(idxsi[2:])
    else:
	class_class = string.join(idxsi)

    ch.chtype = chunk_type[CSLINE]
    ch.data = command

    cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
    cslinearg.append(chunk(PLAIN, wh, ' '))
    if class_class:
	cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
	cslinearg.append(chunk(PLAIN, wh, ' '))
    cslinearg.append(dataname)

    pp.insert(i, chunk(GROUP, wh, cslinearg))
    i, length = i+1, length+1
    hist.command = command
    return length, i


def do_opcodedesc(length, buf, pp, i):
    startpoint = i-1
    ch = pp[startpoint]
    wh = ch.where
    length, newi = getnextarg(length, buf, pp, i)
    dataname = chunk(GROUP, wh, pp[i:newi])
    del pp[i:newi]
    length = length - (newi-i)

    ch.chtype = CSLINE
    ch.data = "deffn"

    cslinearg = [chunk(PLAIN, wh, 'byte\ code\ instruction'),
		 chunk(GROUP, wh, [chunk(PLAIN, wh, "byte code instruction")]),
		 chunk(PLAIN, wh, ' '),
		 dataname,
		 chunk(PLAIN, wh, ' '),
		 pp[i],
		 ]

    pp[i] = chunk(GROUP, wh, cslinearg)
    hist.command = ch.data
    return length, i


# regular indices: those that are not set in tt font by default....
regindices = ('cindex', )

# remove illegal characters from node names
def rm_commas_etc(text):
    result = ''
    changed = 0
    while 1:
	pos = re_commas_etc.search(text)
	if pos >= 0:
	    changed = 1
	    result = result + text[:pos]
	    text = text[pos+1:]
	else:
	    result = result + text
	    break
    if changed:
	print 'Warning: nodename changed to ' + `result`

    return result

# boolean flags
flags = {'texi': 1}


# map of \label{} to node names
label_nodes = {}


##
## changeit: the actual routine, that changes the contents of the parsed
##           chunks
##

def changeit(buf, pp):
    global onlylatexspecial, hist, out

    i, length = 0, len(pp)
    while 1:
	# sanity check: length should always equal len(pp)
	if len(pp) != length:
	    raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)`
	if i >= length:
	    break
	ch = pp[i]
	i = i + 1

	if type(ch) is StringType:
	    #normally, only chunks are present in pp,
	    # but in some cases, some extra info
	    # has been inserted, e.g., the \end{...} clauses
	    raise 'FATAL', 'got string, probably too many ' + `end`

	if ch.chtype == chunk_type[GROUP]:
	    # check for {\em ...} constructs
	    data = ch.data
	    if data and \
	       data[0].chtype == chunk_type[CSNAME] and \
	       fontchanges.has_key(s(buf, data[0].data)):
		k = s(buf, data[0].data)
		del data[0]
		pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k]))
		length, i = length+1, i+1

	    elif data:
		if len(data) \
		   and data[0].chtype == chunk_type[GROUP] \
		   and len(data[0].data) \
		   and data[0].data[0].chtype == chunk_type[CSNAME] \
		   and s(buf, data[0].data[0].data) == 'e':
		    data[0] = data[0].data[0]
		    print "invoking \\e magic group transform..."
		else:
## 		    print "GROUP -- ch.data[0].data =", ch.data[0].data
		    k = s(buf, data[0].data)
		    if k == "fulllineitems":
			del data[0]
			pp[i-1:i] = data
			i = i - 1
			length = length + len(data) - 1
			continue

	    # recursively parse the contents of the group
	    changeit(buf, data)

	elif ch.chtype == chunk_type[IF]:
	    # \if...
	    flag, negate, data = ch.data
	    ##print 'IF: flag, negate = ' + `flag, negate`
	    if flag not in flags.keys():
		raise error, 'unknown flag ' + `flag`

	    value = flags[flag]
	    if negate:
		value = (not value)
	    del pp[i-1]
	    length, i = length-1, i-1
	    if value:
		pp[i:i] = data
		length = length + len(data)


	elif ch.chtype == chunk_type[ENV]:
	    # \begin{...} ....
	    envname, data = ch.data

	    #push this environment name on stack
	    hist.inenv.insert(0, envname)

	    #append an endenv chunk after grouped data
	    data.append(chunk(ENDENV, ch.where, envname))
	    ##[`data`]

	    #delete this object
	    del pp[i-1]
	    i, length = i-1, length-1

	    #insert found data
	    pp[i:i] = data
	    length = length + len(data)

	    if envname == 'verbatim':
		pp[i:i] = [chunk(CSLINE, ch.where, 'example'),
			  chunk(GROUP, ch.where, [])]
		length, i = length+2, i+2

	    elif envname in ('itemize', 'list', 'fulllineitems'):
		if hist.itemizenesting > len(itemizesymbols):
		    raise error, 'too deep itemize nesting'
		if envname == 'list':
		    del pp[i:i+2]
		    length = length - 2
		ingroupch = [chunk(CSNAME, ch.where,
				   itemizesymbols[hist.itemizenesting])]
		hist.itemizenesting = hist.itemizenesting + 1
		pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),
			   chunk(GROUP, ch.where, ingroupch)]
		length, i = length+2, i+2

	    elif envname == 'enumerate':
		if hist.enumeratenesting > len(enumeratesymbols):
		    raise error, 'too deep enumerate nesting'
		ingroupch = [chunk(PLAIN, ch.where,
				   enumeratesymbols[hist.enumeratenesting])]
		hist.enumeratenesting = hist.enumeratenesting + 1
		pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),
			   chunk(GROUP, ch.where, ingroupch)]
		length, i = length+2, i+2

	    elif envname == 'description':
		ingroupch = [chunk(CSNAME, ch.where, 'b')]
		pp[i:i] = [chunk(CSLINE, ch.where, 'table'),
			  chunk(GROUP, ch.where, ingroupch)]
		length, i = length+2, i+2

	    elif (envname == 'tableiii') or (envname == 'tableii'):
		if (envname == 'tableii'):
		    ltable = 2
		else:
		    ltable = 3
		wh = ch.where
		newcode = []

		#delete tabular format description
		# e.g., {|l|c|l|}
		length, newi = getnextarg(length, buf, pp, i)
		del pp[i:newi]
		length = length - (newi-i)

		newcode.append(chunk(CSLINE, wh, 'table'))
		ingroupch = [chunk(CSNAME, wh, 'asis')]
		newcode.append(chunk(GROUP, wh, ingroupch))
		newcode.append(chunk(CSLINE, wh, 'item'))

		#get the name of macro for @item
		# e.g., {code}
		length, newi = getnextarg(length, buf, pp, i)

		if newi-i != 1:
		    raise error, 'Sorry, expected 1 chunk argument'
		if pp[i].chtype != chunk_type[PLAIN]:
		    raise error, 'Sorry, expected plain text argument'
		hist.itemargmacro = s(buf, pp[i].data)
		del pp[i:newi]
		length = length - (newi-i)

		itembody = []
		for count in range(ltable):
		    length, newi = getnextarg(length, buf, pp, i)
		    emphgroup = [
			      chunk(CSNAME, wh, 'emph'),
			      chunk(GROUP, 0, pp[i:newi])]
		    del pp[i:newi]
		    length = length - (newi-i)
		    if count == 0:
			itemarg = emphgroup
		    elif count == ltable-1:
			itembody = itembody + \
				  [chunk(PLAIN, wh, '  ---  ')] + emphgroup
		    else:
			itembody = emphgroup
		newcode.append(chunk(GROUP, wh, itemarg))
		newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')]
		pp[i:i] = newcode
		l = len(newcode)
		length, i = length+l, i+l
		del newcode, l

		if length != len(pp):
		    raise 'STILL, SOMETHING wrong', `i`

	    elif envname in ('funcdesc', 'funcdescni', 'classdesc'):
		pp.insert(i, chunk(PLAIN, ch.where, ''))
		i, length = i+1, length+1
		length, i = do_funcdesc(length, buf, pp, i,
					envname[-2:] != "ni")

	    elif envname == 'excdesc':
		pp.insert(i, chunk(PLAIN, ch.where, ''))
		i, length = i+1, length+1
		length, i = do_excdesc(length, buf, pp, i)

	    elif envname in ('datadesc', 'datadescni'):
		pp.insert(i, chunk(PLAIN, ch.where, ''))
		i, length = i+1, length+1
		length, i = do_datadesc(length, buf, pp, i,
					envname[-2:] != "ni")

	    elif envname == 'opcodedesc':
		pp.insert(i, chunk(PLAIN, ch.where, ''))
		i, length = i+1, length+1
		length, i = do_opcodedesc(length, buf, pp, i)

	    elif envname == 'seealso':
		chunks = [chunk(ENDLINE, ch.where, "\n"),
			  chunk(CSNAME, ch.where, "b"),
			  chunk(GROUP, ch.where, [
			      chunk(PLAIN, ch.where, "See also: ")]),
			  chunk(ENDLINE, ch.where, "\n"),
			  chunk(ENDLINE, ch.where, "\n")]
		pp[i-1:i] = chunks
		length = length + len(chunks) - 1
		i = i + len(chunks) - 1

	    elif envname in ('sloppypar', 'flushleft'):
		pass

	    else:
		print 'WARNING: don\'t know what to do with env ' + `envname`

	elif ch.chtype == chunk_type[ENDENV]:
	    envname = ch.data
	    if envname != hist.inenv[0]:
		raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]`
	    del hist.inenv[0]
	    del pp[i-1]
	    i, length = i-1, length-1

	    if envname == 'verbatim':
		pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
			   chunk(GROUP, ch.where, [
			       chunk(PLAIN, ch.where, 'example')])]
		i, length = i+2, length+2
	    elif envname in ('itemize', 'list', 'fulllineitems'):
		hist.itemizenesting = hist.itemizenesting - 1
		pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
			   chunk(GROUP, ch.where, [
			       chunk(PLAIN, ch.where, 'itemize')])]
		i, length = i+2, length+2
	    elif envname == 'enumerate':
		hist.enumeratenesting = hist.enumeratenesting-1
		pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
			   chunk(GROUP, ch.where, [
			       chunk(PLAIN, ch.where, 'enumerate')])]
		i, length = i+2, length+2
	    elif envname == 'description':
		pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
			   chunk(GROUP, ch.where, [
			       chunk(PLAIN, ch.where, 'table')])]
		i, length = i+2, length+2
	    elif (envname == 'tableiii') or (envname == 'tableii'):
		pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
			   chunk(GROUP, ch.where, [
			       chunk(PLAIN, ch.where, 'table')])]
		i, length = i+2, length + 2
		pp.insert(i, chunk(DENDLINE, ch.where, '\n'))
		i, length = i+1, length+1

	    elif envname in ('funcdesc', 'excdesc', 'datadesc', 'classdesc',
			     'funcdescni', 'datadescni'):
		pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
			   chunk(GROUP, ch.where, [
			       chunk(PLAIN, ch.where, hist.command)])]
		i, length = i+2, length+2

	    elif envname == 'opcodedesc':
		pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
			   chunk(GROUP, ch.where, [
			       chunk(PLAIN, ch.where, "deffn")])]
		i, length = i+2, length+2

	    elif envname in ('seealso', 'sloppypar', 'flushleft'):
		pass

	    else:
		print 'WARNING: ending env %s has no actions' % `envname`

	elif ch.chtype == chunk_type[CSNAME]:
	    # control name transformations
	    s_buf_data = s(buf, ch.data)
	    if s_buf_data == 'optional':
		pp[i-1].chtype = chunk_type[PLAIN]
		pp[i-1].data = '['
		if (i < length) and \
		   (pp[i].chtype == chunk_type[GROUP]):
		    cp=pp[i].data
		    pp[i:i+1]=cp + [
			chunk(PLAIN, ch.where, ']')]
		    length = length+len(cp)
	    elif s_buf_data in ignoredcommands:
		del pp[i-1]
		i, length = i-1, length-1
	    elif s_buf_data == '@' and \
		      i != length and \
		      pp[i].chtype == chunk_type[PLAIN] and \
		      s(buf, pp[i].data)[0] == '.':
		# \@. --> \. --> @.
		ch.data = '.'
		del pp[i]
		length = length-1
	    elif s_buf_data == '\\':
		# \\ --> \* --> @*
		ch.data = '*'
	    elif len(s_buf_data) == 1 and \
		      s_buf_data in onlylatexspecial:
		ch.chtype = chunk_type[PLAIN]
		# check if such a command is followed by
		# an empty group: e.g., `\%{}'.  If so, remove
		# this empty group too
		if i < length and \
			  pp[i].chtype == chunk_type[GROUP] \
			  and len(pp[i].data) == 0:
		    del pp[i]
		    length = length-1

	    elif hist.inargs and s_buf_data in inargsselves:
		# This is the special processing of the
		# arguments of the \begin{funcdesc}... or
		# \funcline... arguments
		# \, --> , \[ --> [, \] --> ]
		ch.chtype = chunk_type[PLAIN]

	    elif s_buf_data == 'setindexsubitem':
		stuff = pp[i].data
		if len(stuff) != 1:
		    raise error, "parameter to \\setindexsubitem{} too long"
		if pp[i].chtype != chunk_type[GROUP]:
		    raise error, "bad chunk type following \\setindexsubitem" \
			  "\nexpected GROUP, got " + str(ch.chtype)
		text = s(buf, stuff[0].data)
		if text[:1] != '(' or text[-1:] != ')':
		    raise error, \
			  'expected indexsubitem enclosed in parenteses'
		hist.indexsubitem = string.split(text[1:-1])
		del stuff, text
		del pp[i-1:i+1]
		i = i - 1
		length = length - 2

	    elif s_buf_data == 'newcommand':
		print "ignoring definition of \\" + s(buf, pp[i].data[0].data)
		del pp[i-1:i+2]
		i = i - 1
		length = length - 3

	    elif s_buf_data == 'mbox':
		stuff = pp[i].data
		pp[i-1:i+1] = stuff
		i = i - 1
		length = length + len(stuff) - 2

	    elif s_buf_data == 'version':
		ch.chtype = chunk_type[PLAIN]
		ch.data = release_version

	    elif s_buf_data == 'program':
		ch.data = "strong"

	    elif s_buf_data == 'item':
		ch.chtype = chunk_type[CSLINE]
		length, newi = getoptarg(length, buf, pp, i)
		ingroupch = pp[i:newi]
		del pp[i:newi]
		length = length - (newi-i)
		changeit(buf, ingroupch) # catch stuff inside the optional arg
		pp.insert(i, chunk(GROUP, ch.where, ingroupch))
		i, length = i+1, length+1

	    elif s_buf_data == 'ttindex':
		idxsi = hist.indexsubitem

		cat_class = ''
		if len(idxsi) >= 2 and idxsi[1] in \
			  ('method', 'function', 'protocol'):
		    command = 'findex'
		elif len(idxsi) >= 2 and idxsi[1] in \
			  ('exception', 'object'):
		    command = 'vindex'
		elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
		    command = 'cindex'
		else:
		    print 'WARNING: can\'t categorize ' + `idxsi` \
			  + ' for \'ttindex\' command'
		    command = 'cindex'

		if not cat_class:
		    cat_class = '('+string.join(idxsi)+')'

		ch.chtype = chunk_type[CSLINE]
		ch.data = command

		length, newi = getnextarg(length, buf, pp, i)
		arg = pp[i:newi]
		del pp[i:newi]
		length = length - (newi-i)

		cat_arg = [chunk(PLAIN, ch.where, cat_class)]

		# determine what should be set in roman, and
		# what in tt-font
		if command in regindices:

		    arg = [chunk(CSNAME, ch.where, 't'),
			      chunk(GROUP, ch.where, arg)]
		else:
		    cat_arg = [chunk(CSNAME, ch.where, 'r'),
			      chunk(GROUP, ch.where, cat_arg)]

		ingroupch = arg + \
			  [chunk(PLAIN, ch.where, ' ')] + \
			  cat_arg

		pp.insert(i, chunk(GROUP, ch.where, ingroupch))
		length, i = length+1, i+1

	    elif s_buf_data == 'ldots':
		# \ldots --> \dots{} --> @dots{}
		ch.data = 'dots'
		if i == length \
			  or pp[i].chtype != chunk_type[GROUP] \
			  or pp[i].data != []:
		    pp.insert(i, chunk(GROUP, ch.where, []))
		    i, length = i+1, length+1
	    elif s_buf_data in themselves:
		# \UNIX --> &UNIX;
		ch.chtype = chunk_type[PLAIN]
		if i != length \
			  and pp[i].chtype == chunk_type[GROUP] \
			  and pp[i].data == []:
		    del pp[i]
		    length = length-1
	    elif s_buf_data in for_texi:
		pass

	    elif s_buf_data == 'manpage':
		ch.data = 'emph'
		sect = s(buf, pp[i+1].data[0].data)
		pp[i+1].data = "(%s)" % sect
		pp[i+1].chtype = chunk_type[PLAIN]

	    elif s_buf_data == 'e':
		# "\e" --> "\"
		ch.data = '\\'
		ch.chtype = chunk_type[PLAIN]
	    elif s_buf_data in ('lineiii', 'lineii'):
		# This is the most tricky one
		# \lineiii{a1}{a2}[{a3}] -->
		# @item @<cts. of itemargmacro>{a1}
		#  a2 [ -- a3]
		#
		if not hist.inenv:
		    raise error, 'no environment for lineiii'
		if (hist.inenv[0] != 'tableiii') and \
		   (hist.inenv[0] != 'tableii'):
		    raise error, \
			  'wrong command (%s) in wrong environment (%s)' \
			  % (s_buf_data, `hist.inenv[0]`)
		ch.chtype = chunk_type[CSLINE]
		ch.data = 'item'
		length, newi = getnextarg(length, buf, pp, i)
		ingroupch = [chunk(CSNAME, 0, hist.itemargmacro),
			     chunk(GROUP, 0, pp[i:newi])]
		del pp[i:newi]
		length = length - (newi-i)
		pp.insert(i, chunk(GROUP, ch.where, ingroupch))
		grouppos = i
		i, length = i+1, length+1
		length, i = getnextarg(length, buf, pp, i)
		length, newi = getnextarg(length, buf, pp, i)
		if newi > i:
		    # we have a 3rd arg
		    pp.insert(i, chunk(PLAIN, ch.where, '  ---  '))
		    i = newi + 1
		    length = length + 1
		if length != len(pp):
		    raise 'IN LINEIII IS THE ERR', `i`

	    elif s_buf_data in ('chapter', 'section', 'subsection', 'subsubsection'):
		#\xxxsection{A} ---->
		# @node A, , ,
		# @xxxsection A
		## also: remove commas and quotes
		ch.chtype = chunk_type[CSLINE]
		length, newi = getnextarg(length, buf, pp, i)
		afternodenamecmd = next_command_p(length, buf,
						  pp, newi, 'nodename')
		if afternodenamecmd < 0:
		    cp1 = crcopy(pp[i:newi])
		    pp[i:newi] = [chunk(GROUP, ch.where, pp[i:newi])]
		    length, newi = length - (newi-i) + 1, i+1
		    text = flattext(buf, cp1)
		    text = invent_node_names(text)
		else:
		    length, endarg = getnextarg(length, buf,
						pp, afternodenamecmd)
		    cp1 = crcopy(pp[afternodenamecmd:endarg])
		    del pp[newi:endarg]
		    length = length - (endarg-newi)

		    pp[i:newi] = [chunk(GROUP, ch.where, pp[i:newi])]
		    length, newi = length - (newi-i) + 1, i + 1
		    text = flattext(buf, cp1)
		if text[-1] == '.':
		    text = text[:-1]
		if text in hist.nodenames:
		    print 'WARNING: node name ' + `text` + ' already used'
		    out.doublenodes.append(text)
		else:
		    hist.nodenames.append(text)
		text = rm_commas_etc(text)
		pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'node'),
			       chunk(GROUP, ch.where, [
				   chunk(PLAIN, ch.where, text+', , ,')
				   ])]
		i, length = newi+2, length+2

	    elif s_buf_data == 'funcline':
		# fold it to a very short environment
		pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'end'),
			       chunk(GROUP, ch.where, [
				   chunk(PLAIN, ch.where, hist.command)])]
		i, length = i+2, length+2
		length, i = do_funcdesc(length, buf, pp, i)

	    elif s_buf_data == 'dataline':
		pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'end'),
			       chunk(GROUP, ch.where, [
				   chunk(PLAIN, ch.where, hist.command)])]
		i, length = i+2, length+2
		length, i = do_datadesc(length, buf, pp, i)

	    elif s_buf_data == 'excline':
		pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'end'),
			       chunk(GROUP, ch.where, [
				   chunk(PLAIN, ch.where, hist.command)])]
		i, length = i+2, length+2
		length, i = do_excdesc(length, buf, pp, i)

	    elif s_buf_data == 'index':
		#\index{A} --->
		# @cindex A
		ch.chtype = chunk_type[CSLINE]
		ch.data = 'cindex'
		length, newi = getnextarg(length, buf, pp, i)

		ingroupch = pp[i:newi]
		del pp[i:newi]
		length = length - (newi-i)
		pp.insert(i, chunk(GROUP, ch.where, ingroupch))
		length, i = length+1, i+1

	    elif s_buf_data == 'bifuncindex':
		ch.chtype = chunk_type[CSLINE]
		ch.data = 'findex'
		length, newi = getnextarg(length, buf, pp, i)
		ingroupch = pp[i:newi]
		del pp[i:newi]
		length = length - (newi-i)

		ingroupch.append(chunk(PLAIN, ch.where, ' '))
		ingroupch.append(chunk(CSNAME, ch.where, 'r'))
		ingroupch.append(chunk(GROUP, ch.where, [
			  chunk(PLAIN, ch.where,
			  '(built-in function)')]))

		pp.insert(i, chunk(GROUP, ch.where, ingroupch))
		length, i = length+1, i+1

	    elif s_buf_data == 'obindex':
		ch.chtype = chunk_type[CSLINE]
		ch.data = 'findex'
		length, newi = getnextarg(length, buf, pp, i)
		ingroupch = pp[i:newi]
		del pp[i:newi]
		length = length - (newi-i)

		ingroupch.append(chunk(PLAIN, ch.where, ' '))
		ingroupch.append(chunk(CSNAME, ch.where, 'r'))
		ingroupch.append(chunk(GROUP, ch.where, [
			  chunk(PLAIN, ch.where,
			  '(object)')]))

		pp.insert(i, chunk(GROUP, ch.where, ingroupch))
		length, i = length+1, i+1

	    elif s_buf_data == 'opindex':
		ch.chtype = chunk_type[CSLINE]
		ch.data = 'findex'
		length, newi = getnextarg(length, buf, pp, i)
		ingroupch = pp[i:newi]
		del pp[i:newi]
		length = length - (newi-i)

		ingroupch.append(chunk(PLAIN, ch.where, ' '))
		ingroupch.append(chunk(CSNAME, ch.where, 'r'))
		ingroupch.append(chunk(GROUP, ch.where, [
			  chunk(PLAIN, ch.where,
			  '(operator)')]))

		pp.insert(i, chunk(GROUP, ch.where, ingroupch))
		length, i = length+1, i+1

	    elif s_buf_data in ('bimodindex', 'refbimodindex'):
		ch.chtype = chunk_type[CSLINE]
		ch.data = 'pindex'
		length, newi = getnextarg(length, buf, pp, i)
		ingroupch = pp[i:newi]
		del pp[i:newi]
		length = length - (newi-i)

		ingroupch.append(chunk(PLAIN, ch.where, ' '))
		ingroupch.append(chunk(CSNAME, ch.where, 'r'))
		ingroupch.append(chunk(GROUP, ch.where, [
			  chunk(PLAIN, ch.where,
			  '(built-in)')]))

		pp.insert(i, chunk(GROUP, ch.where, ingroupch))
		length, i = length+1, i+1

	    elif s_buf_data == 'refmodindex':
		ch.chtype = chunk_type[CSLINE]
		ch.data = 'pindex'
		length, newi = getnextarg(length, buf, pp, i)
		ingroupch = pp[i:newi]
		del pp[i:newi]
		length = length - (newi-i)

		pp.insert(i, chunk(GROUP, ch.where, ingroupch))
		length, i = length+1, i+1

	    elif s_buf_data == 'sectcode':
		ch.data = 'code'

	    elif s_buf_data in ('stmodindex', 'refstmodindex'):
		ch.chtype = chunk_type[CSLINE]
		# use the program index as module index
		ch.data = 'pindex'
		length, newi = getnextarg(length, buf, pp, i)
		ingroupch = pp[i:newi]
		del pp[i:newi]
		length = length - (newi-i)

		ingroupch.append(chunk(PLAIN, ch.where, ' '))
		ingroupch.append(chunk(CSNAME, ch.where, 'r'))
		ingroupch.append(chunk(GROUP, ch.where, [
			  chunk(PLAIN, ch.where,
			  '(standard)')]))

		pp.insert(i, chunk(GROUP, ch.where, ingroupch))
		length, i = length+1, i+1

	    elif s_buf_data in ('stmodindex', 'refstmodindex'):
		ch.chtype = chunk_type[CSLINE]
		# use the program index as module index
		ch.data = 'pindex'
		length, newi = getnextarg(length, buf, pp, i)
		ingroupch = pp[i:newi]
		del pp[i:newi]
		length = length - (newi-i)

		ingroupch.append(chunk(PLAIN, ch.where, ' '))
		ingroupch.append(chunk(CSNAME, ch.where, 'r'))
		ingroupch.append(chunk(GROUP, ch.where, [
			  chunk(PLAIN, ch.where,
			  '(standard)')]))

		pp.insert(i, chunk(GROUP, ch.where, ingroupch))
		length, i = length+1, i+1

	    elif s_buf_data in ('stindex', 'kwindex'):
		# XXX must actually go to newindex st
		what = (s_buf_data[:2] == "st") and "statement" or "keyword"
		wh = ch.where
		ch.chtype = chunk_type[CSLINE]
		ch.data = 'cindex'
		length, newi = getnextarg(length, buf, pp, i)
		ingroupch = [chunk(CSNAME, wh, 'code'),
			     chunk(GROUP, wh, pp[i:newi])]

		del pp[i:newi]
		length = length - (newi-i)

		t = ingroupch[:]
		t.append(chunk(PLAIN, wh, ' ' + what))

		pp.insert(i, chunk(GROUP, wh, t))
		i, length = i+1, length+1

		pp.insert(i, chunk(CSLINE, wh, 'cindex'))
		i, length = i+1, length+1

		t = ingroupch[:]
		t.insert(0, chunk(PLAIN, wh, what + ', '))

		pp.insert(i, chunk(GROUP, wh, t))
		i, length = i+1, length+1

	    elif s_buf_data == 'indexii':
		#\indexii{A}{B} --->
		# @cindex A B
		# @cindex B, A
		length, newi = getnextarg(length, buf, pp, i)
		cp11 = pp[i:newi]
		cp21 = crcopy(pp[i:newi])
		del pp[i:newi]
		length = length - (newi-i)
		length, newi = getnextarg(length, buf, pp, i)
		cp12 = pp[i:newi]
		cp22 = crcopy(pp[i:newi])
		del pp[i:newi]
		length = length - (newi-i)

		ch.chtype = chunk_type[CSLINE]
		ch.data = 'cindex'
		pp.insert(i, chunk(GROUP, ch.where, cp11 + [
			  chunk(PLAIN, ch.where, ' ')] + cp12))
		i, length = i+1, length+1
		pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
			  chunk(GROUP, ch.where, cp22 + [
			  chunk(PLAIN, ch.where, ', ')]+ cp21)]
		i, length = i+2, length+2

	    elif s_buf_data == 'indexiii':
		length, newi = getnextarg(length, buf, pp, i)
		cp11 = pp[i:newi]
		cp21 = crcopy(pp[i:newi])
		cp31 = crcopy(pp[i:newi])
		del pp[i:newi]
		length = length - (newi-i)
		length, newi = getnextarg(length, buf, pp, i)
		cp12 = pp[i:newi]
		cp22 = crcopy(pp[i:newi])
		cp32 = crcopy(pp[i:newi])
		del pp[i:newi]
		length = length - (newi-i)
		length, newi = getnextarg(length, buf, pp, i)
		cp13 = pp[i:newi]
		cp23 = crcopy(pp[i:newi])
		cp33 = crcopy(pp[i:newi])
		del pp[i:newi]
		length = length - (newi-i)

		ch.chtype = chunk_type[CSLINE]
		ch.data = 'cindex'
		pp.insert(i, chunk(GROUP, ch.where, cp11 + [
			  chunk(PLAIN, ch.where, ' ')] + cp12
			  + [chunk(PLAIN, ch.where, ' ')]
			  + cp13))
		i, length = i+1, length+1
		pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
			  chunk(GROUP, ch.where, cp22 + [
			  chunk(PLAIN, ch.where, ' ')]+ cp23
			  + [chunk(PLAIN, ch.where, ', ')] +
			  cp21)]
		i, length = i+2, length+2
		pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
			  chunk(GROUP, ch.where, cp33 + [
			  chunk(PLAIN, ch.where, ', ')]+ cp31
			  + [chunk(PLAIN, ch.where, ' ')] +
			  cp32)]
		i, length = i+2, length+2

	    elif s_buf_data == 'indexiv':
		length, newi = getnextarg(length, buf, pp, i)
		cp11 = pp[i:newi]
		cp21 = crcopy(pp[i:newi])
		cp31 = crcopy(pp[i:newi])
		cp41 = crcopy(pp[i:newi])
		del pp[i:newi]
		length = length - (newi-i)
		length, newi = getnextarg(length, buf, pp, i)
		cp12 = pp[i:newi]
		cp22 = crcopy(pp[i:newi])
		cp32 = crcopy(pp[i:newi])
		cp42 = crcopy(pp[i:newi])
		del pp[i:newi]
		length = length - (newi-i)
		length, newi = getnextarg(length, buf, pp, i)
		cp13 = pp[i:newi]
		cp23 = crcopy(pp[i:newi])
		cp33 = crcopy(pp[i:newi])
		cp43 = crcopy(pp[i:newi])
		del pp[i:newi]
		length = length - (newi-i)
		length, newi = getnextarg(length, buf, pp, i)
		cp14 = pp[i:newi]
		cp24 = crcopy(pp[i:newi])
		cp34 = crcopy(pp[i:newi])
		cp44 = crcopy(pp[i:newi])
		del pp[i:newi]
		length = length - (newi-i)

		ch.chtype = chunk_type[CSLINE]
		ch.data = 'cindex'
		ingroupch = cp11 + \
			  spacech + cp12 + \
			  spacech + cp13 + \
			  spacech + cp14
		pp.insert(i, chunk(GROUP, ch.where, ingroupch))
		i, length = i+1, length+1
		ingroupch = cp22 + \
			  spacech + cp23 + \
			  spacech + cp24 + \
			  commach + cp21
		pp[i:i] = cindexch + [
			  chunk(GROUP, ch.where, ingroupch)]
		i, length = i+2, length+2
		ingroupch = cp33 + \
			  spacech + cp34 + \
			  commach + cp31 + \
			  spacech + cp32
		pp[i:i] = cindexch + [
			  chunk(GROUP, ch.where, ingroupch)]
		i, length = i+2, length+2
		ingroupch = cp44 + \
			  commach + cp41 + \
			  spacech + cp42 + \
			  spacech + cp43
		pp[i:i] = cindexch + [
			  chunk(GROUP, ch.where, ingroupch)]
		i, length = i+2, length+2

	    elif s_buf_data == 'seemodule':
		ch.data = "code"
		# this is needed for just one of the input files... -sigh-
		while pp[i+1].chtype == chunk_type[COMMENT]:
		    i = i + 1
		data = pp[i+1].data
		oparen = chunk(PLAIN, ch.where, " (")
		data.insert(0, oparen)
		data.append(chunk(PLAIN, ch.where, ")"))
		pp[i+1:i+2] = data
		length = length + len(data) - 1

	    elif s_buf_data == 'seetext':
		data = pp[i].data
		data.insert(0, chunk(ENDLINE, ch.where, "\n"))
		pp[i-1:i+1] = data
		i = i - 1
		length = length + len(data) - 2

	    elif s_buf_data == "quad":
		ch.chtype = PLAIN
		ch.data = "    "

	    elif s_buf_data in ('noindent', 'indexsubitem', 'footnote'):
		pass

	    elif s_buf_data in ('url', 'module', 'function', 'cfunction',
				'keyword', 'method', 'exception', 'constant',
				'email', 'class', 'member', 'cdata', 'ctype'):
		ch.data = "code"

	    elif s_buf_data == 'label':
		name = s(buf, pp[i].data[0].data)
		del pp[i-1:i+1]
		length = length - 2
		i = i - 1
		label_nodes[name] = hist.nodenames[-1]

	    elif s_buf_data == 'rfc':
		ch.chtype = chunk_type[PLAIN]
		ch.data = "RFC " + s(buf, pp[i].data[0].data)
		del pp[i]
		length = length - 1

	    elif s_buf_data == 'Large':
		del pp[i-1]
		i = i - 1
		length = length - 1

	    elif s_buf_data == 'ref':
		name = s(buf, pp[i].data[0].data)
		if label_nodes.has_key(name):
		    pp[i].data[0].data = label_nodes[name]
		else:
		    pp[i-1:i+1] = [
			chunk(PLAIN, ch.where,
			      "(unknown node reference: %s)" % name)]
		    length = length - 1
		    print "WARNING: unknown node label", `name`

	    else:
		print "don't know what to do with keyword " + s_buf_data


re_atsign = regex.compile('[@{}]')
re_newline = regex.compile('\n')

def dumpit(buf, wm, pp):

    global out

    i, length = 0, len(pp)

    addspace = 0

    while 1:
	if len(pp) != length:
	    raise 'FATAL', 'inconsistent length'
	if i == length:
	    break
	ch = pp[i]
	i = i + 1

	dospace = addspace
	addspace = 0

	if ch.chtype == chunk_type[CSNAME]:
	    s_buf_data = s(buf, ch.data)
            if s_buf_data == 'e':
                wm('\\')
                continue
            if s_buf_data == '$':
                wm('$')
                continue
	    wm('@' + s_buf_data)
	    if s_buf_data == 'node' and \
		      pp[i].chtype == chunk_type[PLAIN] and \
		      s(buf, pp[i].data) in out.doublenodes:
		##XXX doesnt work yet??
		wm(' ZZZ-' + zfill(`i`, 4))
	    if s_buf_data[0] in string.letters:
		addspace = 1
	elif ch.chtype == chunk_type[PLAIN]:
	    if dospace and s(buf, ch.data) not in (' ', '\t'):
		wm(' ')
	    text = s(buf, ch.data)
	    while 1:
		pos = re_atsign.search(text)
		if pos < 0:
		    break
		wm(text[:pos] + '@' + text[pos])
		text = text[pos+1:]
	    wm(text)
	elif ch.chtype == chunk_type[GROUP]:
	    wm('{')
	    dumpit(buf, wm, ch.data)
	    wm('}')
	elif ch.chtype == chunk_type[DENDLINE]:
	    wm('\n\n')
	    while i != length and pp[i].chtype in \
		      (chunk_type[DENDLINE], chunk_type[ENDLINE]):
		i = i + 1
	elif ch.chtype == chunk_type[OTHER]:
	    wm(s(buf, ch.data))
	elif ch.chtype == chunk_type[ACTIVE]:
	    wm(s(buf, ch.data))
	elif ch.chtype == chunk_type[ENDLINE]:
	    wm('\n')
	elif ch.chtype == chunk_type[CSLINE]:
	    if i >= 2 and pp[i-2].chtype not in \
		      (chunk_type[ENDLINE], chunk_type[DENDLINE]) \
		      and (pp[i-2].chtype != chunk_type[PLAIN]
		      or s(buf, pp[i-2].data)[-1] != '\n'):

		wm('\n')
	    wm('@' + s(buf, ch.data))
	    if i == length:
		raise error, 'CSLINE expected another chunk'
	    if pp[i].chtype != chunk_type[GROUP]:
		raise error, 'CSLINE expected GROUP'
	    if type(pp[i].data) != ListType:
		raise error, 'GROUP chould contain []-data'

	    wobj = Wobj()
	    dumpit(buf, wobj.write, pp[i].data)
	    i = i + 1
	    text = wobj.data
	    del wobj
	    if text:
		wm(' ')
		while 1:
		    pos = re_newline.search(text)
		    if pos < 0:
			break
		    print 'WARNING: found newline in csline arg'
		    wm(text[:pos] + ' ')
		    text = text[pos+1:]
		wm(text)
	    if i >= length or \
		      pp[i].chtype not in (chunk_type[CSLINE],
		      chunk_type[ENDLINE], chunk_type[DENDLINE]) \
		      and (pp[i].chtype != chunk_type[PLAIN]
		      or s(buf, pp[i].data)[0] != '\n'):
		wm('\n')

	elif ch.chtype == chunk_type[COMMENT]:
## 	    print 'COMMENT: previous chunk =', pp[i-2]
## 	    if pp[i-2].chtype == chunk_type[PLAIN]:
## 		print 'PLAINTEXT =', `s(buf, pp[i-2].data)`
	    if s(buf, ch.data) and \
		      regex.match('^[ \t]*$', s(buf, ch.data)) < 0:
		if i >= 2 \
		   and pp[i-2].chtype not in (chunk_type[ENDLINE], chunk_type[DENDLINE]) \
		   and not (pp[i-2].chtype == chunk_type[PLAIN]
			    and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0):
		    wm('\n')
		wm('@c ' + s(buf, ch.data))
	elif ch.chtype == chunk_type[IGNORE]:
	    pass
	else:
	    try:
		str = `s(buf, ch.data)`
	    except TypeError:
		str = `ch.data`
	    if len(str) > 400:
		str = str[:400] + '...'
	    print 'warning:', ch.chtype, 'not handled, data ' + str


def main():
    global release_version
    outfile = None
    headerfile = 'texipre.dat'
    trailerfile = 'texipost.dat'

    try:
	opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:v:')
    except getopt.error:
	args = []

    if not args:
	print 'usage: partparse [-o outfile] [-h headerfile]',
	print '[-t trailerfile] file ...'
	sys.exit(2)

    for opt, arg in opts:
	if opt == '-o': outfile = arg
	if opt == '-h': headerfile = arg
	if opt == '-t': trailerfile = arg
	if opt == '-v': release_version = arg

    if not outfile:
	root, ext = os.path.splitext(args[0])
	outfile = root + '.texi'

    if outfile in args:
	print 'will not overwrite input file', outfile
	sys.exit(2)

    outf = open(outfile, 'w')
    outf.write(open(headerfile, 'r').read())

    for file in args:
	if len(args) > 1: print '='*20, file, '='*20
	buf = open(file, 'r').read()
	w, pp = parseit(buf)
	startchange()
	changeit(buf, pp)
	dumpit(buf, outf.write, pp)

    outf.write(open(trailerfile, 'r').read())

    outf.close()

if __name__ == "__main__":
    main()