cpython/Demo/ibrowse/ifile.py

# Tools for info file processing.

# XXX Need to be more careful with reading ahead searching for nodes.


import regexp
import string


# Exported exceptions.
#
NoSuchFile = 'no such file'
NoSuchNode = 'no such node'


# The search path for info files; this is site-specific.
# Directory names should end in a partname delimiter,
# so they can simply be concatenated to a relative pathname.
#
#INFOPATH = ['', ':Info.Ibrowse:', ':Info:']	# Mac
INFOPATH = ['', '/usr/local/emacs/info/']	# X11 on UNIX


# Tunable constants.
#
BLOCKSIZE = 512			# Qty to align reads to, if possible
FUZZ = 2*BLOCKSIZE		# Qty to back-up before searching for a node
CHUNKSIZE = 4*BLOCKSIZE		# Qty to read at once when reading lots of data


# Regular expressions used.
# Note that it is essential that Python leaves unrecognized backslash
# escapes in a string so they can be seen by regexp.compile!
#
findheader = regexp.compile('\037\014?\n(.*\n)').match
findescape = regexp.compile('\037').match
parseheader = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
findfirstline = regexp.compile('^.*\n').match
findnode = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
findprev = regexp.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match
findnext = regexp.compile('[nN]ext:[ \t]*([^\t,\n]*)').match
findup = regexp.compile('[uU]p:[ \t]*([^\t,\n]*)').match
findmenu = regexp.compile('^\* [mM]enu:').match
findmenuitem = regexp.compile( \
	'^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match
findfootnote = regexp.compile( \
	'\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match
parsenoderef = regexp.compile('^\((.*)\)(.*)$').match


# Get a node and all information pertaining to it.
# This doesn't work if there is an indirect tag table,
# and in general you are better off using icache.get_node() instead.
# Functions get_whole_file() and get_file_node() provide part
# functionality used by icache.
# Raise NoSuchFile or NoSuchNode as appropriate.
#
def get_node(curfile, ref):
	file, node = parse_ref(curfile, ref)
	if node == '*':
		return get_whole_file(file)
	else:
		return get_file_node(file, 0, node)
#
def get_whole_file(file):
	f = try_open(file) # May raise NoSuchFile
	text = f.read()
	header, menu, footnotes = ('', '', ''), [], []
	return file, '*', header, menu, footnotes, text
#
def get_file_node(file, offset, node):
	f = try_open(file) # May raise NoSuchFile
	text = find_node(f, offset, node) # May raise NoSuchNode
	node, header, menu, footnotes = analyze_node(text)
	return file, node, header, menu, footnotes, text


# Parse a node reference into a file (possibly default) and node name.
# Possible reference formats are: "NODE", "(FILE)", "(FILE)NODE".
# Default file is the curfile argument; default node is Top.
# A node value of '*' is a special case: the whole file should
# be interpreted (by the caller!) as a single node.
#
def parse_ref(curfile, ref):
	match = parsenoderef(ref)
	if not match:
		file, node = curfile, ref
	else:
		(a, b), (a1, b1), (a2, b2) = match
		file, node = ref[a1:b1], ref[a2:b2]
	if not file:
		file = curfile # (Is this necessary?)
	if not node:
		node = 'Top'
	return file, node


# Extract node name, links, menu and footnotes from the node text.
#
def analyze_node(text):
	#
	# Get node name and links from the header line
	#
	match = findfirstline(text)
	if match:
		(a, b) = match[0]
		line = text[a:b]
	else:
		line = ''
	node = get_it(text, findnode)
	prev = get_it(text, findprev)
	next = get_it(text, findnext)
	up = get_it(text, findup)
	#
	# Get the menu items, if there is a menu
	#
	menu = []
	match = findmenu(text)
	if match:
		(a, b) = match[0]
		while 1:
			match = findmenuitem(text, b)
			if not match:
				break
			(a, b), (a1, b1), (a2, b2) = match
			topic, ref = text[a1:b1], text[a2:b2]
			if ref == ':':
				ref = topic
			menu.append((topic, ref))
	#
	# Get the footnotes
	#
	footnotes = []
	b = 0
	while 1:
		match = findfootnote(text, b)
		if not match:
			break
		(a, b), (a1, b1), (a2, b2) = match
		topic, ref = text[a1:b1], text[a2:b2]
		if ref == ':':
			ref = topic
		footnotes.append((topic, ref))
	#
	return node, (prev, next, up), menu, footnotes
#
def get_it(line, matcher):
	match = matcher(line)
	if not match:
		return ''
	else:
		(a, b), (a1, b1) = match
		return line[a1:b1]


# Find a node in an open file.
# The offset (from the tags table) is a hint about the node's position.
# Pass zero if there is no tags table.
# Raise NoSuchNode if the node isn't found.
# NB: This seeks around in the file.
#
def find_node(f, offset, node):
	node = string.lower(node) # Just to be sure
	#
	# Position a little before the given offset,
	# so we may find the node even if it has moved around
	# in the file a little.
	#
	offset = max(0, ((offset-FUZZ) / BLOCKSIZE) * BLOCKSIZE)
	f.seek(offset)
	#
	# Loop, hunting for a matching node header.
	#
	while 1:
		buf = f.read(CHUNKSIZE)
		if not buf:
			break
		i = 0
		while 1:
			match = findheader(buf, i)
			if match:
				(a,b), (a1,b1) = match
				start = a1
				line = buf[a1:b1]
				i = b
				match = parseheader(line)
				if match:
					(a,b), (a1,b1) = match
					key = string.lower(line[a1:b1])
					if key == node:
						# Got it!  Now read the rest.
						return read_node(f, buf[start:])
			elif findescape(buf, i):
				next = f.read(CHUNKSIZE)
				if not next:
					break
				buf = buf + next
			else:
				break
	#
	# If we get here, we didn't find it.  Too bad.
	#
	raise NoSuchNode, node


# Finish off getting a node (subroutine for find_node()).
# The node begins at the start of buf and may end in buf;
# if it doesn't end there, read additional data from f.
#
def read_node(f, buf):
	i = 0
	match = findescape(buf, i)
	while not match:
		next = f.read(CHUNKSIZE)
		if not next:
			end = len(buf)
			break
		i = len(buf)
		buf = buf + next
		match = findescape(buf, i)
	else:
		# Got a match
		(a, b) = match[0]
		end = a
	# Strip trailing newlines
	while end > 0 and buf[end-1] == '\n':
		end = end-1
	buf = buf[:end]
	return buf


# Read reverse starting at offset until the beginning of a node is found.
# Then return a buffer containing the beginning of the node,
# with f positioned just after the buffer.
# The buffer will contain at least the full header line of the node;
# the caller should finish off with read_node() if it is the right node.
# (It is also possible that the buffer extends beyond the node!)
# Return an empty string if there is no node before the given offset.
#
def backup_node(f, offset):
	start = max(0, ((offset-CHUNKSIZE) / BLOCKSIZE) * BLOCKSIZE)
	end = offset
	while start < end:
		f.seek(start)
		buf = f.read(end-start)
		i = 0
		hit = -1
		while 1:
			match = findheader(buf, i)
			if match:
				(a,b), (a1,b1) = match
				hit = a1
				i = b
			elif end < offset and findescape(buf, i):
				next = f.read(min(offset-end, BLOCKSIZE))
				if not next:
					break
				buf = buf + next
				end = end + len(next)
			else:
				break
		if hit >= 0:
			return buf[hit:]
		end = start
		start = max(0, end - CHUNKSIZE)
	return ''


# Make a tag table for the given file by scanning the file.
# The file must be open for reading, and positioned at the beginning
# (or wherever the hunt for tags must begin; it is read till the end).
#
def make_tags(f):
	tags = {}
	while 1:
		offset = f.tell()
		buf = f.read(CHUNKSIZE)
		if not buf:
			break
		i = 0
		while 1:
			match = findheader(buf, i)
			if match:
				(a,b), (a1,b1) = match
				start = offset+a1
				line = buf[a1:b1]
				i = b
				match = parseheader(line)
				if match:
					(a,b), (a1,b1) = match
					key = string.lower(line[a1:b1])
					if tags.has_key(key):
						print 'Duplicate node:',
						print key
					tags[key] = '', start, line
			elif findescape(buf, i):
				next = f.read(CHUNKSIZE)
				if not next:
					break
				buf = buf + next
			else:
				break
	return tags


# Try to open a file, return a file object if succeeds.
# Raise NoSuchFile if the file can't be opened.
# Should treat absolute pathnames special.
#
def try_open(file):
	for dir in INFOPATH:
		try:
			return open(dir + file, 'r')
		except IOError:
			pass
	raise NoSuchFile, file


# A little test for the speed of make_tags().
#
TESTFILE = 'texinfo-1'
def test_make_tags():
	import time
	f = try_open(TESTFILE)
	t1 = time.time()
	tags = make_tags(f)
	t2 = time.time()
	print 'Making tag table for', `TESTFILE`, 'took', t2-t1, 'sec.'
commit -- why not 1995-04-10 11:47:11 +00:00			`# Tools for info file processing.`

			`# XXX Need to be more careful with reading ahead searching for nodes.`


			`import regexp`
			`import string`


			`# Exported exceptions.`
			`#`
			`NoSuchFile = 'no such file'`
			`NoSuchNode = 'no such node'`


			`# The search path for info files; this is site-specific.`
			`# Directory names should end in a partname delimiter,`
			`# so they can simply be concatenated to a relative pathname.`
			`#`
			`#INFOPATH = ['', ':Info.Ibrowse:', ':Info:'] # Mac`
			`INFOPATH = ['', '/usr/local/emacs/info/'] # X11 on UNIX`


			`# Tunable constants.`
			`#`
			`BLOCKSIZE = 512 # Qty to align reads to, if possible`
			`FUZZ = 2*BLOCKSIZE # Qty to back-up before searching for a node`
			`CHUNKSIZE = 4*BLOCKSIZE # Qty to read at once when reading lots of data`


			`# Regular expressions used.`
			`# Note that it is essential that Python leaves unrecognized backslash`
			`# escapes in a string so they can be seen by regexp.compile!`
			`#`
			`findheader = regexp.compile('\037\014?\n(.*\n)').match`
			`findescape = regexp.compile('\037').match`
			`parseheader = regexp.compile('[nN]ode:[ \t]([^\t,\n])').match`
			`findfirstline = regexp.compile('^.*\n').match`
			`findnode = regexp.compile('[nN]ode:[ \t]([^\t,\n])').match`
			`findprev = regexp.compile('[pP]rev[ious]:[ \t]([^\t,\n]*)').match`
			`findnext = regexp.compile('[nN]ext:[ \t]([^\t,\n])').match`
			`findup = regexp.compile('[uU]p:[ \t]([^\t,\n])').match`
			`findmenu = regexp.compile('^\* [mM]enu:').match`
			`findmenuitem = regexp.compile( \`
			`'^\* ([^:]+):[ \t](:\|\([^\t]\)[^\t,\n.]\|[^:(][^\t,\n.])').match`
			`findfootnote = regexp.compile( \`
			`'\[nN]ote ([^:]+):[ \t](:\|[^:][^\t,\n.]*)').match`
			`parsenoderef = regexp.compile('^\((.)\)(.)$').match`


			`# Get a node and all information pertaining to it.`
			`# This doesn't work if there is an indirect tag table,`
			`# and in general you are better off using icache.get_node() instead.`
			`# Functions get_whole_file() and get_file_node() provide part`
			`# functionality used by icache.`
			`# Raise NoSuchFile or NoSuchNode as appropriate.`
			`#`
			`def get_node(curfile, ref):`
			`file, node = parse_ref(curfile, ref)`
			`if node == '*':`
			`return get_whole_file(file)`
			`else:`
			`return get_file_node(file, 0, node)`
			`#`
			`def get_whole_file(file):`
			`f = try_open(file) # May raise NoSuchFile`
			`text = f.read()`
			`header, menu, footnotes = ('', '', ''), [], []`
			`return file, '*', header, menu, footnotes, text`
			`#`
			`def get_file_node(file, offset, node):`
			`f = try_open(file) # May raise NoSuchFile`
			`text = find_node(f, offset, node) # May raise NoSuchNode`
			`node, header, menu, footnotes = analyze_node(text)`
			`return file, node, header, menu, footnotes, text`


			`# Parse a node reference into a file (possibly default) and node name.`
			`# Possible reference formats are: "NODE", "(FILE)", "(FILE)NODE".`
			`# Default file is the curfile argument; default node is Top.`
			`# A node value of '*' is a special case: the whole file should`
			`# be interpreted (by the caller!) as a single node.`
			`#`
			`def parse_ref(curfile, ref):`
			`match = parsenoderef(ref)`
			`if not match:`
			`file, node = curfile, ref`
			`else:`
			`(a, b), (a1, b1), (a2, b2) = match`
			`file, node = ref[a1:b1], ref[a2:b2]`
			`if not file:`
			`file = curfile # (Is this necessary?)`
			`if not node:`
			`node = 'Top'`
			`return file, node`


			`# Extract node name, links, menu and footnotes from the node text.`
			`#`
			`def analyze_node(text):`
			`#`
			`# Get node name and links from the header line`
			`#`
			`match = findfirstline(text)`
			`if match:`
			`(a, b) = match[0]`
			`line = text[a:b]`
			`else:`
			`line = ''`
			`node = get_it(text, findnode)`
			`prev = get_it(text, findprev)`
			`next = get_it(text, findnext)`
			`up = get_it(text, findup)`
			`#`
			`# Get the menu items, if there is a menu`
			`#`
			`menu = []`
			`match = findmenu(text)`
			`if match:`
			`(a, b) = match[0]`
			`while 1:`
			`match = findmenuitem(text, b)`
			`if not match:`
			`break`
			`(a, b), (a1, b1), (a2, b2) = match`
			`topic, ref = text[a1:b1], text[a2:b2]`
			`if ref == ':':`
			`ref = topic`
Fix multi-arg list.append() calls. 1998-10-08 15:24:48 +00:00			`menu.append((topic, ref))`
commit -- why not 1995-04-10 11:47:11 +00:00			`#`
			`# Get the footnotes`
			`#`
			`footnotes = []`
			`b = 0`
			`while 1:`
			`match = findfootnote(text, b)`
			`if not match:`
			`break`
			`(a, b), (a1, b1), (a2, b2) = match`
			`topic, ref = text[a1:b1], text[a2:b2]`
			`if ref == ':':`
			`ref = topic`
Fix multi-arg list.append() calls. 1998-10-08 15:24:48 +00:00			`footnotes.append((topic, ref))`
commit -- why not 1995-04-10 11:47:11 +00:00			`#`
			`return node, (prev, next, up), menu, footnotes`
			`#`
			`def get_it(line, matcher):`
			`match = matcher(line)`
			`if not match:`
			`return ''`
			`else:`
			`(a, b), (a1, b1) = match`
			`return line[a1:b1]`


			`# Find a node in an open file.`
			`# The offset (from the tags table) is a hint about the node's position.`
			`# Pass zero if there is no tags table.`
			`# Raise NoSuchNode if the node isn't found.`
			`# NB: This seeks around in the file.`
			`#`
			`def find_node(f, offset, node):`
			`node = string.lower(node) # Just to be sure`
			`#`
			`# Position a little before the given offset,`
			`# so we may find the node even if it has moved around`
			`# in the file a little.`
			`#`
			`offset = max(0, ((offset-FUZZ) / BLOCKSIZE) * BLOCKSIZE)`
			`f.seek(offset)`
			`#`
			`# Loop, hunting for a matching node header.`
			`#`
			`while 1:`
			`buf = f.read(CHUNKSIZE)`
			`if not buf:`
			`break`
			`i = 0`
			`while 1:`
			`match = findheader(buf, i)`
			`if match:`
			`(a,b), (a1,b1) = match`
			`start = a1`
			`line = buf[a1:b1]`
			`i = b`
			`match = parseheader(line)`
			`if match:`
			`(a,b), (a1,b1) = match`
			`key = string.lower(line[a1:b1])`
			`if key == node:`
			`# Got it! Now read the rest.`
			`return read_node(f, buf[start:])`
			`elif findescape(buf, i):`
			`next = f.read(CHUNKSIZE)`
			`if not next:`
			`break`
			`buf = buf + next`
			`else:`
			`break`
			`#`
			`# If we get here, we didn't find it. Too bad.`
			`#`
			`raise NoSuchNode, node`


			`# Finish off getting a node (subroutine for find_node()).`
			`# The node begins at the start of buf and may end in buf;`
			`# if it doesn't end there, read additional data from f.`
			`#`
			`def read_node(f, buf):`
			`i = 0`
			`match = findescape(buf, i)`
			`while not match:`
			`next = f.read(CHUNKSIZE)`
			`if not next:`
			`end = len(buf)`
			`break`
			`i = len(buf)`
			`buf = buf + next`
			`match = findescape(buf, i)`
			`else:`
			`# Got a match`
			`(a, b) = match[0]`
			`end = a`
			`# Strip trailing newlines`
			`while end > 0 and buf[end-1] == '\n':`
			`end = end-1`
			`buf = buf[:end]`
			`return buf`


			`# Read reverse starting at offset until the beginning of a node is found.`
			`# Then return a buffer containing the beginning of the node,`
			`# with f positioned just after the buffer.`
			`# The buffer will contain at least the full header line of the node;`
			`# the caller should finish off with read_node() if it is the right node.`
			`# (It is also possible that the buffer extends beyond the node!)`
			`# Return an empty string if there is no node before the given offset.`
			`#`
			`def backup_node(f, offset):`
			`start = max(0, ((offset-CHUNKSIZE) / BLOCKSIZE) * BLOCKSIZE)`
			`end = offset`
			`while start < end:`
			`f.seek(start)`
			`buf = f.read(end-start)`
			`i = 0`
			`hit = -1`
			`while 1:`
			`match = findheader(buf, i)`
			`if match:`
			`(a,b), (a1,b1) = match`
			`hit = a1`
			`i = b`
			`elif end < offset and findescape(buf, i):`
			`next = f.read(min(offset-end, BLOCKSIZE))`
			`if not next:`
			`break`
			`buf = buf + next`
			`end = end + len(next)`
			`else:`
			`break`
			`if hit >= 0:`
			`return buf[hit:]`
			`end = start`
			`start = max(0, end - CHUNKSIZE)`
			`return ''`


			`# Make a tag table for the given file by scanning the file.`
			`# The file must be open for reading, and positioned at the beginning`
			`# (or wherever the hunt for tags must begin; it is read till the end).`
			`#`
			`def make_tags(f):`
			`tags = {}`
			`while 1:`
			`offset = f.tell()`
			`buf = f.read(CHUNKSIZE)`
			`if not buf:`
			`break`
			`i = 0`
			`while 1:`
			`match = findheader(buf, i)`
			`if match:`
			`(a,b), (a1,b1) = match`
			`start = offset+a1`
			`line = buf[a1:b1]`
			`i = b`
			`match = parseheader(line)`
			`if match:`
			`(a,b), (a1,b1) = match`
			`key = string.lower(line[a1:b1])`
			`if tags.has_key(key):`
			`print 'Duplicate node:',`
			`print key`
			`tags[key] = '', start, line`
			`elif findescape(buf, i):`
			`next = f.read(CHUNKSIZE)`
			`if not next:`
			`break`
			`buf = buf + next`
			`else:`
			`break`
			`return tags`


			`# Try to open a file, return a file object if succeeds.`
			`# Raise NoSuchFile if the file can't be opened.`
			`# Should treat absolute pathnames special.`
			`#`
			`def try_open(file):`
			`for dir in INFOPATH:`
			`try:`
			`return open(dir + file, 'r')`
			`except IOError:`
			`pass`
			`raise NoSuchFile, file`


			`# A little test for the speed of make_tags().`
			`#`
			`TESTFILE = 'texinfo-1'`
			`def test_make_tags():`
			`import time`
			`f = try_open(TESTFILE)`
			`t1 = time.time()`
			`tags = make_tags(f)`
			`t2 = time.time()`
			print 'Making tag table for', `TESTFILE`, 'took', t2-t1, 'sec.'