mirror of https://github.com/python/cpython.git
Demos for Fred's parser module
This commit is contained in:
parent
6dbd190f5e
commit
16d27e3b14
|
@ -0,0 +1,6 @@
|
|||
Demo/parser/
|
||||
Doc/libparser.tex
|
||||
Lib/AST.py
|
||||
Lib/symbol.py
|
||||
Lib/token.py
|
||||
Modules/parsermodule.c
|
|
@ -0,0 +1,8 @@
|
|||
parser.dvi: parser.tex ../../Doc/libparser.tex
|
||||
TEXINPUTS=../../Doc:: $(LATEX) parser
|
||||
|
||||
# Use a new name for this; the included file uses 'clean' already....
|
||||
clean-parser:
|
||||
rm -f *.log *.aux *.dvi *.pyc
|
||||
|
||||
include ../../Doc/Makefile
|
|
@ -0,0 +1,15 @@
|
|||
These files are from the large example of using the `parser' module. Refer
|
||||
to the Python Library Reference for more information.
|
||||
|
||||
Files:
|
||||
------
|
||||
|
||||
example.py -- module that uses the `parser' module to extract
|
||||
information from the parse tree of Python source
|
||||
code.
|
||||
|
||||
source.py -- sample source code used to demonstrate ability to
|
||||
handle nested constructs easily using the functions
|
||||
and classes in example.py.
|
||||
|
||||
Enjoy!
|
|
@ -0,0 +1,2 @@
|
|||
"""Some documentation.
|
||||
"""
|
|
@ -0,0 +1,163 @@
|
|||
"""Simple code to extract class & function docstrings from a module.
|
||||
|
||||
|
||||
"""
|
||||
|
||||
import symbol
|
||||
import token
|
||||
import types
|
||||
|
||||
|
||||
def get_docs(fileName):
|
||||
"""Retrieve information from the parse tree of a source file.
|
||||
|
||||
fileName
|
||||
Name of the file to read Python source code from.
|
||||
"""
|
||||
source = open(fileName).read()
|
||||
import os
|
||||
basename = os.path.basename(os.path.splitext(fileName)[0])
|
||||
import parser
|
||||
ast = parser.suite(source)
|
||||
tup = parser.ast2tuple(ast)
|
||||
return ModuleInfo(tup, basename)
|
||||
|
||||
|
||||
class DefnInfo:
|
||||
_docstring = ''
|
||||
_name = ''
|
||||
|
||||
def __init__(self, tree):
|
||||
self._name = tree[2][1]
|
||||
|
||||
def get_docstring(self):
|
||||
return self._docstring
|
||||
|
||||
def get_name(self):
|
||||
return self._name
|
||||
|
||||
class SuiteInfoBase(DefnInfo):
|
||||
def __init__(self):
|
||||
self._class_info = {}
|
||||
self._function_info = {}
|
||||
|
||||
def get_class_names(self):
|
||||
return self._class_info.keys()
|
||||
|
||||
def get_class_info(self, name):
|
||||
return self._class_info[name]
|
||||
|
||||
def _extract_info(self, tree):
|
||||
if len(tree) >= 4:
|
||||
found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
|
||||
if found:
|
||||
self._docstring = eval(vars['docstring'])
|
||||
for node in tree[1:]:
|
||||
if (node[0] == symbol.stmt
|
||||
and node[1][0] == symbol.compound_stmt):
|
||||
if node[1][1][0] == symbol.funcdef:
|
||||
name = node[1][1][2][1]
|
||||
self._function_info[name] = \
|
||||
FunctionInfo(node[1][1])
|
||||
elif node[1][1][0] == symbol.classdef:
|
||||
name = node[1][1][2][1]
|
||||
self._class_info[name] = ClassInfo(node[1][1])
|
||||
|
||||
|
||||
class SuiteInfo(SuiteInfoBase):
|
||||
def __init__(self, tree):
|
||||
SuiteInfoBase.__init__(self)
|
||||
self._extract_info(tree)
|
||||
|
||||
def get_function_names(self):
|
||||
return self._function_info.keys()
|
||||
|
||||
def get_function_info(self, name):
|
||||
return self._function_info[name]
|
||||
|
||||
|
||||
class FunctionInfo(SuiteInfo):
|
||||
def __init__(self, tree):
|
||||
DefnInfo.__init__(self, tree)
|
||||
suite = tree[-1]
|
||||
if len(suite) >= 4:
|
||||
found, vars = match(DOCSTRING_STMT_PATTERN, suite[3])
|
||||
if found:
|
||||
self._docstring = eval(vars['docstring'])
|
||||
SuiteInfoBase.__init__(self)
|
||||
self._extract_info(suite)
|
||||
|
||||
|
||||
class ClassInfo(SuiteInfoBase):
|
||||
def __init__(self, tree):
|
||||
SuiteInfoBase.__init__(self)
|
||||
DefnInfo.__init__(self, tree)
|
||||
self._extract_info(tree[-1])
|
||||
|
||||
def get_method_names(self):
|
||||
return self._function_info.keys()
|
||||
|
||||
def get_method_info(self, name):
|
||||
return self._function_info[name]
|
||||
|
||||
|
||||
class ModuleInfo(SuiteInfo):
|
||||
def __init__(self, tree, name="<string>"):
|
||||
self._name = name
|
||||
SuiteInfo.__init__(self, tree)
|
||||
found, vars = match(DOCSTRING_STMT_PATTERN, tree[1])
|
||||
if found:
|
||||
self._docstring = vars["docstring"]
|
||||
|
||||
|
||||
from types import ListType, TupleType
|
||||
|
||||
def match(pattern, data, vars=None):
|
||||
"""
|
||||
"""
|
||||
if vars is None:
|
||||
vars = {}
|
||||
if type(pattern) is ListType: # 'variables' are ['varname']
|
||||
vars[pattern[0]] = data
|
||||
return 1, vars
|
||||
if type(pattern) is not TupleType:
|
||||
return (pattern == data), vars
|
||||
if len(data) != len(pattern):
|
||||
return 0, vars
|
||||
for pattern, data in map(None, pattern, data):
|
||||
same, vars = match(pattern, data, vars)
|
||||
if not same:
|
||||
break
|
||||
return same, vars
|
||||
|
||||
|
||||
# This pattern will match a 'stmt' node which *might* represent a docstring;
|
||||
# docstrings require that the statement which provides the docstring be the
|
||||
# first statement in the class or function, which this pattern does not check.
|
||||
#
|
||||
DOCSTRING_STMT_PATTERN = (
|
||||
symbol.stmt,
|
||||
(symbol.simple_stmt,
|
||||
(symbol.small_stmt,
|
||||
(symbol.expr_stmt,
|
||||
(symbol.testlist,
|
||||
(symbol.test,
|
||||
(symbol.and_test,
|
||||
(symbol.not_test,
|
||||
(symbol.comparison,
|
||||
(symbol.expr,
|
||||
(symbol.xor_expr,
|
||||
(symbol.and_expr,
|
||||
(symbol.shift_expr,
|
||||
(symbol.arith_expr,
|
||||
(symbol.term,
|
||||
(symbol.factor,
|
||||
(symbol.power,
|
||||
(symbol.atom,
|
||||
(token.STRING, ['docstring'])
|
||||
)))))))))))))))),
|
||||
(token.NEWLINE, '')
|
||||
))
|
||||
|
||||
#
|
||||
# end of file
|
|
@ -0,0 +1,77 @@
|
|||
\documentstyle[twoside,10pt,myformat]{report}
|
||||
|
||||
%% This manual does not supplement the chapter from the Python
|
||||
%% Library Reference, but only allows formatting of the parser module
|
||||
%% component of that document as a separate document, and was created
|
||||
%% primarily to ease review of the formatted document during authoring.
|
||||
|
||||
\title{Python Parser Module Reference}
|
||||
\author{
|
||||
Fred L. Drake, Jr. \\
|
||||
Corporation for National Research Initiatives (CNRI) \\
|
||||
1895 Preston White Drive, Reston, Va 20191, USA \\
|
||||
E-mail: {\tt fdrake@cnri.reston.va.us}, {\tt fdrake@intr.net}
|
||||
}
|
||||
|
||||
\date{August 20th, 1996 \\ Release 1.4}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\pagenumbering{roman}
|
||||
|
||||
\maketitle
|
||||
|
||||
Copyright \copyright{} 1995-1996 by Fred L. Drake, Jr. and Virginia
|
||||
Polytechnic Institute and State University, Blacksburg, Virginia, USA.
|
||||
Portions of the software copyright 1991-1995 by Stichting Mathematisch
|
||||
Centrum, Amsterdam, The Netherlands. Copying is permitted under the
|
||||
terms associated with the main Python distribution, with the
|
||||
additional restriction that this additional notice be included and
|
||||
maintained on all distributed copies.
|
||||
|
||||
\begin{center}
|
||||
All Rights Reserved
|
||||
\end{center}
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice appear in all copies and that
|
||||
both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the names of Fred L. Drake, Jr. and
|
||||
Virginia Polytechnic Institute and State University not be used in
|
||||
advertising or publicity pertaining to distribution of the software
|
||||
without specific, written prior permission.
|
||||
|
||||
FRED L. DRAKE, JR. AND VIRGINIA POLYTECHNIC INSTITUTE AND STATE
|
||||
UNIVERSITY DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
|
||||
EVENT SHALL FRED L. DRAKE, JR. OR VIRGINIA POLYTECHNIC INSTITUTE AND
|
||||
STATE UNIVERSITY BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
|
||||
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
\begin{abstract}
|
||||
|
||||
\noindent
|
||||
The \emph{Python Parser Module Reference} describes the interfaces
|
||||
published by the optional \code{parser} module and gives examples of
|
||||
how they may be used. It contains the same text as the chapter on the
|
||||
\code{parser} module in the \emph{Python Library Reference}, but is
|
||||
presented as a separate document.
|
||||
|
||||
This manual assumes basic knowledge about the Python language. For an
|
||||
informal introduction to Python, see the {\em Python Tutorial}; the
|
||||
Python Reference Manual remains the highest authority on syntactic and
|
||||
semantic questions.
|
||||
|
||||
\end{abstract}
|
||||
|
||||
\pagebreak
|
||||
\pagenumbering{arabic}
|
||||
|
||||
\chapter{Parser Module Reference}
|
||||
\input{libparser}
|
||||
|
||||
\end{document}
|
|
@ -0,0 +1,143 @@
|
|||
# pprint.py
|
||||
#
|
||||
# Author: Fred L. Drake, Jr.
|
||||
# fdrake@vt.edu
|
||||
#
|
||||
# This is a simple little module I wrote to make life easier. I didn't
|
||||
# see anything quite like it in the library, though I may have overlooked
|
||||
# something. I wrote this when I was trying to read some heavily nested
|
||||
# tuples with fairly non-descriptive content. This is modelled very much
|
||||
# after Lisp/Scheme - style pretty-printing of lists. If you find it
|
||||
# useful, thank small children who sleep at night.
|
||||
#
|
||||
|
||||
"""Support to pretty-print lists, tuples, & dictionaries recursively.
|
||||
Very simple, but at least somewhat useful, especially in debugging
|
||||
data structures.
|
||||
|
||||
INDENT_PER_LEVEL -- Amount of indentation to use for each new
|
||||
recursive level. The default is 1. This
|
||||
must be a non-negative integer, and may be
|
||||
set by the caller before calling pprint().
|
||||
|
||||
MAX_WIDTH -- Maximum width of the display. This is only
|
||||
used if the representation *can* be kept
|
||||
less than MAX_WIDTH characters wide. May
|
||||
be set by the user before calling pprint().
|
||||
|
||||
TAB_WIDTH -- The width represented by a single tab. This
|
||||
value is typically 8, but 4 is the default
|
||||
under MacOS. Can be changed by the user if
|
||||
desired, but is probably not a good idea.
|
||||
|
||||
pprint(seq [, stream]) -- The pretty-printer. This takes a Python
|
||||
object (presumably a sequence, but that
|
||||
doesn't matter) and an optional output
|
||||
stream. See the function documentation
|
||||
for details.
|
||||
"""
|
||||
|
||||
|
||||
INDENT_PER_LEVEL = 1
|
||||
|
||||
MAX_WIDTH = 80
|
||||
|
||||
import os
|
||||
TAB_WIDTH = (os.name == 'mac' and 4) or 8
|
||||
del os
|
||||
|
||||
|
||||
|
||||
def _indentation(cols):
|
||||
"Create tabbed indentation string COLS columns wide."
|
||||
|
||||
# This is used to reduce the byte-count for the output, allowing
|
||||
# files created using this module to use as little external storage
|
||||
# as possible. This is primarily intended to minimize impact on
|
||||
# a user's quota when storing resource files, or for creating output
|
||||
# intended for transmission.
|
||||
|
||||
return ((cols / TAB_WIDTH) * '\t') + ((cols % TAB_WIDTH) * ' ')
|
||||
|
||||
|
||||
|
||||
def pprint(seq, stream = None, indent = 0, allowance = 0):
|
||||
"""Pretty-print a list, tuple, or dictionary.
|
||||
|
||||
pprint(seq [, stream]) ==> None
|
||||
|
||||
If STREAM is provided, output is written to that stream, otherwise
|
||||
sys.stdout is used. Indentation is done according to
|
||||
INDENT_PER_LEVEL, which may be set to any non-negative integer
|
||||
before calling this function. The output written on the stream is
|
||||
a perfectly valid representation of the Python object passed in,
|
||||
with indentation to suite human-readable interpretation. The
|
||||
output can be used as input without error, given readable
|
||||
representations of all sequence elements are available via repr().
|
||||
Output is restricted to MAX_WIDTH columns where possible. The
|
||||
STREAM parameter must support the write() method with a single
|
||||
parameter, which will always be a string. The output stream may be
|
||||
a StringIO.StringIO object if the result is needed as a string.
|
||||
"""
|
||||
|
||||
if stream is None:
|
||||
import sys
|
||||
stream = sys.stdout
|
||||
|
||||
from types import DictType, ListType, TupleType
|
||||
|
||||
rep = `seq`
|
||||
typ = type(seq)
|
||||
sepLines = len(rep) > (MAX_WIDTH - 1 - indent - allowance)
|
||||
|
||||
if sepLines and (typ is ListType or typ is TupleType):
|
||||
# Pretty-print the sequence.
|
||||
stream.write(((typ is ListType) and '[') or '(')
|
||||
|
||||
length = len(seq)
|
||||
if length:
|
||||
indent = indent + INDENT_PER_LEVEL
|
||||
pprint(seq[0], stream, indent, allowance + 1)
|
||||
|
||||
if len(seq) > 1:
|
||||
for ent in seq[1:]:
|
||||
stream.write(',\n' + _indentation(indent))
|
||||
pprint(ent, stream, indent, allowance + 1)
|
||||
|
||||
indent = indent - INDENT_PER_LEVEL
|
||||
|
||||
stream.write(((typ is ListType) and ']') or ')')
|
||||
|
||||
elif typ is DictType and sepLines:
|
||||
stream.write('{')
|
||||
|
||||
length = len(seq)
|
||||
if length:
|
||||
indent = indent + INDENT_PER_LEVEL
|
||||
items = seq.items()
|
||||
items.sort()
|
||||
key, ent = items[0]
|
||||
rep = `key` + ': '
|
||||
stream.write(rep)
|
||||
pprint(ent, stream, indent + len(rep), allowance + 1)
|
||||
|
||||
if len(items) > 1:
|
||||
for key, ent in items[1:]:
|
||||
rep = `key` + ': '
|
||||
stream.write(',\n' + _indentation(indent) + rep)
|
||||
pprint(ent, stream, indent + len(rep), allowance + 1)
|
||||
|
||||
indent = indent - INDENT_PER_LEVEL
|
||||
|
||||
stream.write('}')
|
||||
|
||||
else:
|
||||
stream.write(rep)
|
||||
|
||||
# Terminate the 'print' if we're not a recursive invocation.
|
||||
if not indent:
|
||||
stream.write('\n')
|
||||
|
||||
|
||||
#
|
||||
# end of pprint.py
|
|
@ -0,0 +1,27 @@
|
|||
"""Exmaple file to be parsed for the parsermodule example.
|
||||
|
||||
The classes and functions in this module exist only to exhibit the ability
|
||||
of the handling information extraction from nested definitions using parse
|
||||
trees. They shouldn't interest you otherwise!
|
||||
"""
|
||||
|
||||
class Simple:
|
||||
"This class does very little."
|
||||
|
||||
def method(self):
|
||||
"This method does almost nothing."
|
||||
return 1
|
||||
|
||||
class Nested:
|
||||
"This is a nested class."
|
||||
|
||||
def nested_method(self):
|
||||
"Method of Nested class."
|
||||
def nested_function():
|
||||
"Function in method of Nested class."
|
||||
pass
|
||||
return nested_function
|
||||
|
||||
def function():
|
||||
"This function lives at the module level."
|
||||
return 0
|
|
@ -0,0 +1,50 @@
|
|||
#! /projects/python/Python-1.4b2/python
|
||||
# (Force the script to use the latest build.)
|
||||
#
|
||||
# test_parser.py
|
||||
|
||||
import parser, traceback
|
||||
|
||||
_numFailed = 0
|
||||
|
||||
def testChunk(t, fileName):
|
||||
global _numFailed
|
||||
print '----', fileName,
|
||||
try:
|
||||
ast = parser.suite(t)
|
||||
tup = parser.ast2tuple(ast)
|
||||
# this discards the first AST; a huge memory savings when running
|
||||
# against a large source file like Tkinter.py.
|
||||
ast = None
|
||||
new = parser.tuple2ast(tup)
|
||||
except parser.ParserError, err:
|
||||
print
|
||||
print 'parser module raised exception on input file', fileName + ':'
|
||||
traceback.print_exc()
|
||||
_numFailed = _numFailed + 1
|
||||
else:
|
||||
if tup != parser.ast2tuple(new):
|
||||
print
|
||||
print 'parser module failed on input file', fileName
|
||||
_numFailed = _numFailed + 1
|
||||
else:
|
||||
print 'o.k.'
|
||||
|
||||
def testFile(fileName):
|
||||
t = open(fileName).read()
|
||||
testChunk(t, fileName)
|
||||
|
||||
def test():
|
||||
import sys
|
||||
args = sys.argv[1:]
|
||||
if not args:
|
||||
import glob
|
||||
args = glob.glob("*.py")
|
||||
map(testFile, args)
|
||||
sys.exit(_numFailed != 0)
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
|
||||
#
|
||||
# end of file
|
Loading…
Reference in New Issue