From 16d27e3b141d26853effc6c70214412cebebbe9f Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 21 Aug 1996 16:28:53 +0000 Subject: [PATCH] Demos for Fred's parser module --- Demo/parser/FILES | 6 ++ Demo/parser/Makefile | 8 ++ Demo/parser/README | 15 ++++ Demo/parser/docstring.py | 2 + Demo/parser/example.py | 163 +++++++++++++++++++++++++++++++++++++ Demo/parser/parser.tex | 77 ++++++++++++++++++ Demo/parser/pprint.py | 143 ++++++++++++++++++++++++++++++++ Demo/parser/source.py | 27 ++++++ Demo/parser/test_parser.py | 50 ++++++++++++ 9 files changed, 491 insertions(+) create mode 100644 Demo/parser/FILES create mode 100644 Demo/parser/Makefile create mode 100644 Demo/parser/README create mode 100644 Demo/parser/docstring.py create mode 100644 Demo/parser/example.py create mode 100644 Demo/parser/parser.tex create mode 100644 Demo/parser/pprint.py create mode 100644 Demo/parser/source.py create mode 100755 Demo/parser/test_parser.py diff --git a/Demo/parser/FILES b/Demo/parser/FILES new file mode 100644 index 00000000000..4505d3aa8a4 --- /dev/null +++ b/Demo/parser/FILES @@ -0,0 +1,6 @@ +Demo/parser/ +Doc/libparser.tex +Lib/AST.py +Lib/symbol.py +Lib/token.py +Modules/parsermodule.c diff --git a/Demo/parser/Makefile b/Demo/parser/Makefile new file mode 100644 index 00000000000..648bf6ed2ac --- /dev/null +++ b/Demo/parser/Makefile @@ -0,0 +1,8 @@ +parser.dvi: parser.tex ../../Doc/libparser.tex + TEXINPUTS=../../Doc:: $(LATEX) parser + +# Use a new name for this; the included file uses 'clean' already.... +clean-parser: + rm -f *.log *.aux *.dvi *.pyc + +include ../../Doc/Makefile diff --git a/Demo/parser/README b/Demo/parser/README new file mode 100644 index 00000000000..03696c3c644 --- /dev/null +++ b/Demo/parser/README @@ -0,0 +1,15 @@ +These files are from the large example of using the `parser' module. Refer +to the Python Library Reference for more information. + +Files: +------ + + example.py -- module that uses the `parser' module to extract + information from the parse tree of Python source + code. + + source.py -- sample source code used to demonstrate ability to + handle nested constructs easily using the functions + and classes in example.py. + +Enjoy! diff --git a/Demo/parser/docstring.py b/Demo/parser/docstring.py new file mode 100644 index 00000000000..45a261b61c2 --- /dev/null +++ b/Demo/parser/docstring.py @@ -0,0 +1,2 @@ +"""Some documentation. +""" diff --git a/Demo/parser/example.py b/Demo/parser/example.py new file mode 100644 index 00000000000..c428aff1e82 --- /dev/null +++ b/Demo/parser/example.py @@ -0,0 +1,163 @@ +"""Simple code to extract class & function docstrings from a module. + + +""" + +import symbol +import token +import types + + +def get_docs(fileName): + """Retrieve information from the parse tree of a source file. + + fileName + Name of the file to read Python source code from. + """ + source = open(fileName).read() + import os + basename = os.path.basename(os.path.splitext(fileName)[0]) + import parser + ast = parser.suite(source) + tup = parser.ast2tuple(ast) + return ModuleInfo(tup, basename) + + +class DefnInfo: + _docstring = '' + _name = '' + + def __init__(self, tree): + self._name = tree[2][1] + + def get_docstring(self): + return self._docstring + + def get_name(self): + return self._name + +class SuiteInfoBase(DefnInfo): + def __init__(self): + self._class_info = {} + self._function_info = {} + + def get_class_names(self): + return self._class_info.keys() + + def get_class_info(self, name): + return self._class_info[name] + + def _extract_info(self, tree): + if len(tree) >= 4: + found, vars = match(DOCSTRING_STMT_PATTERN, tree[3]) + if found: + self._docstring = eval(vars['docstring']) + for node in tree[1:]: + if (node[0] == symbol.stmt + and node[1][0] == symbol.compound_stmt): + if node[1][1][0] == symbol.funcdef: + name = node[1][1][2][1] + self._function_info[name] = \ + FunctionInfo(node[1][1]) + elif node[1][1][0] == symbol.classdef: + name = node[1][1][2][1] + self._class_info[name] = ClassInfo(node[1][1]) + + +class SuiteInfo(SuiteInfoBase): + def __init__(self, tree): + SuiteInfoBase.__init__(self) + self._extract_info(tree) + + def get_function_names(self): + return self._function_info.keys() + + def get_function_info(self, name): + return self._function_info[name] + + +class FunctionInfo(SuiteInfo): + def __init__(self, tree): + DefnInfo.__init__(self, tree) + suite = tree[-1] + if len(suite) >= 4: + found, vars = match(DOCSTRING_STMT_PATTERN, suite[3]) + if found: + self._docstring = eval(vars['docstring']) + SuiteInfoBase.__init__(self) + self._extract_info(suite) + + +class ClassInfo(SuiteInfoBase): + def __init__(self, tree): + SuiteInfoBase.__init__(self) + DefnInfo.__init__(self, tree) + self._extract_info(tree[-1]) + + def get_method_names(self): + return self._function_info.keys() + + def get_method_info(self, name): + return self._function_info[name] + + +class ModuleInfo(SuiteInfo): + def __init__(self, tree, name=""): + self._name = name + SuiteInfo.__init__(self, tree) + found, vars = match(DOCSTRING_STMT_PATTERN, tree[1]) + if found: + self._docstring = vars["docstring"] + + +from types import ListType, TupleType + +def match(pattern, data, vars=None): + """ + """ + if vars is None: + vars = {} + if type(pattern) is ListType: # 'variables' are ['varname'] + vars[pattern[0]] = data + return 1, vars + if type(pattern) is not TupleType: + return (pattern == data), vars + if len(data) != len(pattern): + return 0, vars + for pattern, data in map(None, pattern, data): + same, vars = match(pattern, data, vars) + if not same: + break + return same, vars + + +# This pattern will match a 'stmt' node which *might* represent a docstring; +# docstrings require that the statement which provides the docstring be the +# first statement in the class or function, which this pattern does not check. +# +DOCSTRING_STMT_PATTERN = ( + symbol.stmt, + (symbol.simple_stmt, + (symbol.small_stmt, + (symbol.expr_stmt, + (symbol.testlist, + (symbol.test, + (symbol.and_test, + (symbol.not_test, + (symbol.comparison, + (symbol.expr, + (symbol.xor_expr, + (symbol.and_expr, + (symbol.shift_expr, + (symbol.arith_expr, + (symbol.term, + (symbol.factor, + (symbol.power, + (symbol.atom, + (token.STRING, ['docstring']) + )))))))))))))))), + (token.NEWLINE, '') + )) + +# +# end of file diff --git a/Demo/parser/parser.tex b/Demo/parser/parser.tex new file mode 100644 index 00000000000..170d9d73860 --- /dev/null +++ b/Demo/parser/parser.tex @@ -0,0 +1,77 @@ +\documentstyle[twoside,10pt,myformat]{report} + +%% This manual does not supplement the chapter from the Python +%% Library Reference, but only allows formatting of the parser module +%% component of that document as a separate document, and was created +%% primarily to ease review of the formatted document during authoring. + +\title{Python Parser Module Reference} +\author{ + Fred L. Drake, Jr. \\ + Corporation for National Research Initiatives (CNRI) \\ + 1895 Preston White Drive, Reston, Va 20191, USA \\ + E-mail: {\tt fdrake@cnri.reston.va.us}, {\tt fdrake@intr.net} +} + +\date{August 20th, 1996 \\ Release 1.4} + +\begin{document} + +\pagenumbering{roman} + +\maketitle + +Copyright \copyright{} 1995-1996 by Fred L. Drake, Jr. and Virginia +Polytechnic Institute and State University, Blacksburg, Virginia, USA. +Portions of the software copyright 1991-1995 by Stichting Mathematisch +Centrum, Amsterdam, The Netherlands. Copying is permitted under the +terms associated with the main Python distribution, with the +additional restriction that this additional notice be included and +maintained on all distributed copies. + +\begin{center} +All Rights Reserved +\end{center} + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both that copyright notice and this permission notice appear in +supporting documentation, and that the names of Fred L. Drake, Jr. and +Virginia Polytechnic Institute and State University not be used in +advertising or publicity pertaining to distribution of the software +without specific, written prior permission. + +FRED L. DRAKE, JR. AND VIRGINIA POLYTECHNIC INSTITUTE AND STATE +UNIVERSITY DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO +EVENT SHALL FRED L. DRAKE, JR. OR VIRGINIA POLYTECHNIC INSTITUTE AND +STATE UNIVERSITY BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL +DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR +PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. + +\begin{abstract} + +\noindent +The \emph{Python Parser Module Reference} describes the interfaces +published by the optional \code{parser} module and gives examples of +how they may be used. It contains the same text as the chapter on the +\code{parser} module in the \emph{Python Library Reference}, but is +presented as a separate document. + +This manual assumes basic knowledge about the Python language. For an +informal introduction to Python, see the {\em Python Tutorial}; the +Python Reference Manual remains the highest authority on syntactic and +semantic questions. + +\end{abstract} + +\pagebreak +\pagenumbering{arabic} + +\chapter{Parser Module Reference} +\input{libparser} + +\end{document} diff --git a/Demo/parser/pprint.py b/Demo/parser/pprint.py new file mode 100644 index 00000000000..c4b815800c1 --- /dev/null +++ b/Demo/parser/pprint.py @@ -0,0 +1,143 @@ +# pprint.py +# +# Author: Fred L. Drake, Jr. +# fdrake@vt.edu +# +# This is a simple little module I wrote to make life easier. I didn't +# see anything quite like it in the library, though I may have overlooked +# something. I wrote this when I was trying to read some heavily nested +# tuples with fairly non-descriptive content. This is modelled very much +# after Lisp/Scheme - style pretty-printing of lists. If you find it +# useful, thank small children who sleep at night. +# + +"""Support to pretty-print lists, tuples, & dictionaries recursively. +Very simple, but at least somewhat useful, especially in debugging +data structures. + +INDENT_PER_LEVEL -- Amount of indentation to use for each new + recursive level. The default is 1. This + must be a non-negative integer, and may be + set by the caller before calling pprint(). + +MAX_WIDTH -- Maximum width of the display. This is only + used if the representation *can* be kept + less than MAX_WIDTH characters wide. May + be set by the user before calling pprint(). + +TAB_WIDTH -- The width represented by a single tab. This + value is typically 8, but 4 is the default + under MacOS. Can be changed by the user if + desired, but is probably not a good idea. + +pprint(seq [, stream]) -- The pretty-printer. This takes a Python + object (presumably a sequence, but that + doesn't matter) and an optional output + stream. See the function documentation + for details. +""" + + +INDENT_PER_LEVEL = 1 + +MAX_WIDTH = 80 + +import os +TAB_WIDTH = (os.name == 'mac' and 4) or 8 +del os + + + +def _indentation(cols): + "Create tabbed indentation string COLS columns wide." + + # This is used to reduce the byte-count for the output, allowing + # files created using this module to use as little external storage + # as possible. This is primarily intended to minimize impact on + # a user's quota when storing resource files, or for creating output + # intended for transmission. + + return ((cols / TAB_WIDTH) * '\t') + ((cols % TAB_WIDTH) * ' ') + + + +def pprint(seq, stream = None, indent = 0, allowance = 0): + """Pretty-print a list, tuple, or dictionary. + + pprint(seq [, stream]) ==> None + + If STREAM is provided, output is written to that stream, otherwise + sys.stdout is used. Indentation is done according to + INDENT_PER_LEVEL, which may be set to any non-negative integer + before calling this function. The output written on the stream is + a perfectly valid representation of the Python object passed in, + with indentation to suite human-readable interpretation. The + output can be used as input without error, given readable + representations of all sequence elements are available via repr(). + Output is restricted to MAX_WIDTH columns where possible. The + STREAM parameter must support the write() method with a single + parameter, which will always be a string. The output stream may be + a StringIO.StringIO object if the result is needed as a string. + """ + + if stream is None: + import sys + stream = sys.stdout + + from types import DictType, ListType, TupleType + + rep = `seq` + typ = type(seq) + sepLines = len(rep) > (MAX_WIDTH - 1 - indent - allowance) + + if sepLines and (typ is ListType or typ is TupleType): + # Pretty-print the sequence. + stream.write(((typ is ListType) and '[') or '(') + + length = len(seq) + if length: + indent = indent + INDENT_PER_LEVEL + pprint(seq[0], stream, indent, allowance + 1) + + if len(seq) > 1: + for ent in seq[1:]: + stream.write(',\n' + _indentation(indent)) + pprint(ent, stream, indent, allowance + 1) + + indent = indent - INDENT_PER_LEVEL + + stream.write(((typ is ListType) and ']') or ')') + + elif typ is DictType and sepLines: + stream.write('{') + + length = len(seq) + if length: + indent = indent + INDENT_PER_LEVEL + items = seq.items() + items.sort() + key, ent = items[0] + rep = `key` + ': ' + stream.write(rep) + pprint(ent, stream, indent + len(rep), allowance + 1) + + if len(items) > 1: + for key, ent in items[1:]: + rep = `key` + ': ' + stream.write(',\n' + _indentation(indent) + rep) + pprint(ent, stream, indent + len(rep), allowance + 1) + + indent = indent - INDENT_PER_LEVEL + + stream.write('}') + + else: + stream.write(rep) + + # Terminate the 'print' if we're not a recursive invocation. + if not indent: + stream.write('\n') + + +# +# end of pprint.py diff --git a/Demo/parser/source.py b/Demo/parser/source.py new file mode 100644 index 00000000000..b1690a52670 --- /dev/null +++ b/Demo/parser/source.py @@ -0,0 +1,27 @@ +"""Exmaple file to be parsed for the parsermodule example. + +The classes and functions in this module exist only to exhibit the ability +of the handling information extraction from nested definitions using parse +trees. They shouldn't interest you otherwise! +""" + +class Simple: + "This class does very little." + + def method(self): + "This method does almost nothing." + return 1 + + class Nested: + "This is a nested class." + + def nested_method(self): + "Method of Nested class." + def nested_function(): + "Function in method of Nested class." + pass + return nested_function + +def function(): + "This function lives at the module level." + return 0 diff --git a/Demo/parser/test_parser.py b/Demo/parser/test_parser.py new file mode 100755 index 00000000000..e114d7634fe --- /dev/null +++ b/Demo/parser/test_parser.py @@ -0,0 +1,50 @@ +#! /projects/python/Python-1.4b2/python +# (Force the script to use the latest build.) +# +# test_parser.py + +import parser, traceback + +_numFailed = 0 + +def testChunk(t, fileName): + global _numFailed + print '----', fileName, + try: + ast = parser.suite(t) + tup = parser.ast2tuple(ast) + # this discards the first AST; a huge memory savings when running + # against a large source file like Tkinter.py. + ast = None + new = parser.tuple2ast(tup) + except parser.ParserError, err: + print + print 'parser module raised exception on input file', fileName + ':' + traceback.print_exc() + _numFailed = _numFailed + 1 + else: + if tup != parser.ast2tuple(new): + print + print 'parser module failed on input file', fileName + _numFailed = _numFailed + 1 + else: + print 'o.k.' + +def testFile(fileName): + t = open(fileName).read() + testChunk(t, fileName) + +def test(): + import sys + args = sys.argv[1:] + if not args: + import glob + args = glob.glob("*.py") + map(testFile, args) + sys.exit(_numFailed != 0) + +if __name__ == '__main__': + test() + +# +# end of file