From df9f7a3e525ced6f8771ff651c536574b93c0af6 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 8 Jun 1999 12:53:21 +0000 Subject: [PATCH] Tim Peters: Taught it more "real Python" rules without slowing it appreciably. Triple-quoted strings no longer confuse it, nor nested classes or defs, nor comments starting in column 1. Chews thru Tkinter.py in < 3 seconds for me; doctest.py no longer confuses it; no longer missing methods in PyShell.py; etc. Also captures defs starting in column 1 now, but ignores them; an interface should be added so that IDLE's class browser can show the top-level functions too. --- Lib/pyclbr.py | 100 ++++++++++++++++++++++++++------------------------ 1 file changed, 53 insertions(+), 47 deletions(-) diff --git a/Lib/pyclbr.py b/Lib/pyclbr.py index ad20c99efea..709a07bf366 100644 --- a/Lib/pyclbr.py +++ b/Lib/pyclbr.py @@ -29,10 +29,14 @@ shouldn't happen often. BUGS -Continuation lines are not dealt with at all and strings may confuse -the hell out of the parser, but it usually works. -Nested classes are not recognized. -Nested defs may be mistaken for class methods.''' # ' <-- bow to font lock +Continuation lines are not dealt with at all. +While triple-quoted strings won't confuse it, lines that look like +def, class, import or "from ... import" stmts inside backslash-continued +single-quoted strings are treated like code. The expense of stopping +that isn't worth it. +Code that doesn't pass tabnanny or python -t will confuse it, unless +you set the module TABWIDTH vrbl (default 8) to the correct tab width +for the file.''' # ' <-- bow to font lock import os import sys @@ -40,39 +44,35 @@ import re import string +TABWIDTH = 8 + _getnext = re.compile(r""" -## String slows it down by more than a factor of 2 (not because the -## string regexp is slow, but because there are often a lot of strings, -## which means the regexp has to get called that many more times). -## (?P -## " [^"\\\n]* (?: \\. [^"\\\n]* )* " -## -## | ' [^'\\\n]* (?: \\. [^'\\\n]* )* ' -## -## | \""" [^"\\]* (?: -## (?: \\. | "(?!"") ) -## [^"\\]* -## )* -## \""" -## -## | ''' [^'\\]* (?: -## (?: \\. | '(?!'') ) -## [^'\\]* -## )* -## ''' -## ) -## -##| (?P - (?P - # dicey trick: assume a def not at top level is a method - ^ [ \t]+ def [ \t]+ + (?P + \""" [^"\\]* (?: + (?: \\. | "(?!"") ) + [^"\\]* + )* + \""" + + | ''' [^'\\]* (?: + (?: \\. | '(?!'') ) + [^'\\]* + )* + ''' + ) + +| (?P + ^ + (?P [ \t]* ) + def [ \t]+ (?P [a-zA-Z_] \w* ) [ \t]* \( ) | (?P - # lightly questionable: assume only top-level classes count - ^ class [ \t]+ + ^ + (?P [ \t]* ) + class [ \t]+ (?P [a-zA-Z_] \w* ) [ \t]* (?P \( [^)\n]* \) )? @@ -96,11 +96,6 @@ import [ \t]+ (?P [^#;\n]+ ) ) - -| (?P - # cheap trick: anything other than ws in first column - ^ \S - ) """, re.VERBOSE | re.DOTALL | re.MULTILINE).search _modules = {} # cache of modules we've seen @@ -169,10 +164,10 @@ def readmodule(module, path=[], inpackage=0): _modules[module] = dict return dict - cur_class = None dict = {} _modules[module] = dict imports = [] + classstack = [] # stack of (class, indent) pairs src = f.read() f.close() @@ -191,26 +186,33 @@ def readmodule(module, path=[], inpackage=0): break start, i = m.span() - if m.start("AtTopLevel") >= 0: - # end of class definition - cur_class = None - -## elif m.start("String") >= 0: -## pass - - elif m.start("Method") >= 0: - # found a method definition - if cur_class: + if m.start("Method") >= 0: + # found a method definition or function + thisindent = _indent(m.group("MethodIndent")) + # close all classes indented at least as much + while classstack and \ + classstack[-1][1] >= thisindent: + del classstack[-1] + if classstack: # and we know the class it belongs to meth_name = m.group("MethodName") lineno = lineno + \ countnl(src, '\n', last_lineno_pos, start) last_lineno_pos = start + cur_class = classstack[-1][0] cur_class._addmethod(meth_name, lineno) + elif m.start("String") >= 0: + pass + elif m.start("Class") >= 0: # we found a class definition + thisindent = _indent(m.group("ClassIndent")) + # close all classes indented at least as much + while classstack and \ + classstack[-1][1] >= thisindent: + del classstack[-1] lineno = lineno + \ countnl(src, '\n', last_lineno_pos, start) last_lineno_pos = start @@ -245,6 +247,7 @@ def readmodule(module, path=[], inpackage=0): cur_class = Class(module, class_name, inherit, file, lineno) dict[class_name] = cur_class + classstack.append((cur_class, thisindent)) elif m.start("Import") >= 0: # import module @@ -287,3 +290,6 @@ def readmodule(module, path=[], inpackage=0): assert 0, "regexp _getnext found something unexpected" return dict + +def _indent(ws, _expandtabs=string.expandtabs): + return len(_expandtabs(ws, TABWIDTH))