Tim Peters: Taught it more "real Python" rules without slowing it

appreciably.  Triple-quoted strings no longer confuse it, nor nested
classes or defs, nor comments starting in column 1.  Chews thru
Tkinter.py in < 3 seconds for me; doctest.py no longer confuses it; no
longer missing methods in PyShell.py; etc.  Also captures defs
starting in column 1 now, but ignores them; an interface should be
added so that IDLE's class browser can show the top-level functions
too.
This commit is contained in:
Guido van Rossum 1999-06-08 12:53:21 +00:00
parent 9a33707be7
commit df9f7a3e52
1 changed files with 53 additions and 47 deletions

View File

@ -29,10 +29,14 @@
shouldn't happen often. shouldn't happen often.
BUGS BUGS
Continuation lines are not dealt with at all and strings may confuse Continuation lines are not dealt with at all.
the hell out of the parser, but it usually works. While triple-quoted strings won't confuse it, lines that look like
Nested classes are not recognized. def, class, import or "from ... import" stmts inside backslash-continued
Nested defs may be mistaken for class methods.''' # ' <-- bow to font lock single-quoted strings are treated like code. The expense of stopping
that isn't worth it.
Code that doesn't pass tabnanny or python -t will confuse it, unless
you set the module TABWIDTH vrbl (default 8) to the correct tab width
for the file.''' # ' <-- bow to font lock
import os import os
import sys import sys
@ -40,39 +44,35 @@
import re import re
import string import string
TABWIDTH = 8
_getnext = re.compile(r""" _getnext = re.compile(r"""
## String slows it down by more than a factor of 2 (not because the (?P<String>
## string regexp is slow, but because there are often a lot of strings, \""" [^"\\]* (?:
## which means the regexp has to get called that many more times). (?: \\. | "(?!"") )
## (?P<String> [^"\\]*
## " [^"\\\n]* (?: \\. [^"\\\n]* )* " )*
## \"""
## | ' [^'\\\n]* (?: \\. [^'\\\n]* )* '
## | ''' [^'\\]* (?:
## | \""" [^"\\]* (?: (?: \\. | '(?!'') )
## (?: \\. | "(?!"") ) [^'\\]*
## [^"\\]* )*
## )* '''
## \""" )
##
## | ''' [^'\\]* (?: | (?P<Method>
## (?: \\. | '(?!'') ) ^
## [^'\\]* (?P<MethodIndent> [ \t]* )
## )* def [ \t]+
## '''
## )
##
##| (?P<Method>
(?P<Method>
# dicey trick: assume a def not at top level is a method
^ [ \t]+ def [ \t]+
(?P<MethodName> [a-zA-Z_] \w* ) (?P<MethodName> [a-zA-Z_] \w* )
[ \t]* \( [ \t]* \(
) )
| (?P<Class> | (?P<Class>
# lightly questionable: assume only top-level classes count ^
^ class [ \t]+ (?P<ClassIndent> [ \t]* )
class [ \t]+
(?P<ClassName> [a-zA-Z_] \w* ) (?P<ClassName> [a-zA-Z_] \w* )
[ \t]* [ \t]*
(?P<ClassSupers> \( [^)\n]* \) )? (?P<ClassSupers> \( [^)\n]* \) )?
@ -96,11 +96,6 @@
import [ \t]+ import [ \t]+
(?P<ImportFromList> [^#;\n]+ ) (?P<ImportFromList> [^#;\n]+ )
) )
| (?P<AtTopLevel>
# cheap trick: anything other than ws in first column
^ \S
)
""", re.VERBOSE | re.DOTALL | re.MULTILINE).search """, re.VERBOSE | re.DOTALL | re.MULTILINE).search
_modules = {} # cache of modules we've seen _modules = {} # cache of modules we've seen
@ -169,10 +164,10 @@ def readmodule(module, path=[], inpackage=0):
_modules[module] = dict _modules[module] = dict
return dict return dict
cur_class = None
dict = {} dict = {}
_modules[module] = dict _modules[module] = dict
imports = [] imports = []
classstack = [] # stack of (class, indent) pairs
src = f.read() src = f.read()
f.close() f.close()
@ -191,26 +186,33 @@ def readmodule(module, path=[], inpackage=0):
break break
start, i = m.span() start, i = m.span()
if m.start("AtTopLevel") >= 0: if m.start("Method") >= 0:
# end of class definition # found a method definition or function
cur_class = None thisindent = _indent(m.group("MethodIndent"))
# close all classes indented at least as much
## elif m.start("String") >= 0: while classstack and \
## pass classstack[-1][1] >= thisindent:
del classstack[-1]
elif m.start("Method") >= 0: if classstack:
# found a method definition
if cur_class:
# and we know the class it belongs to # and we know the class it belongs to
meth_name = m.group("MethodName") meth_name = m.group("MethodName")
lineno = lineno + \ lineno = lineno + \
countnl(src, '\n', countnl(src, '\n',
last_lineno_pos, start) last_lineno_pos, start)
last_lineno_pos = start last_lineno_pos = start
cur_class = classstack[-1][0]
cur_class._addmethod(meth_name, lineno) cur_class._addmethod(meth_name, lineno)
elif m.start("String") >= 0:
pass
elif m.start("Class") >= 0: elif m.start("Class") >= 0:
# we found a class definition # we found a class definition
thisindent = _indent(m.group("ClassIndent"))
# close all classes indented at least as much
while classstack and \
classstack[-1][1] >= thisindent:
del classstack[-1]
lineno = lineno + \ lineno = lineno + \
countnl(src, '\n', last_lineno_pos, start) countnl(src, '\n', last_lineno_pos, start)
last_lineno_pos = start last_lineno_pos = start
@ -245,6 +247,7 @@ def readmodule(module, path=[], inpackage=0):
cur_class = Class(module, class_name, inherit, cur_class = Class(module, class_name, inherit,
file, lineno) file, lineno)
dict[class_name] = cur_class dict[class_name] = cur_class
classstack.append((cur_class, thisindent))
elif m.start("Import") >= 0: elif m.start("Import") >= 0:
# import module # import module
@ -287,3 +290,6 @@ def readmodule(module, path=[], inpackage=0):
assert 0, "regexp _getnext found something unexpected" assert 0, "regexp _getnext found something unexpected"
return dict return dict
def _indent(ws, _expandtabs=string.expandtabs):
return len(_expandtabs(ws, TABWIDTH))