]> granicus.if.org Git - python/commitdiff
Tim Peters: Taught it more "real Python" rules without slowing it
authorGuido van Rossum <guido@python.org>
Tue, 8 Jun 1999 12:53:21 +0000 (12:53 +0000)
committerGuido van Rossum <guido@python.org>
Tue, 8 Jun 1999 12:53:21 +0000 (12:53 +0000)
appreciably.  Triple-quoted strings no longer confuse it, nor nested
classes or defs, nor comments starting in column 1.  Chews thru
Tkinter.py in < 3 seconds for me; doctest.py no longer confuses it; no
longer missing methods in PyShell.py; etc.  Also captures defs
starting in column 1 now, but ignores them; an interface should be
added so that IDLE's class browser can show the top-level functions
too.

Lib/pyclbr.py

index ad20c99efea5e24fade1af9174f20c86b2db47c0..709a07bf3661cf3a2fdaf6c3e9993d662b28d05a 100644 (file)
@@ -29,10 +29,14 @@ are recognized and imported modules are scanned as well, this
 shouldn't happen often.
 
 BUGS
-Continuation lines are not dealt with at all and strings may confuse
-the hell out of the parser, but it usually works.
-Nested classes are not recognized.
-Nested defs may be mistaken for class methods.''' # ' <-- bow to font lock
+Continuation lines are not dealt with at all.
+While triple-quoted strings won't confuse it, lines that look like
+def, class, import or "from ... import" stmts inside backslash-continued
+single-quoted strings are treated like code.  The expense of stopping
+that isn't worth it.
+Code that doesn't pass tabnanny or python -t will confuse it, unless
+you set the module TABWIDTH vrbl (default 8) to the correct tab width
+for the file.''' # ' <-- bow to font lock
 
 import os
 import sys
@@ -40,39 +44,35 @@ import imp
 import re
 import string
 
+TABWIDTH = 8
+
 _getnext = re.compile(r"""
-## String slows it down by more than a factor of 2 (not because the
-## string regexp is slow, but because there are often a lot of strings,
-## which means the regexp has to get called that many more times).
-##    (?P<String>
-##     " [^"\\\n]* (?: \\. [^"\\\n]* )* "
-##
-##    |   ' [^'\\\n]* (?: \\. [^'\\\n]* )* '
-##
-##    |  \""" [^"\\]* (?:
-##                     (?: \\. | "(?!"") )
-##                     [^"\\]*
-##                 )*
-##       \"""
-##
-##    |   ''' [^'\\]* (?:
-##                     (?: \\. | '(?!'') )
-##                     [^'\\]*
-##                 )*
-##     '''
-##    )
-##
-##|   (?P<Method>
-    (?P<Method>
-       # dicey trick:  assume a def not at top level is a method
-       ^ [ \t]+ def [ \t]+
+    (?P<String>
+       \""" [^"\\]* (?:
+                       (?: \\. | "(?!"") )
+                       [^"\\]*
+                   )*
+       \"""
+
+    |   ''' [^'\\]* (?:
+                       (?: \\. | '(?!'') )
+                       [^'\\]*
+                   )*
+       '''
+    )
+
+|   (?P<Method>
+       ^
+       (?P<MethodIndent> [ \t]* )
+       def [ \t]+
        (?P<MethodName> [a-zA-Z_] \w* )
        [ \t]* \(
     )
 
 |   (?P<Class>
-       # lightly questionable:  assume only top-level classes count
-       ^ class [ \t]+
+       ^
+       (?P<ClassIndent> [ \t]* )
+       class [ \t]+
        (?P<ClassName> [a-zA-Z_] \w* )
        [ \t]*
        (?P<ClassSupers> \( [^)\n]* \) )?
@@ -96,11 +96,6 @@ _getnext = re.compile(r"""
        import [ \t]+
        (?P<ImportFromList> [^#;\n]+ )
     )
-
-|   (?P<AtTopLevel>
-       # cheap trick: anything other than ws in first column
-       ^ \S
-    )
 """, re.VERBOSE | re.DOTALL | re.MULTILINE).search
 
 _modules = {}                           # cache of modules we've seen
@@ -169,10 +164,10 @@ def readmodule(module, path=[], inpackage=0):
                _modules[module] = dict
                return dict
 
-       cur_class = None
        dict = {}
        _modules[module] = dict
        imports = []
+       classstack = [] # stack of (class, indent) pairs
        src = f.read()
        f.close()
 
@@ -191,26 +186,33 @@ def readmodule(module, path=[], inpackage=0):
                        break
                start, i = m.span()
 
-               if m.start("AtTopLevel") >= 0:
-                       # end of class definition
-                       cur_class = None
-
-##             elif m.start("String") >= 0:
-##                     pass
-
-               elif m.start("Method") >= 0:
-                       # found a method definition
-                       if cur_class:
+               if m.start("Method") >= 0:
+                       # found a method definition or function
+                       thisindent = _indent(m.group("MethodIndent"))
+                       # close all classes indented at least as much
+                       while classstack and \
+                             classstack[-1][1] >= thisindent:
+                               del classstack[-1]
+                       if classstack:
                                # and we know the class it belongs to
                                meth_name = m.group("MethodName")
                                lineno = lineno + \
                                         countnl(src, '\n',
                                                 last_lineno_pos, start)
                                last_lineno_pos = start
+                               cur_class = classstack[-1][0]
                                cur_class._addmethod(meth_name, lineno)
 
+               elif m.start("String") >= 0:
+                       pass
+
                elif m.start("Class") >= 0:
                        # we found a class definition
+                       thisindent = _indent(m.group("ClassIndent"))
+                       # close all classes indented at least as much
+                       while classstack and \
+                             classstack[-1][1] >= thisindent:
+                               del classstack[-1]
                        lineno = lineno + \
                                 countnl(src, '\n', last_lineno_pos, start)
                        last_lineno_pos = start
@@ -245,6 +247,7 @@ def readmodule(module, path=[], inpackage=0):
                        cur_class = Class(module, class_name, inherit,
                                          file, lineno)
                        dict[class_name] = cur_class
+                       classstack.append((cur_class, thisindent))
 
                elif m.start("Import") >= 0:
                        # import module
@@ -287,3 +290,6 @@ def readmodule(module, path=[], inpackage=0):
                        assert 0, "regexp _getnext found something unexpected"
 
        return dict
+
+def _indent(ws, _expandtabs=string.expandtabs):
+       return len(_expandtabs(ws, TABWIDTH))