definitions and to find out the superclasses of a class.
The interface consists of a single function:
- readmodule(module, path)
+ readmodule_ex(module [, path[, inpackage]])
module is the name of a Python module, path is an optional list of
directories where the module is to be searched. If present, path is
-prepended to the system search path sys.path.
+prepended to the system search path sys.path. (inpackage is used
+internally to search for a submodule of a package.)
The return value is a dictionary. The keys of the dictionary are
the names of the classes defined in the module (including classes
that are defined via the from XXX import YYY construct). The values
are recognized and imported modules are scanned as well, this
shouldn't happen often.
+XXX describe the Function class.
+
BUGS
-- Continuation lines are not dealt with at all, except inside strings.
- Nested classes and functions can confuse it.
-- Code that doesn't pass tabnanny or python -t will confuse it, unless
- you set the module TABWIDTH vrbl (default 8) to the correct tab width
- for the file.
PACKAGE RELATED BUGS
- If you have a package and a module inside that or another package
import sys
import imp
-import re
-import string
+import tokenize # Python tokenizer
+from token import NAME
__all__ = ["readmodule"]
-TABWIDTH = 8
-
-_getnext = re.compile(r"""
- (?P<String>
- \""" [^"\\]* (?:
- (?: \\. | "(?!"") )
- [^"\\]*
- )*
- \"""
-
- | ''' [^'\\]* (?:
- (?: \\. | '(?!'') )
- [^'\\]*
- )*
- '''
-
- | " [^"\\\n]* (?: \\. [^"\\\n]*)* "
-
- | ' [^'\\\n]* (?: \\. [^'\\\n]*)* '
- )
-
-| (?P<Method>
- ^
- (?P<MethodIndent> [ \t]* )
- def [ \t]+
- (?P<MethodName> [a-zA-Z_] \w* )
- [ \t]* \(
- )
-
-| (?P<Class>
- ^
- (?P<ClassIndent> [ \t]* )
- class [ \t]+
- (?P<ClassName> [a-zA-Z_] \w* )
- [ \t]*
- (?P<ClassSupers> \( [^)\n]* \) )?
- [ \t]* :
- )
-
-| (?P<Import>
- ^ import [ \t]+
- (?P<ImportList> [^#;\n]+ )
- )
-
-| (?P<ImportFrom>
- ^ from [ \t]+
- (?P<ImportFromPath>
- [a-zA-Z_] \w*
- (?:
- [ \t]* \. [ \t]* [a-zA-Z_] \w*
- )*
- )
- [ \t]+
- import [ \t]+
- (?P<ImportFromList> [^#;\n]+ )
- )
-""", re.VERBOSE | re.DOTALL | re.MULTILINE).search
-
_modules = {} # cache of modules we've seen
# each Python class is represented by an instance of this class
def _addmethod(self, name, lineno):
assert 0, "Function._addmethod() shouldn't be called"
-def readmodule(module, path=[], inpackage=0):
+def readmodule(module, path=[], inpackage=False):
'''Backwards compatible interface.
Like readmodule_ex() but strips Function objects from the
res[key] = value
return res
-def readmodule_ex(module, path=[], inpackage=0):
+def readmodule_ex(module, path=[], inpackage=False):
'''Read a module file and return a dictionary of classes.
Search for MODULE in PATH and sys.path, read and parse the
package = module[:i].strip()
submodule = module[i+1:].strip()
parent = readmodule_ex(package, path, inpackage)
- child = readmodule_ex(submodule, parent['__path__'], 1)
+ child = readmodule_ex(submodule, parent['__path__'], True)
return child
if module in _modules:
_modules[module] = dict
classstack = [] # stack of (class, indent) pairs
- src = f.read()
- f.close()
-
- # To avoid having to stop the regexp at each newline, instead
- # when we need a line number we simply count the number of
- # newlines in the string since the last time we did this; i.e.,
- # lineno += src.count('\n', last_lineno_pos, here)
- # last_lineno_pos = here
- lineno, last_lineno_pos = 1, 0
- i = 0
- while 1:
- m = _getnext(src, i)
- if not m:
- break
- start, i = m.span()
-
- if m.start("Method") >= 0:
- # found a method definition or function
- thisindent = _indent(m.group("MethodIndent"))
- meth_name = m.group("MethodName")
- lineno += src.count('\n', last_lineno_pos, start)
- last_lineno_pos = start
- # close all classes indented at least as much
- while classstack and \
- classstack[-1][1] >= thisindent:
- del classstack[-1]
- if classstack:
- # it's a class method
- cur_class = classstack[-1][0]
- cur_class._addmethod(meth_name, lineno)
- else:
- # it's a function
- f = Function(module, meth_name,
- file, lineno)
- dict[meth_name] = f
-
- elif m.start("String") >= 0:
- pass
- elif m.start("Class") >= 0:
- # we found a class definition
- thisindent = _indent(m.group("ClassIndent"))
- # close all classes indented at least as much
- while classstack and \
- classstack[-1][1] >= thisindent:
- del classstack[-1]
- lineno += src.count('\n', last_lineno_pos, start)
- last_lineno_pos = start
- class_name = m.group("ClassName")
- inherit = m.group("ClassSupers")
- if inherit:
- # the class inherits from other classes
- inherit = inherit[1:-1].strip()
- names = []
- for n in inherit.split(','):
- n = n.strip()
- if n in dict:
- # we know this super class
- n = dict[n]
- else:
- c = n.split('.')
- if len(c) > 1:
- # super class
- # is of the
- # form module.class:
- # look in
- # module for class
- m = c[-2]
- c = c[-1]
- if m in _modules:
- d = _modules[m]
- if c in d:
- n = d[c]
- names.append(n)
- inherit = names
- # remember this class
- cur_class = Class(module, class_name, inherit,
- file, lineno)
- dict[class_name] = cur_class
- classstack.append((cur_class, thisindent))
-
- elif m.start("Import") >= 0:
- # import module
- for n in m.group("ImportList").split(','):
- n = n.strip()
+ g = tokenize.generate_tokens(f.readline)
+ try:
+ for tokentype, token, start, end, line in g:
+ if token == 'def':
+ lineno, thisindent = start
+ tokentype, meth_name, start, end, line = g.next()
+ if tokentype != NAME:
+ continue # Syntax error
+ # close all classes indented at least as much
+ while classstack and \
+ classstack[-1][1] >= thisindent:
+ del classstack[-1]
+ if classstack:
+ # it's a class method
+ cur_class = classstack[-1][0]
+ cur_class._addmethod(meth_name, lineno)
+ else:
+ # it's a function
+ dict[meth_name] = Function(module, meth_name, file, lineno)
+ elif token == 'class':
+ lineno, thisindent = start
+ tokentype, class_name, start, end, line = g.next()
+ if tokentype != NAME:
+ continue # Syntax error
+ # close all classes indented at least as much
+ while classstack and \
+ classstack[-1][1] >= thisindent:
+ del classstack[-1]
+ # parse what follows the class name
+ tokentype, token, start, end, line = g.next()
+ inherit = None
+ if token == '(':
+ names = [] # List of superclasses
+ # there's a list of superclasses
+ level = 1
+ super = [] # Tokens making up current superclass
+ while True:
+ tokentype, token, start, end, line = g.next()
+ if token in (')', ',') and level == 1:
+ n = "".join(super)
+ if n in dict:
+ # we know this super class
+ n = dict[n]
+ else:
+ c = n.split('.')
+ if len(c) > 1:
+ # super class is of the form
+ # module.class: look in module for
+ # class
+ m = c[-2]
+ c = c[-1]
+ if m in _modules:
+ d = _modules[m]
+ if c in d:
+ n = d[c]
+ names.append(n)
+ if token == '(':
+ level += 1
+ elif token == ')':
+ level -= 1
+ if level == 0:
+ break
+ elif token == ',' and level == 1:
+ pass
+ else:
+ super.append(token)
+ inherit = names
+ cur_class = Class(module, class_name, inherit, file, lineno)
+ dict[class_name] = cur_class
+ classstack.append((cur_class, thisindent))
+ elif token == 'import' and start[1] == 0:
+ modules = _getnamelist(g)
+ for mod, mod2 in modules:
+ readmodule_ex(mod, path, inpackage)
+ elif token == 'from' and start[1] == 0:
+ mod, token = _getname(g)
+ if not mod or token != "import":
+ continue
+ names = _getnamelist(g)
try:
# recursively read the imported module
- d = readmodule_ex(n, path, inpackage)
+ d = readmodule_ex(mod, path, inpackage)
except:
- ##print 'module', n, 'not found'
- pass
-
- elif m.start("ImportFrom") >= 0:
- # from module import stuff
- mod = m.group("ImportFromPath")
- names = m.group("ImportFromList").split(',')
- try:
- # recursively read the imported module
- d = readmodule_ex(mod, path, inpackage)
- except:
- ##print 'module', mod, 'not found'
- continue
- # add any classes that were defined in the
- # imported module to our name space if they
- # were mentioned in the list
- for n in names:
- n = n.strip()
- if n in d:
- dict[n] = d[n]
- elif n == '*':
- # only add a name if not
- # already there (to mimic what
- # Python does internally)
- # also don't add names that
- # start with _
- for n in d:
- if n[0] != '_' and \
- not n in dict:
- dict[n] = d[n]
- else:
- assert 0, "regexp _getnext found something unexpected"
+ continue
+ # add any classes that were defined in the imported module
+ # to our name space if they were mentioned in the list
+ for n, n2 in names:
+ if n in d:
+ dict[n2 or n] = d[n]
+ elif n == '*':
+ # only add a name if not already there (to mimic
+ # what Python does internally) also don't add
+ # names that start with _
+ for n in d:
+ if n[0] != '_' and not n in dict:
+ dict[n] = d[n]
+ except StopIteration:
+ pass
+ f.close()
return dict
-def _indent(ws, _expandtabs=string.expandtabs):
- return len(_expandtabs(ws, TABWIDTH))
+def _getnamelist(g):
+ # Helper to get a comma-separated list of dotted names plus 'as'
+ # clauses. Return a list of pairs (name, name2) where name2 is
+ # the 'as' name, or None if there is no 'as' clause.
+ names = []
+ while True:
+ name, token = _getname(g)
+ if not name:
+ break
+ if token == 'as':
+ name2, token = _getname(g)
+ else:
+ name2 = None
+ names.append((name, name2))
+ while token != "," and "\n" not in token:
+ tokentype, token, start, end, line = g.next()
+ if token != ",":
+ break
+ return names
+
+def _getname(g):
+ # Helper to get a dotted name, return a pair (name, token) where
+ # name is the dotted name, or None if there was no dotted name,
+ # and token is the next input token.
+ parts = []
+ tokentype, token, start, end, line = g.next()
+ if tokentype != NAME and token != '*':
+ return (None, token)
+ parts.append(token)
+ while True:
+ tokentype, token, start, end, line = g.next()
+ if token != '.':
+ break
+ tokentype, token, start, end, line = g.next()
+ if tokentype != NAME:
+ break
+ parts.append(token)
+ return (".".join(parts), token)