From 0ed7aa1e031f3194ebb2e7373c492201b8f43bc7 Mon Sep 17 00:00:00 2001
From: Guido van Rossum <guido@python.org>
Date: Mon, 2 Dec 2002 14:54:20 +0000
Subject: [PATCH] Moderately heavy reorganization of pyclbr to fix
 package-related bugs.

- The _modules cache now uses the full module name.

- The meaning of the (internal!!!) inpackage argument is changed: it
  now is the parent package name, or None.  readmodule() doesn't
  support this argument any more.

- The meaning of the path argument is changed: when inpackage is set,
  the module *must* be found in this path (as is the case for the real
  package search).

- Miscellaneous cleanup, e.g. fixed __all__, changed some comments and
  doc strings, etc.

- Adapted the unit tests to the new semantics (nothing much changed,
  really).  Added some debugging code to the unit tests that print
  helpful extra info to stderr when a test fails (interpreting the
  test failures turned out to be hard without these).
---
 Lib/pyclbr.py           | 118 ++++++++++++++++++++++------------------
 Lib/test/test_pyclbr.py |  48 +++++++++++-----
 2 files changed, 99 insertions(+), 67 deletions(-)

diff --git a/Lib/pyclbr.py b/Lib/pyclbr.py
index fe34208dd6..aa00f6f817 100644
--- a/Lib/pyclbr.py
+++ b/Lib/pyclbr.py
@@ -16,6 +16,7 @@ are class instances of the class Class defined here.
 
 A class is described by the class Class in this module.  Instances
 of this class have the following instance variables:
+        module -- the module name
         name -- the name of the class
         super -- a list of super classes (Class instances)
         methods -- a dictionary of methods
@@ -29,24 +30,21 @@ string giving the name of the super class.  Since import statements
 are recognized and imported modules are scanned as well, this
 shouldn't happen often.
 
-XXX describe the Function class.
+A function is described by the class Function in this module.
+Instances of this class have the following instance variables:
+        module -- the module name
+        name -- the name of the class
+        file -- the file in which the class was defined
+        lineno -- the line in the file on which the class statement occurred
+
 
 BUGS
 - Nested classes and functions can confuse it.
 
-PACKAGE RELATED BUGS
-- If you have a package and a module inside that or another package
-  with the same name, module caching doesn't work properly since the
-  key is the base name of the module/package.
-- The only entry that is returned when you readmodule a package is a
-  __path__ whose value is a list which confuses certain class browsers.
-- When code does:
-  from package import subpackage
-  class MyClass(subpackage.SuperClass):
-    ...
-  It can't locate the parent.  It probably needs to have the same
-  hairy logic that the import locator already does.  (This logic
-  exists coded in Python in the freeze package.)
+PACKAGE CAVEAT
+- When you call readmodule_ex for a package, dict['__path__'] is a
+  list, which may confuse older class browsers.  (readmodule filters
+  these out though.)
 """
 
 import sys
@@ -54,7 +52,7 @@ import imp
 import tokenize # Python tokenizer
 from token import NAME
 
-__all__ = ["readmodule"]
+__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
 
 _modules = {}                           # cache of modules we've seen
 
@@ -74,76 +72,84 @@ class Class:
     def _addmethod(self, name, lineno):
         self.methods[name] = lineno
 
-class Function(Class):
+class Function:
     '''Class to represent a top-level Python function'''
     def __init__(self, module, name, file, lineno):
-        Class.__init__(self, module, name, None, file, lineno)
-    def _addmethod(self, name, lineno):
-        assert 0, "Function._addmethod() shouldn't be called"
+        self.module = module
+        self.name = name
+        self.file = file
+        self.lineno = lineno
 
-def readmodule(module, path=[], inpackage=False):
+def readmodule(module, path=[]):
     '''Backwards compatible interface.
 
-    Like readmodule_ex() but strips Function objects from the
+    Call readmodule_ex() and then only keep Class objects from the
     resulting dictionary.'''
 
-    dict = readmodule_ex(module, path, inpackage)
+    dict = readmodule_ex(module, path)
     res = {}
     for key, value in dict.items():
-        if not isinstance(value, Function):
+        if isinstance(value, Class):
             res[key] = value
     return res
 
-def readmodule_ex(module, path=[], inpackage=False):
+def readmodule_ex(module, path=[], inpackage=None):
     '''Read a module file and return a dictionary of classes.
 
     Search for MODULE in PATH and sys.path, read and parse the
     module and return a dictionary with one entry for each class
-    found in the module.'''
+    found in the module.
+
+    If INPACKAGE is true, it must be the dotted name of the package in
+    which we are searching for a submodule, and then PATH must be the
+    package search path; otherwise, we are searching for a top-level
+    module, and PATH is combined with sys.path.
+    '''
+
+    # Compute the full module name (prepending inpackage if set)
+    if inpackage:
+        fullmodule = "%s.%s" % (inpackage, module)
+    else:
+        fullmodule = module
+
+    # Check in the cache
+    if fullmodule in _modules:
+        return _modules[fullmodule]
 
+    # Initialize the dict for this module's contents
     dict = {}
 
+    # Check if it is a built-in module; we don't do much for these
+    if module in sys.builtin_module_names and not inpackage:
+        _modules[module] = dict
+        return dict
+
+    # Check for a dotted module name
     i = module.rfind('.')
     if i >= 0:
-        # Dotted module name
-        package = module[:i].strip()
-        submodule = module[i+1:].strip()
+        package = module[:i]
+        submodule = module[i+1:]
         parent = readmodule_ex(package, path, inpackage)
-        child = readmodule_ex(submodule, parent['__path__'], True)
-        return child
+        if inpackage:
+            package = "%s.%s" % (inpackage, package)
+        return readmodule_ex(submodule, parent['__path__'], package)
 
-    if module in _modules:
-        # we've seen this module before...
-        return _modules[module]
-    if module in sys.builtin_module_names:
-        # this is a built-in module
-        _modules[module] = dict
-        return dict
-
-    # search the path for the module
+    # Search the path for the module
     f = None
     if inpackage:
-        try:
-            f, file, (suff, mode, type) = \
-                    imp.find_module(module, path)
-        except ImportError:
-            f = None
-    if f is None:
-        fullpath = list(path) + sys.path
-        f, file, (suff, mode, type) = imp.find_module(module, fullpath)
+        f, file, (suff, mode, type) = imp.find_module(module, path)
+    else:
+        f, file, (suff, mode, type) = imp.find_module(module, path + sys.path)
     if type == imp.PKG_DIRECTORY:
         dict['__path__'] = [file]
-        _modules[module] = dict
         path = [file] + path
-        f, file, (suff, mode, type) = \
-                        imp.find_module('__init__', [file])
+        f, file, (suff, mode, type) = imp.find_module('__init__', [file])
+    _modules[fullmodule] = dict
     if type != imp.PY_SOURCE:
         # not Python source, can't do anything with this module
         f.close()
-        _modules[module] = dict
         return dict
 
-    _modules[module] = dict
     classstack = [] # stack of (class, indent) pairs
 
     g = tokenize.generate_tokens(f.readline)
@@ -221,7 +227,13 @@ def readmodule_ex(module, path=[], inpackage=False):
                 for mod, mod2 in modules:
                     try:
                         # Recursively read the imported module
-                        readmodule_ex(mod, path, inpackage)
+                        if not inpackage:
+                            readmodule_ex(mod, path)
+                        else:
+                            try:
+                                readmodule_ex(mod, path, inpackage)
+                            except ImportError:
+                                readmodule_ex(mod)
                     except:
                         # If we can't find or parse the imported module,
                         # too bad -- don't die here.
diff --git a/Lib/test/test_pyclbr.py b/Lib/test/test_pyclbr.py
index 61d59f77b4..03eb221cf1 100644
--- a/Lib/test/test_pyclbr.py
+++ b/Lib/test/test_pyclbr.py
@@ -6,6 +6,7 @@ from test.test_support import run_unittest
 import unittest, sys
 from types import ClassType, FunctionType, MethodType
 import pyclbr
+from unittest import TestCase
 
 # This next line triggers an error on old versions of pyclbr.
 
@@ -18,16 +19,19 @@ from commands import getstatus
 # is imperfect (as designed), testModule is called with a set of
 # members to ignore.
 
-class PyclbrTest(unittest.TestCase):
+class PyclbrTest(TestCase):
 
     def assertListEq(self, l1, l2, ignore):
         ''' succeed iff {l1} - {ignore} == {l2} - {ignore} '''
-        for p1, p2 in (l1, l2), (l2, l1):
-            for item in p1:
-                ok = (item in p2) or (item in ignore)
-                if not ok:
-                    self.fail("%r missing" % item)
-
+        try:
+            for p1, p2 in (l1, l2), (l2, l1):
+                for item in p1:
+                    ok = (item in p2) or (item in ignore)
+                    if not ok:
+                        self.fail("%r missing" % item)
+        except:
+            print >>sys.stderr, "l1=%r, l2=%r, ignore=%r" % (l1, l2, ignore)
+            raise
 
     def assertHasattr(self, obj, attr, ignore):
         ''' succeed iff hasattr(obj,attr) or attr in ignore. '''
@@ -40,7 +44,8 @@ class PyclbrTest(unittest.TestCase):
     def assertHaskey(self, obj, key, ignore):
         ''' succeed iff obj.has_key(key) or key in ignore. '''
         if key in ignore: return
-        if not obj.has_key(key): print "***",key
+        if not obj.has_key(key):
+            print >>sys.stderr, "***",key
         self.failUnless(obj.has_key(key))
 
     def assertEquals(self, a, b, ignore=None):
@@ -56,7 +61,9 @@ class PyclbrTest(unittest.TestCase):
             module is loaded with __import__.'''
 
         if module == None:
-            module = __import__(moduleName, globals(), {}, [])
+            # Import it.
+            # ('<silly>' is to work around an API silliness in __import__)
+            module = __import__(moduleName, globals(), {}, ['<silly>'])
 
         dict = pyclbr.readmodule_ex(moduleName)
 
@@ -74,7 +81,11 @@ class PyclbrTest(unittest.TestCase):
                 pyclbr_bases = [ getattr(base, 'name', base)
                                  for base in value.super ]
 
-                self.assertListEq(real_bases, pyclbr_bases, ignore)
+                try:
+                    self.assertListEq(real_bases, pyclbr_bases, ignore)
+                except:
+                    print >>sys.stderr, "class=%s" % py_item
+                    raise
 
                 actualMethods = []
                 for m in py_item.__dict__.keys():
@@ -94,10 +105,17 @@ class PyclbrTest(unittest.TestCase):
                 # can't check file or lineno
 
         # Now check for missing stuff.
+        def defined_in(item, module):
+            if isinstance(item, ClassType):
+                return item.__module__ == module.__name__
+            if isinstance(item, FunctionType):
+                return item.func_globals is module.__dict__
+            return False
         for name in dir(module):
             item = getattr(module, name)
-            if type(item) in (ClassType, FunctionType):
-                self.assertHaskey(dict, name, ignore)
+            if isinstance(item,  (ClassType, FunctionType)):
+                if defined_in(item, module):
+                    self.assertHaskey(dict, name, ignore)
 
     def test_easy(self):
         self.checkModule('pyclbr')
@@ -136,8 +154,10 @@ class PyclbrTest(unittest.TestCase):
                                 'makedict', 'dump' # from sre_constants
                                 ))
 
-        cm('test.test_pyclbr',
-           module=sys.modules[__name__])
+        # Tests for modules inside packages
+        cm('email.Parser')
+
+        cm('test.test_pyclbr', ignore=('defined_in',))
 
         # pydoc doesn't work because of string issues
         # cm('pydoc', pydoc)
-- 
2.40.0