]> granicus.if.org Git - python/commitdiff
Added support for packages.
authorGuido van Rossum <guido@python.org>
Thu, 5 Mar 1998 03:42:00 +0000 (03:42 +0000)
committerGuido van Rossum <guido@python.org>
Thu, 5 Mar 1998 03:42:00 +0000 (03:42 +0000)
We have a whole new module finder that uses the actual Python
parser and scans the bytecode for IMPORT_NAME and IMPORT_FROM.
This requires some support in import.c (that hasn't been checked in).
New command line options for this: -d, -q, -m.

Tools/freeze/findmodules.py [deleted file]
Tools/freeze/freeze.py
Tools/freeze/makefreeze.py
Tools/freeze/modulefinder.py [new file with mode: 0644]

diff --git a/Tools/freeze/findmodules.py b/Tools/freeze/findmodules.py
deleted file mode 100644 (file)
index 9e02f2b..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-# Determine the names and filenames of the modules imported by a
-# script, recursively.  This is done by scanning for lines containing
-# import statements.  (The scanning has only superficial knowledge of
-# Python syntax and no knowledge of semantics, so in theory the result
-# may be incorrect -- however this is quite unlikely if you don't
-# intentionally obscure your Python code.)
-
-import os
-import regex
-import string
-import sys
-
-
-# Top-level interface.
-# First argument is the main program (script).
-# Second optional argument is list of modules to be searched as well.
-
-def findmodules(scriptfile, modules = [], path = sys.path):
-       todo = {}
-       todo['__main__'] = scriptfile
-       for name in modules:
-               mod = os.path.basename(name)
-               if mod[-3:] == '.py': mod = mod[:-3]
-               elif mod[-4:] == '.pyc': mod = mod[:-4]
-               todo[mod] = name
-       done = closure(todo)
-       return done
-
-
-# Compute the closure of scanfile() and findmodule().
-# Return a dictionary mapping module names to filenames.
-# Writes to stderr if a file can't be or read.
-
-def closure(todo):
-       done = {}
-       while todo:
-               newtodo = {}
-               for modname in todo.keys():
-                       if not done.has_key(modname):
-                               filename = todo[modname]
-                               if filename is None:
-                                       filename = findmodule(modname)
-                               done[modname] = filename
-                               if filename in ('<builtin>', '<unknown>'):
-                                       continue
-                               try:
-                                       modules = scanfile(filename)
-                               except IOError, msg:
-                                       sys.stderr.write("%s: %s\n" %
-                                                        (filename, str(msg)))
-                                       continue
-                               for m in modules:
-                                       if not done.has_key(m):
-                                               newtodo[m] = None
-               todo = newtodo
-       return done
-
-
-# Scan a file looking for import statements.
-# Return list of module names.
-# Can raise IOError.
-
-importstr = '\(^\|:\)[ \t]*import[ \t]+\([a-zA-Z0-9_, \t]+\)'
-fromstr   = '\(^\|:\)[ \t]*from[ \t]+\([a-zA-Z0-9_]+\)[ \t]+import[ \t]+'
-isimport = regex.compile(importstr)
-isfrom = regex.compile(fromstr)
-
-def scanfile(filename):
-       allmodules = {}
-       f = open(filename, 'r')
-       try:
-               while 1:
-                       line = f.readline()
-                       if not line: break # EOF
-                       while line[-2:] == '\\\n': # Continuation line
-                               line = line[:-2] + ' '
-                               line = line + f.readline()
-                       if isimport.search(line) >= 0:
-                               rawmodules = isimport.group(2)
-                               modules = string.splitfields(rawmodules, ',')
-                               for i in range(len(modules)):
-                                       modules[i] = string.strip(modules[i])
-                       elif isfrom.search(line) >= 0:
-                               modules = [isfrom.group(2)]
-                       else:
-                               continue
-                       for mod in modules:
-                               allmodules[mod] = None
-       finally:
-               f.close()
-       return allmodules.keys()
-
-
-# Find the file containing a module, given its name.
-# Return filename, or '<builtin>', or '<unknown>'.
-
-builtins = sys.builtin_module_names
-tails = ['.py', '.pyc']
-
-def findmodule(modname, path = sys.path):
-       if modname in builtins: return '<builtin>'
-       for dirname in path:
-               for tail in tails:
-                       fullname = os.path.join(dirname, modname + tail)
-                       try:
-                               f = open(fullname, 'r')
-                       except IOError:
-                               continue
-                       f.close()
-                       return fullname
-       return '<unknown>'
-
-
-# Test the above functions.
-
-def test():
-       if not sys.argv[1:]:
-               print 'usage: python findmodules.py scriptfile [morefiles ...]'
-               sys.exit(2)
-       done = findmodules(sys.argv[1], sys.argv[2:])
-       items = done.items()
-       items.sort()
-       for mod, file in [('Module', 'File')] + items:
-               print "%-15s %s" % (mod, file)
-
-if __name__ == '__main__':
-       test()
index 181e84598b194ab2bdb4bae6829d7f6f3d0dd6a2..b6f2d4ada2a92818d626e5cf288d424bd60edbf1 100755 (executable)
@@ -26,6 +26,12 @@ Options:
 
 -o dir:       Directory where the output files are created; default '.'.
 
+-m:           Additional arguments are module names instead of filenames.
+
+-d:           Debugging mode for the module finder.
+
+-q:           Make the module finder totally quiet.
+
 -h:           Print this help message.
 
 -w:           Toggle Windows (NT or 95) behavior.
@@ -42,7 +48,8 @@ script.py:    The Python script to be executed by the resulting binary.
 
 module ...:   Additional Python modules (referenced by pathname)
               that will be included in the resulting binary.  These
-              may be .py or .pyc files.
+              may be .py or .pyc files.  If -m is specified, these are
+              module names that are search in the path instead.
 
 NOTES:
 
@@ -67,7 +74,7 @@ import addpack
 # Import the freeze-private modules
 
 import checkextensions
-import findmodules
+import modulefinder
 import makeconfig
 import makefreeze
 import makemakefile
@@ -82,6 +89,8 @@ def main():
     exec_prefix = None                  # settable with -P option
     extensions = []
     path = sys.path
+    modargs = 0
+    debug = 1
     odir = ''
     win = sys.platform[:3] == 'win'
 
@@ -97,7 +106,7 @@ def main():
 
     # parse command line
     try:
-        opts, args = getopt.getopt(sys.argv[1:], 'he:o:p:P:s:w')
+        opts, args = getopt.getopt(sys.argv[1:], 'deh:mo:p:P:qs:w')
     except getopt.error, msg:
         usage('getopt error: ' + str(msg))
 
@@ -106,14 +115,20 @@ def main():
         if o == '-h':
             print __doc__
             return
+        if o == '-d':
+            debug = debug + 1
         if o == '-e':
             extensions.append(a)
+        if o == '-m':
+            modargs = 1
         if o == '-o':
             odir = a
         if o == '-p':
             prefix = a
         if o == '-P':
             exec_prefix = a
+        if o == '-q':
+            debug = 0
         if o == '-w':
             win = not win
         if o == '-s':
@@ -220,18 +235,30 @@ def main():
         target = os.path.join(odir, target)
         makefile = os.path.join(odir, makefile)
 
-    for mod in implicits:
-        modules.append(findmodules.findmodule(mod))
-
     # Actual work starts here...
 
-    dict = findmodules.findmodules(scriptfile, modules, path)
-    names = dict.keys()
-    names.sort()
-    print "Modules being frozen:"
-    for name in names:
-        print '\t', name
-
+    # collect all modules of the program
+    mf = modulefinder.ModuleFinder(path, debug)
+    for mod in implicits:
+        mf.import_hook(mod)
+    for mod in modules:
+        if mod == '-m':
+            modargs = 1
+            continue
+        if modargs:
+            if mod[-2:] == '.*':
+                mf.import_hook(mod[:-2], None, ["*"])
+            else:
+                mf.import_hook(mod)
+        else:
+            mf.load_file(mod)
+    mf.run_script(scriptfile)
+    if debug > 0:
+        mf.report()
+        print
+    dict = mf.modules
+
+    # generate output for frozen modules
     backup = frozen_c + '~'
     try:
         os.rename(frozen_c, backup)
@@ -239,7 +266,7 @@ def main():
         backup = None
     outfp = open(frozen_c, 'w')
     try:
-        makefreeze.makefreeze(outfp, dict)
+        makefreeze.makefreeze(outfp, dict, debug)
         if win and subsystem == 'windows':
             import winmakemakefile
             outfp.write(winmakemakefile.WINMAINTEMPLATE)
@@ -251,6 +278,7 @@ def main():
                              frozen_c)
             os.rename(backup, frozen_c)
 
+    # windows gets different treatment
     if win:
         # Taking a shortcut here...
         import winmakemakefile
@@ -264,14 +292,17 @@ def main():
             outfp.close()
         return
 
+    # generate config.c and Makefile
     builtins = []
     unknown = []
     mods = dict.keys()
     mods.sort()
     for mod in mods:
-        if dict[mod] == '<builtin>':
+        if dict[mod].__code__:
+            continue
+        if not dict[mod].__file__:
             builtins.append(mod)
-        elif dict[mod] == '<unknown>':
+        else:
             unknown.append(mod)
 
     addfiles = []
index 5c6f371af555320519ce51bd4943caf432795523..97315b339128ca643ebc888afb997037a46e3270 100644 (file)
@@ -1,4 +1,5 @@
 import marshal
+import string
 
 
 # Write a file containing frozen code for the modules in the dictionary.
@@ -23,51 +24,31 @@ main(argc, argv)
 
 """
 
-def makefreeze(outfp, dict):
+def makefreeze(outfp, dict, debug=0):
        done = []
        mods = dict.keys()
        mods.sort()
        for mod in mods:
-               modfn = dict[mod]
-               try:
-                       str = makecode(modfn)
-               except IOError, msg:
-                       sys.stderr.write("%s: %s\n" % (modfn, str(msg)))
-                       continue
-               if str:
-                       done.append(mod, len(str))
-                       writecode(outfp, mod, str)
+               m = dict[mod]
+               mangled = string.join(string.split(mod, "."), "__")
+               if m.__code__:
+                       if debug:
+                               print "freezing", mod, "..."
+                       str = marshal.dumps(m.__code__)
+                       size = len(str)
+                       if m.__path__:
+                               # Indicate package by negative size
+                               size = -size
+                       done.append((mod, mangled, size))
+                       writecode(outfp, mangled, str)
+       if debug:
+               print "generating table of frozen modules"
        outfp.write(header)
-       for mod, size in done:
-               outfp.write('\t{"%s", M_%s, %d},\n' % (mod, mod, size))
+       for mod, mangled, size in done:
+               outfp.write('\t{"%s", M_%s, %d},\n' % (mod, mangled, size))
        outfp.write(trailer)
 
 
-# Return code string for a given module -- either a .py or a .pyc
-# file.  Return either a string or None (if it's not Python code).
-# May raise IOError.
-
-def makecode(filename):
-       if filename[-3:] == '.py':
-               f = open(filename, 'r')
-               try:
-                       text = f.read()
-                       code = compile(text, filename, 'exec')
-               finally:
-                       f.close()
-               return marshal.dumps(code)
-       if filename[-4:] == '.pyc':
-               f = open(filename, 'rb')
-               try:
-                       f.seek(8)
-                       str = f.read()
-               finally:
-                       f.close()
-               return str
-       # Can't generate code for this extension
-       return None
-
-
 # Write a C initializer for a module containing the frozen python code.
 # The array is called M_<mod>.
 
@@ -78,22 +59,3 @@ def writecode(outfp, mod, str):
                for c in str[i:i+16]:
                        outfp.write('%d,' % ord(c))
        outfp.write('\n};\n')
-
-
-# Test for the above functions.
-
-def test():
-       import os
-       import sys
-       if not sys.argv[1:]:
-               print 'usage: python freezepython.py file.py(c) ...'
-               sys.exit(2)
-       dict = {}
-       for arg in sys.argv[1:]:
-               base = os.path.basename(arg)
-               mod, ext = os.path.splitext(base)
-               dict[mod] = arg
-       makefreeze(sys.stdout, dict)
-
-if __name__ == '__main__':
-       test()
diff --git a/Tools/freeze/modulefinder.py b/Tools/freeze/modulefinder.py
new file mode 100644 (file)
index 0000000..79665b7
--- /dev/null
@@ -0,0 +1,373 @@
+"""Find modules used by a script, using introspection."""
+
+import dis
+import imp
+import marshal
+import os
+import re
+import string
+import sys
+
+
+IMPORT_NAME = dis.opname.index('IMPORT_NAME')
+IMPORT_FROM = dis.opname.index('IMPORT_FROM')
+
+
+class Module:
+
+    def __init__(self, name, file=None, path=None):
+       self.__name__ = name
+       self.__file__ = file
+       self.__path__ = path
+       self.__code__ = None
+
+    def __repr__(self):
+       s = "Module(%s" % `self.__name__`
+       if self.__file__ is not None:
+           s = s + ", %s" % `self.__file__`
+       if self.__path__ is not None:
+           s = s + ", %s" % `self.__path__`
+       s = s + ")"
+       return s
+
+
+class ModuleFinder:
+
+    def __init__(self, path=None, debug=0):
+       if path is None:
+           path = sys.path
+       self.path = path
+       self.modules = {}
+       self.badmodules = {}
+       self.debug = debug
+       self.indent = 0
+
+    def msg(self, level, str, *args):
+       if level <= self.debug:
+           for i in range(self.indent):
+               print "   ",
+           print str,
+           for arg in args:
+               print repr(arg),
+           print
+
+    def msgin(self, *args):
+       level = args[0]
+       if level <= self.debug:
+           self.indent = self.indent + 1
+           apply(self.msg, args)
+
+    def msgout(self, *args):
+       level = args[0]
+       if level <= self.debug:
+           self.indent = self.indent - 1
+           apply(self.msg, args)
+
+    def run_script(self, pathname):
+       self.msg(2, "run_script", pathname)
+       fp = open(pathname)
+       stuff = ("", "r", imp.PY_SOURCE)
+       self.load_module('__main__', fp, pathname, stuff)
+
+    def load_file(self, pathname):
+       dir, name = os.path.split(pathname)
+       name, ext = os.path.splitext(name)
+       fp = open(pathname)
+       stuff = (ext, "r", imp.PY_SOURCE)
+       self.load_module(name, fp, pathname, stuff)
+
+    def import_hook(self, name, caller=None, fromlist=None):
+       self.msg(3, "import_hook", name, caller, fromlist)
+       parent = self.determine_parent(caller)
+       q, tail = self.find_head_package(parent, name)
+       m = self.load_tail(q, tail)
+       if not fromlist:
+           return q
+       if m.__path__:
+           self.ensure_fromlist(m, fromlist)
+
+    def determine_parent(self, caller):
+       self.msgin(4, "determine_parent", caller)
+       if not caller:
+           self.msgout(4, "determine_parent -> None")
+           return None
+       pname = caller.__name__
+       if caller.__path__:
+           parent = self.modules[pname]
+           assert caller is parent
+           self.msgout(4, "determine_parent ->", parent)
+           return parent
+       if '.' in pname:
+           i = string.rfind(pname, '.')
+           pname = pname[:i]
+           parent = self.modules[pname]
+           assert parent.__name__ == pname
+           self.msgout(4, "determine_parent ->", parent)
+           return parent
+       self.msgout(4, "determine_parent -> None")
+       return None
+
+    def find_head_package(self, parent, name):
+       self.msgin(4, "find_head_package", parent, name)
+       if '.' in name:
+           i = string.find(name, '.')
+           head = name[:i]
+           tail = name[i+1:]
+       else:
+           head = name
+           tail = ""
+       if parent:
+           qname = "%s.%s" % (parent.__name__, head)
+       else:
+           qname = head
+       q = self.import_module(head, qname, parent)
+       if q:
+           self.msgout(4, "find_head_package ->", (q, tail))
+           return q, tail
+       if parent:
+           qname = head
+           parent = None
+           q = self.import_module(head, qname, parent)
+           if q:
+               self.msgout(4, "find_head_package ->", (q, tail))
+               return q, tail
+       self.msgout(4, "raise ImportError: No module named", qname)
+       raise ImportError, "No module named " + qname
+
+    def load_tail(self, q, tail):
+       self.msgin(4, "load_tail", q, tail)
+       m = q
+       while tail:
+           i = string.find(tail, '.')
+           if i < 0: i = len(tail)
+           head, tail = tail[:i], tail[i+1:]
+           mname = "%s.%s" % (m.__name__, head)
+           m = self.import_module(head, mname, m)
+           if not m:
+               self.msgout(4, "raise ImportError: No module named", mname)
+               raise ImportError, "No module named " + mname
+       self.msgout(4, "load_tail ->", m)
+       return m
+
+    def ensure_fromlist(self, m, fromlist, recursive=0):
+       self.msg(4, "ensure_fromlist", m, fromlist, recursive)
+       for sub in fromlist:
+           if sub == "*":
+               if not recursive:
+                   all = self.find_all_submodules(m)
+                   if all:
+                       self.ensure_fromlist(m, all, 1)
+           elif not hasattr(m, sub):
+               subname = "%s.%s" % (m.__name__, sub)
+               submod = self.import_module(sub, subname, m)
+               if not submod:
+                   raise ImportError, "No module named " + subname
+
+    def find_all_submodules(self, m):
+       if not m.__path__:
+           return
+       modules = {}
+       suffixes = [".py", ".pyc", ".pyo"]
+       for dir in m.__path__:
+           try:
+               names = os.listdir(dir)
+           except os.error:
+               self.msg(2, "can't list directory", dir)
+               continue
+           for name in names:
+               mod = None
+               for suff in suffixes:
+                   n = len(suff)
+                   if name[-n:] == suff:
+                       mod = name[:-n]
+                       break
+               if mod and mod != "__init__":
+                   modules[mod] = mod
+       return modules.keys()
+
+    def import_module(self, partname, fqname, parent):
+       self.msgin(3, "import_module", partname, fqname, parent)
+       try:
+           m = self.modules[fqname]
+       except KeyError:
+           pass
+       else:
+           self.msgout(3, "import_module ->", m)
+           return m
+       if self.badmodules.has_key(fqname):
+           self.msgout(3, "import_module -> None")
+           return None
+       try:
+           fp, pathname, stuff = self.find_module(partname,
+                                                  parent and parent.__path__)
+       except ImportError:
+           self.msgout(3, "import_module ->", None)
+           return None
+       try:
+           m = self.load_module(fqname, fp, pathname, stuff)
+       finally:
+           if fp: fp.close()
+       if parent:
+           setattr(parent, partname, m)
+       self.msgout(3, "import_module ->", m)
+       return m
+
+    def load_module(self, fqname, fp, pathname, (suffix, mode, type)):
+       self.msgin(2, "load_module", fqname, fp and "fp", pathname)
+       if type == imp.PKG_DIRECTORY:
+           m = self.load_package(fqname, pathname)
+           self.msgout(2, "load_module ->", m)
+           return m
+       if type == imp.PY_SOURCE:
+           co = compile(fp.read(), pathname, 'exec')
+       elif type == imp.PY_COMPILED:
+           if fp.read(4) != imp.get_magic():
+               self.msgout(2, "raise ImportError: Bad magic number", pathname)
+               raise ImportError, "Bad magic number in %s", pathname
+           fp.read(4)
+           co = marshal.load(fp)
+       else:
+           co = None
+       m = self.add_module(fqname)
+       if co:
+           m.__file__ = pathname
+           m.__code__ = co
+           code = co.co_code
+           n = len(code)
+           i = 0
+           lastname = None
+           while i < n:
+               c = code[i]
+               i = i+1
+               op = ord(c)
+               if op >= dis.HAVE_ARGUMENT:
+                   oparg = ord(code[i]) + ord(code[i+1])*256
+                   i = i+2
+               if op == IMPORT_NAME:
+                   name = lastname = co.co_names[oparg]
+                   if not self.badmodules.has_key(lastname):
+                       try:
+                           self.import_hook(name, m)
+                       except ImportError, msg:
+                           self.msg(2, "ImportError:", str(msg))
+                           self.badmodules[name] = None
+               elif op == IMPORT_FROM:
+                   name = co.co_names[oparg]
+                   assert lastname is not None
+                   if not self.badmodules.has_key(lastname):
+                       try:
+                           self.import_hook(lastname, m, [name])
+                       except ImportError, msg:
+                           self.msg(2, "ImportError:", str(msg))
+                           fullname = lastname + "." + name
+                           self.badmodules[fullname] = None
+               else:
+                   lastname = None
+       self.msgout(2, "load_module ->", m)
+       return m
+
+    def load_package(self, fqname, pathname):
+       self.msgin(2, "load_package", fqname, pathname)
+       m = self.add_module(fqname)
+       m.__file__ = pathname
+       m.__path__ = [pathname]
+       fp, buf, stuff = self.find_module("__init__", m.__path__)
+       self.load_module(fqname, fp, buf, stuff)
+       self.msgout(2, "load_package ->", m)
+       return m
+
+    def add_module(self, fqname):
+       if self.modules.has_key(fqname):
+           return self.modules[fqname]
+       self.modules[fqname] = m = Module(fqname)
+       return m
+
+    def find_module(self, name, path):
+       if path is None:
+           if name in sys.builtin_module_names:
+               return (None, None, ("", "", imp.C_BUILTIN))
+           path = self.path
+       return imp.find_module(name, path)
+
+    def report(self):
+       print
+       print "  %-25s %s" % ("Name", "File")
+       print "  %-25s %s" % ("----", "----")
+       # Print modules found
+       keys = self.modules.keys()
+       keys.sort()
+       for key in keys:
+           m = self.modules[key]
+           if m.__path__:
+               print "P",
+           else:
+               print "m",
+           print "%-25s" % key, m.__file__ or ""
+
+       # Print missing modules
+       keys = self.badmodules.keys()
+       keys.sort()
+       for key in keys:
+           print "?", key
+
+
+def test():
+    # Parse command line
+    import getopt
+    try:
+       opts, args = getopt.getopt(sys.argv[1:], "dmp:q")
+    except getopt.error, msg:
+       print msg
+       return
+
+    # Process options
+    debug = 1
+    domods = 0
+    addpath = []
+    for o, a in opts:
+       if o == '-d':
+           debug = debug + 1
+       if o == '-m':
+           domods = 1
+       if o == '-p':
+           addpath = addpath + string.split(a, os.pathsep)
+       if o == '-q':
+           debug = 0
+
+    # Provide default arguments
+    if not args:
+       script = "hello.py"
+    else:
+       script = args[0]
+
+    # Set the path based on sys.path and the script directory
+    path = sys.path[:]
+    path[0] = os.path.dirname(script)
+    path = addpath + path
+    if debug > 1:
+       print "path:"
+       for item in path:
+           print "   ", `item`
+
+    # Create the module finder and turn its crank
+    mf = ModuleFinder(path, debug)
+    for arg in args[1:]:
+       if arg == '-m':
+           domods = 1
+           continue
+       if domods:
+           if arg[-2:] == '.*':
+               mf.import_hook(arg[:-2], None, ["*"])
+           else:
+               mf.import_hook(arg)
+        else:
+           mf.load_file(arg)
+    mf.run_script(script)
+    mf.report()
+
+
+if __name__ == '__main__':
+    try:
+       test()
+    except KeyboardInterrupt:
+       print "\n[interrupt]"