]> granicus.if.org Git - python/commitdiff
Fix long-standing bugs with MANIFEST.in parsing on Windows (#6884).
authorÉric Araujo <merwok@netwok.org>
Sat, 25 Feb 2012 15:13:53 +0000 (16:13 +0100)
committerÉric Araujo <merwok@netwok.org>
Sat, 25 Feb 2012 15:13:53 +0000 (16:13 +0100)
These regex changes fix a number of issues for distutils on Windows:
- #6884: impossible to include a file starting with 'build'
- #9691 and #14004: sdist includes too many files
- #13193: test_filelist failures

This commit replaces the incorrect changes done in 557a973709de,
c566a3447ba1 and 3925081a7ca0 to fix #13193; we were too eager to fix
the test failures and I did not study the code enough before
greenlighting patches.  This time we have unit tests from the problems
reported by users to be sure we have the right fix.

Thanks to Nadeem Vawda for his help.

Lib/distutils/filelist.py
Lib/distutils/tests/test_filelist.py
Lib/distutils/tests/test_sdist.py
Misc/NEWS

index 5540b39a380ece0f7df39c8904dae626dd29e306..2f1c457ea0bec53348c5e3c88b4fb407339c4b3e 100644 (file)
@@ -210,6 +210,7 @@ class FileList:
 
         Return 1 if files are found.
         """
+        # XXX docstring lying about what the special chars are?
         files_found = 0
         pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
         self.debug_print("include_pattern: applying regex r'%s'" %
@@ -297,11 +298,14 @@ def glob_to_re(pattern):
     # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
     # and by extension they shouldn't match such "special characters" under
     # any OS.  So change all non-escaped dots in the RE to match any
-    # character except the special characters.
-    # XXX currently the "special characters" are just slash -- i.e. this is
-    # Unix-only.
-    pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', r'\1[^/]', pattern_re)
-
+    # character except the special characters (currently: just os.sep).
+    sep = os.sep
+    if os.sep == '\\':
+        # we're using a regex to manipulate a regex, so we need
+        # to escape the backslash twice
+        sep = r'\\\\'
+    escaped = r'\1[^%s]' % sep
+    pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
     return pattern_re
 
 
@@ -328,8 +332,10 @@ def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
         # ditch end of pattern character
         empty_pattern = glob_to_re('')
         prefix_re = glob_to_re(prefix)[:-len(empty_pattern)]
-        # paths should always use / in manifest templates
-        pattern_re = "^%s/.*%s" % (prefix_re, pattern_re)
+        sep = os.sep
+        if os.sep == '\\':
+            sep = r'\\'
+        pattern_re = "^" + sep.join((prefix_re, ".*" + pattern_re))
     else:                               # no prefix -- respect anchor flag
         if anchor:
             pattern_re = "^" + pattern_re
index d86ea44491a0376cc69fa83c2b97f997570c8ee5..69b88f2df2a613fa1af0f81f871d18ce425bd563 100644 (file)
@@ -1,4 +1,5 @@
 """Tests for distutils.filelist."""
+import os
 import re
 import unittest
 from distutils import debug
@@ -14,6 +15,7 @@ include ok
 include xo
 exclude xo
 include foo.tmp
+include buildout.cfg
 global-include *.x
 global-include *.txt
 global-exclude *.tmp
@@ -24,6 +26,11 @@ prune dir3
 """
 
 
+def make_local_path(s):
+    """Converts '/' in a string to os.sep"""
+    return s.replace('/', os.sep)
+
+
 class FileListTestCase(support.LoggingSilencer,
                        unittest.TestCase):
 
@@ -36,41 +43,60 @@ class FileListTestCase(support.LoggingSilencer,
         self.clear_logs()
 
     def test_glob_to_re(self):
-        # simple cases
-        self.assertEqual(glob_to_re('foo*'), 'foo[^/]*\\Z(?ms)')
-        self.assertEqual(glob_to_re('foo?'), 'foo[^/]\\Z(?ms)')
-        self.assertEqual(glob_to_re('foo??'), 'foo[^/][^/]\\Z(?ms)')
-
-        # special cases
-        self.assertEqual(glob_to_re(r'foo\\*'), r'foo\\\\[^/]*\Z(?ms)')
-        self.assertEqual(glob_to_re(r'foo\\\*'), r'foo\\\\\\[^/]*\Z(?ms)')
-        self.assertEqual(glob_to_re('foo????'), r'foo[^/][^/][^/][^/]\Z(?ms)')
-        self.assertEqual(glob_to_re(r'foo\\??'), r'foo\\\\[^/][^/]\Z(?ms)')
+        sep = os.sep
+        if os.sep == '\\':
+            sep = re.escape(os.sep)
+
+        for glob, regex in (
+            # simple cases
+            ('foo*', r'foo[^%(sep)s]*\Z(?ms)'),
+            ('foo?', r'foo[^%(sep)s]\Z(?ms)'),
+            ('foo??', r'foo[^%(sep)s][^%(sep)s]\Z(?ms)'),
+            # special cases
+            (r'foo\\*', r'foo\\\\[^%(sep)s]*\Z(?ms)'),
+            (r'foo\\\*', r'foo\\\\\\[^%(sep)s]*\Z(?ms)'),
+            ('foo????', r'foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s]\Z(?ms)'),
+            (r'foo\\??', r'foo\\\\[^%(sep)s][^%(sep)s]\Z(?ms)')):
+            regex = regex % {'sep': sep}
+            self.assertEqual(glob_to_re(glob), regex)
 
     def test_process_template_line(self):
         # testing  all MANIFEST.in template patterns
         file_list = FileList()
+        l = make_local_path
 
         # simulated file list
         file_list.allfiles = ['foo.tmp', 'ok', 'xo', 'four.txt',
-                              'global/one.txt',
-                              'global/two.txt',
-                              'global/files.x',
-                              'global/here.tmp',
-                              'f/o/f.oo',
-                              'dir/graft-one',
-                              'dir/dir2/graft2',
-                              'dir3/ok',
-                              'dir3/sub/ok.txt',
-                              ]
+                              'buildout.cfg',
+                              # filelist does not filter out VCS directories,
+                              # it's sdist that does
+                              l('.hg/last-message.txt'),
+                              l('global/one.txt'),
+                              l('global/two.txt'),
+                              l('global/files.x'),
+                              l('global/here.tmp'),
+                              l('f/o/f.oo'),
+                              l('dir/graft-one'),
+                              l('dir/dir2/graft2'),
+                              l('dir3/ok'),
+                              l('dir3/sub/ok.txt'),
+                             ]
 
         for line in MANIFEST_IN.split('\n'):
             if line.strip() == '':
                 continue
             file_list.process_template_line(line)
 
-        wanted = ['ok', 'four.txt', 'global/one.txt', 'global/two.txt',
-                  'f/o/f.oo', 'dir/graft-one', 'dir/dir2/graft2']
+        wanted = ['ok',
+                  'buildout.cfg',
+                  'four.txt',
+                  l('.hg/last-message.txt'),
+                  l('global/one.txt'),
+                  l('global/two.txt'),
+                  l('f/o/f.oo'),
+                  l('dir/graft-one'),
+                  l('dir/dir2/graft2'),
+                 ]
 
         self.assertEqual(file_list.files, wanted)
 
@@ -158,6 +184,7 @@ class FileListTestCase(support.LoggingSilencer,
         self.assertEqual(file_list.allfiles, ['a.py', 'b.txt'])
 
     def test_process_template(self):
+        l = make_local_path
         # invalid lines
         file_list = FileList()
         for action in ('include', 'exclude', 'global-include',
@@ -168,7 +195,7 @@ class FileListTestCase(support.LoggingSilencer,
 
         # include
         file_list = FileList()
-        file_list.set_allfiles(['a.py', 'b.txt', 'd/c.py'])
+        file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')])
 
         file_list.process_template_line('include *.py')
         self.assertEqual(file_list.files, ['a.py'])
@@ -180,31 +207,31 @@ class FileListTestCase(support.LoggingSilencer,
 
         # exclude
         file_list = FileList()
-        file_list.files = ['a.py', 'b.txt', 'd/c.py']
+        file_list.files = ['a.py', 'b.txt', l('d/c.py')]
 
         file_list.process_template_line('exclude *.py')
-        self.assertEqual(file_list.files, ['b.txt', 'd/c.py'])
+        self.assertEqual(file_list.files, ['b.txt', l('d/c.py')])
         self.assertNoWarnings()
 
         file_list.process_template_line('exclude *.rb')
-        self.assertEqual(file_list.files, ['b.txt', 'd/c.py'])
+        self.assertEqual(file_list.files, ['b.txt', l('d/c.py')])
         self.assertWarnings()
 
         # global-include
         file_list = FileList()
-        file_list.set_allfiles(['a.py', 'b.txt', 'd/c.py'])
+        file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')])
 
         file_list.process_template_line('global-include *.py')
-        self.assertEqual(file_list.files, ['a.py', 'd/c.py'])
+        self.assertEqual(file_list.files, ['a.py', l('d/c.py')])
         self.assertNoWarnings()
 
         file_list.process_template_line('global-include *.rb')
-        self.assertEqual(file_list.files, ['a.py', 'd/c.py'])
+        self.assertEqual(file_list.files, ['a.py', l('d/c.py')])
         self.assertWarnings()
 
         # global-exclude
         file_list = FileList()
-        file_list.files = ['a.py', 'b.txt', 'd/c.py']
+        file_list.files = ['a.py', 'b.txt', l('d/c.py')]
 
         file_list.process_template_line('global-exclude *.py')
         self.assertEqual(file_list.files, ['b.txt'])
@@ -216,50 +243,52 @@ class FileListTestCase(support.LoggingSilencer,
 
         # recursive-include
         file_list = FileList()
-        file_list.set_allfiles(['a.py', 'd/b.py', 'd/c.txt', 'd/d/e.py'])
+        file_list.set_allfiles(['a.py', l('d/b.py'), l('d/c.txt'),
+                                l('d/d/e.py')])
 
         file_list.process_template_line('recursive-include d *.py')
-        self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
+        self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
         self.assertNoWarnings()
 
         file_list.process_template_line('recursive-include e *.py')
-        self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
+        self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
         self.assertWarnings()
 
         # recursive-exclude
         file_list = FileList()
-        file_list.files = ['a.py', 'd/b.py', 'd/c.txt', 'd/d/e.py']
+        file_list.files = ['a.py', l('d/b.py'), l('d/c.txt'), l('d/d/e.py')]
 
         file_list.process_template_line('recursive-exclude d *.py')
-        self.assertEqual(file_list.files, ['a.py', 'd/c.txt'])
+        self.assertEqual(file_list.files, ['a.py', l('d/c.txt')])
         self.assertNoWarnings()
 
         file_list.process_template_line('recursive-exclude e *.py')
-        self.assertEqual(file_list.files, ['a.py', 'd/c.txt'])
+        self.assertEqual(file_list.files, ['a.py', l('d/c.txt')])
         self.assertWarnings()
 
         # graft
         file_list = FileList()
-        file_list.set_allfiles(['a.py', 'd/b.py', 'd/d/e.py', 'f/f.py'])
+        file_list.set_allfiles(['a.py', l('d/b.py'), l('d/d/e.py'),
+                                l('f/f.py')])
 
         file_list.process_template_line('graft d')
-        self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
+        self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
         self.assertNoWarnings()
 
         file_list.process_template_line('graft e')
-        self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
+        self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
         self.assertWarnings()
 
         # prune
         file_list = FileList()
-        file_list.files = ['a.py', 'd/b.py', 'd/d/e.py', 'f/f.py']
+        file_list.files = ['a.py', l('d/b.py'), l('d/d/e.py'), l('f/f.py')]
 
         file_list.process_template_line('prune d')
-        self.assertEqual(file_list.files, ['a.py', 'f/f.py'])
+        self.assertEqual(file_list.files, ['a.py', l('f/f.py')])
         self.assertNoWarnings()
 
         file_list.process_template_line('prune e')
-        self.assertEqual(file_list.files, ['a.py', 'f/f.py'])
+        self.assertEqual(file_list.files, ['a.py', l('f/f.py')])
         self.assertWarnings()
 
 
index cb7beb7b2c3dbdb2d9ef1e7f558c4e9fd5b02975..9e422fca173d596c92542b9be1e977001236d228 100644 (file)
@@ -42,6 +42,7 @@ setup(name='fake')
 MANIFEST = """\
 # file GENERATED by distutils, do NOT edit
 README
+buildout.cfg
 inroot.txt
 setup.py
 data%(sep)sdata.dt
@@ -150,7 +151,7 @@ class SDistTestCase(PyPIRCCommandTestCase):
         dist_folder = join(self.tmp_dir, 'dist')
         result = os.listdir(dist_folder)
         result.sort()
-        self.assertEqual(result, ['fake-1.0.tar', 'fake-1.0.tar.gz'] )
+        self.assertEqual(result, ['fake-1.0.tar', 'fake-1.0.tar.gz'])
 
         os.remove(join(dist_folder, 'fake-1.0.tar'))
         os.remove(join(dist_folder, 'fake-1.0.tar.gz'))
@@ -209,11 +210,18 @@ class SDistTestCase(PyPIRCCommandTestCase):
         self.write_file((data_dir, 'data.dt'), '#')
         some_dir = join(self.tmp_dir, 'some')
         os.mkdir(some_dir)
+        # make sure VCS directories are pruned (#14004)
+        hg_dir = join(self.tmp_dir, '.hg')
+        os.mkdir(hg_dir)
+        self.write_file((hg_dir, 'last-message.txt'), '#')
+        # a buggy regex used to prevent this from working on windows (#6884)
+        self.write_file((self.tmp_dir, 'buildout.cfg'), '#')
         self.write_file((self.tmp_dir, 'inroot.txt'), '#')
         self.write_file((some_dir, 'file.txt'), '#')
         self.write_file((some_dir, 'other_file.txt'), '#')
 
         dist.data_files = [('data', ['data/data.dt',
+                                     'buildout.cfg',
                                      'inroot.txt',
                                      'notexisting']),
                            'some/file.txt',
@@ -243,15 +251,15 @@ class SDistTestCase(PyPIRCCommandTestCase):
             zip_file.close()
 
         # making sure everything was added
-        self.assertEqual(len(content), 11)
+        self.assertEqual(len(content), 12)
 
         # checking the MANIFEST
         f = open(join(self.tmp_dir, 'MANIFEST'))
         try:
             manifest = f.read()
-            self.assertEqual(manifest, MANIFEST % {'sep': os.sep})
         finally:
             f.close()
+        self.assertEqual(manifest, MANIFEST % {'sep': os.sep})
 
     @unittest.skipUnless(zlib, "requires zlib")
     def test_metadata_check_option(self):
index 1e35133b52a3b66f1881926951ee33e5fa1c1f37..0ab75146fbdcfdd287d8ec5f2de7222dd1c2e140 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -98,6 +98,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #6884: Fix long-standing bugs with MANIFEST.in parsing in distutils
+  on Windows.
+
 - Issue #8033: sqlite3: Fix 64-bit integer handling in user functions
   on 32-bit architectures. Initial patch by Philippe Devalkeneer.
 
@@ -250,8 +253,6 @@ Library
 - Issues #1745761, #755670, #13357, #12629, #1200313: HTMLParser now correctly
   handles non-valid attributes, including adjacent and unquoted attributes.
 
-- Issue #13193: Fix distutils.filelist.FileList under Windows.
-
 - Issue #13373: multiprocessing.Queue.get() could sometimes block indefinitely
   when called with a timeout.  Patch by Arnaud Ysmal.