From: Martin v. Löwis Date: Fri, 7 May 2004 07:18:13 +0000 (+0000) Subject: Fix _sre.CODESIZE on 64-bit machines in UCS-4 mode. Fixes #931848. X-Git-Tag: v2.4a1~434 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7d9c6c7e8c1e381de7e96989c1332cf98d766f3a;p=python Fix _sre.CODESIZE on 64-bit machines in UCS-4 mode. Fixes #931848. Backported to 2.3. --- diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index 7ddc09779a..fa21d95f56 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -333,14 +333,16 @@ def _optimize_unicode(charset, fixup): block = block + 1 data = data + _mk_bitmap(chunk) header = [block] - if MAXCODE == 65535: + if _sre.CODESIZE == 2: code = 'H' else: - code = 'L' + code = 'I' # Convert block indices to byte array of 256 bytes mapping = array.array('b', mapping).tostring() # Convert byte array to word array - header = header + array.array(code, mapping).tolist() + mapping = array.array(code, mapping) + assert mapping.itemsize == _sre.CODESIZE + header = header + mapping.tolist() data[0:0] = header return [(BIGCHARSET, data)] diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 9edca6eb97..2363ce5ee6 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -497,6 +497,15 @@ class ReTests(unittest.TestCase): self.assert_(re.compile('bug_926075') is not re.compile(eval("u'bug_926075'"))) + def test_bug_931848(self): + try: + unicode + except NameError: + pass + pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"') + self.assertEqual(re.compile(pattern).split("a.b.c"), + ['a','b','c']) + def run_re_tests(): from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR if verbose: diff --git a/Modules/sre.h b/Modules/sre.h index ba8500b9c4..4502802466 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -16,7 +16,7 @@ /* size of a code word (must be unsigned short or larger, and large enough to hold a Py_UNICODE character) */ #ifdef Py_UNICODE_WIDE -#define SRE_CODE unsigned long +#define SRE_CODE Py_UCS4 #else #define SRE_CODE unsigned short #endif