From: Serhiy Storchaka Date: Thu, 24 Oct 2013 19:02:42 +0000 (+0300) Subject: Issue #19327: Fixed the working of regular expressions with too big charset. X-Git-Tag: v2.7.6rc1~10 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=22fb0dec30af4168c81744782a8bcc2453ac8055;p=python Issue #19327: Fixed the working of regular expressions with too big charset. --- diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index 97c1663b0b..bd4070549a 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -343,7 +343,7 @@ def _optimize_unicode(charset, fixup): else: code = 'I' # Convert block indices to byte array of 256 bytes - mapping = array.array('b', mapping).tostring() + mapping = array.array('B', mapping).tostring() # Convert byte array to word array mapping = array.array(code, mapping) assert mapping.itemsize == _sre.CODESIZE diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 8b277cfc02..d879bac5f6 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -427,6 +427,8 @@ class ReTests(unittest.TestCase): u"\u2222").group(1), u"\u2222") self.assertEqual(re.match(u"([\u2222\u2223])", u"\u2222", re.UNICODE).group(1), u"\u2222") + r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255))) + self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01") def test_big_codesize(self): # Issue #1160 diff --git a/Misc/NEWS b/Misc/NEWS index 4e8dab8fc2..5dc2870042 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -40,6 +40,8 @@ Core and Builtins Library ------- +- Issue #19327: Fixed the working of regular expressions with too big charset. + - Issue #19350: Increasing the test coverage of macurl2path. Patch by Colin Williams.