Issue #6561: '\d' regular expression should not match characters of

author Mark Dickinson <dickinsm@gmail.com>

Tue, 28 Jul 2009 20:35:03 +0000 (20:35 +0000)

committer Mark Dickinson <dickinsm@gmail.com>

Tue, 28 Jul 2009 20:35:03 +0000 (20:35 +0000)
author Mark Dickinson <dickinsm@gmail.com>
Tue, 28 Jul 2009 20:35:03 +0000 (20:35 +0000)
committer Mark Dickinson <dickinsm@gmail.com>
Tue, 28 Jul 2009 20:35:03 +0000 (20:35 +0000)
diff --git a/Doc/library/re.rst b/Doc/library/re.rst

index 2d5e1956ce3660ec017e1e32018bcb3ec4e9971c..df63f9bec2809d7ffd2d703fa197a6102ef2f6c0 100644 (file)
--- a/Doc/library/re.rst
+++ b/Doc/library/re.rst
@@ -332,7 +332,8 @@ the second character.  For example, ``\$`` matches the character ``'$'``.
  ``\d``
     When the :const:`UNICODE` flag is not specified, matches any decimal digit; this
     is equivalent to the set ``[0-9]``.  With :const:`UNICODE`, it will match
-   whatever is classified as a digit in the Unicode character properties database.
+   whatever is classified as a decimal digit in the Unicode character properties
+   database.
  
  ``\D``
     When the :const:`UNICODE` flag is not specified, matches any non-digit
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py

index 4f543d93dd9e525ffbbc3a213b59e0fcaf955be4..c4cc8208ede0156a8fb3687add8c0f34a9c912f7 100644 (file)
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -636,6 +636,27 @@ class ReTests(unittest.TestCase):
          self.assertEqual(iter.next().span(), (4, 4))
          self.assertRaises(StopIteration, iter.next)
  
+    def test_bug_6561(self):
+        # '\d' should match characters in Unicode category 'Nd'
+        # (Number, Decimal Digit), but not those in 'Nl' (Number,
+        # Letter) or 'No' (Number, Other).
+        decimal_digits = [
+            u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
+            u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
+            u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
+            ]
+        for x in decimal_digits:
+            self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
+
+        not_decimal_digits = [
+            u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
+            u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
+            u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
+            u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
+            ]
+        for x in not_decimal_digits:
+            self.assertIsNone(re.match('^\d$', x, re.UNICODE))
+
      def test_empty_array(self):
          # SF buf 1647541
          import array
diff --git a/Misc/NEWS b/Misc/NEWS

index 25bbabb72cd3192e537a2429bfe4df5f12d2a44b..6731fb93e16e00669c3ef109daed8cee5e36e4bc 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -1205,6 +1205,10 @@ C-API
  Extension Modules
  -----------------
  
+- Issue #6561: '\d' in a regex now matches only characters with
+  Unicode category 'Nd' (Number, Decimal Digit).  Previously it also
+  matched characters with category 'No'.
+
  - Issue #1523: Remove deprecated overflow wrapping for struct.pack
    with an integer format code ('bBhHiIlLqQ').  Packing an out-of-range
    integer now consistently raises struct.error.
diff --git a/Modules/_sre.c b/Modules/_sre.c

index 1aea53bf94e6ce4e78204d5fa360c5138ef1cb5d..0d9ee24eae7147013c69b0f445b990a6007e2b92 100644 (file)
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -172,7 +172,7 @@ static unsigned int sre_lower_locale(unsigned int ch)
  
  #if defined(HAVE_UNICODE)
  
-#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
+#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))
  #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
  #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
  #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
author	Mark Dickinson <dickinsm@gmail.com>
	Tue, 28 Jul 2009 20:35:03 +0000 (20:35 +0000)
committer	Mark Dickinson <dickinsm@gmail.com>
	Tue, 28 Jul 2009 20:35:03 +0000 (20:35 +0000)
Doc/library/re.rst		patch \| blob \| history
Lib/test/test_re.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Modules/_sre.c		patch \| blob \| history