Issue #10254: Fixed a crash and a regression introduced by the implementation of...

author Alexander Belopolsky <alexander.belopolsky@gmail.com>

Thu, 23 Dec 2010 02:27:37 +0000 (02:27 +0000)

committer Alexander Belopolsky <alexander.belopolsky@gmail.com>

Thu, 23 Dec 2010 02:27:37 +0000 (02:27 +0000)
author Alexander Belopolsky <alexander.belopolsky@gmail.com>
Thu, 23 Dec 2010 02:27:37 +0000 (02:27 +0000)
committer Alexander Belopolsky <alexander.belopolsky@gmail.com>
Thu, 23 Dec 2010 02:27:37 +0000 (02:27 +0000)
diff --git a/Lib/test/test_normalization.py b/Lib/test/test_normalization.py

index fa9611ea58ae0c4a3302c1707370eecf7558cccc..e3e25603564185b2cc6c34fc860b8df03175070f 100644 (file)
--- a/Lib/test/test_normalization.py
+++ b/Lib/test/test_normalization.py
@@ -55,9 +55,6 @@ class NormalizationTest(unittest.TestCase):
              if line.startswith("@Part"):
                  part = line.split()[0]
                  continue
-            if part == "@Part3":
-                # XXX we don't support PRI #29 yet, so skip these tests for now
-                continue
              try:
                  c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
              except RangeError:
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py

index 1cb36fe2c8a8f711b584293595aaed3853b722e5..97442564e7a4b8183b1f8a6638358cb1828d3c1a 100644 (file)
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -188,9 +188,22 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
  
      def test_pr29(self):
          # http://www.unicode.org/review/pr-29.html
-        for text in ("\u0b47\u0300\u0b3e", "\u1100\u0300\u1161"):
+        # See issues #1054943 and #10254.
+        composed = ("\u0b47\u0300\u0b3e", "\u1100\u0300\u1161",
+                    'Li\u030dt-s\u1e73\u0301',
+                    '\u092e\u093e\u0930\u094d\u0915 \u091c\u093c'
+                    + '\u0941\u0915\u0947\u0930\u092c\u0930\u094d\u0917',
+                    '\u0915\u093f\u0930\u094d\u0917\u093f\u091c\u093c'
+                    + '\u0938\u094d\u0924\u093e\u0928')
+        for text in composed:
              self.assertEqual(self.db.normalize('NFC', text), text)
  
+    def test_issue10254(self):
+        # Crash reported in #10254
+        a = 'C\u0338' * 20  + 'C\u0327'
+        b = 'C\u0338' * 20  + '\xC7'
+        self.assertEqual(self.db.normalize('NFC', a), b)
+
      def test_east_asian_width(self):
          eaw = self.db.east_asian_width
          self.assertRaises(TypeError, eaw, b'a')
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c

index 233f8e08ba8c7eeda2e5b93e6e9d8781d2439a6f..bd96e3643f92dc3b4064be2d630ebf893b753457 100644 (file)
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -684,10 +684,14 @@ nfc_nfkc(PyObject *self, PyObject *input, int k)
        comb = 0;
        while (i1 < end) {
            int comb1 = _getrecord_ex(*i1)->combining;
-          if (comb && (comb1 == 0 || comb == comb1)) {
-              /* Character is blocked. */
-              i1++;
-              continue;
+          if (comb) {
+              if (comb1 == 0)
+                  break;
+              if (comb >= comb1) {
+                  /* Character is blocked. */
+                  i1++;
+                  continue;
+              }
            }
            l = find_nfc_index(self, nfc_last, *i1);
            /* *i1 cannot be combined with *i. If *i1
@@ -711,6 +715,7 @@ nfc_nfkc(PyObject *self, PyObject *input, int k)
            /* Replace the original character. */
            *i = code;
            /* Mark the second character unused. */
+          assert(cskipped < 20);
            skipped[cskipped++] = i1;
            i1++;
            f = find_nfc_index(self, nfc_first, *i);
author	Alexander Belopolsky <alexander.belopolsky@gmail.com>
	Thu, 23 Dec 2010 02:27:37 +0000 (02:27 +0000)
committer	Alexander Belopolsky <alexander.belopolsky@gmail.com>
	Thu, 23 Dec 2010 02:27:37 +0000 (02:27 +0000)
Lib/test/test_normalization.py		patch \| blob \| history
Lib/test/test_unicodedata.py		patch \| blob \| history
Modules/unicodedata.c		patch \| blob \| history