bpo-38043: Move unicodedata.normalize tests into test_unicodedata. (GH-15712)

author Greg Price <gnprice@gmail.com>

Tue, 10 Sep 2019 09:29:26 +0000 (02:29 -0700)

committer Benjamin Peterson <benjamin@python.org>

Tue, 10 Sep 2019 09:29:26 +0000 (10:29 +0100)
author Greg Price <gnprice@gmail.com>
Tue, 10 Sep 2019 09:29:26 +0000 (02:29 -0700)
committer Benjamin Peterson <benjamin@python.org>
Tue, 10 Sep 2019 09:29:26 +0000 (10:29 +0100)
diff --git a/Lib/test/test_normalization.py b/Lib/test/test_normalization.py

deleted file mode 100644 (file)

index ba877e7..0000000
--- a/Lib/test/test_normalization.py
+++ /dev/null
@@ -1,117 +0,0 @@
-from test.support import open_urlresource
-import unittest
-
-from http.client import HTTPException
-import sys
-from unicodedata import normalize, is_normalized, unidata_version
-
-TESTDATAFILE = "NormalizationTest.txt"
-TESTDATAURL = "http://www.pythontest.net/unicode/" + unidata_version + "/" + TESTDATAFILE
-
-def check_version(testfile):
-    hdr = testfile.readline()
-    return unidata_version in hdr
-
-class RangeError(Exception):
-    pass
-
-def NFC(str):
-    return normalize("NFC", str)
-
-def NFKC(str):
-    return normalize("NFKC", str)
-
-def NFD(str):
-    return normalize("NFD", str)
-
-def NFKD(str):
-    return normalize("NFKD", str)
-
-def unistr(data):
-    data = [int(x, 16) for x in data.split(" ")]
-    for x in data:
-        if x > sys.maxunicode:
-            raise RangeError
-    return "".join([chr(x) for x in data])
-
-class NormalizationTest(unittest.TestCase):
-    def test_main(self):
-        # Hit the exception early
-        try:
-            testdata = open_urlresource(TESTDATAURL, encoding="utf-8",
-                                        check=check_version)
-        except PermissionError:
-            self.skipTest(f"Permission error when downloading {TESTDATAURL} "
-                          f"into the test data directory")
-        except (OSError, HTTPException):
-            self.fail(f"Could not retrieve {TESTDATAURL}")
-
-        with testdata:
-            self.run_normalization_tests(testdata)
-
-    def run_normalization_tests(self, testdata):
-        part = None
-        part1_data = {}
-
-        for line in testdata:
-            if '#' in line:
-                line = line.split('#')[0]
-            line = line.strip()
-            if not line:
-                continue
-            if line.startswith("@Part"):
-                part = line.split()[0]
-                continue
-            try:
-                c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
-            except RangeError:
-                # Skip unsupported characters;
-                # try at least adding c1 if we are in part1
-                if part == "@Part1":
-                    try:
-                        c1 = unistr(line.split(';')[0])
-                    except RangeError:
-                        pass
-                    else:
-                        part1_data[c1] = 1
-                continue
-
-            # Perform tests
-            self.assertTrue(c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3), line)
-            self.assertTrue(c4 ==  NFC(c4) ==  NFC(c5), line)
-            self.assertTrue(c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3), line)
-            self.assertTrue(c5 ==  NFD(c4) ==  NFD(c5), line)
-            self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
-                            NFKC(c3) == NFKC(c4) == NFKC(c5),
-                            line)
-            self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
-                            NFKD(c3) == NFKD(c4) == NFKD(c5),
-                            line)
-
-            self.assertTrue(is_normalized("NFC", c2))
-            self.assertTrue(is_normalized("NFC", c4))
-
-            self.assertTrue(is_normalized("NFD", c3))
-            self.assertTrue(is_normalized("NFD", c5))
-
-            self.assertTrue(is_normalized("NFKC", c4))
-            self.assertTrue(is_normalized("NFKD", c5))
-
-            # Record part 1 data
-            if part == "@Part1":
-                part1_data[c1] = 1
-
-        # Perform tests for all other data
-        for c in range(sys.maxunicode+1):
-            X = chr(c)
-            if X in part1_data:
-                continue
-            self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
-
-    def test_bug_834676(self):
-        # Check for bug 834676
-        normalize('NFC', '\ud55c\uae00')
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/Lib/test/test_ucn.py b/Lib/test/test_ucn.py

index 8febf0af8627b41f6f752e4e791291da1f08d107..503df6a51d5c62b94f2412c75f920bfb613176d1 100644 (file)
--- a/Lib/test/test_ucn.py
+++ b/Lib/test/test_ucn.py
@@ -12,7 +12,6 @@ import unicodedata
  
  from test import support
  from http.client import HTTPException
-from test.test_normalization import check_version
  
  try:
      from _testcapi import INT_MAX, PY_SSIZE_T_MAX, UINT_MAX
@@ -172,6 +171,9 @@ class UnicodeNamesTest(unittest.TestCase):
  
      def test_named_sequences_full(self):
          # Check all the named sequences
+        def check_version(testfile):
+            hdr = testfile.readline()
+            return unicodedata.unidata_version in hdr
          url = ("http://www.pythontest.net/unicode/%s/NamedSequences.txt" %
                 unicodedata.unidata_version)
          try:
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py

index 9ec2f11497268a96c83362ee92b4674fe05b67be..40c38c1c42621ed766019eb3cd5099764917a8fb 100644 (file)
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -7,10 +7,11 @@
  """
  
  import hashlib
+from http.client import HTTPException
  import sys
  import unicodedata
  import unittest
-from test.support import script_helper
+from test.support import open_urlresource, script_helper
  
  
  class UnicodeMethodsTest(unittest.TestCase):
@@ -171,13 +172,6 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
          self.assertRaises(TypeError, self.db.combining)
          self.assertRaises(TypeError, self.db.combining, 'xx')
  
-    def test_normalize(self):
-        self.assertRaises(TypeError, self.db.normalize)
-        self.assertRaises(ValueError, self.db.normalize, 'unknown', 'xx')
-        self.assertEqual(self.db.normalize('NFKC', ''), '')
-        # The rest can be found in test_normalization.py
-        # which requires an external file.
-
      def test_pr29(self):
          # http://www.unicode.org/review/pr-29.html
          # See issues #1054943 and #10254.
@@ -208,9 +202,6 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
          self.assertEqual(self.db.normalize('NFC', u11a7_str_a), u11a7_str_b)
          self.assertEqual(self.db.normalize('NFC', u11c3_str_a), u11c3_str_b)
  
-    # For tests of unicodedata.is_normalized / self.db.is_normalized ,
-    # see test_normalization.py .
-
      def test_east_asian_width(self):
          eaw = self.db.east_asian_width
          self.assertRaises(TypeError, eaw, b'a')
@@ -315,5 +306,102 @@ class UnicodeMiscTest(UnicodeDatabaseTest):
                  self.assertEqual(len(lines), 1,
                                   r"\u%.4x should not be a linebreak" % i)
  
+class NormalizationTest(unittest.TestCase):
+    @staticmethod
+    def check_version(testfile):
+        hdr = testfile.readline()
+        return unicodedata.unidata_version in hdr
+
+    @staticmethod
+    def unistr(data):
+        data = [int(x, 16) for x in data.split(" ")]
+        return "".join([chr(x) for x in data])
+
+    def test_normalization(self):
+        TESTDATAFILE = "NormalizationTest.txt"
+        TESTDATAURL = f"http://www.pythontest.net/unicode/{unicodedata.unidata_version}/{TESTDATAFILE}"
+
+        # Hit the exception early
+        try:
+            testdata = open_urlresource(TESTDATAURL, encoding="utf-8",
+                                        check=self.check_version)
+        except PermissionError:
+            self.skipTest(f"Permission error when downloading {TESTDATAURL} "
+                          f"into the test data directory")
+        except (OSError, HTTPException):
+            self.fail(f"Could not retrieve {TESTDATAURL}")
+
+        with testdata:
+            self.run_normalization_tests(testdata)
+
+    def run_normalization_tests(self, testdata):
+        part = None
+        part1_data = {}
+
+        def NFC(str):
+            return unicodedata.normalize("NFC", str)
+
+        def NFKC(str):
+            return unicodedata.normalize("NFKC", str)
+
+        def NFD(str):
+            return unicodedata.normalize("NFD", str)
+
+        def NFKD(str):
+            return unicodedata.normalize("NFKD", str)
+
+        for line in testdata:
+            if '#' in line:
+                line = line.split('#')[0]
+            line = line.strip()
+            if not line:
+                continue
+            if line.startswith("@Part"):
+                part = line.split()[0]
+                continue
+            c1,c2,c3,c4,c5 = [self.unistr(x) for x in line.split(';')[:-1]]
+
+            # Perform tests
+            self.assertTrue(c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3), line)
+            self.assertTrue(c4 ==  NFC(c4) ==  NFC(c5), line)
+            self.assertTrue(c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3), line)
+            self.assertTrue(c5 ==  NFD(c4) ==  NFD(c5), line)
+            self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
+                            NFKC(c3) == NFKC(c4) == NFKC(c5),
+                            line)
+            self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
+                            NFKD(c3) == NFKD(c4) == NFKD(c5),
+                            line)
+
+            self.assertTrue(unicodedata.is_normalized("NFC", c2))
+            self.assertTrue(unicodedata.is_normalized("NFC", c4))
+
+            self.assertTrue(unicodedata.is_normalized("NFD", c3))
+            self.assertTrue(unicodedata.is_normalized("NFD", c5))
+
+            self.assertTrue(unicodedata.is_normalized("NFKC", c4))
+            self.assertTrue(unicodedata.is_normalized("NFKD", c5))
+
+            # Record part 1 data
+            if part == "@Part1":
+                part1_data[c1] = 1
+
+        # Perform tests for all other data
+        for c in range(sys.maxunicode+1):
+            X = chr(c)
+            if X in part1_data:
+                continue
+            self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
+
+    def test_edge_cases(self):
+        self.assertRaises(TypeError, unicodedata.normalize)
+        self.assertRaises(ValueError, unicodedata.normalize, 'unknown', 'xx')
+        self.assertEqual(unicodedata.normalize('NFKC', ''), '')
+
+    def test_bug_834676(self):
+        # Check for bug 834676
+        unicodedata.normalize('NFC', '\ud55c\uae00')
+
+
  if __name__ == "__main__":
      unittest.main()
diff --git a/PCbuild/lib.pyproj b/PCbuild/lib.pyproj

index 0ddeef3eaa3b1264cfcfea3212b9b3a7ed5b9ac3..401e207ae57e8008d789068dab00216a324b353e 100644 (file)
--- a/PCbuild/lib.pyproj
+++ b/PCbuild/lib.pyproj
@@ -1191,7 +1191,6 @@
      <Compile Include="test\test_netrc.py" />
      <Compile Include="test\test_nis.py" />
      <Compile Include="test\test_nntplib.py" />
-    <Compile Include="test\test_normalization.py" />
      <Compile Include="test\test_ntpath.py" />
      <Compile Include="test\test_numeric_tower.py" />
      <Compile Include="test\test_opcodes.py" />
author	Greg Price <gnprice@gmail.com>
	Tue, 10 Sep 2019 09:29:26 +0000 (02:29 -0700)
committer	Benjamin Peterson <benjamin@python.org>
	Tue, 10 Sep 2019 09:29:26 +0000 (10:29 +0100)
Lib/test/test_normalization.py	[deleted file]	patch \| blob \| history
Lib/test/test_ucn.py		patch \| blob \| history
Lib/test/test_unicodedata.py		patch \| blob \| history
PCbuild/lib.pyproj		patch \| blob \| history