]> granicus.if.org Git - icu/commitdiff
ICU-13186 stop prepending UTF-8 BOM to some Unicode files
authorMarkus Scherer <markus.icu@gmail.com>
Fri, 2 Jun 2017 22:52:19 +0000 (22:52 +0000)
committerMarkus Scherer <markus.icu@gmail.com>
Fri, 2 Jun 2017 22:52:19 +0000 (22:52 +0000)
X-SVN-Rev: 40149

icu4c/source/data/unidata/changes.txt
icu4c/source/test/testdata/GraphemeBreakTest.txt
icu4c/source/test/testdata/LineBreakTest.txt
icu4c/source/test/testdata/SentenceBreakTest.txt
icu4c/source/test/testdata/WordBreakTest.txt
tools/unicode/py/preparseucd.py

index cedc98577e961e1e440644e484a37b8448b7ced2..c80e8353ca497fef71930ac49410a183d1360e13 100644 (file)
@@ -52,10 +52,10 @@ http://www.unicode.org/reports/tr44/tr44-19.html
 * Command-line environment setup
 
 UNICODE_DATA=~/unidata/uni10/20170503
-CLDR_SRC=~/svn.cldr/uni10
-ICU_ROOT=~/svn.icu/uni10
+CLDR_SRC=~/svn.cldr/trunk
+ICU_ROOT=~/svn.icu/trunk
 ICU_SRC=$ICU_ROOT/src
-ICUDT=icudt59b
+ICUDT=icudt60b
 ICU4C_DATA_IN=$ICU_SRC/icu4c/source/data/in
 ICU4C_UNIDATA=$ICU_SRC/icu4c/source/data/unidata
 export LD_LIBRARY_PATH=$ICU_ROOT/dbg/icu4c/lib
index e320e2a2e40a842a60b7a38788a67bf7d2c897fd..d7d8f90de07a59484c0d9ed7bf17bd1cd65ecdb4 100644 (file)
@@ -1,4 +1,4 @@
-# GraphemeBreakTest-10.0.0.txt
+# GraphemeBreakTest-10.0.0.txt
 # Date: 2017-04-14, 05:40:29 GMT
 # © 2017 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
index 7fad40773fc91a94a4d85e5f69d3050f5b45a7c6..6715446aba245a37cfc710356c9de740b745984c 100644 (file)
@@ -1,4 +1,4 @@
-# LineBreakTest-10.0.0.txt
+# LineBreakTest-10.0.0.txt
 # Date: 2017-04-14, 05:40:30 GMT
 # © 2017 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
index 9eecc0c0bec69ee7d94ced4a4a547c932b6f6b0d..2985b84cf8007d54e3d1b7fce33312bcd5bd3405 100644 (file)
@@ -1,4 +1,4 @@
-# SentenceBreakTest-10.0.0.txt
+# SentenceBreakTest-10.0.0.txt
 # Date: 2017-04-14, 05:40:43 GMT
 # © 2017 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
index b04bedd5f4707f6c8da638bb0c5ad4ab8241dbce..63761026ce1bcd1908ef6a6d4aedd97cdb282378 100644 (file)
@@ -1,4 +1,4 @@
-# WordBreakTest-10.0.0.txt
+# WordBreakTest-10.0.0.txt
 # Date: 2017-04-14, 05:40:44 GMT
 # © 2017 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
index 635d528ce746b769750745ebd88d7fde9ce4b227..2bb08357ed3d94d0d5a4be8546d1ca9b007c39ce 100755 (executable)
@@ -1568,13 +1568,6 @@ def CopyAndStripAndMerge(s, t):
   return CopyAndStripWithOptionalMerge(s, t, True)
 
 
-def PrependBOM(s, t):
-  with open(s, "r") as in_file, open(t, "w") as out_file:
-    out_file.write("\xef\xbb\xbf")  # UTF-8 BOM for ICU svn
-    shutil.copyfileobj(in_file, out_file)
-  return t
-
-
 def CopyOnly(s, t):
   shutil.copy(s, t)
   return t
@@ -1608,12 +1601,12 @@ _files = {
   "EastAsianWidth.txt": (DontCopy, ParseEastAsianWidth),
   "emoji-data.txt": (DontCopy, ParseNamedProperties),
   "GraphemeBreakProperty.txt": (DontCopy, ParseGraphemeBreakProperty),
-  "GraphemeBreakTest.txt": (PrependBOM, "testdata"),
+  "GraphemeBreakTest.txt": (CopyOnly, "testdata"),
   "IdnaTest.txt": (CopyOnly, "testdata"),
   "IndicPositionalCategory.txt": (DontCopy, ParseIndicPositionalCategory),
   "IndicSyllabicCategory.txt": (DontCopy, ParseIndicSyllabicCategory),
   "LineBreak.txt": (DontCopy, ParseLineBreak),
-  "LineBreakTest.txt": (PrependBOM, "testdata"),
+  "LineBreakTest.txt": (CopyOnly, "testdata"),
   "NameAliases.txt": (DontCopy, ParseNameAliases),
   "NamesList.txt": (DontCopy, ParseNamesList),
   "NormalizationCorrections.txt": (CopyOnly,),  # Only used in gensprep.
@@ -1622,14 +1615,14 @@ _files = {
   "PropertyValueAliases.txt": (DontCopy, ParsePropertyValueAliases, 1),
   "PropList.txt": (DontCopy, ParseNamedProperties),
   "SentenceBreakProperty.txt": (DontCopy, ParseSentenceBreak),
-  "SentenceBreakTest.txt": (PrependBOM, "testdata"),
+  "SentenceBreakTest.txt": (CopyOnly, "testdata"),
   "Scripts.txt": (DontCopy, ParseScripts),
   "ScriptExtensions.txt": (DontCopy, ParseScriptExtensions),
   "SpecialCasing.txt": (CopyOnly, ParseSpecialCasing),
   "UnicodeData.txt": (CopyOnly, ParseUnicodeData, 2),
   "VerticalOrientation.txt": (DontCopy, ParseVerticalOrientation),
   "WordBreakProperty.txt": (DontCopy, ParseWordBreak),
-  "WordBreakTest.txt": (PrependBOM, "testdata"),
+  "WordBreakTest.txt": (CopyOnly, "testdata"),
   # From www.unicode.org/Public/idna/<version>/
   "IdnaMappingTable.txt": (IdnaToUTS46TextFile, "norm2")
 }