_ucd_version = "?"
_copyright = ""
_terms_of_use = ""
+_current_year = datetime.date.today().strftime("%Y")
# ISO 15924 script codes --------------------------------------------------- ***
Strips comments, ignores empty and all-comment lines.
Returns a tuple (type, line, ...).
"""
+ global _copyright, _terms_of_use
for line in in_file:
line = line.strip()
if not line: continue
yield ("missing", line, fields)
continue
if want_comments: yield ("comment", line)
+ if line.startswith("# Copyright"):
+ if not _copyright and _current_year in line:
+ _copyright = line
+ elif "terms of use" in line and not _terms_of_use:
+ _terms_of_use = line
continue
comment_start = line.find("#") # inline comment
if comment_start >= 0:
"\\.txt")
def ParsePropertyAliases(in_file):
- global _copyright, _terms_of_use, _ucd_version
+ global _ucd_version
prop_type_nulls = {
"Binary": False,
"Catalog": "??", # Must be specified, e.g., in @missing line.
match = _ucd_version_re.match(line)
if match:
_ucd_version = match.group(1)
- elif line.startswith("# Copyright"):
- _copyright = line
- elif "terms of use" in line:
- _terms_of_use = line
else:
words = line[1:].lstrip().split()
if len(words) == 2 and words[1] == "Properties":
out_file.write("\n")
+def EscapeNonASCII(s):
+ i = 0
+ while i < len(s):
+ c = ord(s[i])
+ if c <= 0x7f:
+ i = i + 1
+ else:
+ if c <= 0xffff:
+ esc = u"\\u%04X" % c
+ else:
+ esc = u"\\U%08X" % c
+ s = s[:i] + esc + s[i+1:]
+ i = i + len(esc)
+ return s
+
+
def WritePreparsedUCD(out_file):
+ global _copyright, _terms_of_use
out_file.write("# Preparsed UCD generated by ICU preparseucd.py\n");
- if _copyright: out_file.write(_copyright + "\n")
+ if not _copyright:
+ _copyright = "# Copyright (c) 1991-" + _current_year + " Unicode, Inc."
+ out_file.write(_copyright + "\n")
if _terms_of_use: out_file.write(_terms_of_use + "\n")
out_file.write("ucd;%s\n\n" % _ucd_version)
# Sort property names (props keys) by their normalized forms
# NamesList h1 heading (for [most of] a block).
if i_h1 < len(_h1) and start == _h1[i_h1][0]:
h = _h1[i_h1]
- out_file.write("# %04lX..%04lX %s\n" % (h[0], h[1], h[2]))
+ out_file.write("# %04lX..%04lX %s\n" % (h[0], h[1], EscapeNonASCII(h[2])))
i_h1 += 1
# Algorithmic-names range.
if i_alg < len(_alg_names_ranges) and start == _alg_names_ranges[i_alg][0]:
i_alg += 1
# NamesList h2 heading.
if i_h2 < len(_h2) and start == _h2[i_h2][0]:
- out_file.write("# %s\n" % (_h2[i_h2][1]))
+ out_file.write("# %s\n" % EscapeNonASCII(_h2[i_h2][1]))
i_h2 += 1
# Code point/range data.
props = _props[i]
def WriteNorm2NFCTextFile(path):
- year = datetime.date.today().strftime("%Y")
+ global _current_year
with open(os.path.join(path, "nfc.txt"), "w") as out_file:
out_file.write(
- """# Copyright (C) 1999-""" + year +
+ """# Copyright (C) 1999-""" + _current_year +
""", International Business Machines
# Corporation and others. All Rights Reserved.
#
def WriteNorm2NFKCTextFile(path):
- year = datetime.date.today().strftime("%Y")
+ global _current_year
with open(os.path.join(path, "nfkc.txt"), "w") as out_file:
out_file.write(
- """# Copyright (C) 1999-""" + year +
+ """# Copyright (C) 1999-""" + _current_year +
""", International Business Machines
# Corporation and others. All Rights Reserved.
#
def WriteNorm2NFKC_CFTextFile(path):
- year = datetime.date.today().strftime("%Y")
+ global _current_year
with open(os.path.join(path, "nfkc_cf.txt"), "w") as out_file:
out_file.write(
"""# Unicode Character Database
-# Copyright (c) 1991-""" + year + """ Unicode, Inc.
+# Copyright (c) 1991-""" + _current_year + """ Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
def WritePNamesDataHeader(out_path):
- year = datetime.date.today().strftime("%Y")
+ global _current_year
with open(out_path, "w") as out_file:
out_file.write("""/**
- * Copyright (C) 2002-""" + year +
+ * Copyright (C) 2002-""" + _current_year +
""", International Business Machines Corporation and
* others. All Rights Reserved.
*
# Optimize block vs. cp properties.
CompactBlocks()
# Write the ppucd.txt output file.
+ # Use US-ASCII so that ICU tests can parse it in the platform charset,
+ # which may be EBCDIC.
+ # Fix up non-ASCII data (NamesList.txt headings) to fit.
out_path = os.path.join(unidata_path, "ppucd.txt")
- with codecs.open(out_path, "w", "UTF-8") as out_file:
+ with codecs.open(out_path, "w", "US-ASCII") as out_file:
WritePreparsedUCD(out_file)
out_file.flush()