From dbebd188e78f8a3264ef236c29d84e699a074320 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 4 May 2016 23:54:37 +0000 Subject: [PATCH] ICU-12526 initial Unicode 9 data X-SVN-Rev: 38698 --- tools/unicode/c/genprops/pnames_data.h | 53 +++++++++++++++++++------- tools/unicode/py/preparseucd.py | 10 +++-- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/tools/unicode/c/genprops/pnames_data.h b/tools/unicode/c/genprops/pnames_data.h index da651fdccd6..9fda40b2ffb 100644 --- a/tools/unicode/c/genprops/pnames_data.h +++ b/tools/unicode/c/genprops/pnames_data.h @@ -5,7 +5,7 @@ * machine-generated by: icu/tools/unicode/py/preparseucd.py */ -#define UNICODE_VERSION { 8, 0, 0, 0 } +#define UNICODE_VERSION { 9, 0, 0, 0 } static const Value VALUES_binprop[2] = { Value(0, "N No F False"), @@ -38,7 +38,7 @@ static const Value VALUES_bc[23] = { Value(U_POP_DIRECTIONAL_ISOLATE, "PDI Pop_Directional_Isolate"), }; -static const Value VALUES_blk[263] = { +static const Value VALUES_blk[274] = { Value(UBLOCK_NO_BLOCK, "NB No_Block"), Value(UBLOCK_BASIC_LATIN, "ASCII Basic_Latin"), Value(UBLOCK_LATIN_1_SUPPLEMENT, "Latin_1_Sup Latin_1_Supplement Latin_1"), @@ -302,6 +302,17 @@ static const Value VALUES_blk[263] = { Value(UBLOCK_OLD_HUNGARIAN, "Old_Hungarian Old_Hungarian"), Value(UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, "Sup_Symbols_And_Pictographs Supplemental_Symbols_And_Pictographs"), Value(UBLOCK_SUTTON_SIGNWRITING, "Sutton_SignWriting Sutton_SignWriting"), + Value(UBLOCK_ADLAM, "Adlam Adlam"), + Value(UBLOCK_BHAIKSUKI, "Bhaiksuki Bhaiksuki"), + Value(UBLOCK_CYRILLIC_EXTENDED_C, "Cyrillic_Ext_C Cyrillic_Extended_C"), + Value(UBLOCK_GLAGOLITIC_SUPPLEMENT, "Glagolitic_Sup Glagolitic_Supplement"), + Value(UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, "Ideographic_Symbols Ideographic_Symbols_And_Punctuation"), + Value(UBLOCK_MARCHEN, "Marchen Marchen"), + Value(UBLOCK_MONGOLIAN_SUPPLEMENT, "Mongolian_Sup Mongolian_Supplement"), + Value(UBLOCK_NEWA, "Newa Newa"), + Value(UBLOCK_OSAGE, "Osage Osage"), + Value(UBLOCK_TANGUT, "Tangut Tangut"), + Value(UBLOCK_TANGUT_COMPONENTS, "Tangut_Components Tangut_Components"), }; static const Value VALUES_ccc[57] = { @@ -427,7 +438,7 @@ static const Value VALUES_gc[30] = { Value(U_FINAL_PUNCTUATION, "Pf Final_Punctuation"), }; -static const Value VALUES_jg[86] = { +static const Value VALUES_jg[89] = { Value(U_JG_NO_JOINING_GROUP, "No_Joining_Group No_Joining_Group"), Value(U_JG_AIN, "Ain Ain"), Value(U_JG_ALAPH, "Alaph Alaph"), @@ -514,6 +525,9 @@ static const Value VALUES_jg[86] = { Value(U_JG_MANICHAEAN_YODH, "Manichaean_Yodh Manichaean_Yodh"), Value(U_JG_MANICHAEAN_ZAYIN, "Manichaean_Zayin Manichaean_Zayin"), Value(U_JG_STRAIGHT_WAW, "Straight_Waw Straight_Waw"), + Value(U_JG_AFRICAN_FEH, "African_Feh African_Feh"), + Value(U_JG_AFRICAN_NOON, "African_Noon African_Noon"), + Value(U_JG_AFRICAN_QAF, "African_Qaf African_Qaf"), }; static const Value VALUES_jt[6] = { @@ -525,7 +539,7 @@ static const Value VALUES_jt[6] = { Value(U_JT_TRANSPARENT, "T Transparent"), }; -static const Value VALUES_lb[40] = { +static const Value VALUES_lb[43] = { Value(U_LB_UNKNOWN, "XX Unknown"), Value(U_LB_AMBIGUOUS, "AI Ambiguous"), Value(U_LB_ALPHABETIC, "AL Alphabetic"), @@ -566,6 +580,9 @@ static const Value VALUES_lb[40] = { Value(U_LB_CONDITIONAL_JAPANESE_STARTER, "CJ Conditional_Japanese_Starter"), Value(U_LB_HEBREW_LETTER, "HL Hebrew_Letter"), Value(U_LB_REGIONAL_INDICATOR, "RI Regional_Indicator"), + Value(U_LB_E_BASE, "EB E_Base"), + Value(U_LB_E_MODIFIER, "EM E_Modifier"), + Value(U_LB_ZWJ, "ZWJ ZWJ"), }; static const Value VALUES_nt[4] = { @@ -730,7 +747,7 @@ static const Value VALUES_sc[175] = { Value(USCRIPT_SHARADA, "Shrd Sharada"), Value(USCRIPT_SORA_SOMPENG, "Sora Sora_Sompeng"), Value(USCRIPT_TAKRI, "Takr Takri"), - Value(USCRIPT_TANGUT, "Tang Tang"), + Value(USCRIPT_TANGUT, "Tang Tangut"), Value(USCRIPT_WOLEAI, "Wole Wole"), Value(USCRIPT_ANATOLIAN_HIEROGLYPHS, "Hluw Anatolian_Hieroglyphs"), Value(USCRIPT_KHOJKI, "Khoj Khojki"), @@ -904,7 +921,7 @@ static const Value VALUES_tccc[57] = { Value(240, "IS Iota_Subscript"), }; -static const Value VALUES_GCB[13] = { +static const Value VALUES_GCB[18] = { Value(U_GCB_OTHER, "XX Other"), Value(U_GCB_CONTROL, "CN Control"), Value(U_GCB_CR, "CR CR"), @@ -918,6 +935,11 @@ static const Value VALUES_GCB[13] = { Value(U_GCB_SPACING_MARK, "SM SpacingMark"), Value(U_GCB_PREPEND, "PP Prepend"), Value(U_GCB_REGIONAL_INDICATOR, "RI Regional_Indicator"), + Value(U_GCB_E_BASE, "EB E_Base"), + Value(U_GCB_E_BASE_GAZ, "EBG E_Base_GAZ"), + Value(U_GCB_E_MODIFIER, "EM E_Modifier"), + Value(U_GCB_GLUE_AFTER_ZWJ, "GAZ Glue_After_Zwj"), + Value(U_GCB_ZWJ, "ZWJ ZWJ"), }; static const Value VALUES_SB[15] = { @@ -938,7 +960,7 @@ static const Value VALUES_SB[15] = { Value(U_SB_SCONTINUE, "SC SContinue"), }; -static const Value VALUES_WB[17] = { +static const Value VALUES_WB[22] = { Value(U_WB_OTHER, "XX Other"), Value(U_WB_ALETTER, "LE ALetter"), Value(U_WB_FORMAT, "FO Format"), @@ -956,6 +978,11 @@ static const Value VALUES_WB[17] = { Value(U_WB_HEBREW_LETTER, "HL Hebrew_Letter"), Value(U_WB_SINGLE_QUOTE, "SQ Single_Quote"), Value(U_WB_DOUBLE_QUOTE, "DQ Double_Quote"), + Value(U_WB_E_BASE, "EB E_Base"), + Value(U_WB_E_BASE_GAZ, "EBG E_Base_GAZ"), + Value(U_WB_E_MODIFIER, "EM E_Modifier"), + Value(U_WB_GLUE_AFTER_ZWJ, "GAZ Glue_After_Zwj"), + Value(U_WB_ZWJ, "ZWJ ZWJ"), }; static const Value VALUES_bpt[3] = { @@ -1041,7 +1068,7 @@ static const Property PROPERTIES[100] = { Property(UCHAR_XID_CONTINUE, "XIDC XID_Continue"), Property(UCHAR_XID_START, "XIDS XID_Start"), Property(UCHAR_CASE_SENSITIVE, "Sensitive Case_Sensitive"), - Property(UCHAR_S_TERM, "STerm STerm"), + Property(UCHAR_S_TERM, "STerm Sentence_Terminal"), Property(UCHAR_VARIATION_SELECTOR, "VS Variation_Selector"), Property(UCHAR_NFD_INERT, "nfdinert NFD_Inert"), Property(UCHAR_NFKD_INERT, "nfkdinert NFKD_Inert"), @@ -1068,14 +1095,14 @@ static const Property PROPERTIES[100] = { Property(UCHAR_EMOJI_MODIFIER, "Emoji_Modifier Emoji_Modifier"), Property(UCHAR_EMOJI_MODIFIER_BASE, "Emoji_Modifier_Base Emoji_Modifier_Base"), Property(UCHAR_BIDI_CLASS, "bc Bidi_Class", VALUES_bc, 23), - Property(UCHAR_BLOCK, "blk Block", VALUES_blk, 263), + Property(UCHAR_BLOCK, "blk Block", VALUES_blk, 274), Property(UCHAR_CANONICAL_COMBINING_CLASS, "ccc Canonical_Combining_Class", VALUES_ccc, 57), Property(UCHAR_DECOMPOSITION_TYPE, "dt Decomposition_Type", VALUES_dt, 18), Property(UCHAR_EAST_ASIAN_WIDTH, "ea East_Asian_Width", VALUES_ea, 6), Property(UCHAR_GENERAL_CATEGORY, "gc General_Category", VALUES_gc, 30), - Property(UCHAR_JOINING_GROUP, "jg Joining_Group", VALUES_jg, 86), + Property(UCHAR_JOINING_GROUP, "jg Joining_Group", VALUES_jg, 89), Property(UCHAR_JOINING_TYPE, "jt Joining_Type", VALUES_jt, 6), - Property(UCHAR_LINE_BREAK, "lb Line_Break", VALUES_lb, 40), + Property(UCHAR_LINE_BREAK, "lb Line_Break", VALUES_lb, 43), Property(UCHAR_NUMERIC_TYPE, "nt Numeric_Type", VALUES_nt, 4), Property(UCHAR_SCRIPT, "sc Script", VALUES_sc, 175), Property(UCHAR_HANGUL_SYLLABLE_TYPE, "hst Hangul_Syllable_Type", VALUES_hst, 6), @@ -1085,9 +1112,9 @@ static const Property PROPERTIES[100] = { Property(UCHAR_NFKC_QUICK_CHECK, "NFKC_QC NFKC_Quick_Check", VALUES_NFKC_QC, 3), Property(UCHAR_LEAD_CANONICAL_COMBINING_CLASS, "lccc Lead_Canonical_Combining_Class", VALUES_lccc, 57), Property(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS, "tccc Trail_Canonical_Combining_Class", VALUES_tccc, 57), - Property(UCHAR_GRAPHEME_CLUSTER_BREAK, "GCB Grapheme_Cluster_Break", VALUES_GCB, 13), + Property(UCHAR_GRAPHEME_CLUSTER_BREAK, "GCB Grapheme_Cluster_Break", VALUES_GCB, 18), Property(UCHAR_SENTENCE_BREAK, "SB Sentence_Break", VALUES_SB, 15), - Property(UCHAR_WORD_BREAK, "WB Word_Break", VALUES_WB, 17), + Property(UCHAR_WORD_BREAK, "WB Word_Break", VALUES_WB, 22), Property(UCHAR_BIDI_PAIRED_BRACKET_TYPE, "bpt Bidi_Paired_Bracket_Type", VALUES_bpt, 3), Property(UCHAR_GENERAL_CATEGORY_MASK, "gcm General_Category_Mask", VALUES_gcm, 38), Property(UCHAR_NUMERIC_VALUE, "nv Numeric_Value"), diff --git a/tools/unicode/py/preparseucd.py b/tools/unicode/py/preparseucd.py index 00c63ba9239..50e044f13f3 100755 --- a/tools/unicode/py/preparseucd.py +++ b/tools/unicode/py/preparseucd.py @@ -49,11 +49,11 @@ _current_year = datetime.date.today().strftime("%Y") _scripts_only_in_iso15924 = ( "Afak", "Blis", "Cirt", "Cyrs", "Egyd", "Egyh", "Geok", - "Hans", "Hant", - "Inds", "Jpan", "Jurc", "Kore", "Kpel", "Latf", "Latg", "Loma", + "Hanb", "Hans", "Hant", + "Inds", "Jamo", "Jpan", "Jurc", "Kore", "Kpel", "Latf", "Latg", "Loma", "Maya", "Moon", "Nkgb", "Nshu", "Phlv", "Roro", "Sara", "Syre", "Syrj", "Syrn", - "Tang", "Teng", "Visp", "Wole", "Zmth", "Zsym", "Zxxx" + "Teng", "Visp", "Wole", "Zmth", "Zsye", "Zsym", "Zxxx" ) # Properties --------------------------------------------------------------- *** @@ -654,7 +654,9 @@ def ParseUnicodeData(in_file): range_first = -1 # Remember algorithmic name ranges. if "Ideograph" in name: - _alg_names_ranges.append([c, end, "han", "CJK UNIFIED IDEOGRAPH-"]) + prefix = "CJK UNIFIED IDEOGRAPH-" + if c == 0x17000: prefix = "TANGUT IDEOGRAPH-" + _alg_names_ranges.append([c, end, "han", prefix]) elif name == "Hangul Syllable": _alg_names_ranges.append([c, end, "hangul"]) name = "" -- 2.40.0