From: Peter Edberg Date: Sat, 2 Dec 2017 00:02:56 +0000 (+0000) Subject: ICU-13474 Integrate cldr/tags/release-32-0-1.beta1 into ICU, update tests X-Git-Tag: release-61-rc~174 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6d58ccae2ab26f0532e08af2f2c9b5a539646106;p=icu ICU-13474 Integrate cldr/tags/release-32-0-1.beta1 into ICU, update tests X-SVN-Rev: 40685 --- diff --git a/icu4c/source/data/brkitr/brkfiles.mk b/icu4c/source/data/brkitr/brkfiles.mk index 68deedff8e4..8f5c1db2ab1 100644 --- a/icu4c/source/data/brkitr/brkfiles.mk +++ b/icu4c/source/data/brkitr/brkfiles.mk @@ -1,6 +1,6 @@ # © 2016 and later: Unicode, Inc. and others. # License & terms of use: http://www.unicode.org/copyright.html#License -BRK_RES_CLDR_VERSION = 32 +BRK_RES_CLDR_VERSION = 32.0.1 # A list of txt's to build # Note: # diff --git a/icu4c/source/data/coll/af.txt b/icu4c/source/data/coll/af.txt index ce3cfe1d13c..4176f09ea04 100644 --- a/icu4c/source/data/coll/af.txt +++ b/icu4c/source/data/coll/af.txt @@ -5,7 +5,7 @@ af{ collations{ standard{ Sequence{"&N<<<ʼn"} - Version{"32"} + Version{"32.0.1"} } } } diff --git a/icu4c/source/data/coll/am.txt b/icu4c/source/data/coll/am.txt index f3b029f42fe..aad76c05e74 100644 --- a/icu4c/source/data/coll/am.txt +++ b/icu4c/source/data/coll/am.txt @@ -5,7 +5,7 @@ am{ collations{ standard{ Sequence{"[reorder Ethi]"} - Version{"32"} + Version{"32.0.1"} } } } diff --git a/icu4c/source/data/coll/ar.txt b/icu4c/source/data/coll/ar.txt index 90dc71eee88..23dac0b14da 100644 --- a/icu4c/source/data/coll/ar.txt +++ b/icu4c/source/data/coll/ar.txt @@ -9,7 +9,7 @@ ar{ "&ت<<ة<<<ﺔ<<<ﺓ" "&ي<<ى<<<ﯨ<<<ﯩ<<<ﻰ<<<ﻯ<<<ﲐ<<<ﱝ" } - Version{"32"} + Version{"32.0.1"} } standard{ Sequence{ @@ -397,7 +397,7 @@ ar{ "‎&ۓ‎=ﮰ‎=ﮱ" "‎&ۀ‎=ﮤ‎=ﮥ" } - Version{"32"} + Version{"32.0.1"} } } } diff --git a/icu4c/source/data/coll/as.txt b/icu4c/source/data/coll/as.txt index 132987a8f1b..b3ac592b113 100644 --- a/icu4c/source/data/coll/as.txt +++ b/icu4c/source/data/coll/as.txt @@ -11,7 +11,7 @@ as{ "&[before 1]ত<ৎ=ত্\u200D" "&হ<ক্ষ" } - Version{"32"} + Version{"32.0.1"} } } } diff --git a/icu4c/source/data/coll/az.txt b/icu4c/source/data/coll/az.txt index 7a115ed8e57..22a6a2b05cd 100644 --- a/icu4c/source/data/coll/az.txt +++ b/icu4c/source/data/coll/az.txt @@ -9,7 +9,7 @@ az{ "[import az-u-co-standard]" "[reorder others]" } - Version{"32"} + Version{"32.0.1"} } standard{ Sequence{ @@ -26,7 +26,7 @@ az{ "&H +######################################################################## +# MINIMAL FILTER: Amharic-Latin +:: [ሀ-᎙] ; +:: NFD (NFC) ; +$ejective = ’; +$glottal = ’; +$pharyngeal = ‘; +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/trac/ticket/2034 +$wordBoundary = [^[:L:][:M:][:N:]] ; +######################################################################## +# Start of Syllabic Transformations +######################################################################## +ሀ → hā ; # ETHIOPIC SYLLABLE HA +ሁ → hu ; # ETHIOPIC SYLLABLE HU +ሂ → hī ; # ETHIOPIC SYLLABLE HI +ሃ → ha ; # ETHIOPIC SYLLABLE HAA +ሄ → hē ; # ETHIOPIC SYLLABLE HEE +ህ → hi ; # ETHIOPIC SYLLABLE HE +ሆ → ho ; # ETHIOPIC SYLLABLE HO +ለ → le ; # ETHIOPIC SYLLABLE LA +ሉ → lu ; # ETHIOPIC SYLLABLE LU +ሊ → lī ; # ETHIOPIC SYLLABLE LI +ላ → la ; # ETHIOPIC SYLLABLE LAA +ሌ → lē ; # ETHIOPIC SYLLABLE LEE +ል → li ; # ETHIOPIC SYLLABLE LE +ሎ → lo ; # ETHIOPIC SYLLABLE LO +ሏ → lwa ; # ETHIOPIC SYLLABLE LWA +######################################################################## +# BGN Page 3 Rule 2: +# +# For documentation purposes the characters romanized with h in rows +# 1, 3, 13 and 18 may be romanized with h, h\u0323, h\u032E, and h\u0331, respectively. +######################################################################## +ሐ → h\u0323ā ; # ETHIOPIC SYLLABLE HHA +ሑ → h\u0323u ; # ETHIOPIC SYLLABLE HHU +ሒ → h\u0323ī ; # ETHIOPIC SYLLABLE HHI +ሓ → h\u0323a ; # ETHIOPIC SYLLABLE HHAA +ሔ → h\u0323ē ; # ETHIOPIC SYLLABLE HHEE +ሕ → h\u0323i ; # ETHIOPIC SYLLABLE HHE +ሖ → h\u0323o ; # ETHIOPIC SYLLABLE HHO +######################################################################## +# End of Rule 2 +######################################################################## +መ → me ; # ETHIOPIC SYLLABLE MA +ሙ → mu ; # ETHIOPIC SYLLABLE MU +ሚ → mī ; # ETHIOPIC SYLLABLE MI +ማ → ma ; # ETHIOPIC SYLLABLE MAA +ሜ → mē ; # ETHIOPIC SYLLABLE MEE +ም → mi ; # ETHIOPIC SYLLABLE ME +ሞ → mo ; # ETHIOPIC SYLLABLE MO +ሟ → mwa ; # ETHIOPIC SYLLABLE MWA +######################################################################## +# BGN Page 3 Rule 2: +# +# The characters romanized with s in rows 5 and 7 may, instead, be +# romanized with š and s, respectively; and the characters romanized +# with ts’ in rows 30 and 31 may, instead, be romanized with ts’ and +# t\u035Fs’ respectively. +######################################################################## +ሠ → še ; # ETHIOPIC SYLLABLE SZA +ሡ → šu ; # ETHIOPIC SYLLABLE SZU +ሢ → šī ; # ETHIOPIC SYLLABLE SZI +ሣ → ša ; # ETHIOPIC SYLLABLE SZAA +ሤ → šē ; # ETHIOPIC SYLLABLE SZEE +ሥ → ši ; # ETHIOPIC SYLLABLE SZE +ሦ → šo ; # ETHIOPIC SYLLABLE SZO +######################################################################## +# End of Rule 2 +######################################################################## +ረ → re ; # ETHIOPIC SYLLABLE RA +ሩ → ru ; # ETHIOPIC SYLLABLE RU +ሪ → rī ; # ETHIOPIC SYLLABLE RI +ራ → ra ; # ETHIOPIC SYLLABLE RAA +ሬ → rē ; # ETHIOPIC SYLLABLE REE +ር → ri ; # ETHIOPIC SYLLABLE RE +ሮ → ro ; # ETHIOPIC SYLLABLE RO +ሯ → rwa ; # ETHIOPIC SYLLABLE RWA not in BGN +ሰ → se ; # ETHIOPIC SYLLABLE SA +ሱ → su ; # ETHIOPIC SYLLABLE SU +ሲ → sī ; # ETHIOPIC SYLLABLE SI +ሳ → sa ; # ETHIOPIC SYLLABLE SAA +ሴ → sē ; # ETHIOPIC SYLLABLE SEE +ስ → si ; # ETHIOPIC SYLLABLE SE +ሶ → so ; # ETHIOPIC SYLLABLE SO +ሷ → swa ; # ETHIOPIC SYLLABLE SWA +ሸ → she ; # ETHIOPIC SYLLABLE SHA +ሹ → shu ; # ETHIOPIC SYLLABLE SHU +ሺ → shī ; # ETHIOPIC SYLLABLE SHI +ሻ → sha ; # ETHIOPIC SYLLABLE SHAA +ሼ → shē ; # ETHIOPIC SYLLABLE SHEE +ሽ → shi ; # ETHIOPIC SYLLABLE SHE +ሾ → sho ; # ETHIOPIC SYLLABLE SHO +ሿ → shwa ; # ETHIOPIC SYLLABLE SHWA +ቀ → k $ejective e ; # ETHIOPIC SYLLABLE QA +ቁ → k $ejective u ; # ETHIOPIC SYLLABLE QU +ቂ → k $ejective ī ; # ETHIOPIC SYLLABLE QI +ቃ → k $ejective a ; # ETHIOPIC SYLLABLE QAA +ቄ → k $ejective ē ; # ETHIOPIC SYLLABLE QEE +ቅ → k $ejective i ; # ETHIOPIC SYLLABLE QE +ቆ → k $ejective o ; # ETHIOPIC SYLLABLE QO +# +# No rule yet for ቇ U+1247 ETHIOPIC SYLLABLE QOA +ቈ → k $ejective o ; # ETHIOPIC SYLLABLE QWA +ቍ → k $ejective wi ; # ETHIOPIC SYLLABLE QWE +ቋ → k $ejective wa ; # ETHIOPIC SYLLABLE QWAA +ቌ → k $ejective wē ; # ETHIOPIC SYLLABLE QWEE +ቊ → k $ejective wī ; # ETHIOPIC SYLLABLE QWI +######################################################################## +# BGN Page 3 Rule 3: +# +# The character ቐ which occurs only in the writing system of the Tigre +# and # Tigrinya languages, should be romanized with k’ in geographic +# names but may be romanized with k\u0331 in documentation. +######################################################################## +ቐ → k\u0331 $ejective e ; # ETHIOPIC SYLLABLE QHA +ቑ → k\u0331 $ejective u ; # ETHIOPIC SYLLABLE QHU +ቒ → k\u0331 $ejective ī ; # ETHIOPIC SYLLABLE QHI +ቓ → k\u0331 $ejective a ; # ETHIOPIC SYLLABLE QHAA +ቔ → k\u0331 $ejective ē ; # ETHIOPIC SYLLABLE QHEE +ቕ → k\u0331 $ejective i ; # ETHIOPIC SYLLABLE QHE +ቖ → k\u0331 $ejective o ; # ETHIOPIC SYLLABLE QHO +ቘ → k\u0331 $ejective wo ; # ETHIOPIC SYLLABLE QHWA +ቚ → k\u0331 $ejective wī ; # ETHIOPIC SYLLABLE QHWI +ቛ → k\u0331 $ejective wa ; # ETHIOPIC SYLLABLE QHWAA +ቜ → k\u0331 $ejective wē ; # ETHIOPIC SYLLABLE QHWEE +ቝ → k\u0331 $ejective wi ; # ETHIOPIC SYLLABLE QHWE +######################################################################## +# End of Rule 3 +######################################################################## +በ → be ; # ETHIOPIC SYLLABLE BA +ቡ → bu ; # ETHIOPIC SYLLABLE BU +ቢ → bī ; # ETHIOPIC SYLLABLE BI +ባ → ba ; # ETHIOPIC SYLLABLE BAA +ቤ → bē ; # ETHIOPIC SYLLABLE BEE +ብ → bi ; # ETHIOPIC SYLLABLE BE +ቦ → bo ; # ETHIOPIC SYLLABLE BO +ቧ → bwa ; # ETHIOPIC SYLLABLE BWA +ተ → te ; # ETHIOPIC SYLLABLE TA +ቱ → tu ; # ETHIOPIC SYLLABLE TU +ቲ → tī ; # ETHIOPIC SYLLABLE TI +ታ → ta ; # ETHIOPIC SYLLABLE TAA +ቴ → tē ; # ETHIOPIC SYLLABLE TEE +ት → ti ; # ETHIOPIC SYLLABLE TE +ቶ → to ; # ETHIOPIC SYLLABLE TO +ቷ → twa ; # ETHIOPIC SYLLABLE TWA +ቸ → che ; # ETHIOPIC SYLLABLE CA +ቹ → chu ; # ETHIOPIC SYLLABLE CU +ቺ → chī ; # ETHIOPIC SYLLABLE CI +ቻ → cha ; # ETHIOPIC SYLLABLE CAA +ቼ → chē ; # ETHIOPIC SYLLABLE CEE +ች → chi ; # ETHIOPIC SYLLABLE CE +ቾ → cho ; # ETHIOPIC SYLLABLE CO +ቿ → chwa ; # ETHIOPIC SYLLABLE CWA +######################################################################## +# BGN Page 3 Rule 2: +# +# For documentation purposes the characters romanized with h in rows +# 1, 3, 13 and 18 may be romanized with h, h\u0323, h\u032E, and h\u0331, respectively. +######################################################################## +ኀ → h\u032Eā ; # ETHIOPIC SYLLABLE XA +ኁ → h\u032Eu ; # ETHIOPIC SYLLABLE XU +ኂ → h\u032Eī ; # ETHIOPIC SYLLABLE XI +ኃ → h\u032Ea ; # ETHIOPIC SYLLABLE XAA +ኄ → h\u032Eē ; # ETHIOPIC SYLLABLE XEE +ኅ → h\u032Ei ; # ETHIOPIC SYLLABLE XE +ኆ → h\u032Eo ; # ETHIOPIC SYLLABLE XO +# No rule yet for ኇ U+1287 ETHIOPIC SYLLABLE XOA +ኈ → h\u032Eo; # ETHIOPIC SYLLABLE XWA +ኊ → h\u032Ewī ; # ETHIOPIC SYLLABLE XWI +ኋ → h\u032Ewa ; # ETHIOPIC SYLLABLE XWAA +ኌ → h\u032Ewē ; # ETHIOPIC SYLLABLE XWEE +ኍ → h\u032Ewi ; # ETHIOPIC SYLLABLE XWE +######################################################################## +# End of Rule 2 +######################################################################## +ነ → ne ; # ETHIOPIC SYLLABLE NA +ኑ → nu ; # ETHIOPIC SYLLABLE NU +ኒ → nī ; # ETHIOPIC SYLLABLE NI +ና → na ; # ETHIOPIC SYLLABLE NAA +ኔ → nē ; # ETHIOPIC SYLLABLE NEE +ን → ni ; # ETHIOPIC SYLLABLE NE +ኖ → no ; # ETHIOPIC SYLLABLE NO +ኗ → nwa ; # ETHIOPIC SYLLABLE NWA +ኘ → nye ; # ETHIOPIC SYLLABLE NYA +ኙ → nyu ; # ETHIOPIC SYLLABLE NYU +ኚ → nyī ; # ETHIOPIC SYLLABLE NYI +ኛ → nya ; # ETHIOPIC SYLLABLE NYAA +ኜ → nyē ; # ETHIOPIC SYLLABLE NYEE +ኝ → nyi ; # ETHIOPIC SYLLABLE NYE +ኞ → nyo ; # ETHIOPIC SYLLABLE NYO +ኟ → nywa ; # ETHIOPIC SYLLABLE NYWA +######################################################################## +# BGN Page 3 Rule 5: +# +# The vowel characters in row 16 should be Romanized ā, u, ī, a, ē, i, +# and o initially and ’ā, ’u, ’ī, ’a, ’ē, ’i, and ’o in all other +# positions. +######################################################################## +$wordBoundary{አ → ā ; # ETHIOPIC SYLLABLE GLOTTAL A +$wordBoundary{ኡ → u ; # ETHIOPIC SYLLABLE GLOTTAL U +$wordBoundary{ኢ → ī ; # ETHIOPIC SYLLABLE GLOTTAL I +$wordBoundary{ኣ → a ; # ETHIOPIC SYLLABLE GLOTTAL AA +$wordBoundary{ኤ → ē ; # ETHIOPIC SYLLABLE GLOTTAL EE +$wordBoundary{እ → i ; # ETHIOPIC SYLLABLE GLOTTAL E +$wordBoundary{ኦ → o ; # ETHIOPIC SYLLABLE GLOTTAL O +$wordBoundary{ኧ → e ; # ETHIOPIC SYLLABLE GLOTTAL WA +አ → $glottal ā ; # ETHIOPIC SYLLABLE GLOTTAL A +ኡ → $glottal u ; # ETHIOPIC SYLLABLE GLOTTAL U +ኢ → $glottal ī ; # ETHIOPIC SYLLABLE GLOTTAL I +ኣ → $glottal a ; # ETHIOPIC SYLLABLE GLOTTAL AA +ኤ → $glottal ē ; # ETHIOPIC SYLLABLE GLOTTAL EE +እ → $glottal i ; # ETHIOPIC SYLLABLE GLOTTAL E +ኦ → $glottal o ; # ETHIOPIC SYLLABLE GLOTTAL O +ኧ → $glottal e ; # ETHIOPIC SYLLABLE GLOTTAL WA +######################################################################## +# End of Rule 5 +######################################################################## +ከ → ke ; # ETHIOPIC SYLLABLE KA +ኩ → ku ; # ETHIOPIC SYLLABLE KU +ኪ → kī ; # ETHIOPIC SYLLABLE KI +ካ → ka ; # ETHIOPIC SYLLABLE KAA +ኬ → kē ; # ETHIOPIC SYLLABLE KEE +ክ → ki ; # ETHIOPIC SYLLABLE KE +ኮ → ko ; # ETHIOPIC SYLLABLE KO +# No rule yet for ኯ U+12AF ETHIOPIC SYLLABLE KOA +ኰ → ko ; # ETHIOPIC SYLLABLE KWA +ኲ → kwī ; # ETHIOPIC SYLLABLE KWI +ኳ → kwa ; # ETHIOPIC SYLLABLE KWAA +ኴ → kwē ; # ETHIOPIC SYLLABLE KWEE +ኵ → kwi ; # ETHIOPIC SYLLABLE KWE +######################################################################## +# BGN Page 3 Rule 2: +# +# For documentation purposes the characters romanized with h in rows +# 1, 3, 13 and 18 may be romanized with h, h\u0323, h\u032E, and h\u0331, respectively. +######################################################################## +ኸ → h\u0331e ; # ETHIOPIC SYLLABLE KXA +ኹ → h\u0331u ; # ETHIOPIC SYLLABLE KXU +ኺ → h\u0331ī ; # ETHIOPIC SYLLABLE KXI +ኻ → h\u0331a ; # ETHIOPIC SYLLABLE KXAA +ኼ → h\u0331ē ; # ETHIOPIC SYLLABLE KXEE +ኽ → h\u0331i ; # ETHIOPIC SYLLABLE KXE +ኾ → h\u0331o ; # ETHIOPIC SYLLABLE KXO +# No rule yet for ዀ U+12C0 ETHIOPIC SYLLABLE KXWA +# No rule yet for ዂ U+12C2 ETHIOPIC SYLLABLE KXWI +# No rule yet for ዃ U+12C3 ETHIOPIC SYLLABLE KXWAA +# No rule yet for ዄ U+12C4 ETHIOPIC SYLLABLE KXWEE +# No rule yet for ዅ U+12C5 ETHIOPIC SYLLABLE KXWE +######################################################################## +# End of Rule 2 +######################################################################## +ወ → we ; # ETHIOPIC SYLLABLE WA +ዉ → wu ; # ETHIOPIC SYLLABLE WU +ዊ → wī ; # ETHIOPIC SYLLABLE WI +ዋ → wa ; # ETHIOPIC SYLLABLE WAA +ዌ → wē ; # ETHIOPIC SYLLABLE WEE +ው → wi ; # ETHIOPIC SYLLABLE WE +ዎ → wo ; # ETHIOPIC SYLLABLE WO +# No rule yet for ዏ U+12CF ETHIOPIC SYLLABLE WOA +ዐ → $pharyngeal ā ; # ETHIOPIC SYLLABLE PHARYNGEAL A +ዑ → $pharyngeal u ; # ETHIOPIC SYLLABLE PHARYNGEAL U +ዒ → $pharyngeal ī ; # ETHIOPIC SYLLABLE PHARYNGEAL I +ዓ → $pharyngeal a ; # ETHIOPIC SYLLABLE PHARYNGEAL AA +ዔ → $pharyngeal ē ; # ETHIOPIC SYLLABLE PHARYNGEAL EE +ዕ → $pharyngeal i ; # ETHIOPIC SYLLABLE PHARYNGEAL E +ዖ → $pharyngeal o ; # ETHIOPIC SYLLABLE PHARYNGEAL O +ዘ → ze ; # ETHIOPIC SYLLABLE ZA +ዙ → zu ; # ETHIOPIC SYLLABLE ZU +ዚ → zī ; # ETHIOPIC SYLLABLE ZI +ዛ → za ; # ETHIOPIC SYLLABLE ZAA +ዜ → zē ; # ETHIOPIC SYLLABLE ZEE +ዝ → zi ; # ETHIOPIC SYLLABLE ZE +ዞ → zo ; # ETHIOPIC SYLLABLE ZO +ዟ → zwa ; # ETHIOPIC SYLLABLE ZWA +ዠ → zhe ; # ETHIOPIC SYLLABLE ZHA +ዡ → zhu ; # ETHIOPIC SYLLABLE ZHU +ዢ → zhī ; # ETHIOPIC SYLLABLE ZHI +ዣ → zha ; # ETHIOPIC SYLLABLE ZHAA +ዤ → zhē ; # ETHIOPIC SYLLABLE ZHEE +ዥ → zhi ; # ETHIOPIC SYLLABLE ZHE +ዦ → zho ; # ETHIOPIC SYLLABLE ZHO +ዧ → zhwa ; # ETHIOPIC SYLLABLE ZHWA +የ → ye ; # ETHIOPIC SYLLABLE YA +ዩ → yu ; # ETHIOPIC SYLLABLE YU +ዪ → yī ; # ETHIOPIC SYLLABLE YI +ያ → ya ; # ETHIOPIC SYLLABLE YAA +ዬ → yē ; # ETHIOPIC SYLLABLE YEE +ይ → yi ; # ETHIOPIC SYLLABLE YE +ዮ → yo ; # ETHIOPIC SYLLABLE YO +ደ → de ; # ETHIOPIC SYLLABLE DA +ዱ → du ; # ETHIOPIC SYLLABLE DU +ዲ → dī ; # ETHIOPIC SYLLABLE DI +ዳ → da ; # ETHIOPIC SYLLABLE DAA +ዴ → dē ; # ETHIOPIC SYLLABLE DEE +ድ → di ; # ETHIOPIC SYLLABLE DE +ዶ → do ; # ETHIOPIC SYLLABLE DO +ዷ → dwa ; # ETHIOPIC SYLLABLE DWA +# No rule yet for ዸ U+12F8 ETHIOPIC SYLLABLE DDA ... +ጀ → je ; # ETHIOPIC SYLLABLE JA +ጁ → ju ; # ETHIOPIC SYLLABLE JU +ጂ → jī ; # ETHIOPIC SYLLABLE JI +ጃ → ja ; # ETHIOPIC SYLLABLE JAA +ጄ → jē ; # ETHIOPIC SYLLABLE JEE +ጅ → ji ; # ETHIOPIC SYLLABLE JE +ጆ → jo ; # ETHIOPIC SYLLABLE JO +ጇ → jwa ; # ETHIOPIC SYLLABLE JWA +ገ → ge ; # ETHIOPIC SYLLABLE GA +ጉ → gu ; # ETHIOPIC SYLLABLE GU +ጊ → gī ; # ETHIOPIC SYLLABLE GI +ጋ → ga ; # ETHIOPIC SYLLABLE GAA +ጌ → gē ; # ETHIOPIC SYLLABLE GEE +ግ → gi ; # ETHIOPIC SYLLABLE GE +ጎ → go ; # ETHIOPIC SYLLABLE GO +# No rule yet for ጏ U+130F ETHIOPIC SYLLABLE GOA +ጐ → go ; # ETHIOPIC SYLLABLE GWA +ጒ → gwī ; # ETHIOPIC SYLLABLE GWI +ጓ → gwa ; # ETHIOPIC SYLLABLE GWAA +ጔ → gwē ; # ETHIOPIC SYLLABLE GWEE +ጕ → gwi ; # ETHIOPIC SYLLABLE GWE +# No rule yet for ጘ U+1318 ETHIOPIC SYLLABLE GGA +# ...ጙጚጛጜጝጞ... +# No rule yet for ጟ U+131F ETHIOPIC SYLLABLE GGWAA +ጠ → t $ejective e ; # ETHIOPIC SYLLABLE THA +ጡ → t $ejective u ; # ETHIOPIC SYLLABLE THU +ጢ → t $ejective ī ; # ETHIOPIC SYLLABLE THI +ጣ → t $ejective a ; # ETHIOPIC SYLLABLE THAA +ጤ → t $ejective ē ; # ETHIOPIC SYLLABLE THEE +ጥ → t $ejective i ; # ETHIOPIC SYLLABLE THE +ጦ → t $ejective o ; # ETHIOPIC SYLLABLE THO +ጧ → t $ejective wa ; # ETHIOPIC SYLLABLE THWA +ጨ → ch $ejective e ; # ETHIOPIC SYLLABLE CHA +ጩ → ch $ejective u ; # ETHIOPIC SYLLABLE CHU +ጪ → ch $ejective ī ; # ETHIOPIC SYLLABLE CHI +ጫ → ch $ejective a ; # ETHIOPIC SYLLABLE CHAA +ጬ → ch $ejective ē ; # ETHIOPIC SYLLABLE CHEE +ጭ → ch $ejective i ; # ETHIOPIC SYLLABLE CHE +ጮ → ch $ejective o ; # ETHIOPIC SYLLABLE CHO +ጯ → ch $ejective wa ; # ETHIOPIC SYLLABLE CHWA +ጰ → p $ejective e ; # ETHIOPIC SYLLABLE PHA +ጱ → p $ejective u ; # ETHIOPIC SYLLABLE PHU +ጲ → p $ejective ī ; # ETHIOPIC SYLLABLE PHI +ጳ → p $ejective a ; # ETHIOPIC SYLLABLE PHAA +ጴ → p $ejective ē ; # ETHIOPIC SYLLABLE PHEE +ጵ → p $ejective i ; # ETHIOPIC SYLLABLE PHE +ጶ → p $ejective o ; # ETHIOPIC SYLLABLE PHO +ጷ → p $ejective wa ; # ETHIOPIC SYLLABLE PHWA +######################################################################## +# BGN Page 3 Rule 2: +# +# The characters romanized with s in rows 5 and 7 may, instead, be +# romanized with š and s, respectively; and the characters romanized +# with ts’ in rows 30 and 31 may, instead, be romanized with ts’ and +# t\u035Fs’ respectively. +######################################################################## +ጸ → ts $ejective e ; # ETHIOPIC SYLLABLE TSA +ጹ → ts $ejective u ; # ETHIOPIC SYLLABLE TSU +ጺ → ts $ejective ī ; # ETHIOPIC SYLLABLE TSI +ጻ → ts $ejective a ; # ETHIOPIC SYLLABLE TSAA +ጼ → ts $ejective ē ; # ETHIOPIC SYLLABLE TSEE +ጽ → ts $ejective i ; # ETHIOPIC SYLLABLE TSE +ጾ → ts $ejective o ; # ETHIOPIC SYLLABLE TSO +ጿ → ts $ejective wa ; # ETHIOPIC SYLLABLE TSWA +ፀ → t\u035Fs $ejective e ; # ETHIOPIC SYLLABLE TZA +ፁ → t\u035Fs $ejective u ; # ETHIOPIC SYLLABLE TZU +ፂ → t\u035Fs $ejective ī ; # ETHIOPIC SYLLABLE TZI +ፃ → t\u035Fs $ejective a ; # ETHIOPIC SYLLABLE TZAA +ፄ → t\u035Fs $ejective ē ; # ETHIOPIC SYLLABLE TZEE +ፅ → t\u035Fs $ejective i ; # ETHIOPIC SYLLABLE TZE +ፆ → t\u035Fs $ejective o ; # ETHIOPIC SYLLABLE TZO +# No rule yet for ፇ U+1347 ETHIOPIC SYLLABLE TZOA +######################################################################## +# End of Rule 2 +######################################################################## +ፈ → fe ; # ETHIOPIC SYLLABLE FA +ፉ → fu ; # ETHIOPIC SYLLABLE FU +ፊ → fī ; # ETHIOPIC SYLLABLE FI +ፋ → fa ; # ETHIOPIC SYLLABLE FAA +ፌ → fē ; # ETHIOPIC SYLLABLE FEE +ፍ → fi ; # ETHIOPIC SYLLABLE FE +ፎ → fo ; # ETHIOPIC SYLLABLE FO +ፏ → fwa ; # ETHIOPIC SYLLABLE FWA +ፐ → pe ; # ETHIOPIC SYLLABLE PA +ፑ → pu ; # ETHIOPIC SYLLABLE PU +ፒ → pī ; # ETHIOPIC SYLLABLE PI +ፓ → pa ; # ETHIOPIC SYLLABLE PAA +ፔ → pē ; # ETHIOPIC SYLLABLE PEE +ፕ → pi ; # ETHIOPIC SYLLABLE PE +ፖ → po ; # ETHIOPIC SYLLABLE PO +ፗ → pwa ; # ETHIOPIC SYLLABLE PWA +ፘ → rya ; # ETHIOPIC SYLLABLE RYA +ፙ → mya ; # ETHIOPIC SYLLABLE MYA +ፚ → fya ; # ETHIOPIC SYLLABLE FYA +# No rule yet for ፚ U+135A ETHIOPIC SYLLABLE FYA +ቨ → ve ; # ETHIOPIC SYLLABLE VA +ቩ → vu ; # ETHIOPIC SYLLABLE VU +ቪ → vī ; # ETHIOPIC SYLLABLE VI +ቫ → va ; # ETHIOPIC SYLLABLE VAA +ቬ → vē ; # ETHIOPIC SYLLABLE VEE +ቭ → vi ; # ETHIOPIC SYLLABLE VE +ቮ → vo ; # ETHIOPIC SYLLABLE VO +ቯ → vwa ; # ETHIOPIC SYLLABLE VWA +######################################################################## +# Start of Numeric Transformations +# +# The BGN table on page 3 does not include ፼. +######################################################################## +፩ → 1 ; # ETHIOPIC DIGIT ONE +፪ → 2 ; # ETHIOPIC DIGIT TWO +፫ → 3 ; # ETHIOPIC DIGIT THREE +፬ → 4 ; # ETHIOPIC DIGIT FOUR +፭ → 5 ; # ETHIOPIC DIGIT FIVE +፮ → 6 ; # ETHIOPIC DIGIT SIX +፯ → 7 ; # ETHIOPIC DIGIT SEVEN +፰ → 8 ; # ETHIOPIC DIGIT EIGHT +፱ → 9 ; # ETHIOPIC DIGIT NINE +፲ → 10 ; # ETHIOPIC NUMBER TEN +፳ → 20 ; # ETHIOPIC NUMBER TWENTY +፴ → 30 ; # ETHIOPIC NUMBER THIRTY +፵ → 40 ; # ETHIOPIC NUMBER FORTY +፶ → 50 ; # ETHIOPIC NUMBER FIFTY +፷ → 60 ; # ETHIOPIC NUMBER SIXTY +፸ → 70 ; # ETHIOPIC NUMBER SEVENTY +፹ → 80 ; # ETHIOPIC NUMBER EIGHTY +፺ → 90 ; # ETHIOPIC NUMBER NINETY +፻ → 100 ; # ETHIOPIC NUMBER HUNDRED + diff --git a/icu4c/source/data/translit/ar_ar_Latn_BGN.txt b/icu4c/source/data/translit/ar_ar_Latn_BGN.txt new file mode 100644 index 00000000000..ecd9363c7ea --- /dev/null +++ b/icu4c/source/data/translit/ar_ar_Latn_BGN.txt @@ -0,0 +1,211 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ar_ar_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1956 System +# +# This system was adopted by the BGN in 1946 and by the PCGN +# in 1956 and has been applied in the systematic romanization +# of geographic names in Bahrain, Egypt, Iraq, Jordan, +# Kuwait, Lebanon, Libya, Oman, Qatar, Saudi Arabia, Sudan, +# Syria, Tunisia, the United Arab Emirates, and Yemen, all +# of which has been covered by published BGN engineers. +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: Arabic-Latin +# +:: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىي\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652٠١٢٣٤٥٦٧٨٩ٱ]] ; +:: NFKD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$alef = ’; +$ayin = ‘; +$disambig = \u0331 ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# non-letters +[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR +[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR +٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR +٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR +# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate +، ↔ ',' ; # ARABIC COMMA +؛ ↔ ';' ; # ARABIC SEMICOLON +؟ ↔ '?' ; # ARABIC QUESTION MARK +٪ ↔ '%' ; # ARABIC PERCENT SIGN +۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO +۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE +۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO +۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE +۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR +۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE +۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX +۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN +۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT +۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE +٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO +١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE +٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO +٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE +٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR +٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE +٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX +٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN +٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT +٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE +# +######################################################################## +# +# Rules moved to front to avoid masking +# +######################################################################## +# +######################################################################## +# +# BGN Page 8 Rule 5 +# +# The character sequences ت , كه , ته , and سه may be romanized t·h, k·h, +# d·h, and s·h in order to differentiate those romanizations from the +# digraphs th, kh, dh, and sh. +# +######################################################################## +# +ته → t·h ; # ARABIC LETTER TEH + HEH +كه → k·h ; # ARABIC LETTER KAF + HEH +ده → d·h ; # ARABIC LETTER DAL + HEH +سه → s·h ; # ARABIC LETTER SEEN + HEH +# +# +######################################################################## +# +# End Rule 5 +# +######################################################################## +######################################################################## +# +# +# BGN Page 8 Rule 9 +# +# Doubles consonant sounds are represented in Arabic script by placing +# a shaddah ( \u0651 ) over a consonant character. In romanization the letter +# should be doubled. [The remainder of this rule deals with the definite +# article and is lexical.] +# +######################################################################## +# +ب\u0651 → bb ; # ARABIC LETTER BEH + SHADDA +ت\u0651 → tt ; # ARABIC LETTER TEH + SHADDA +ث\u0651 → thth ; # ARABIC LETTER THEH + SHADDA +ج\u0651 → jj ; # ARABIC LETTER JEEM + SHADDA +ح\u0651 → ḥḥ ; # ARABIC LETTER HAH + SHADDA +خ\u0651 → khkh ; # ARABIC LETTER KHAH + SHADDA +د\u0651 → dd ; # ARABIC LETTER DAL + SHADDA +ذ\u0651 → dhdh ; # ARABIC LETTER THAL + SHADDA +ر\u0651 → rr ; # ARABIC LETTER REH + SHADDA +ز\u0651 → zz ; # ARABIC LETTER ZAIN + SHADDA +س\u0651 → ss ; # ARABIC LETTER SEEN + SHADDA +ش\u0651 → shsh ; # ARABIC LETTER SHEEN + SHADDA +ص\u0651 → ṣṣ ; # ARABIC LETTER SAD + SHADDA +ض\u0651 → ḍḍ ; # ARABIC LETTER DAD + SHADDA +ط\u0651 → ṭṭ ; # ARABIC LETTER TAH + SHADDA +ظ\u0651 → ẓẓ ; # ARABIC LETTER ZAH + SHADDA +ع\u0651 → $ayin $ayin ; # ARABIC LETTER AIN + SHADDA +غ\u0651 → ghgh ; # ARABIC LETTER GHAIN + SHADDA +ف\u0651 → ff ; # ARABIC LETTER FEH + SHADDA +ق\u0651 → qq ; # ARABIC LETTER QAF + SHADDA +ك\u0651 → kk ; # ARABIC LETTER KAF + SHADDA +ل\u0651 → ll ; # ARABIC LETTER LAM + SHADDA +م\u0651 → mm ; # ARABIC LETTER MEEM + SHADDA +ن\u0651 → nn ; # ARABIC LETTER NOON + SHADDA +ه\u0651 → hh ; # ARABIC LETTER HEH + SHADDA +و\u0651 → ww ; # ARABIC LETTER WAW + SHADDA +ى\u0651 → yy ; # ARABIC LETTER YEH + SHADDA +# +# +######################################################################## +# +# End Rule 9 +# +######################################################################## +# +######################################################################## +# +# Start of Transformations +# +######################################################################## +# +$wordBoundary{ء → ; # ARABIC LETTER HAMZA +ء → $alef ; # ARABIC LETTER HAMZA +$wordBoundary{ا → ; # ARABIC LETTER ALEF +ٱ → $alef ; # ARABIC LETTER ALEF WASLA +$wordBoundary{آ → ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE +آ → $alef ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE +ب → b ; # ARABIC LETTER BEH +ت → t ; # ARABIC LETTER TEH +ة → h ; # ARABIC LETTER TEH MARBUTA +ث → th ; # ARABIC LETTER THEH +ج → j ; # ARABIC LETTER JEEM +ح → ḩ ; # ARABIC LETTER HAH +خ → kh ; # ARABIC LETTER KHAH +د → d ; # ARABIC LETTER DAL +ذ → dh ; # ARABIC LETTER THAL +ر → r ; # ARABIC LETTER REH +ز → z ; # ARABIC LETTER ZAIN +س → s ; # ARABIC LETTER SEEN +ش → sh ; # ARABIC LETTER SHEEN +ص → ş ; # ARABIC LETTER SAD +ض → ḑ ; # ARABIC LETTER DAD +ط → ţ ; # ARABIC LETTER TAH +ظ → z\u0327 ; # ARABIC LETTER ZAH +ع → $ayin ; # ARABIC LETTER AIN +غ → gh ; # ARABIC LETTER GHAIN +ف → f ; # ARABIC LETTER FEH +ق → q ; # ARABIC LETTER QAF +ک ↔ k $disambig ; # ARABIC LETTER KEHEH +ك ↔ k ; # ARABIC LETTER KAF +ل → l ; # ARABIC LETTER LAM +م → m ; # ARABIC LETTER MEEM +ن → n ; # ARABIC LETTER NOON +ه → h ; # ARABIC LETTER HEH +و → w ; # ARABIC LETTER WAW +ى → y ; # ARABIC LETTER YEH +\u064Eا → ā ; # ARABIC FATHA + ALEF +\u064Eى → á ; # ARABIC FATHA + ALEF MAKSURA +\u064Eي\u0652 → ay ; # ARABIC FATHA + YEH + SUKUN +\u064Eو\u0652 → aw ; # ARABIC FATHA + WAW + SUKUN +\u064E → a ; # ARABIC FATHA +\u0650ي → ī ; # ARABIC KASRA + YEH +\u0650 → i ; # ARABIC KASRA +\u064Fو → ū ; # ARABIC DAMMA + WAW +\u064F → u ; # ARABIC DAMMA +\u0652 → ; # ARABIC SUKUN +\u064B → aⁿ ; # ARABIC FATHATAN +\u064D → iⁿ ; # ARABIC KASRATAN +\u064C → uⁿ ; # ARABIC DAMMATAN +::NFC (NFD) ; +# +# +######################################################################## + diff --git a/icu4c/source/data/translit/az_Cyrl_az_BGN.txt b/icu4c/source/data/translit/az_Cyrl_az_BGN.txt new file mode 100644 index 00000000000..d8e6d3c8a88 --- /dev/null +++ b/icu4c/source/data/translit/az_Cyrl_az_BGN.txt @@ -0,0 +1,189 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: az_Cyrl_az_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1993 Agreement +# +# Azerbaijani is the official language of Azerbaijan. In 1991, the +# Azerbaijani government adopted a Roman alphabet to replace the +# existing Cyrillic alphabet. The Azerbaijani Cyrillic alphabet +# contains nine letters not present in the Russian alphabet: +# Ғғ, Әә, Јј, Ҝҝ, Өө, Үү, Һһ, Ҹҹ, and ’. Four obsolete letters +# Йй, Ээ, Юю and Яя are also given. +# +# The Azerbaijani Alphabet as defined by the BGN (Page 13): +# +# АБВГҒДЕӘЖЗИЫЈКҜЛМНОӨПРСТУҮФХҺЧҸШЙЭЮЯ +# абвгғдеәжзиыјкҝлмноөпрстуүфхһчҹш’йэюя +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: AzerbaijaniCyrl-Latin +# +:: [АБВГҒДЕӘЖЗИЫЈКҜЛМНОӨПРСТУҮФХҺЧҸШЙЭЮЯабвгғдеәжзиыјкҝлмноөпрстуүфхһчҹш’йэюя] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ’ ; +$wordBoundary = [^[:L:][:M:][:N:]] ; +$upperConsonants = [БВГҒДЖЗЈКҜЛМНПРСТФХҺЧҸШЙ] ; +$lowerConsonants = [бвгғджзјкҝлмнпрстфхһчҹш’й] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕӘИЫОӨУҮЭЮЯ] ; +$lowerVowels = [аеәиыоөуүэюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +Г → Q ; # CYRILLIC CAPITAL LETTER GHE +г → q ; # CYRILLIC SMALL LETTER GHE +Ғ → Ğ ; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +ғ → ğ ; # CYRILLIC SMALL LETTER GHE WITH STROKE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +Е → E ; # CYRILLIC CAPITAL LETTER DE +е → e ; # CYRILLIC SMALL LETTER DE +# +######################################################################## +# +# BGN Page 14 Note 1 +# +# The special letter Ə ə, known as schwa, should be reproduced in that +# form whenever encountered. In those instances when it cannot be +# reproduced, however, the letter Ä ä may be substituted for it. +# +######################################################################## +# +Ә → Ə; # CYRILLIC CAPITAL LETTER SCHWA +ә → ə; # CYRILLIC SMALL LETTER SCHWA +# +# +# Alternative rule when schwa is not available. To apply uncomment the +# following by removing the '#' mark at the start of the line and insert +# before the two rule lines above. +# +# Ә → Ä; # CYRILLIC CAPITAL LETTER SCHWA +# ә → ä; # CYRILLIC SMALL LETTER SCHWA +# +######################################################################## +# +# End BGN Page 14 Note 1 +# +######################################################################## +Ж → J ; # CYRILLIC CAPITAL LETTER ZHE +ж → j ; # CYRILLIC SMALL LETTER ZHE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +И → İ ; # CYRILLIC CAPITAL LETTER I +и → i ; # CYRILLIC SMALL LETTER I +Ы → I ; # CYRILLIC CAPITAL LETTER YERU +ы → ı ; # CYRILLIC SMALL LETTER YERU +Ј → Y ; # CYRILLIC CAPITAL LETTER JE +ј → y ; # CYRILLIC SMALL LETTER JE +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +Ҝ → G ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +ҝ → g ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +Ө → Ö ; # CYRILLIC CAPITAL LETTER BARRED O +ө → ö ; # CYRILLIC SMALL LETTER BARRED O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ү → Ü ; # CYRILLIC CAPITAL LETTER STRAIGHT U +ү → ü ; # CYRILLIC SMALL LETTER STRAIGHT U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х → X ; # CYRILLIC CAPITAL LETTER HA +х → x ; # CYRILLIC SMALL LETTER HA +Һ → H ; # CYRILLIC CAPITAL LETTER SHHA +һ → h ; # CYRILLIC SMALL LETTER SHHA +Ч → Ç ; # CYRILLIC CAPITAL LETTER CHE +ч → ç ; # CYRILLIC SMALL LETTER CHE +Ҹ → C ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +ҹ → c ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE +Ш → Ş ; # CYRILLIC CAPITAL LETTER SHA +ш → ş ; # CYRILLIC SMALL LETTER SHA +# +######################################################################## +# +# BGN Page 13 Rule 33, maps the symbol onto itself and +# is ignored here for computational efficiency. +# +# $prime → $prime ; # RIGHT SINGLE QUOTATION MARK +# +######################################################################## +# +######################################################################## +# +# BGN Page 14 Note 2: +# +# The obsolete characters й, э, ю, and я should be romanized ẏ, ė, +# yu\u0307, and yȧ. +# +######################################################################## +# +Й → Ẏ ; # CYRILLIC CAPITAL LETTER HARD SIGN +й → ẏ ; # CYRILLIC SMALL LETTER HARD SIGN +Э → Ė ; # CYRILLIC CAPITAL LETTER SOFT SIGN +э → ė ; # CYRILLIC SMALL LETTER SOFT SIGN +Ю} $lower → Yu\u0307 ; # CYRILLIC CAPITAL LETTER YU +Ю → YU\u0307 ; # CYRILLIC CAPITAL LETTER YU +ю → yu\u0307 ; # CYRILLIC SMALL LETTER YU +Я} $lower → Yȧ ; # CYRILLIC CAPITAL LETTER YA +Я → YȦ ; # CYRILLIC CAPITAL LETTER YA +я → yȧ ; # CYRILLIC SMALL LETTER YA +# +# +######################################################################## +# +# End BGN Page 14 Note 2. +# +######################################################################## + diff --git a/icu4c/source/data/translit/be_be_Latn_BGN.txt b/icu4c/source/data/translit/be_be_Latn_BGN.txt new file mode 100644 index 00000000000..83cc37c06fe --- /dev/null +++ b/icu4c/source/data/translit/be_be_Latn_BGN.txt @@ -0,0 +1,200 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: be_be_Latn_BGN.txt +# Generated from CLDR +# + +# BGN/PCGN 1979 System +# +# The BGN/PCGN system for Belarusian (formerly Byelorussian) was +# designed for use in romanizing names written in the Belarusian +# Cyrillic alphabet. The Belarusian alphabet contains three +# letters not present in the Russian alphabet: Іі, Ўў, ’. +# One obsolete letter Ґґ is included. +# +# The Belarusian Alphabet as defined by the BGN (Page 23): +# +# АБВГДЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЫЬЭЮЯҐ +# абвгдеёжзійклмнопрстуўфхцчшыьэюя’ґ +# +# Originally prepared by Michael Everson +######################################################################## +::[АБВГДЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯҐабвгдеёжзійклмнопрстуўфхцчшщъыьэюя’ґ] ; +::NFC; +$prime = ʹ ; +$doublePrime = ʺ ; +$upperConsonants = [БВГДЖЗЙКЛМНПРСТЎФХЦЧШЬҐ] ; +$lowerConsonants = [бвгджзйклмнпрстўфхцчшь’ґ] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕЁІОУЫЭЮЯ] ; +$lowerVowels = [аеёіоуыэюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +$wordBoundary = [^[:L:][:M:][:N:]] ; +# Start of Alphabetic Transformations +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +Г → H ; # CYRILLIC CAPITAL LETTER GHE +г → h ; # CYRILLIC SMALL LETTER GHE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +Е} $lower → Ye ; # CYRILLIC CAPITAL LETTER IE +Е → YE ; # CYRILLIC CAPITAL LETTER IE +е → ye ; # CYRILLIC SMALL LETTER IE +Ё} $lower → Yo ; # CYRILLIC CAPITAL LETTER IO +Ё → YO ; # CYRILLIC CAPITAL LETTER IO +ё → yo ; # CYRILLIC SMALL LETTER IO +Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE +Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE +ж → zh ; # CYRILLIC SMALL LETTER ZHE +Ш} $lower → Sh; +Ш → SH; +ш → sh; +######################################################################## +# +# BGN Page 23 Note 1 +# +# The character sequences зг, цг, сг, тс, and кг may be romanized z·h, +# k·h, s·h, t·s and ts·h in order to differentiate those romanizations +# from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which +# are used to render characters ж, х, ш, ц, and the character sequence тш. +# +######################################################################## +ЗГ → Z·H ; # CYRILLIC CAPITAL LETTER ZE +Зг → Z·h ; # CYRILLIC CAPITAL LETTER ZE +зг → z·h ; # CYRILLIC SMALL LETTER ZE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +######################################################################## +# +# End Rule 1 +# +######################################################################## +І → I ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +і → i ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +######################################################################## +# +# BGN Page 23 Rule 1 +# +# кг becomes k·h +# +######################################################################## +КГ → K·H ; # CYRILLIC CAPITAL LETTER KA +Кг → K·h ; # CYRILLIC CAPITAL LETTER KA +кг → k·h ; # CYRILLIC SMALL LETTER KA +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +######################################################################## +# +# End Rule 1 +# +######################################################################## +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +######################################################################## +# +# BGN Page 23 Rule 1 +# +# сг becomes s·h +# +######################################################################## +СГ → S·H ; # CYRILLIC CAPITAL LETTER ES +Сг → S·h ; # CYRILLIC CAPITAL LETTER ES +сг → s·h ; # CYRILLIC SMALL LETTER ES +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +######################################################################## +# +# BGN Page 23 Rule 1 +# +# тс becomes t·s +# +######################################################################## +ТС → T·S ; # CYRILLIC CAPITAL LETTER TE +Тс → T·s ; # CYRILLIC CAPITAL LETTER TE +тс → t·s ; # CYRILLIC SMALL LETTER TE +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +######################################################################## +# +# End Rule 1 +# +######################################################################## +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ў → W ; # CYRILLIC CAPITAL LETTER SHORT U +ў → w ; # CYRILLIC SMALL LETTER SHORT U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA +Х → KH ; # CYRILLIC CAPITAL LETTER HA +х → kh ; # CYRILLIC SMALL LETTER HA +######################################################################## +# +# BGN Page 23 Rule 1 +# +# цг becomes ts·h +# +######################################################################## +ЦГ → TS·H ; # CYRILLIC CAPITAL LETTER TSE +Цг → Ts·h ; # CYRILLIC CAPITAL LETTER TSE +цг → ts·h ; # CYRILLIC SMALL LETTER TSE +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +######################################################################## +# +# End Rule 1 +# +######################################################################## +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +Ы → Y ; # CYRILLIC CAPITAL LETTER YERU +ы → y ; # CYRILLIC SMALL LETTER YERU +Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN +Э → E ; # CYRILLIC CAPITAL LETTER E +э → e ; # CYRILLIC SMALL LETTER E +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA +’ → $doublePrime ; # LEFT SINGLE QUOTATION MARK +######################################################################## +# +# BGN Page 23 Note 2 +# +# The obsolete character ґ should be romanized g. +# +######################################################################## +Ґ → G ; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +ґ → g ; # CYRILLIC SMALL LETTER GHE WITH UPTURN + diff --git a/icu4c/source/data/translit/bg_bg_Latn_BGN.txt b/icu4c/source/data/translit/bg_bg_Latn_BGN.txt new file mode 100644 index 00000000000..d8351ce9bb4 --- /dev/null +++ b/icu4c/source/data/translit/bg_bg_Latn_BGN.txt @@ -0,0 +1,243 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: bg_bg_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1952 System +# +# This system was adopted by the BGN in 1949 and by the PCGN in 1952. +# It reflects the much simplified Bulgarian orthography as officially +# revised in February 1945. The Bulgarian alphabet contains all of +# the characters present in the Russian alphabet with the exception +# of Ёё, Ыы, and Ээ. Two obsolete letters Ѫѫ and Ѣѣ are also given. +# +# The Bulgarian Alphabet as defined by the BGN (Page 15): +# +# АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯѪѢ +# абвгдежзийклмнопрстуфхцчшщъьюяѫѣ +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: Bulgarian-Latin +# +:: [АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯѪѢабвгдежзийклмнопрстуфхцчшщъьюяѫѣ] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$upperConsonants = [БВГДЖЗЙКЛМНПРСТФХЦЧШЩЬ] ; +$lowerConsonants = [бвгджзйклмнпрстфхцчшщь] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕИОУЪЮЯѪѢ] ; +$lowerVowels = [аеиоуъюяѫѣ] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +$bulgarian = [ $lower $upperConsonants $upperVowels ] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +Е → E ; # CYRILLIC CAPITAL LETTER DE +е → e ; # CYRILLIC SMALL LETTER DE +Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE +Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE +ж → zh ; # CYRILLIC SMALL LETTER ZHE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +И → I ; # CYRILLIC CAPITAL LETTER I +и → i ; # CYRILLIC SMALL LETTER I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +# +# +######################################################################## +# +# BGN Page 16 Note 4 +# +# тс becomes t·s +# +######################################################################## +# +ТС → T·S ; # CYRILLIC CAPITAL LETTER TE +Тс → T·s ; # CYRILLIC CAPITAL LETTER TE +тс → t·s ; # CYRILLIC SMALL LETTER TE +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +# +# +######################################################################## +# +# End Note 4 +# +######################################################################## +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA +Х → KH ; # CYRILLIC CAPITAL LETTER HA +х → kh ; # CYRILLIC SMALL LETTER HA +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +# +######################################################################## +# +# Implied rule from BGN Russian-Latin transliteration (Page 94 Note 3.6). +# +# шт becomes sh·t +# +######################################################################## +# +ШТ → SH·T ; # CYRILLIC CAPITAL LETTER SHA +Шт → Sh·t ; # CYRILLIC CAPITAL LETTER SHA +шт → sh·t ; # CYRILLIC SMALL LETTER SHA +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Щ} $lower → Sht ; # CYRILLIC CAPITAL LETTER SHCHA +Щ → SHT ; # CYRILLIC CAPITAL LETTER SHCHA +щ → sht ; # CYRILLIC SMALL LETTER SHCHA +# +# +######################################################################## +# +# End Implied rule +# +######################################################################## +Ъ → Ŭ ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ → ŭ ; # CYRILLIC SMALL LETTER HARD SIGN +# +######################################################################## +# +# BGN Page 16 Note 1 +# +# In modern Bulgarian orthography, the character ъ does not occur in +# word-final position. It should be omitted in romanization when found +# on older sources. +# +# The following rule removes all Ъъ at the end of a word. It is assumed +# that when the condition is met, the text must be from an older source. +# Comment out with a '#' at the start of a line to disable. +# +# +######################################################################## +# +$bulgarian { [Ъъ] } $wordBoundary > ; +# +# +######################################################################## +# +# End BGN Page 16 Note 1 +# +######################################################################## +Ь → ’ ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → ’ ; # CYRILLIC SMALL LETTER SOFT SIGN +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA +# +######################################################################## +# +# BGN Page 16 Note 2 +# +# The obsolete character Ѫ, which was replaced by Ъ in 1945, should be +# romanized Ŭ. +# +######################################################################## +# +Ѫ → Ŭ ; # CYRILLIC CAPITAL LETTER BIG YUS +ѫ → ŭ ; # CYRILLIC SMALL LETTER BIG YUS +# +# +######################################################################## +# +# End BGN Page 16 Note 2 +# +######################################################################## +# +######################################################################## +# +# BGN Page 16 Note 3 +# +# The obsolete character Ѣ, replaced in 1945 by Я or Е according to local +# pronunciation, should be romanized as e or ya, accordingly, if the +# pronunciation is known; otherwise as ye. +# +######################################################################## +# +Ѣ} $lower → Ye ; # CYRILLIC CAPITAL LETTER YAT +Ѣ → YE ; # CYRILLIC CAPITAL LETTER YAT +ѣ → ye ; # CYRILLIC SMALL LETTER YAT +# +# +# Alternative rule where appropriate for local pronounciation. To apply +# uncomment the following by removing the '#' mark at the start of the +# line and insert before the three rule lines above. +# +# Ѣ} $lower → e ; # CYRILLIC CAPITAL LETTER YAT +# Ѣ → E ; # CYRILLIC CAPITAL LETTER YAT +# ѣ → e ; # CYRILLIC SMALL LETTER YAT +# +######################################################################## +# +# End BGN Page 16 Note 3 +# +######################################################################## + diff --git a/icu4c/source/data/translit/dv_dv_Latn_BGN.txt b/icu4c/source/data/translit/dv_dv_Latn_BGN.txt new file mode 100644 index 00000000000..45f0d12bae6 --- /dev/null +++ b/icu4c/source/data/translit/dv_dv_Latn_BGN.txt @@ -0,0 +1,177 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: dv_dv_Latn_BGN.txt +# Generated from CLDR +# + +# BGN/PCGN 1988 Agreement, with modifications 2009 +# +# This romanization system supersedes the one which was approved by +# the BGN and the PCGN in 1972. This official system was submitted +# to the PCGN by the Maldivian government in 1987 and approved by BGN +# and PCGN in 1988. The system presented here reflects the 1988 Agreement +# with minor modifications introduced by the government of the Maldives +# in 2009. +# +# In our rules, we also convert Arabic punctuation characters to Latin. +# These appears to be used in Maldivian text, for example in the Universal +# Declaration of Human Rights. +::[[:block=thaana:][،؛؟٪٫٬]\uFDF2] ; +::NFD; +$wordBoundary = [^[:L:][:M:][:N:]] ; +$vowel = [\u07A6-\u07AF] ; +$sukun = \u07B0 ; +$sign = [$sukun $vowel] ; +$rule4 = [އށ] $sukun ; # see note 4 +### Consonants +# HAA +$rule4 ހ → hh ; +ހ → h ; +# NOONU +# See note 5: "romanized n’ when appearing without any vowel or auxiliary sign" +$rule4 ނ } $sign → nn ; +$rule4 ނ → nn\' ; +ނ } $sign → n ; +ނ → n\' ; +# RAA +$rule4 ރ → rr ; +ރ → r ; +# BAA +$rule4 ބ → bb ; +ބ → b ; +# LHAVIYANI +$rule4 ޅ → hlh ; +ޅ → lh; +# KAAFU +$rule4 ކ → kk ; +ކ → k ; +# VAAVU +$rule4 ވ → vv ; +ވ → v ; +# MEEMU +$rule4 މ → mm ; +މ → m ; +# FAAFU +$rule4 ފ → ff ; +ފ → f; +# DHAALU +$rule4 ދ → hdh ; +ދ → dh; +# THAA +# See note 6: "romanized iy when appearing in combination with a supercircle" +$rule4 ތ $sukun → hiy ; +$rule4 ތ → hth ; +\u07A8 ތ $sukun → iy ; +ތ $sukun → iy ; +ތ → th ; +# LAAMU +$rule4 ލ → ll ; +ލ → l ; +# GAAFU +$rule4 ގ → gg ; +ގ → g ; +# GNAVIYANI +$rule4 ޏ → hgn ; +ޏ → gn ; +# SEENU +$rule4 ސ → ss ; +ސ → s ; +# DAVIYANI +$rule4 ޑ → dd ; +ޑ → d ; +# ZAVIYANI +$rule4 ޒ → zz ; +ޒ → z ; +# TAVIYANI +$rule4 ޓ → tt ; +ޓ → t ; +# YAA +$rule4 ޔ → yy ; +ޔ → y ; +# PAVIYANI +$rule4 ޕ → pp ; +ޕ → p ; +# JAVIYANI +$rule4 ޖ → jj ; +ޖ → j ; +# CHAVIYANI +$rule4 ޗ → hch ; +ޗ → ch ; +### Borrowed Consonants (See Rule 7) +# SAADHU +$rule4 ޞ → şş ; +ޞ → ş ; +# SHEENU +$rule4 ޝ → hsh ; +ޝ → sh ; +# ZAA +$rule4 ޜ → zz ; +ޜ → z; +# KHAA +$rule4 ޚ → hkh ; +ޚ → kh; +# HHAA +$rule4 ޙ → ḩḩ ; +ޙ → ḩ ; +# THAALU +$rule4 ޛ → hdh ; +ޛ → dh ; +# TTAA +$rule4 ޘ → hth ; +ޘ → th ; +# WAAVU +$rule4 ޥ → ww ; +ޥ → w ; +# QAAFU +$rule4 ޤ → qq ; +ޤ → q ; +# GHAINU +$rule4 ޣ → hgh ; +ޣ → gh ; +# AINU +$rule4 ޢ → \'\' ; +ޢ → \' ; +# ZO +$rule4 ޡ → z\u0327z\u0327 ; +ޡ → z\u0327 ; +# TO +$rule4 ޠ → ţţ ; +ޠ → ţ ; +# DAADHU +$rule4 ޟ → ḑḑ ; +ޟ → ḑ ; +# NOTE: not in Maldivian BGN system, but for completeness of Thaana block +# NAA +$rule4 ޱ → n\u0332n\u0332 ; +ޱ → n\u0332 ; +# Rule 4 in word-final position +$rule4 } $wordBoundary → h; +# SHAVIYANI (placed last to avoid masking) +$rule4 ށ → hsh; +ށ → sh; +# Otherwise, these signs are not romanized elsewhere +$rule4 → ; +\u07B0 → ; +އ → ; +# NOTE: not in Maldivian BGN system, but common in names (e.g. Abdullah) +($vowel) \uFDF2 → | $1 llāh ; +\uFDF2 → allāh; +### Vowels +\u07A6 → a; # ABAFILI +\u07A7 → aa; # AABAAFILI +\u07AC → e; # EBEFILI +\u07AD → ey; # EYBEYFILI +\u07A8 → i; # IBIFILI +\u07A9 → ee; # EEBEEFILI +\u07AE → o; # OBOFILI +\u07AF → oa; # OABOAFILI +\u07AA → u; # UBUFILI +\u07AB → oo; # OOBOOFILI +، → ','; # U+060C ARABIC COMMA +؛ → ';'; # U+061B ARABIC SEMICOLON +؟ → '?'; # U+061F ARABIC QUESTION MARK +٪ → '%'; # U+066A ARABIC PERCENT SIGN +٫ → '.'; # U+066B ARABIC DECIMAL SEPARATOR +٬ → ','; # U+066C ARABIC THOUSANDS SEPARATOR + diff --git a/icu4c/source/data/translit/el_el_Latn_BGN.txt b/icu4c/source/data/translit/el_el_Latn_BGN.txt new file mode 100644 index 00000000000..6d7fbaf5d35 --- /dev/null +++ b/icu4c/source/data/translit/el_el_Latn_BGN.txt @@ -0,0 +1,425 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: el_el_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1962 System +# +# This system is a simplified version of the system devised by the PCGN +# in 1941 and later adopted by the BGN. In 1962 the two organizations +# agreed to joint adoption of certain changes in the original system, +# specifically the omission of special rules for the treatment of Greek +# geographic names of Albanian, Bulgarian, Italian, Macedonian, and +# Turkish origin. That revision eliminated the need to consider the +# origin of names and removed ambiguity from the romanization of Greek +# expressions of possible non-Greek origin. This system is based on +# the pronunciation of modern Greek and is not intended for use in +# the romanization of classical Greek. +# +# The Greek Alphabet as defined by the BGN (Pages 29-31): +# +# ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ +# αβγδεζηθικλμνξοπρσςτυφχψω +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: Greek-Latin +# +:: [ΆΈΉΊΌΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώἀἁἂἃἄἅἆἇἈἉἊἋἌἍἎἏἐἑἒἓἔἕἘἙἚἛἜἝἠἡἢἣἤἥἦἧἨἩἪἫἬἭἮἯἰἱἲἳἴἵἶἷἸἹἺἻἼἽἾἿὀὁὂὃὄὅὈὉὊὋὌὍὐὑὒὓὔὕὖὗὙὛὝὟὠὡὢὣὤὥὦὧὨὩὫὬὭὮὯὰάὲέὴήὶίὸόὺύὼώᾀᾁᾂᾃᾄᾅᾆᾇᾈᾉᾊᾋᾌᾍᾎᾏᾐᾑᾒᾓᾔᾕᾖᾗᾘᾙᾚᾛᾜᾝᾞᾟᾠᾡᾢᾣᾤᾥᾦᾧᾨᾩᾪᾫᾬᾭᾮᾯᾲᾳᾴᾶᾷᾺΆᾼῂῃῄῆῇῈΈῊΉῌῖῚΊῤῥῦῪΎῲῳῴῶῷῸΌῺΏῼ῾] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$upperConsonants = [ΒΓΔΖΘΚΛΜΝΞΠΡΣΤΦΧΨ] ; +$lowerConsonants = [βγδζθκλμνξπρσςτφχψ] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [ΑΕΗΙΟΥΩ] ; +$lowerVowels = [αεηιουω] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Rules moved to front to avoid masking +# +######################################################################## +# +######################################################################## +# +# BGN Page 32 Rule 1: +# +# The apostrophe and reversed apostrophe, on or the other of which is +# written in Greek in front of all initial uppercase vowel characters, +# above all initial lowercase vowel characters, and above the second +# character of all initial two-vowel character sequences, should not +# be romanized, e.g., Ἀθῆναι → Athínai, Ἠράκλειον → Iráklion, +# Οἰνόφυτα → Oinófita. These apostrophes must be distinguished from +# accent marks hen they occur together, e.g. Ἄβατον → Ávaton, +# Ἤλια → Ília, Οἴτη → Oíti. The reversed apostrophe is sometimes found +# also with ρ and should, likewise, not be romanized: ῥέμα → réma. +# +# BGN Page 32 Rule 2a: +# +# Stress is shown in Greek by the use of the tilde or circumflex, +# the acute accent, or the grave accent; all of those marks should +# be represented in romanization by an acute accent, e.g., +# Ἀθῆναι → Athínai, Νδία → Día, Ζεμενὸν → Zemenón. +# +# BGN Page 32 Rule 4: +# +# The character ι (ióta) is sometimes found written under, or, +# in uppercase, to the right of the vowel characters α, η, and ω. +# This "subscript iota" should not be romanized, e.g., +# Μυρτῷον Πέλαγος or ΜΥΡΤῼΟΝ ΠΕΛΑΓΟΣ [but not ΜΥΡΤΩΙΟΝ ΠΕΛΑΓΟΣ] +# → Mirtóön Pélagos. +# +######################################################################## +# +[ἈἉᾼᾈᾉ] → Α ; # GREEK CAPITAL LETTER ALPHA +[ἀἁᾳᾀᾁ] → α ; # GREEK SMALL LETTER ALPHA +[ἊἋἌἍἎἏᾊᾋᾌᾍᾎᾏᾺΆ] → Ά ; # GREEK CAPITAL LETTER ALPHA WITH TONOS +[ἂἃἄἅἆἇὰάᾂᾃᾄᾅᾆᾇᾲᾴᾶᾷ] → ά ; # GREEK SMALL LETTER ALPHA WITH TONOS +[ἘἙ] → Ε ; # GREEK CAPITAL LETTER EPSILON +[ἐἑὲέ] → ε ; # GREEK SMALL LETTER EPSILON +[ἚἛἜἝῈΈ] → Έ ; # GREEK CAPITAL LETTER EPSILON WITH TONOS +[ἒἓἔἕ] → έ ; # GREEK SMALL LETTER EPSILON WITH TONOS +[ἨἩᾘᾙῌ] → Η ; # GREEK CAPITAL LETTER ETA +[ἠἡᾐᾑῃ] → η ; # GREEK SMALL LETTER ETA +[ἪἫἬἭἮἯᾚᾛᾜᾝᾞᾟῊΉ] → Ή ; # GREEK CAPITAL LETTER ETA WITH TONOS +[ἢἣἤἥἦἧὴήᾒᾓᾔᾕᾖᾗῂῄῆῇ] → ή ; # GREEK SMALL LETTER ETA WITH TONOS +[ἸἹ] → Ι ; # GREEK CAPITAL LETTER IOTA +[ἰἱ] → ι ; # GREEK SMALL LETTER IOTA +[ἺἻἼἽἾἿῚΊ] → Ί ; # GREEK CAPITAL LETTER IOTA WITH TONOS +[ἲἳἴἵἶἷὶίῖ] → ί ; # GREEK SMALL LETTER IOTA WITH TONOS +[ὈὉ] → Ο ; # GREEK CAPITAL LETTER OMICRON +[ὀὁ] → ο ; # GREEK SMALL LETTER OMICRON +[ὊὋὌὍῸΌ] → Ό ; # GREEK CAPITAL LETTER OMICRON WITH TONOS +[ὂὃὄὅὸό] → ό ; # GREEK SMALL LETTER OMICRON WITH TONOS +Ὑ → Υ ; # GREEK CAPITAL LETTER UPSILON +[ὐὑ] → υ ; # GREEK SMALL LETTER UPSILON +[ὛὝὟῪΎ] → Ύ ; # GREEK CAPITAL LETTER UPSILON WITH TONOS +[ὒὓὔὕὖὗὺύῦ] → ύ ; # GREEK SMALL LETTER UPSILON WITH TONOS +[ὨὩᾨᾩῼ] → Ω ; # GREEK CAPITAL LETTER OMEGA +[ὠὡᾠᾡῳ] → ω ; # GREEK SMALL LETTER OMEGA +[ὬὫὬὭὮὯᾪᾫᾬᾭᾮᾯῺΏ] → Ώ ; # GREEK CAPITAL LETTER OMEGA WITH TONOS +[ὢὣὤὥὦὧὼώᾢᾣᾤᾥᾦᾧῲῴῶῷ] → ώ ; # GREEK SMALL LETTER OMEGA WITH TONOS +Ῥ → Ρ ; # GREEK CAPITAL LETTER RHO +[ῤῥ] → ρ ; # GREEK SMALL LETTER RHO +# +# +######################################################################## +# +# End of Rules 1, 2a, and 4 +# +######################################################################## +# +######################################################################## +# +# BGN Page 32 Rules 2b and 2c: +# +# If the stressed vowel is written as a sequence of two vowel characters +# in Greek, the # second vowel character should carry the accent; +# similarly, in Romanization the acute accent should be placed over the +# second vowel letter, e.g., Οἰνοῦσαι → Oinoúsai, Οἴτη → Oíti, +# Θεσπιαὶ → Thespiaí. +# +# Where a syllable containing on the combinations αυ, ευ, or ηυ +# carries the stress, this is marked in Greek on the character υ. +# In romanization it should be shown on the preceding vowel +# letter, e.g., Πειραιεύς → Piraiévs, Αὔρα → Ávra. +# +Αί → Aí ; +αί → aí ; +Οί → Oí ; +οί → Oí ; +Ού → Oú ; +ού → oú ; +Αύ → Άυ ; +αύ → άυ ; +Εύ → Έυ ; +εύ → έυ ; +Ηύ → Ήυ ; +ηύ → ήυ ; +# +# +######################################################################## +# +# End of Rules 2b and 2c +# +######################################################################## +# +######################################################################## +# +# BGN Page 32 Rule 3: +# +# The dieresis should be shown in romanization where it occurs in Greek, +# e.g., Μαρινέϊκα → Marinéïka, Ἀχαΐα → Akhaï\u0301a; and over the second vowel +# etter in romanization of the following combinations fo Greek vowel +# characters: αε, e.g., Ἀετὸς → Aëtos; αη, e.g., Ἀηδὼν → Aïdhon; οη, +# e.g. Οἰνόη → Oinóï; ωο, e.g., Ἠρῶον → Iróön. +# +[ΪΫ] → Ï ; +[ϊϋ] → ï ; +[ΐΰ] → ï\u0301 ; +Αε → Aë ; +αε → aë ; +Αη → Aï ; +αη → aï ; +Οη → Oï ; +οη → oï ; +Ωο → Oö ; +ωο → oö ; +Άε → Áë ; +άε → áë ; +Άη → Áï ; +άη → áï ; +Όη → Óï ; +όη → óï ; +Ώο → Óö ; +ώο → óö ; +# +# +######################################################################## +# +# End of Rule 3 +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +ΑΙ → AI ; # GREEK CAPITAL LETTER ALPHA + CAPITAL IOTA +Αι → Ai ; # GREEK CAPITAL LETTER ALPHA + SMALL IOTA +αι → ai ; # GREEK SMALL LETTER ALPHA + SMALL IOTA +ΑΥ → AV ; # GREEK CAPITAL LETTER ALPHA + CAPITAL UPSILON +Αυ → Av ; # GREEK CAPITAL LETTER ALPHA + SMALL UPSILON +αυ → av ; # GREEK SMALL LETTER ALPHA + SMALL UPSILON +Α → A ; # GREEK CAPITAL LETTER ALPHA +α → a ; # GREEK SMALL LETTER ALPHA +Ά → Á ; # GREEK CAPITAL LETTER ALPHA WITH TONOS +ά → á ; # GREEK SMALL LETTER ALPHA WITH TONOS +Β → V ; # GREEK CAPITAL LETTER BETA +β → v ; # GREEK SMALL LETTER BETA +ΓΓ → NG ; # GREEK CAPITAL LETTER GAMMA + CAPITAL GAMMA +Γγ → Ng ; # GREEK CAPITAL LETTER GAMMA + SMALL GAMMA +γγ → ng ; # GREEK SMALL LETTER GAMMA + SMALL GAMMA +$wordBoundary{ΓΚ → G ; # GREEK CAPITAL LETTER GAMMA + CAPITAL KAPPA +$wordBoundary{Γκ → G ; # GREEK CAPITAL LETTER GAMMA + SMALL KAPPA +$wordBoundary{γκ → g ; # GREEK SMALL LETTER GAMMA + SMALL KAPPA +ΓΚ → NG ; # GREEK CAPITAL LETTER GAMMA + CAPITAL KAPPA +Γκ → Ng ; # GREEK CAPITAL LETTER GAMMA + SMALL KAPPA +γκ → ng ; # GREEK SMALL LETTER GAMMA + SMALL KAPPA +# +# +######################################################################## +# +# BGN Page 29 Rule 3a: +# +# The character γ should be romanized g before α, ο, ου, ω, and +# consonants other than γ, ξ, and χ. +# +######################################################################## +# +Γ}[ΑΟΩ [$upperConsonants - [ΓΞΧ]]] → G ; # GREEK CAPITAL LETTER GAMMA +Γ}[αοω [$lowerConsonants - [γξχ]]] → G ; # GREEK CAPITAL LETTER GAMMA +Γ}ΟΥ → G ; # GREEK CAPITAL LETTER GAMMA +Γ}ου → G ; # GREEK CAPITAL LETTER GAMMA +γ}[αοω [$lowerConsonants - [γξχ]]] → g ; # GREEK SMALL LETTER GAMMA +γ}ου → g ; # GREEK SMALL LETTER GAMMA +# +# +######################################################################## +# +# End of Rule 3a +# +######################################################################## +# +######################################################################## +# +# BGN Page 29 Rule 3b: +# +# The character γ should be romanized y before αι, ε, ει, η, ι, οι, υ, +# and υι. +# +######################################################################## +# +Γ}[ΑΕΟΥ]Ι → Y ; # GREEK CAPITAL LETTER GAMMA +Γ}[ΕΗΙΥ] → Y ; # GREEK CAPITAL LETTER GAMMA +Γ}[αεου]ι → Y ; # GREEK CAPITAL LETTER GAMMA +Γ}[εηιυ] → Y ; # GREEK CAPITAL LETTER GAMMA +γ}[αεου]ι → y ; # GREEK SMALL LETTER GAMMA +γ}[εηιυ] → y ; # GREEK SMALL LETTER GAMMA +# +# +######################################################################## +# +# End of Rule 3b +# +######################################################################## +# +######################################################################## +# +# BGN Page 29 Rule 3c: +# +# The character γ should be romanized n before ξ and χ. +# +######################################################################## +# +Γ}[ΞΧ] → N ; # GREEK CAPITAL LETTER GAMMA +Γ}[ξχ] → N ; # GREEK CAPITAL LETTER GAMMA +γ}[ξχ] → n ; # GREEK SMALL LETTER GAMMA +# +# +######################################################################## +# +# End of Rule 3c +# +######################################################################## +# +Γ → G ; # GREEK CAPITAL LETTER GAMMA +γ → g ; # GREEK SMALL LETTER GAMMA +# +# +######################################################################## +# +# BGN Page 29 Rule 4a: +# +# The character δ should be romanized d when between ν and ρ. +# +######################################################################## +# +Ν{Δ}Ρ → D ; # GREEK CAPITAL LETTER DELTA +ν{δ}ρ → d ; # GREEK SMALL LETTER GAMMA +# +# +######################################################################## +# +# End of Rule 4a +# +######################################################################## +# +Δ} $lower → Dh ; # GREEK CAPITAL LETTER PSI +Δ → DH ; # GREEK CAPITAL LETTER DELTA +δ → dh ; # GREEK SMALL LETTER DELTA +ΕΙ → I ; # GREEK CAPITAL LETTER EPSILON + CAPITAL IOTA +Ει → I ; # GREEK CAPITAL LETTER EPSILON + SMALL IOTA +ει → i ; # GREEK SMALL LETTER EPSILON + SMALL IOTA +ΕΪ → EÏ ; # GREEK CAPITAL LETTER EPSILON + CAPITAL IOTA DIAERESIS +Εϊ → Eï ; # GREEK CAPITAL LETTER EPSILON + SMALL IOTA DIAERESIS +εϊ → eï ; # GREEK SMALL LETTER EPSILON + SMALL IOTA DIAERESIS +ΕΥ → EV ; # GREEK CAPITAL LETTER EPSILON + CAPITAL UPSILON +Ευ → Ev ; # GREEK CAPITAL LETTER EPSILON + SMALL UPSILON +ευ → ev ; # GREEK SMALL LETTER EPSILON + SMALL UPSILON +Ε → E ; # GREEK CAPITAL LETTER EPSILON +ε → e ; # GREEK SMALL LETTER EPSILON +Έ → É ; # GREEK CAPITAL LETTER EPSILON WITH TONOS +έ → é ; # GREEK SMALL LETTER EPSILON WITH TONOS +Ζ → Z ; # GREEK CAPITAL LETTER ZETA +ζ → z ; # GREEK SMALL LETTER ZETA +ΗΥ → IV ; # GREEK CAPITAL LETTER ALPHA + CAPITAL UPSILON +Ηυ → Iv ; # GREEK CAPITAL LETTER ALPHA + SMALL UPSILON +ηυ → iv ; # GREEK SMALL LETTER ALPHA + SMALL UPSILON +Η → I ; # GREEK CAPITAL LETTER ETA +η → i ; # GREEK SMALL LETTER ETA +Ή → Í ; # GREEK CAPITAL LETTER ETA WITH TONOS +ή → í ; # GREEK SMALL LETTER ETA WITH TONOS +Θ} $lower → Th ; # GREEK CAPITAL LETTER THETA +Θ → TH ; # GREEK CAPITAL LETTER THETA +θ → th ; # GREEK SMALL LETTER THETA +Ι → I ; # GREEK CAPITAL LETTER IOTA +ι → i ; # GREEK SMALL LETTER IOTA +Ί → Í ; # GREEK CAPITAL LETTER IOTA WITH TONOS +ί → í ; # GREEK SMALL LETTER IOTA WITH TONOS +Κ → K ; # GREEK CAPITAL LETTER KAPPA +κ → k ; # GREEK SMALL LETTER KAPPA +Λ → L ; # GREEK CAPITAL LETTER LAMDA +λ → l ; # GREEK SMALL LETTER LAMDA +$wordBoundary{ΜΠ → B ; # GREEK CAPITAL LETTER MU + CAPITAL PI +$wordBoundary{Μπ → B ; # GREEK CAPITAL LETTER MU + SMALL PI +$wordBoundary{μπ → b ; # GREEK SMALL LETTER MU + SMALL PI +ΜΠ → MB ; # GREEK CAPITAL LETTER MU + CAPITAL PI +Μπ → Mb ; # GREEK CAPITAL LETTER MU + SMALL PI +μπ → mb ; # GREEK SMALL LETTER MU + SMALL PI +Μ → M ; # GREEK CAPITAL LETTER MU +μ → m ; # GREEK SMALL LETTER MU +$wordBoundary{ΝΤ → D ; # GREEK CAPITAL LETTER NU + CAPITAL TAU +$wordBoundary{Ντ → D ; # GREEK CAPITAL LETTER NU + SMALL TAU +$wordBoundary{ντ → d ; # GREEK SMALL LETTER NU + SMALL TAU +ΝΤ → ND ; # GREEK CAPITAL LETTER NU + CAPITAL TAU +Ντ → Nd ; # GREEK CAPITAL LETTER NU + SMALL TAU +ντ → nd ; # GREEK SMALL LETTER NU + SMALL TAU +Ν → N ; # GREEK CAPITAL LETTER NU +ν → n ; # GREEK SMALL LETTER NU +Ξ → X ; # GREEK CAPITAL LETTER KSI +ξ → x ; # GREEK SMALL LETTER KSI +ΟΙ → OI ; # GREEK CAPITAL LETTER OMICRON + CAPITAL IOTA +Οι → Oi ; # GREEK CAPITAL LETTER OMICRON + SMALL IOTA +οι → oi ; # GREEK SMALL LETTER OMICRON + SMALL IOTA +ΟΥ → OU ; # GREEK CAPITAL LETTER OMICRON + CAPITAL UPSILON +Ου → Ou ; # GREEK CAPITAL LETTER OMICRON + SMALL UPSILON +ου → ou ; # GREEK SMALL LETTER OMICRON + SMALL UPSILON +Ο → O ; # GREEK CAPITAL LETTER OMICRON +ο → o ; # GREEK SMALL LETTER OMICRON +Ό → Ó ; # GREEK CAPITAL LETTER OMICRON WITH TONOS +ό → ó ; # GREEK SMALL LETTER OMICRON WITH TONOS +Π → P ; # GREEK CAPITAL LETTER PI +π → p ; # GREEK SMALL LETTER PI +Ρ → R ; # GREEK CAPITAL LETTER RHO +ρ → r ; # GREEK SMALL LETTER RHO +Σ → S ; # GREEK CAPITAL LETTER SIGMA +σ → s ; # GREEK SMALL LETTER SIGMA +ς → s ; # GREEK SMALL LETTER FINAL SIGMA +Τ → T ; # GREEK CAPITAL LETTER TAU +τ → t ; # GREEK SMALL LETTER TAU +# +# +######################################################################## +# +# End Rule 3.5 +# +######################################################################## +# +Υ → I ; # GREEK CAPITAL LETTER UPSILON +υ → i ; # GREEK SMALL LETTER UPSILON +Ύ → Í ; # GREEK CAPITAL LETTER UPSILON WITH TONOS +ύ → í ; # GREEK SMALL LETTER UPSILON WITH TONOS +Φ → F ; # GREEK CAPITAL LETTER PHI +φ → f ; # GREEK SMALL LETTER PHI +Χ} $lower → Kh ; # GREEK CAPITAL LETTER CHI +Χ → KH ; # GREEK CAPITAL LETTER CHI +χ → kh ; # GREEK SMALL LETTER CHI +Ψ} $lower → Ps ; # GREEK CAPITAL LETTER PSI +Ψ → PS ; # GREEK CAPITAL LETTER PSI +ψ → ps ; # GREEK SMALL LETTER PSI +Ω → O ; # GREEK CAPITAL LETTER OMEGA +ω → o ; # GREEK SMALL LETTER OMEGA +Ώ → Ó ; # GREEK CAPITAL LETTER OMEGA WITH TONOS +ώ → ó ; # GREEK SMALL LETTER OMEGA WITH TONOS +# +# +######################################################################## + diff --git a/icu4c/source/data/translit/fa_fa_Latn_BGN.txt b/icu4c/source/data/translit/fa_fa_Latn_BGN.txt new file mode 100644 index 00000000000..0fd2d1181ea --- /dev/null +++ b/icu4c/source/data/translit/fa_fa_Latn_BGN.txt @@ -0,0 +1,209 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: fa_fa_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1956 System +# +# This system was adopted by the BGN in 1946 and by the PCGN in 1958. +# It is used for the romanization of geographic names in Iran and +# for Persian-language names in Afghanistan. +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: Persian-Latin +# +:: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهویي\u064E\u064F\u0650\u0651\u0652٠١٢٣٤٥٦٧٨٩پچژگی]] ; +:: NFKD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$alef = ’; +$ayin = ‘; +$disambig = \u0331 ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# non-letters +[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR +[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR +٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR +٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR +# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate +، ↔ ',' ; # ARABIC COMMA +؛ ↔ ';' ; # ARABIC SEMICOLON +؟ ↔ '?' ; # ARABIC QUESTION MARK +٪ ↔ '%' ; # ARABIC PERCENT SIGN +٠ ↔ 0 $disambig ; # ARABIC-INDIC DIGIT ZERO +١ ↔ 1 $disambig ; # ARABIC-INDIC DIGIT ONE +٢ ↔ 2 $disambig ; # ARABIC-INDIC DIGIT TWO +٣ ↔ 3 $disambig ; # ARABIC-INDIC DIGIT THREE +٤ ↔ 4 $disambig ; # ARABIC-INDIC DIGIT FOUR +٥ ↔ 5 $disambig ; # ARABIC-INDIC DIGIT FIVE +٦ ↔ 6 $disambig ; # ARABIC-INDIC DIGIT SIX +٧ ↔ 7 $disambig ; # ARABIC-INDIC DIGIT SEVEN +٨ ↔ 8 $disambig ; # ARABIC-INDIC DIGIT EIGHT +٩ ↔ 9 $disambig ; # ARABIC-INDIC DIGIT NINE +۰ ↔ 0 ; # EXTENDED ARABIC-INDIC DIGIT ZERO +۱ ↔ 1 ; # EXTENDED ARABIC-INDIC DIGIT ONE +۲ ↔ 2 ; # EXTENDED ARABIC-INDIC DIGIT TWO +۳ ↔ 3 ; # EXTENDED ARABIC-INDIC DIGIT THREE +۴ ↔ 4 ; # EXTENDED ARABIC-INDIC DIGIT FOUR +۵ ↔ 5 ; # EXTENDED ARABIC-INDIC DIGIT FIVE +۶ ↔ 6 ; # EXTENDED ARABIC-INDIC DIGIT SIX +۷ ↔ 7 ; # EXTENDED ARABIC-INDIC DIGIT SEVEN +۸ ↔ 8 ; # EXTENDED ARABIC-INDIC DIGIT EIGHT +۹ ↔ 9 ; # EXTENDED ARABIC-INDIC DIGIT NINE +# +######################################################################## +# +# Rules moved to front to avoid masking +# +######################################################################## +# +######################################################################## +# +# BGN Page 89 Rule 4 +# +# The character sequences كه , زه , سه , and گه may be romanized k·h, z·h, +# s·h, and g·h in order to differentiate those romanizations from the +# digraphs kh, zh, sh, and gh. +# +######################################################################## +# +كه → k·h ; # ARABIC LETTER KAF + HEH +زه → z·h ; # ARABIC LETTER ZAIN + HEH +سه → s·h ; # ARABIC LETTER SEEN + HEH +گه → g·h ; # ARABIC LETTER GAF + HEH +# +# +######################################################################## +# +# End Rule 4 +# +######################################################################## +# +######################################################################## +# +# BGN Page 91 Rule 7 +# +# Doubles consonant sounds are represented in Arabic script by +# placing a shaddah ( \u0651 ) over a consonant character. In romanization +# the letter should be doubled. [The remainder of this rule deals with +# the definite article and is lexical.] +# +######################################################################## +# +ب\u0651 → bb ; # ARABIC LETTER BEH + SHADDA +پ\u0651 → pp ; # ARABIC LETTER PEH + SHADDA +ت\u0651 → tt ; # ARABIC LETTER TEH + SHADDA +ث\u0651 → s\u0304s\u0304 ; # ARABIC LETTER THEH + SHADDA +ج\u0651 → jj ; # ARABIC LETTER JEEM + SHADDA +چ\u0651 → chch ; # ARABIC LETTER TCHEH + SHADDA +ح\u0651 → ḥḥ ; # ARABIC LETTER HAH + SHADDA +خ\u0651 → khkh ; # ARABIC LETTER KHAH + SHADDA +د\u0651 → dd ; # ARABIC LETTER DAL + SHADDA +ذ\u0651 → z\u0304z\u0304 ; # ARABIC LETTER THAL + SHADDA +ر\u0651 → rr ; # ARABIC LETTER REH + SHADDA +ز\u0651 → zz ; # ARABIC LETTER ZAIN + SHADDA +ژ\u0651 → zhzh ; # ARABIC LETTER JEH + SHADDA +س\u0651 → ss ; # ARABIC LETTER SEEN + SHADDA +ش\u0651 → shsh ; # ARABIC LETTER SHEEN + SHADDA +ص\u0651 → ṣṣ ; # ARABIC LETTER SAD + SHADDA +ض\u0651 → ḍḍ ; # ARABIC LETTER DAD + SHADDA +ط\u0651 → ṭṭ ; # ARABIC LETTER TAH + SHADDA +ظ\u0651 → ẓẓ ; # ARABIC LETTER ZAH + SHADDA +ع\u0651 → $ayin $ayin ; # ARABIC LETTER AIN + SHADDA +غ\u0651 → ghgh ; # ARABIC LETTER GHAIN + SHADDA +ف\u0651 → ff ; # ARABIC LETTER FEH + SHADDA +ق\u0651 → qq ; # ARABIC LETTER QAF + SHADDA +ك\u0651 → kk ; # ARABIC LETTER KAF + SHADDA +ل\u0651 → ll ; # ARABIC LETTER LAM + SHADDA +م\u0651 → mm ; # ARABIC LETTER MEEM + SHADDA +ن\u0651 → nn ; # ARABIC LETTER NOON + SHADDA +ه\u0651 → hh ; # ARABIC LETTER HEH + SHADDA +و\u0651 → ww ; # ARABIC LETTER WAW + SHADDA +ی\u0651 → yy ; # ARABIC LETTER FARSI YEH + SHADDA +# +# +######################################################################## +# +# End Rule 7 +# +######################################################################## +# +######################################################################## +# +# Start of Transformations +# +######################################################################## +# +$wordBoundary{ء → ; # ARABIC LETTER HAMZA +ء → $alef ; # ARABIC LETTER HAMZA +$wordBoundary{ا → ; # ARABIC LETTER ALEF +آ → $alef ā ; # ARABIC FATHA ALEF WITH MADDA ABOVE +ب → b ; # ARABIC LETTER BEH +پ → p ; # ARABIC LETTER PEH +ت → t ; # ARABIC LETTER TEH +ة → h ; # ARABIC LETTER TEH MARBUTA +ث → s\u0304 ; # ARABIC LETTER THEH +ج → j ; # ARABIC LETTER JEEM +چ → ch ; # ARABIC LETTER TCHEH +ح → ḥ ; # ARABIC LETTER HAH +خ → kh ; # ARABIC LETTER KHAH +د → d ; # ARABIC LETTER DAL +ذ → z\u0304 ; # ARABIC LETTER THAL +ر → r ; # ARABIC LETTER REH +ز → z ; # ARABIC LETTER ZAIN +ژ → zh ; # ARABIC LETTER JEH +س → s ; # ARABIC LETTER SEEN +ش → sh ; # ARABIC LETTER SHEEN +ص → ṣ ; # ARABIC LETTER SAD +ض → ẕ ; # ARABIC LETTER DAD +ط → ṭ ; # ARABIC LETTER TAH +ظ → ẓ ; # ARABIC LETTER ZAH +ع → $ayin ; # ARABIC LETTER AIN +غ → gh ; # ARABIC LETTER GHAIN +ف → f ; # ARABIC LETTER FEH +ق → q ; # ARABIC LETTER QAF +ک ↔ k ; # ARABIC LETTER KEHEH +ك ↔ k $disambig ; # ARABIC LETTER KAF +گ → g ; # ARABIC LETTER GAF +ل → l ; # ARABIC LETTER LAM +م → m ; # ARABIC LETTER MEEM +ن → n ; # ARABIC LETTER NOON +ه → h ; # ARABIC LETTER HEH +و → v ; # ARABIC LETTER WAW +ی → y ; # ARABIC LETTER FARSI YEH +\u064Eا → ā ; # ARABIC FATHA + ALEF +\u064Eی → á ; # ARABIC FATHA + FARSI YEH +\u064Eو\u0652 → ow ; # ARABIC FATHA + WAW + SUKUN +\u064E → a ; # ARABIC FATHA +\u0650ي → ī ; # ARABIC KASRA + YEH +\u0650 → e ; # ARABIC KASRA +\u064Fو → ū ; # ARABIC DAMMA + WAW +\u064F → o ; # ARABIC DAMMA +\u0652 → ; # ARABIC SUKUN +::NFC (NFD) ; +# +# +######################################################################## + diff --git a/icu4c/source/data/translit/he_he_Latn_BGN.txt b/icu4c/source/data/translit/he_he_Latn_BGN.txt new file mode 100644 index 00000000000..fc971dbf5c1 --- /dev/null +++ b/icu4c/source/data/translit/he_he_Latn_BGN.txt @@ -0,0 +1,119 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: he_he_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1981 System +# +# The BGN/PCGN system for Hebrew was designed for use in romanizing +# names written in the Hebrew alphabet. The Roman letters and letter +# combinations shown as equivalents to the Hebrew characters reflect +# the eastern variety of Hebrew, i.e., the language spoken in +# the Republic of Armenia. +# +# The Hebrew Alphabet as defined by the BGN (Page 33-35): +# +# אבגדהוזחטיכךלמםנןסעפףצץקרששת +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: Hebrew-Latin +# +:: [ \u05B0\u05B1\u05B2\u05B3\u05B4\u05B5\u05B6\u05B7\u05B8\u05B9\u05BB\u05BC\u05C1\u05C2אבגדהוזחטיךכלםמןנסעףפץצקרשת׳] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$alef = ’; +$ayin = ‘; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Rules moved to front to avoid masking +# +######################################################################## +# +ב\u05BC → b ; # HEBREW LETTER BET + DAGESH +פ\u05BC → P ; # HEBREW LETTER PE + DAGESH +ג\u05BC → g ; # HEBREW LETTER GIMEL + DAGESH +ג׳ → ǧ ; # HEBREW LETTER GIMEL + GERESH +ו\u05BC → u ; # HEBREW LETTER VAV + POINT DAGESH +ו\u05B9 → o ; # HEBREW LETTER VAV + POINT HOLAM +צ׳ → č ; # HEBREW LETTER TSADI + GERESH +ז׳ → ž ; # HEBREW LETTER ZAYIN + GERESH +ד\u05BC → d ; # HEBREW LETTER DALET + DAGESH +ה\u05BC → h ; # HEBREW LETTER HE + DAGESH +ך\u05BC → k ; # HEBREW LETTER FINAL KAF + DAGESH +כ\u05BC → k ; # HEBREW LETTER KAF + DAGESH +ך\u05B0 → kh ; # HEBREW LETTER FINAL KAF + SHEVA +ת\u05BC → t ; # HEBREW LETTER TAV + DAGESH +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +א → $alef ; # HEBREW LETTER ALEF +ב → v ; # HEBREW LETTER BET +ג → g ; # HEBREW LETTER GIMEL +ד → d ; # HEBREW LETTER DALET +ה → h ; # HEBREW LETTER HE +ח → h\u0331 ; # HEBREW LETTER HET +ו → w ; # HEBREW LETTER VAV +ז → z ; # HEBREW LETTER ZAYIN +[טת] → t ; # HEBREW LETTER TET +י → y ; # HEBREW LETTER YOD +[כך] → kh ; # HEBREW LETTER KAF and FINAL KAF +ל → l ; # HEBREW LETTER LAMED +[מם] → m ; # HEBREW LETTER MEM and FINAL MEM +[נן] → n ; # HEBREW LETTER NUN and FINAL NUN +ס → s ; # HEBREW LETTER SAMEKH +ע → $ayin ; # HEBREW LETTER AYIN +[פף] → f ; # HEBREW LETTER PE and FINAL PE +[צץ] → z\u0331 ; # HEBREW LETTER TSADI and FINAL TSADI +ק → q ; # HEBREW LETTER QOF +ר → r ; # HEBREW LETTER RESH +ש\u05C1 → sh ; # HEBREW LETTER SHIN +ש\u05C2 → s ; # HEBREW LETTER SHIN +\u05B7 → a ; # HEBREW POINT PATAH +\u05B2 → a ; # HEBREW POINT HATAF PATAH +\u05B8 → o ; # HEBREW POINT QAMATS +\u05B6 → e ; # HEBREW POINT SEGOL +\u05B1 → e ; # HEBREW POINT HATAF SEGOL +\u05B5י → e ; # HEBREW POINT TSERE + LETTER YOD +\u05B5 → e ; # HEBREW POINT TSERE +\u05B0 → e ; # HEBREW POINT SHEVA +\u05B4י → i ; # HEBREW POINT HIRIQ + LETTER YOD +\u05B4 → i ; # HEBREW POINT HIRIQ +\u05B3 → o ; # HEBREW LETTER HATAF QAMATS +\u05B9 → o ; # HEBREW POINT HOLAM +\u05BB → u ; # HEBREW POINT QUBUTS +# +# +######################################################################## + diff --git a/icu4c/source/data/translit/hy_hy_Latn_BGN.txt b/icu4c/source/data/translit/hy_hy_Latn_BGN.txt new file mode 100644 index 00000000000..3538737147c --- /dev/null +++ b/icu4c/source/data/translit/hy_hy_Latn_BGN.txt @@ -0,0 +1,171 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: hy_hy_Latn_BGN.txt +# Generated from CLDR +# + +# BGN/PCGN 1981 System +# +# The BGN/PCGN system for Armenian was designed for use in romanizing +# names written in the Armenian alphabet. The Roman letters and letter +# combinations shown as equivalents to the Armenian characters reflect +# the eastern variety of Armenian, i.e., the language spoken in +# the Republic of Armenia. +# +# The Armenian Alphabet as defined by the BGN (Page 11): +# +# ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՐՑՓՔՕՖ +# աբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցփքևօֆ +# +# Originally prepared by Michael Everson +# +# https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/614615/ROMANIZATION_SYSTEM_FOR_ARMENIAN.PDF +::[ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև։]; +::NFC; +$upperConsonants = [ԲԳԴԶԹԺԼԽԾԿՀՁՂՃՄՅՆՇՉՊՋՌՍՎՐՑՓՔՖ] ; +$lowerConsonants = [բգդզթժլխծկհձղճմյնշչպջռսվտրցփքֆ] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [ԱԵԷԸԻՈՕՒ] ; +$lowerVowels = [աեէըիոևօւ] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +$aspirate = ’ ; +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +$wordBoundary = [^[:L:][:M:][:N:]] ; +# BGN/PCGN 1981, Note 5: “The characters ԵՎ , եւ and և should be romanized +# yev initially and after the vowel characters ա, ե, է, ի, ո, ու, and օ. +# In all other instances these characters should be romanized ev.” +# +# BGN/PCGN 1981, Note 3: “In Soviet-era sources this upper-case digraph +# character is found as Ե ի.” +$YEV = [{ԵՎ} {ԵՒ} {ԵԻ}]; +$Yev = [{Եվ} {Եւ} {Եի}]; +$yev = [{եվ} {եւ} և {եի}]; +$yev_vowels = [ա ե է ի ո {ու} օ Ա Ե Է Ի Ո {ՈՒ} {Ու} Օ]; +[$wordBoundary $yev_vowels] {$YEV} → YEV; +[$wordBoundary $yev_vowels] {$Yev} → Yev; +[$wordBoundary $yev_vowels] {$yev} → yev; +$YEV → EV; +$Yev → Ev; +$yev → ev; +::null; +# BGN Page 12 Rule 1: The character ե should be romanized ye initially, +# after the vowel characters ա, ե, է, ը, ի, ո, ւ, and օ. +# In all other instances, it should be romanized e. +$upperVowels {Ե → YE ; # ARMENIAN CAPITAL LETTER ECH +$lowerVowels {Ե → Ye ; # ARMENIAN CAPITAL LETTER ECH +$wordBoundary {Ե → Ye ; # ARMENIAN CAPITAL LETTER ECH +Ե → E ; # ARMENIAN CAPITAL LETTER ECH +$vowels {ե → ye ; # ARMENIAN SMALL LETTER ECH +$wordBoundary {ե → ye ; # ARMENIAN SMALL LETTER ECH +ե → e ; # ARMENIAN SMALL LETTER ECH +::null; +Ա → A ; # ARMENIAN CAPITAL LETTER AYB +ա → a ; # ARMENIAN SMALL LETTER AYB +Բ → B ; # ARMENIAN CAPITAL LETTER BEN +բ → b ; # ARMENIAN SMALL LETTER BEN +Գ → G ; # ARMENIAN CAPITAL LETTER GIM +գ → g ; # ARMENIAN SMALL LETTER GIM +Դ → D ; # ARMENIAN CAPITAL LETTER DA +դ → d ; # ARMENIAN SMALL LETTER DA +Զ → Z ; # ARMENIAN CAPITAL LETTER ZA +զ → z ; # ARMENIAN SMALL LETTER ZA +Է → E ; # ARMENIAN CAPITAL LETTER EH +է → e ; # ARMENIAN SMALL LETTER EH +Ը → Y ; # ARMENIAN CAPITAL LETTER ET +ը → y ; # ARMENIAN SMALL LETTER ET +Թ → T $aspirate ; # ARMENIAN CAPITAL LETTER TO +թ → t $aspirate ; # ARMENIAN SMALL LETTER TO +Ժ} $lower → Zh ; # ARMENIAN CAPITAL LETTER ZHE +Ժ → ZH ; # ARMENIAN CAPITAL LETTER ZHE +ժ → zh ; # ARMENIAN SMALL LETTER ZHE +Ի → I ; # ARMENIAN CAPITAL LETTER INI +ի → i ; # ARMENIAN SMALL LETTER INI +Լ → L ; # ARMENIAN CAPITAL LETTER LIWN +լ → l ; # ARMENIAN SMALL LETTER LIWN +Խ} $lower → Kh ; # ARMENIAN CAPITAL LETTER XEH +Խ → KH ; # ARMENIAN CAPITAL LETTER XEH +խ → kh ; # ARMENIAN SMALL LETTER XEH +Ծ} $lower → Ts ; # ARMENIAN CAPITAL LETTER CA +Ծ → TS ; # ARMENIAN CAPITAL LETTER CA +ծ → ts ; # ARMENIAN SMALL LETTER CA +Կ → K ; # ARMENIAN CAPITAL LETTER KEN +կ → k ; # ARMENIAN SMALL LETTER KEN +Հ → H ; # ARMENIAN CAPITAL LETTER HO +հ → h ; # ARMENIAN SMALL LETTER HO +Ձ} $lower → Dz ; # ARMENIAN CAPITAL LETTER JA +Ձ → DZ ; # ARMENIAN CAPITAL LETTER JA +ձ → dz ; # ARMENIAN SMALL LETTER JA +Ղ} $lower → Gh ; # ARMENIAN CAPITAL LETTER GHAD +Ղ → GH ; # ARMENIAN CAPITAL LETTER GHAD +ղ → gh ; # ARMENIAN SMALL LETTER GHAD +Ճ} $lower → Ch ; # ARMENIAN CAPITAL LETTER CHEH +Ճ → CH ; # ARMENIAN CAPITAL LETTER CHEH +ճ → ch ; # ARMENIAN SMALL LETTER CHEH +Մ → M ; # ARMENIAN CAPITAL LETTER MEN +մ → m ; # ARMENIAN SMALL LETTER MEN +Յ → Y ; # ARMENIAN CAPITAL LETTER YI +յ → y ; # ARMENIAN SMALL LETTER YI +Ն → N ; # ARMENIAN CAPITAL LETTER NOW +ն → n ; # ARMENIAN SMALL LETTER NOW +Շ} $lower → Sh ; # ARMENIAN CAPITAL LETTER SHA +Շ → SH ; # ARMENIAN CAPITAL LETTER SHA +շ → sh ; # ARMENIAN SMALL LETTER SHA +# Transliteration Case 34: +Ու} $lower → U ; # ARMENIAN CAPITAL LETTER VO + SMALL YIWN +ՈՒ → U ; # ARMENIAN CAPITAL LETTER VO + CAPITAL YIWN +ու → u ; # ARMENIAN SMALL LETTER VO + SMALL YIWN +# BGN Page 12 Rule 2: +# +# The character ո should be romanized vo initially except in the +# word ով, which should be romanized ov. In all other instances, it +# should be romanized o. +ՈՎ → OV ; +Ով → Ov ; +ով → ov ; +$wordBoundary{Ո}$lower → Vo ; # ARMENIAN CAPITAL LETTER VO +$wordBoundary{Ո → VO ; # ARMENIAN CAPITAL LETTER VO +Ո → O ; # ARMENIAN CAPITAL LETTER ECH +$wordBoundary{ո → vo ; # ARMENIAN SMALL LETTER VO +ո → o ; # ARMENIAN SMALL LETTER VO +Չ} $lower → Ch $aspirate ; # ARMENIAN CAPITAL LETTER CHA +Չ → CH $aspirate ; # ARMENIAN CAPITAL LETTER CHA +չ → ch $aspirate ; # ARMENIAN SMALL LETTER CHA +Պ → P ; # ARMENIAN CAPITAL LETTER PEH +պ → p ; # ARMENIAN SMALL LETTER PEH +Ջ → J ; # ARMENIAN CAPITAL LETTER JHEH +ջ → j ; # ARMENIAN SMALL LETTER JHEH +Ռ} $lower → Rr ; # ARMENIAN CAPITAL LETTER RA +Ռ → RR ; # ARMENIAN CAPITAL LETTER RA +ռ → rr ; # ARMENIAN SMALL LETTER RA +Ս → S ; # ARMENIAN CAPITAL LETTER SEH +ս → s ; # ARMENIAN SMALL LETTER SEH +Վ → V ; # ARMENIAN CAPITAL LETTER VEW +վ → v ; # ARMENIAN SMALL LETTER VEW +Տ → T ; # ARMENIAN CAPITAL LETTER TIWN +տ → t ; # ARMENIAN SMALL LETTER TIWN +Ր → R ; # ARMENIAN CAPITAL LETTER REH +ր → r ; # ARMENIAN SMALL LETTER REH +Ց} $lower → Ts $aspirate ; # ARMENIAN CAPITAL LETTER CHEH +Ց → TS $aspirate ; # ARMENIAN CAPITAL LETTER CO +ց → ts $aspirate ; # ARMENIAN SMALL LETTER CO +######################################################################## +# +# The BGN does not show YIWN on its own. +# +#Ւ → W ; # ARMENIAN CAPITAL LETTER YIWN +#ւ → w ; # ARMENIAN SMALL LETTER YIWN +# +######################################################################## +Փ → P $aspirate ; # ARMENIAN CAPITAL LETTER PIWR +փ → p $aspirate ; # ARMENIAN SMALL LETTER PIWR +Ք → K $aspirate ; # ARMENIAN CAPITAL LETTER KEH +ք → k $aspirate ; # ARMENIAN SMALL LETTER KEH +Օ → O ; # ARMENIAN CAPITAL LETTER OH +օ → o ; # ARMENIAN SMALL LETTER OH +Ֆ → F ; # ARMENIAN CAPITAL LETTER FEH +ֆ → f ; # ARMENIAN SMALL LETTER FEH +։ → \. ; # ARMENIAN FULL STOP + diff --git a/icu4c/source/data/translit/ja_Hrkt_ja_Latn_BGN.txt b/icu4c/source/data/translit/ja_Hrkt_ja_Latn_BGN.txt new file mode 100644 index 00000000000..04040d09e35 --- /dev/null +++ b/icu4c/source/data/translit/ja_Hrkt_ja_Latn_BGN.txt @@ -0,0 +1,393 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ja_Hrkt_ja_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN Agreement +# +# The modified Hepburn system for the romanization of Japanese has been +# in use by the U.S. Board on Geographic Names since about 1930 and has +# been used extensively in the romanization of Japanese geographic names. +# The system is well adapted to the general needs of speakers of English +# and is the most widely used system for the romanization of Japanese. +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: Japanese-Latin +# +:: [あいうえおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろわゐゑをんゔアイウエオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロワヰヱヲンヴ] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +$apostrophe = ’; +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +######################################################################## +# +# Rules moved to front to avoid masking +# +######################################################################## +# +######################################################################## +# +# BGN Page 45 Rule 2: +# +# A small-script tsu form (ッ or っ) is inserted between kana symbols +# to indicate a double consonant and is romanized as k before k; +# as s before s or sh; as t before t, ts, or ch; and as p before p. +# +######################################################################## +# +ッ}[カキクケコ] → k ; # KATAKANA LETTER SMALL TU +っ}[かきくけこ] → k ; # HIRAGANA LETTER SMALL TU +ッ}[サシスセソ] → s ; # KATAKANA LETTER SMALL TU +っ}[さしすせそ] → s ; # HIRAGANA LETTER SMALL TU +ッ}[タチツテト] → t ; # KATAKANA LETTER SMALL TU +っ}[たちつてと] → t ; # HIRAGANA LETTER SMALL TU +ッ}[パピプペポ] → p ; # KATAKANA LETTER SMALL TU +っ}[ぱぴぷぺぽ] → p ; # HIRAGANA LETTER SMALL TU +# +# +######################################################################## +# +# End of Rule 2 +# +######################################################################## +# +######################################################################## +# +# Start of Syllabic Transformations +# +######################################################################## +# +ア → a ; # KATAKANA LETTER A +イ → i ; # KATAKANA LETTER I +ウ → u ; # KATAKANA LETTER U +エ → e ; # KATAKANA LETTER E +オウ → ō ; # KATAKANA LETTER O + U +オ → o ; # KATAKANA LETTER O +カ → ka ; # KATAKANA LETTER KA +キョウ → kyō ; # KATAKANA LETTER KI + SMALL YO + U +キュウ → kyū ; # KATAKANA LETTER KI + SMALL YU + U +キャ → kya ; # KATAKANA LETTER KI + SMALL YA +キョ → kyo ; # KATAKANA LETTER KI + SMALL YO +キュ → kyu ; # KATAKANA LETTER KI + SMALL YU +キ → ki ; # KATAKANA LETTER KI +ク → ku ; # KATAKANA LETTER KU +ケ → ke ; # KATAKANA LETTER KE +コウ → kō ; # KATAKANA LETTER KO + U +コ → ko ; # KATAKANA LETTER KO +サ → sa ; # KATAKANA LETTER SA +ショウ → shō ; # KATAKANA LETTER SI + SMALL YO + U +シュウ → shū ; # KATAKANA LETTER SI + SMALL YU + U +シャ → sha ; # KATAKANA LETTER SI + SMALL YA +ショ → sho ; # KATAKANA LETTER SI + SMALL YO +シュ → shu ; # KATAKANA LETTER SI + SMALL YU +シ → shi ; # KATAKANA LETTER SI +ス → su ; # KATAKANA LETTER SU +セ → se ; # KATAKANA LETTER SE +ソウ → sō ; # KATAKANA LETTER SO + U +ソ → so ; # KATAKANA LETTER SO +タ → ta ; # KATAKANA LETTER TA +チョウ → chō ; # KATAKANA LETTER TI + SMALL YO + U +チュウ → chū ; # KATAKANA LETTER TI + SMALL YU + U +チャ → cha ; # KATAKANA LETTER TI + SMALL YA +チョ → cho ; # KATAKANA LETTER TI + SMALL YO +チュ → chu ; # KATAKANA LETTER TI + SMALL YU +チ → chi ; # KATAKANA LETTER TI +ツ → tsu ; # KATAKANA LETTER TU +テ → te ; # KATAKANA LETTER TE +トウ → tō ; # KATAKANA LETTER TO + U +ト → to ; # KATAKANA LETTER TO +ナ → na ; # KATAKANA LETTER NA +ニョウ → nyō ; # KATAKANA LETTER NI + SMALL YO + U +ニュウ → nyū ; # KATAKANA LETTER NI + SMALL YU + U +ニャ → nya ; # KATAKANA LETTER NI + SMALL YA +ニョ → nyo ; # KATAKANA LETTER NI + SMALL YO +ニュ → nyu ; # KATAKANA LETTER NI + SMALL YU +ニ → ni ; # KATAKANA LETTER NI +ヌ → nu ; # KATAKANA LETTER NU +ネ → ne ; # KATAKANA LETTER NE +ノウ → nō ; # KATAKANA LETTER NO + U +ノ → no ; # KATAKANA LETTER NO +ハ → ha ; # KATAKANA LETTER HA +ヒョウ → hyō ; # KATAKANA LETTER HI + SMALL YO + U +ヒュウ → hyū ; # KATAKANA LETTER HI + SMALL YU + U +ヒャ → hya ; # KATAKANA LETTER HI + SMALL YA +ヒョ → hyo ; # KATAKANA LETTER HI + SMALL YO +ヒュ → hyu ; # KATAKANA LETTER HI + SMALL YU +ヒ → hi ; # KATAKANA LETTER HI +フ → fu ; # KATAKANA LETTER HU +ヘ → he ; # KATAKANA LETTER HE +ホウ → hō ; # KATAKANA LETTER HO + U +ホ → ho ; # KATAKANA LETTER HO +マ → ma ; # KATAKANA LETTER MA +ミョウ → hyō ; # KATAKANA LETTER MI + SMALL YO + U +ミュウ → hyū ; # KATAKANA LETTER MI + SMALL YU + U +ミャ → hya ; # KATAKANA LETTER MI + SMALL YA +ミョ → hyo ; # KATAKANA LETTER MI + SMALL YO +ミュ → hyu ; # KATAKANA LETTER MI + SMALL YU +ミ → mi ; # KATAKANA LETTER MI +ム → mu ; # KATAKANA LETTER MU +メ → me ; # KATAKANA LETTER ME +モウ → mō ; # KATAKANA LETTER MO + U +モ → mo ; # KATAKANA LETTER MO +ヤ → ya ; # KATAKANA LETTER YA +ユ → yu ; # KATAKANA LETTER YU +ヨウ → yō ; # KATAKANA LETTER YO + U +ヨ → yo ; # KATAKANA LETTER YO +ラ → ra ; # KATAKANA LETTER RA +リョウ → ryō ; # KATAKANA LETTER RI + SMALL YO + U +リュウ → ryū ; # KATAKANA LETTER RI + SMALL YU + U +リャ → rya ; # KATAKANA LETTER RI + SMALL YA +リョ → ryo ; # KATAKANA LETTER RI + SMALL YO +リュ → ryu ; # KATAKANA LETTER RI + SMALL YU +リ → ri ; # KATAKANA LETTER RI +ル → ru ; # KATAKANA LETTER RU +レ → re ; # KATAKANA LETTER RE +ロウ → rō ; # KATAKANA LETTER RO + U +ロ → ro ; # KATAKANA LETTER RO +ワ → wa ; # KATAKANA LETTER WA +ヰ → i ; # KATAKANA LETTER WI +ヱ → e ; # KATAKANA LETTER WE +ヲ → o ; # KATAKANA LETTER WO +# +# +######################################################################## +# +# BGN Page 45 Rule 3: +# +# The character ン should be romanized m before b, p, or m. +# The character ん should be romanized m before b, p, or m. +# The character ン should be romanized n’ before y or a vowel letter. +# The character ん should be romanized n’ before y or a vowel letter. +# +######################################################################## +# +ン}[バビブベボパピプペポマミムメモ] → m ; # KATAKANA LETTER N +ん}[ばびぶべぼぱぴぷぺぽまみむめも] → m ; # HIRAGANA LETTER N +ン}[ヤユヨアイウエオ] → n $apostrophe ; # KATAKANA LETTER N +ん}[やゆよあいうえお] → n $apostrophe ; # HIRAGANA LETTER N +# +# +######################################################################## +# +# End of Rule 3 +# +######################################################################## +# +ン → n ; # KATAKANA LETTER N +ガ → ga ; # KATAKANA LETTER GA +ギョウ → gyō ; # KATAKANA LETTER GI + SMALL YO + U +ギュウ → gyū ; # KATAKANA LETTER GI + SMALL YU + U +ギャ → gya ; # KATAKANA LETTER GI + SMALL YA +ギョ → gyo ; # KATAKANA LETTER GI + SMALL YO +ギュ → gyu ; # KATAKANA LETTER GI + SMALL YU +ギ → gi ; # KATAKANA LETTER GI +グ → gu ; # KATAKANA LETTER GU +ゲ → ge ; # KATAKANA LETTER GE +ゴウ → gō ; # KATAKANA LETTER GO + U +ゴ → go ; # KATAKANA LETTER GO +ザ → za ; # KATAKANA LETTER ZA +ジョウ → jō ; # KATAKANA LETTER ZI + SMALL YO + U +ジュウ → jū ; # KATAKANA LETTER ZI + SMALL YU + U +ジャ → ja ; # KATAKANA LETTER ZI + SMALL YA +ジョ → jo ; # KATAKANA LETTER ZI + SMALL YO +ジュ → ju ; # KATAKANA LETTER ZI + SMALL YU +ジ → ji ; # KATAKANA LETTER ZI +ズ → zu ; # KATAKANA LETTER ZU +ゼ → ze ; # KATAKANA LETTER ZE +ゾウ → zō ; # KATAKANA LETTER ZO + U +ゾ → zo ; # KATAKANA LETTER ZO +ダ → da ; # KATAKANA LETTER DA +ヂ → ji ; # KATAKANA LETTER DI +ヅ → zu ; # KATAKANA LETTER DU +デ → de ; # KATAKANA LETTER DE +ドウ → dō ; # KATAKANA LETTER DO + U +ド → do ; # KATAKANA LETTER DO +バ → ba ; # KATAKANA LETTER BA +ビョウ → byō ; # KATAKANA LETTER BI + SMALL YO + U +ビュウ → byū ; # KATAKANA LETTER BI + SMALL YU + U +ビャ → bya ; # KATAKANA LETTER BI + SMALL YA +ビョ → byo ; # KATAKANA LETTER BI + SMALL YO +ビュ → byu ; # KATAKANA LETTER BI + SMALL YU +ビ → bi ; # KATAKANA LETTER BI +ブ → bu ; # KATAKANA LETTER BU +ベ → be ; # KATAKANA LETTER BE +ボウ → bō ; # KATAKANA LETTER BO + U +ボ → bo ; # KATAKANA LETTER BO +パ → pa ; # KATAKANA LETTER PA +ピョウ → pyō ; # KATAKANA LETTER PI + SMALL YO + U +ピュウ → pyū ; # KATAKANA LETTER PI + SMALL YU + U +ピャ → pya ; # KATAKANA LETTER PI + SMALL YA +ピョ → pyo ; # KATAKANA LETTER PI + SMALL YO +ピュ → pyu ; # KATAKANA LETTER PI + SMALL YU +ピ → pi ; # KATAKANA LETTER PI +プ → pu ; # KATAKANA LETTER PU +ペ → pe ; # KATAKANA LETTER PE +ポウ → pō ; # KATAKANA LETTER PO + U +ポ → po ; # KATAKANA LETTER PO +ヴ → v ; # KATAKANA LETTER VU +あ → a ; # HIRAGANA LETTER A +い → i ; # HIRAGANA LETTER I +う → u ; # HIRAGANA LETTER U +え → e ; # HIRAGANA LETTER E +おう → ō ; # HIRAGANA LETTER O + U +お → o ; # HIRAGANA LETTER O +か → ka ; # HIRAGANA LETTER KA +きょう → kyō ; # HIRAGANA LETTER KI + SMALL YO + U +きゅう → kyū ; # HIRAGANA LETTER KI + SMALL YU + U +きゃ → kya ; # HIRAGANA LETTER KI + SMALL YA +きょ → kyo ; # HIRAGANA LETTER KI + SMALL YO +きゅ → kyu ; # HIRAGANA LETTER KI + SMALL YU +き → ki ; # HIRAGANA LETTER KI +く → ku ; # HIRAGANA LETTER KU +け → ke ; # HIRAGANA LETTER KE +こう → kō ; # HIRAGANA LETTER KO + U +こ → ko ; # HIRAGANA LETTER KO +さ → sa ; # HIRAGANA LETTER SA +しょう → shō ; # HIRAGANA LETTER SI + SMALL YO + U +しゅう → shū ; # HIRAGANA LETTER SI + SMALL YU + U +しゃ → sha ; # HIRAGANA LETTER SI + SMALL YA +しょ → sho ; # HIRAGANA LETTER SI + SMALL YO +しゅ → shu ; # HIRAGANA LETTER SI + SMALL YU +し → shi ; # HIRAGANA LETTER SI +す → su ; # HIRAGANA LETTER SU +せ → se ; # HIRAGANA LETTER SE +そう → sō ; # HIRAGANA LETTER SO + U +そ → so ; # HIRAGANA LETTER SO +た → ta ; # HIRAGANA LETTER TA +ちょう → chō ; # HIRAGANA LETTER TI + SMALL YO + U +ちゅう → chū ; # HIRAGANA LETTER TI + SMALL YU + U +ちゃ → cha ; # HIRAGANA LETTER TI + SMALL YA +ちょ → cho ; # HIRAGANA LETTER TI + SMALL YO +ちゅ → chu ; # HIRAGANA LETTER TI + SMALL YU +ち → chi ; # HIRAGANA LETTER TI +つ → tsu ; # HIRAGANA LETTER TU +て → te ; # HIRAGANA LETTER TE +とう → tō ; # HIRAGANA LETTER TO + U +と → to ; # HIRAGANA LETTER TO +な → na ; # HIRAGANA LETTER NA +にょう → nyō ; # HIRAGANA LETTER NI + SMALL YO + U +にゅう → nyū ; # HIRAGANA LETTER NI + SMALL YU + U +にゃ → nya ; # HIRAGANA LETTER NI + SMALL YA +にょ → nyo ; # HIRAGANA LETTER NI + SMALL YO +にゅ → nyu ; # HIRAGANA LETTER NI + SMALL YU +に → ni ; # HIRAGANA LETTER NI +ぬ → nu ; # HIRAGANA LETTER NU +ね → ne ; # HIRAGANA LETTER NE +のう → nō ; # HIRAGANA LETTER NO + U +の → no ; # HIRAGANA LETTER NO +は → ha ; # HIRAGANA LETTER HA +ひょう → hyō ; # HIRAGANA LETTER HI + SMALL YO + U +ひゅう → hyū ; # HIRAGANA LETTER HI + SMALL YU + U +ひゃ → hya ; # HIRAGANA LETTER HI + SMALL YA +ひょ → hyo ; # HIRAGANA LETTER HI + SMALL YO +ひゅ → hyu ; # HIRAGANA LETTER HI + SMALL YU +ひ → hi ; # HIRAGANA LETTER HI +ふ → fu ; # HIRAGANA LETTER HU +へ → he ; # HIRAGANA LETTER HE +ほう → hō ; # HIRAGANA LETTER HO + U +ほ → ho ; # HIRAGANA LETTER HO +ま → ma ; # HIRAGANA LETTER MA +みょう → hyō ; # HIRAGANA LETTER MI + SMALL YO + U +みゅう → hyū ; # HIRAGANA LETTER MI + SMALL YU + U +みゃ → hya ; # HIRAGANA LETTER MI + SMALL YA +みょ → hyo ; # HIRAGANA LETTER MI + SMALL YO +みゅ → hyu ; # HIRAGANA LETTER MI + SMALL YU +み → mi ; # HIRAGANA LETTER MI +む → mu ; # HIRAGANA LETTER MU +め → me ; # HIRAGANA LETTER ME +もう → mō ; # HIRAGANA LETTER MO + U +も → mo ; # HIRAGANA LETTER MO +や → ya ; # HIRAGANA LETTER YA +ゆ → yu ; # HIRAGANA LETTER YU +よう → yō ; # HIRAGANA LETTER YO + U +よ → yo ; # HIRAGANA LETTER YO +ら → ra ; # HIRAGANA LETTER RA +りょう → ryō ; # HIRAGANA LETTER RI + SMALL YO + U +りゅう → ryū ; # HIRAGANA LETTER RI + SMALL YU + U +りゃ → rya ; # HIRAGANA LETTER RI + SMALL YA +りょ → ryo ; # HIRAGANA LETTER RI + SMALL YO +りゅ → ryu ; # HIRAGANA LETTER RI + SMALL YU +り → ri ; # HIRAGANA LETTER RI +る → ru ; # HIRAGANA LETTER RU +れ → re ; # HIRAGANA LETTER RE +ろう → rō ; # HIRAGANA LETTER RO + U +ろ → ro ; # HIRAGANA LETTER RO +わ → wa ; # HIRAGANA LETTER WA +ゐ → i ; # HIRAGANA LETTER WI +ゑ → e ; # HIRAGANA LETTER WE +を → o ; # HIRAGANA LETTER WO +ん → n ; # HIRAGANA LETTER N +が → ga ; # HIRAGANA LETTER GA +ぎょう → gyō ; # HIRAGANA LETTER GI + SMALL YO + U +ぎゅう → gyū ; # HIRAGANA LETTER GI + SMALL YU + U +ぎゃ → gya ; # HIRAGANA LETTER GI + SMALL YA +ぎょ → gyo ; # HIRAGANA LETTER GI + SMALL YO +ぎゅ → gyu ; # HIRAGANA LETTER GI + SMALL YU +ぎ → gi ; # HIRAGANA LETTER GI +ぐ → gu ; # HIRAGANA LETTER GU +げ → ge ; # HIRAGANA LETTER GE +ごう → gō ; # HIRAGANA LETTER GO + U +ご → go ; # HIRAGANA LETTER GO +ざ → za ; # HIRAGANA LETTER ZA +じょう → jō ; # HIRAGANA LETTER ZI + SMALL YO + U +じゅう → jū ; # HIRAGANA LETTER ZI + SMALL YU + U +じゃ → ja ; # HIRAGANA LETTER ZI + SMALL YA +じょ → jo ; # HIRAGANA LETTER ZI + SMALL YO +じゅ → ju ; # HIRAGANA LETTER ZI + SMALL YU +じ → ji ; # HIRAGANA LETTER ZI +ず → zu ; # HIRAGANA LETTER ZU +ぜ → ze ; # HIRAGANA LETTER ZE +ぞう → zō ; # HIRAGANA LETTER ZO + U +ぞ → zo ; # HIRAGANA LETTER ZO +だ → da ; # HIRAGANA LETTER DA +ぢ → ji ; # HIRAGANA LETTER DI +づ → zu ; # HIRAGANA LETTER DU +で → de ; # HIRAGANA LETTER DE +どう → dō ; # HIRAGANA LETTER DO + U +ど → do ; # HIRAGANA LETTER DO +ば → ba ; # HIRAGANA LETTER BA +びょう → byō ; # HIRAGANA LETTER BI + SMALL YO + U +びゅう → byū ; # HIRAGANA LETTER BI + SMALL YU + U +びゃ → bya ; # HIRAGANA LETTER BI + SMALL YA +びょ → byo ; # HIRAGANA LETTER BI + SMALL YO +びゅ → byu ; # HIRAGANA LETTER BI + SMALL YU +び → bi ; # HIRAGANA LETTER BI +ぶ → bu ; # HIRAGANA LETTER BU +べ → be ; # HIRAGANA LETTER BE +ぼう → bō ; # HIRAGANA LETTER BO + U +ぼ → bo ; # HIRAGANA LETTER BO +ぱ → pa ; # HIRAGANA LETTER PA +ぴょう → pyō ; # HIRAGANA LETTER PI + SMALL YO + U +ぴゅう → pyū ; # HIRAGANA LETTER PI + SMALL YU + U +ぴゃ → pya ; # HIRAGANA LETTER PI + SMALL YA +ぴょ → pyo ; # HIRAGANA LETTER PI + SMALL YO +ぴゅ → pyu ; # HIRAGANA LETTER PI + SMALL YU +ぴ → pi ; # HIRAGANA LETTER PI +ぷ → pu ; # HIRAGANA LETTER PU +ぺ → pe ; # HIRAGANA LETTER PE +ぽう → pō ; # HIRAGANA LETTER PO + U +ぽ → po ; # HIRAGANA LETTER PO +ゔ → v ; # HIRAGANA LETTER VU +# +# +######################################################################## + diff --git a/icu4c/source/data/translit/ka_ka_Latn_BGN.txt b/icu4c/source/data/translit/ka_ka_Latn_BGN.txt new file mode 100644 index 00000000000..39d352fad87 --- /dev/null +++ b/icu4c/source/data/translit/ka_ka_Latn_BGN.txt @@ -0,0 +1,49 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ka_ka_Latn_BGN.txt +# Generated from CLDR +# + +# BGN/PCGN 2009 System +# +# The BGN/PCGN system for Georgian was designed for use in romanizing +# Georgian-language names written in the Mkhedruli alphabet. +# +# https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/499646/ROMANIZATION_SYSTEM_FOR_GEORGIAN.PDF +::[აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ]; +::NFC; +ა → a; +ბ → b; +გ → g; +დ → d; +ე → e; +ვ → v; +ზ → z; +თ → t; +ი → i; +კ → k’; +ლ → l; +მ → m; +ნ → n; +ო → o; +პ → p’; +ჟ → zh; +რ → r; +ს → s; +ტ → t’; +უ → u; +ფ → p; +ქ → k; +ღ → gh; +ყ → q’; +შ → sh; +ჩ → ch; +ც → ts; +ძ → dz; +წ → ts’; +ჭ → ch’; +ხ → kh; +ჯ → j; +ჰ → h; + diff --git a/icu4c/source/data/translit/ka_ka_Latn_BGN_1981.txt b/icu4c/source/data/translit/ka_ka_Latn_BGN_1981.txt new file mode 100644 index 00000000000..9b805bc7bd0 --- /dev/null +++ b/icu4c/source/data/translit/ka_ka_Latn_BGN_1981.txt @@ -0,0 +1,60 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ka_ka_Latn_BGN_1981.txt +# Generated from CLDR +# + +######################################################################## +# BGN/PCGN 1981 System +# +# The BGN/PCGN system for Georgian was designed for use in romanizing +# names written in the Georgian alphabet. The alphabet shown here is +# known as the Mkhedruli alphabet and is the alphabet presently +# used in the Republic of Georgia. +# +# The Georgian Alphabet as defined by the BGN (Page 27): +# +# აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ +# +# Originally prepared by Michael Everson +######################################################################## +:: [აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ] ; +:: NFD (NFC) ; +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +$wordBoundary = [^[:L:][:M:][:N:]] ; +ა → a ; # GEORGIAN LETTER AN +ბ → b ; # GEORGIAN LETTER BAN +გ → g ; # GEORGIAN LETTER GAN +დ → d ; # GEORGIAN LETTER DON +ე → e ; # GEORGIAN LETTER EN +ვ → v ; # GEORGIAN LETTER VIN +ზ → z ; # GEORGIAN LETTER ZEN +თ → t’ ; # GEORGIAN LETTER TAN +ი → i ; # GEORGIAN LETTER IN +კ → k ; # GEORGIAN LETTER KAN +ლ → l ; # GEORGIAN LETTER LAS +მ → m ; # GEORGIAN LETTER MAN +ნ → n ; # GEORGIAN LETTER NAR +ო → o ; # GEORGIAN LETTER ON +პ → p ; # GEORGIAN LETTER PAR +ჟ → zh ; # GEORGIAN LETTER ZHAR +რ → r ; # GEORGIAN LETTER RAE +ს → s ; # GEORGIAN LETTER SAN +ტ → t ; # GEORGIAN LETTER TAR +უ → u ; # GEORGIAN LETTER UN +ფ → p’ ; # GEORGIAN LETTER PHAR +ქ → k’ ; # GEORGIAN LETTER KHAR +ღ → gh ; # GEORGIAN LETTER GHAN +ყ → q ; # GEORGIAN LETTER QAR +შ → sh ; # GEORGIAN LETTER SHIN +ჩ → ch’ ; # GEORGIAN LETTER CHIN +ც → ts’ ; # GEORGIAN LETTER CAN +ძ → dz ; # GEORGIAN LETTER JIL +წ → ts ; # GEORGIAN LETTER CIL +ჭ → ch ; # GEORGIAN LETTER CHAR +ხ → kh ; # GEORGIAN LETTER XAN +ჯ → j ; # GEORGIAN LETTER JHAN +ჰ → h ; # GEORGIAN LETTER HAE + diff --git a/icu4c/source/data/translit/kk_kk_Latn_BGN.txt b/icu4c/source/data/translit/kk_kk_Latn_BGN.txt new file mode 100644 index 00000000000..d9ef30e974c --- /dev/null +++ b/icu4c/source/data/translit/kk_kk_Latn_BGN.txt @@ -0,0 +1,338 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: kk_kk_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1979 System +# +# The BGN/PCGN system for Kazakh Cyrillic was designed for use in +# romanizing names written in the Kazakh Cyrillic alphabet. +# The Kazakh Cyrillic alphabet contains nine letters not present +# in the Russian alphabet: Әә, Ғғ, Ққ, Ңң, Өө, Ұұ, Үү, Һһ, and Іі. +# +# The Kazakh Cyrillic Alphabet as defined by the BGN (Page 47): +# +# АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯ +# аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: KazakhCyrl-Latin +# +:: [АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯаәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ʹ ; +$doublePrime = ʺ ; +$upperConsonants = [БВГҒДЖЗЙКҚЛМНҢПРСТФХҺЦЧШЩЪЬ] ; +$lowerConsonants = [бвгғджзйкқлмнңпрстфхһцчшщъь] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АӘЕЁИОӨУҰҮЫІЭЮЯ] ; +$lowerVowels = [аәеёиоөуұүыіэюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Ә → Ä ; # CYRILLIC CAPITAL LETTER SCHWA +ә → ä ; # CYRILLIC SMALL LETTER SCHWA +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +# +# +######################################################################## +# +# BGN Page 48 Rule 1 +# +# The character sequences гһ, зһ, кһ, нг, сһ and цһ may be romanized +# g·h, z·h, k·h, n·g, s·h and ts·h in order to differentiate those +# romanizations from the digraphs gh, zh, kh, ng, sh, and the letter +# sequence tsh, which are used to render the characters г, ж, х, ң, ш, +# and the character sequence тш. +# +######################################################################## +# +ГҺ → G·H ; # CYRILLIC CAPITAL LETTER GHE +Гһ → G·h ; # CYRILLIC CAPITAL LETTER GHE +гһ → g·h ; # CYRILLIC SMALL LETTER GHE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Ғ} $lower → Gh ; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +Ғ → GH ; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +ғ → gh ; # CYRILLIC SMALL LETTER GHE WITH STROKE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +Е → E ; # CYRILLIC CAPITAL LETTER IE +е → e ; # CYRILLIC SMALL LETTER IE +Ё} $lower → Yo ; # CYRILLIC CAPITAL LETTER IO +Ё → YO ; # CYRILLIC CAPITAL LETTER IO +ё → yo ; # CYRILLIC SMALL LETTER IO +Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE +Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE +ж → zh ; # CYRILLIC SMALL LETTER ZHE +# +# +######################################################################## +# +# BGN Page 48 Rule 1 +# +# зһ becomes z·h +# +######################################################################## +# +ЗҺ → Z·H ; # CYRILLIC CAPITAL LETTER ZE +Зһ → Z·h ; # CYRILLIC CAPITAL LETTER ZE +зһ → z·h ; # CYRILLIC SMALL LETTER ZE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +И → Ī ; # CYRILLIC CAPITAL LETTER I +и → ī ; # CYRILLIC SMALL LETTER I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +# +# +######################################################################## +# +# BGN Page 48 Rule 1 +# +# кһ becomes k·h +# +######################################################################## +# +КҺ → K·H ; # CYRILLIC CAPITAL LETTER KA +Кһ → K·h ; # CYRILLIC CAPITAL LETTER KA +кһ → k·h ; # CYRILLIC SMALL LETTER KA +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Қ → Q ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER +қ → q ; # CYRILLIC SMALL LETTER KA WITH DESCENDER +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +# +# +######################################################################## +# +# BGN Page 48 Rule 1 +# +# нг becomes n·g +# +######################################################################## +# +НГ → N·G ; # CYRILLIC CAPITAL LETTER EN +Нг → N·g ; # CYRILLIC CAPITAL LETTER EN +нг → n·g ; # CYRILLIC SMALL LETTER EN +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Ң} $lower → Ng ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +Ң → NG ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +ң → ng ; # CYRILLIC SMALL LETTER EN WITH DESCENDER +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +Ө → Ö ; # CYRILLIC CAPITAL LETTER BARRED O +ө → ö ; # CYRILLIC SMALL LETTER BARRED O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +# +# +######################################################################## +# +# BGN Page 48 Rule 1 +# +# сһ becomes s·h +# +######################################################################## +# +СҺ → S·H ; # CYRILLIC CAPITAL LETTER ES +Сһ → S·h ; # CYRILLIC CAPITAL LETTER ES +сһ → s·h ; # CYRILLIC SMALL LETTER ES +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +У → Ū ; # CYRILLIC CAPITAL LETTER U +у → ū ; # CYRILLIC SMALL LETTER U +Ұ → U ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +ұ → u ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE +Ү → Ü ; # CYRILLIC CAPITAL LETTER STRAIGHT U +ү → ü ; # CYRILLIC SMALL LETTER STRAIGHT U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA +Х → KH ; # CYRILLIC CAPITAL LETTER HA +х → kh ; # CYRILLIC SMALL LETTER HA +Һ → H ; # CYRILLIC CAPITAL LETTER SHHA +һ → h ; # CYRILLIC SMALL LETTER SHHA +# +# +######################################################################## +# +# BGN Page 48 Rule 1 +# +# цһ becomes ts·h +# +######################################################################## +# +ЦҺ → TS·H ; # CYRILLIC CAPITAL LETTER GHE +Цһ → Ts·h ; # CYRILLIC CAPITAL LETTER GHE +цһ → ts·h ; # CYRILLIC SMALL LETTER GHE +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +# +# +######################################################################## +# +# Implied rule from BGN Russian-Latin transliteration (Page 94 Note 3.6). +# +# шч becomes sh·ch +# +######################################################################## +# +ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA +Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA +шч → sh·ch ; # CYRILLIC SMALL LETTER SHA +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA +Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA +щ → shch ; # CYRILLIC SMALL LETTER SHCHA +# +# +######################################################################## +# +# End Implied rule +# +######################################################################## +# +Ъ → $doublePrime ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ → $doublePrime ; # CYRILLIC SMALL LETTER HARD SIGN +# +# +######################################################################## +# +# BGN Page 48 Note 2 +# +# The character Ыы may be romanized Ɨɨ instead of Yy, if so desired. +# +######################################################################## +# +Ы → Y ; # CYRILLIC CAPITAL LETTER YERU +ы → y ; # CYRILLIC SMALL LETTER YERU +# +# +# Alternative rule to implement the option described here. To apply +# uncomment the following by removing the '#' mark at the start of the +# line and insert before the two rule lines above. +# +#Ы → Ɨ ; # CYRILLIC CAPITAL LETTER YERU +#ы → ɨ ; # CYRILLIC SMALL LETTER YERU +# +######################################################################## +# +# End BGN Page 48 Note 2 +# +######################################################################## +# +І → I ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +і → i ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN +Э → Ė ; # CYRILLIC CAPITAL LETTER E +э → ė ; # CYRILLIC SMALL LETTER E +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA +# +# +######################################################################## + diff --git a/icu4c/source/data/translit/ko_ko_Latn_BGN.txt b/icu4c/source/data/translit/ko_ko_Latn_BGN.txt new file mode 100644 index 00000000000..ff54254be1b --- /dev/null +++ b/icu4c/source/data/translit/ko_ko_Latn_BGN.txt @@ -0,0 +1,351 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ko_ko_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN Agreement +# +# This system wad devised by G. M. McCune and E. O. Reischauer, and +# was originally published in the Transactions of the Korea Branch of +# the Royal Asiatic Society, Volume XXIX, 1939. It has been used by +# the BGN since 1943, and was later adopted for use by the PCGN. A +# main characteristic of this system is the attempt to represent +# approximate Korean pronunciation, while systematically converting +# the Hangul characters to corresponding Roman-script letters. Since +# Korean pronunciation is often inconsistently represented in Hangul, +# the McCune-Reischauer conversion tables are rather elaborate, and +# reverse conversion (from Roman script back to Hangul) is not possible. +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: Korean-Latin +# +:: [ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄋᄌᄍᄎᄏᄐᄑᄒᄭᄯᄲᄶᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵᆨᆫᆮᆯᆰᆱᆲᆷᆸᆺᆼᆽᆾᆿᇀᇁ] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$aspirate = ’; +$apostrophe = ’; +$vowels = [ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Rules moved to front to avoid masking +# +######################################################################## +# +######################################################################## +# +# BGN Page 60 Rule 1: +# +# Romanization of Hangul consonants and consonant clusters within words. +# +######################################################################## +# +ᆨᄀ → kk ; # HANGUL JONGSEONG KIYEOK + CHOSEONG KIYEOK +ᆨᄂ → ngn ; # HANGUL JONGSEONG KIYEOK + CHOSEONG NIEUN +ᆨᄃ → kt ; # HANGUL JONGSEONG KIYEOK + CHOSEONG TIKEUT +ᆨᄅ → ngn ; # HANGUL JONGSEONG KIYEOK + CHOSEONG RIEUL +ᆨᄆ → ngm ; # HANGUL JONGSEONG KIYEOK + CHOSEONG MIEUM +ᆨᄇ → kp ; # HANGUL JONGSEONG KIYEOK + CHOSEONG PIEUP +ᆨᄉ → ks ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SIOS +ᆨᄋ → g ; # HANGUL JONGSEONG KIYEOK + CHOSEONG IEUNG +ᆨᄌ → kch ; # HANGUL JONGSEONG KIYEOK + CHOSEONG CIEUC +ᆨᄎ → kch $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG CHIEUCH +ᆨᄏ → kk $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG KHIEUKH +ᆨᄐ → kt $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG THIEUTH +ᆨᄑ → kp $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG PHIEUPH +ᆨᄒ → kh ; # HANGUL JONGSEONG KIYEOK + CHOSEONG HIEUH +ᆨᄁ → kk ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGKIYEOK +ᆨᄄ → ktt ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGTIKEUT +ᆨᄈ → kpp ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGPIEUP +ᆨᄊ → kss ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGSIOS +ᆨᄍ → ktch ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGCIEUC +ᆫᄀ → n $apostrophe g ; # HANGUL JONGSEONG NIEUN + CHOSEONG KIEUK +ᆫᄂ → nn ; # HANGUL JONGSEONG NIEUN + CHOSEONG NIEUN +ᆫᄃ → nd ; # HANGUL JONGSEONG NIEUN + CHOSEONG TIKEUT +ᆫᄅ → ll ; # HANGUL JONGSEONG NIEUN + CHOSEONG RIEUL +ᆫᄆ → nm ; # HANGUL JONGSEONG NIEUN + CHOSEONG MIEUM +ᆫᄇ → nb ; # HANGUL JONGSEONG NIEUN + CHOSEONG PIEUP +ᆫᄉ → ns ; # HANGUL JONGSEONG NIEUN + CHOSEONG SIOS +ᆫᄋ → n ; # HANGUL JONGSEONG NIEUN + CHOSEONG IEUNG +ᆫᄌ → nj ; # HANGUL JONGSEONG NIEUN + CHOSEONG CIEUC +ᆫᄎ → nch $aspirate ; # HANGUL JONGSEONG NIEUN + CHOSEONG CHIEUCH +ᆫᄏ → nk $aspirate ; # HANGUL JONGSEONG NIEUN + CHOSEONG KHIEUKH +ᆫᄐ → nt $aspirate ; # HANGUL JONGSEONG NIEUN + CHOSEONG THIEUTH +ᆫᄑ → np $aspirate ; # HANGUL JONGSEONG NIEUN + CHOSEONG PHIEUPH +ᆫᄒ → nh ; # HANGUL JONGSEONG NIEUN + CHOSEONG HIEUH +ᆫᄁ → nkk ; # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGKIYEOK +ᆫᄄ → ntt ; # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGTIKEUT +ᆫᄈ → npp ; # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGPIEUP +ᆫᄊ → nss ; # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGSIOS +ᆫᄍ → ntch ; # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGCIEUC +ᆯᄀ → lg ; # HANGUL JONGSEONG RIEUL + CHOSEONG KIYEOK +ᆯᄂ → ll ; # HANGUL JONGSEONG RIEUL + CHOSEONG NIEUN +ᆯᄃ → lt ; # HANGUL JONGSEONG RIEUL + CHOSEONG TIKEUT +ᆯᄅ → ll ; # HANGUL JONGSEONG RIEUL + CHOSEONG RIEUL +ᆯᄆ → lm ; # HANGUL JONGSEONG RIEUL + CHOSEONG MIEUM +ᆯᄇ → lb ; # HANGUL JONGSEONG RIEUL + CHOSEONG PIEUP +ᆯᄉ → ls ; # HANGUL JONGSEONG RIEUL + CHOSEONG SIOS +ᆯᄋ → r ; # HANGUL JONGSEONG RIEUL + CHOSEONG IEUNG +ᆯᄌ → lch ; # HANGUL JONGSEONG RIEUL + CHOSEONG CIEUC +ᆯᄎ → lch $aspirate ; # HANGUL JONGSEONG RIEUL + CHOSEONG CHIEUCH +ᆯᄏ → lk $aspirate ; # HANGUL JONGSEONG RIEUL + CHOSEONG KHIEUKH +ᆯᄐ → lt $aspirate ; # HANGUL JONGSEONG RIEUL + CHOSEONG THIEUTH +ᆯᄑ → lp $aspirate ; # HANGUL JONGSEONG RIEUL + CHOSEONG PHIEUPH +ᆯᄒ → rh ; # HANGUL JONGSEONG RIEUL + CHOSEONG HIEUH +ᆯᄁ → lkk ; # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGKIYEOK +ᆯᄄ → ltt ; # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGTIKEUT +ᆯᄈ → lpp ; # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGPIEUP +ᆯᄊ → lss ; # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGSIOS +ᆯᄍ → ltch ; # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGCIEUC +ᆷᄀ → mg ; # HANGUL JONGSEONG MIEUM + CHOSEONG KIYEOK +ᆷᄂ → mn ; # HANGUL JONGSEONG MIEUM + CHOSEONG NIEUN +ᆷᄃ → md ; # HANGUL JONGSEONG MIEUM + CHOSEONG TIKEUT +ᆷᄅ → mn ; # HANGUL JONGSEONG MIEUM + CHOSEONG RIEUL +ᆷᄆ → mm ; # HANGUL JONGSEONG MIEUM + CHOSEONG MIEUM +ᆷᄇ → mb ; # HANGUL JONGSEONG MIEUM + CHOSEONG PIEUP +ᆷᄉ → ms ; # HANGUL JONGSEONG MIEUM + CHOSEONG SIOS +ᆷᄋ → m ; # HANGUL JONGSEONG MIEUM + CHOSEONG IEUNG +ᆷᄌ → mj ; # HANGUL JONGSEONG MIEUM + CHOSEONG CIEUC +ᆷᄎ → mch $aspirate ; # HANGUL JONGSEONG MIEUM + CHOSEONG CHIEUCH +ᆷᄏ → mk $aspirate ; # HANGUL JONGSEONG MIEUM + CHOSEONG KHIEUKH +ᆷᄐ → mt $aspirate ; # HANGUL JONGSEONG MIEUM + CHOSEONG THIEUTH +ᆷᄑ → mp $aspirate ; # HANGUL JONGSEONG MIEUM + CHOSEONG PHIEUPH +ᆷᄒ → mh ; # HANGUL JONGSEONG MIEUM + CHOSEONG HIEUH +ᆷᄁ → mkk ; # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGKIYEOK +ᆷᄄ → mtt ; # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGTIKEUT +ᆷᄈ → mpp ; # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGPIEUP +ᆷᄊ → mss ; # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGSIOS +ᆷᄍ → mtch ; # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGCIEUC +ᆸᄀ → pk ; # HANGUL JONGSEONG PIEUP + CHOSEONG KIYEOK +ᆸᄂ → mn ; # HANGUL JONGSEONG PIEUP + CHOSEONG NIEUN +ᆸᄃ → pt ; # HANGUL JONGSEONG PIEUP + CHOSEONG TIKEUT +ᆸᄅ → mn ; # HANGUL JONGSEONG PIEUP + CHOSEONG RIEUL +ᆸᄆ → mm ; # HANGUL JONGSEONG PIEUP + CHOSEONG MIEUM +ᆸᄇ → pp ; # HANGUL JONGSEONG PIEUP + CHOSEONG PIEUP +ᆸᄉ → ps ; # HANGUL JONGSEONG PIEUP + CHOSEONG SIOS +ᆸᄋ → p ; # HANGUL JONGSEONG PIEUP + CHOSEONG IEUNG +ᆸᄌ → pch ; # HANGUL JONGSEONG PIEUP + CHOSEONG CIEUC +ᆸᄎ → pch $aspirate ; # HANGUL JONGSEONG PIEUP + CHOSEONG CHIEUCH +ᆸᄏ → pk $aspirate ; # HANGUL JONGSEONG PIEUP + CHOSEONG KHIEUKH +ᆸᄐ → pt $aspirate ; # HANGUL JONGSEONG PIEUP + CHOSEONG THIEUTH +ᆸᄑ → pp $aspirate ; # HANGUL JONGSEONG PIEUP + CHOSEONG PHIEUPH +ᆸᄒ → ph ; # HANGUL JONGSEONG PIEUP + CHOSEONG HIEUH +ᆸᄁ → pkk ; # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGKIYEOK +ᆸᄄ → ptt ; # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGTIKEUT +ᆸᄈ → pp ; # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGPIEUP +ᆸᄊ → pss ; # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGSIOS +ᆸᄍ → ptch ; # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGCIEUC +ᆺᄀ → kk ; # HANGUL JONGSEONG SIOS + CHOSEONG KIYEOK +ᆺᄂ → nn ; # HANGUL JONGSEONG SIOS + CHOSEONG NIEUN +ᆺᄃ → tt ; # HANGUL JONGSEONG SIOS + CHOSEONG TIKEUT +ᆺᄅ → nn ; # HANGUL JONGSEONG SIOS + CHOSEONG RIEUL +ᆺᄆ → nm ; # HANGUL JONGSEONG SIOS + CHOSEONG MIEUM +ᆺᄇ → pp ; # HANGUL JONGSEONG SIOS + CHOSEONG PIEUP +ᆺᄉ → ss ; # HANGUL JONGSEONG SIOS + CHOSEONG SIOS +ᆺᄋ → d ; # HANGUL JONGSEONG SIOS + CHOSEONG IEUNG +ᆺᄌ → tch ; # HANGUL JONGSEONG SIOS + CHOSEONG CIEUC +ᆺᄎ → tch $aspirate ; # HANGUL JONGSEONG SIOS + CHOSEONG CHIEUCH +ᆺᄏ → tk $aspirate ; # HANGUL JONGSEONG SIOS + CHOSEONG KHIEUKH +ᆺᄐ → tt $aspirate ; # HANGUL JONGSEONG SIOS + CHOSEONG THIEUTH +ᆺᄑ → tp $aspirate ; # HANGUL JONGSEONG SIOS + CHOSEONG PHIEUPH +ᆺᄒ → th ; # HANGUL JONGSEONG SIOS + CHOSEONG HIEUH +ᆺᄁ → tkk ; # HANGUL JONGSEONG SIOS + CHOSEONG SSANGKIYEOK +ᆺᄄ → tt ; # HANGUL JONGSEONG SIOS + CHOSEONG SSANGTIKEUT +ᆺᄈ → tpp ; # HANGUL JONGSEONG SIOS + CHOSEONG SSANGPIEUP +ᆺᄊ → tss ; # HANGUL JONGSEONG SIOS + CHOSEONG SSANGSIOS +ᆺᄍ → tch ; # HANGUL JONGSEONG SIOS + CHOSEONG SSANGCIEUC +ᆼᄀ → ngg ; # HANGUL JONGSEONG IEUNG + CHOSEONG KIYEOK +ᆼᄂ → ngn ; # HANGUL JONGSEONG IEUNG + CHOSEONG NIEUN +ᆼᄃ → ngd ; # HANGUL JONGSEONG IEUNG + CHOSEONG TIKEUT +ᆼᄅ → ngn ; # HANGUL JONGSEONG IEUNG + CHOSEONG RIEUL +ᆼᄆ → ngm ; # HANGUL JONGSEONG IEUNG + CHOSEONG MIEUM +ᆼᄇ → ngb ; # HANGUL JONGSEONG IEUNG + CHOSEONG PIEUP +ᆼᄉ → ngs ; # HANGUL JONGSEONG IEUNG + CHOSEONG SIOS +ᆼᄋ → ng ; # HANGUL JONGSEONG IEUNG + CHOSEONG IEUNG +ᆼᄌ → ngj ; # HANGUL JONGSEONG IEUNG + CHOSEONG CIEUC +ᆼᄎ → ngch $aspirate ; # HANGUL JONGSEONG IEUNG + CHOSEONG CHIEUCH +ᆼᄏ → ngk $aspirate ; # HANGUL JONGSEONG IEUNG + CHOSEONG KHIEUKH +ᆼᄐ → ngt $aspirate ; # HANGUL JONGSEONG IEUNG + CHOSEONG THIEUTH +ᆼᄑ → ngp $aspirate ; # HANGUL JONGSEONG IEUNG + CHOSEONG PHIEUPH +ᆼᄒ → ngh ; # HANGUL JONGSEONG IEUNG + CHOSEONG HIEUH +ᆼᄁ → ngkk ; # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGKIYEOK +ᆼᄄ → ngtt ; # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGTIKEUT +ᆼᄈ → ngpp ; # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGPIEUP +ᆼᄊ → ngss ; # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGSIOS +ᆼᄍ → ngtch ; # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGCIEUC +[$vowels]ᄀ → g ; # HANGUL JONGSEONG KIYEOK + CHOSEONG KIYEOK +[$vowels]ᄂ → n ; # HANGUL JONGSEONG KIYEOK + CHOSEONG NIEUN +[$vowels]ᄃ → d ; # HANGUL JONGSEONG KIYEOK + CHOSEONG TIKEUT +[$vowels]ᄅ → r ; # HANGUL JONGSEONG KIYEOK + CHOSEONG RIEUL +[$vowels]ᄆ → m ; # HANGUL JONGSEONG KIYEOK + CHOSEONG MIEUM +[$vowels]ᄇ → b ; # HANGUL JONGSEONG KIYEOK + CHOSEONG PIEUP +[$vowels]ᄉ → s ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SIOS +[$vowels]ᄋ → ; # HANGUL JONGSEONG KIYEOK + CHOSEONG IEUNG +[$vowels]ᄌ → j ; # HANGUL JONGSEONG KIYEOK + CHOSEONG CIEUC +[$vowels]ᄎ → ch $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG CHIEUCH +[$vowels]ᄏ → k $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG KHIEUKH +[$vowels]ᄐ → t $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG THIEUTH +[$vowels]ᄑ → p $aspirate ; # HANGUL JONGSEONG KIYEOK + CHOSEONG PHIEUPH +[$vowels]ᄒ → h ; # HANGUL JONGSEONG KIYEOK + CHOSEONG HIEUH +[$vowels]ᄁ → kk ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGKIYEOK +[$vowels]ᄄ → tt ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGTIKEUT +[$vowels]ᄈ → pp ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGPIEUP +[$vowels]ᄊ → ss ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGSIOS +[$vowels]ᄍ → tch ; # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGCIEUC +ᆰᄀ → lg ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG KIYEOK +ᆰᄂ → ngn ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG NIEUN +ᆰᄃ → kt ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG TIKEUT +ᆰᄅ → ngl ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG RIEUL +ᆰᄆ → ngm ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG MIEUM +ᆰᄇ → kp ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG PIEUP +ᆰᄉ → ks ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SIOS +ᆰᄋ → lg ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG IEUNG +ᆰᄌ → kch ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG CIEUC +ᆰᄎ → kch $aspirate ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG CHIEUCH +ᆰᄏ → lk $aspirate ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG KHIEUKH +ᆰᄐ → kt $aspirate ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG THIEUTH +ᆰᄑ → kp $aspirate ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG PHIEUPH +ᆰᄒ → lkh ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG HIEUH +ᆰᄁ → lkk ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGKIYEOK +ᆰᄄ → ktt ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGTIKEUT +ᆰᄈ → kpp ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGPIEUP +ᆰᄊ → kss ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGSIOS +ᆰᄍ → ktch ; # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGCIEUC +ᆱᄀ → mg ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG KIYEOK +ᆱᄂ → mn ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG NIEUN +ᆱᄃ → md ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG TIKEUT +ᆱᄅ → ml ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG RIEUL +ᆱᄆ → lm ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG MIEUM +ᆱᄇ → mb ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG PIEUP +ᆱᄉ → ms ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SIOS +ᆱᄋ → lm ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG IEUNG +ᆱᄌ → mj ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG CIEUC +ᆱᄎ → mch $aspirate ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG CHIEUCH +ᆱᄏ → mk $aspirate ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG KHIEUKH +ᆱᄐ → mt $aspirate ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG THIEUTH +ᆱᄑ → mp $aspirate ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG PHIEUPH +ᆱᄒ → mh ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG HIEUH +ᆱᄁ → mkk ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGKIYEOK +ᆱᄄ → mtt ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGTIKEUT +ᆱᄈ → mpp ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGPIEUP +ᆱᄊ → mss ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGSIOS +ᆱᄍ → mtch ; # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGCIEUC +ᆲᄀ → pk ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG KIYEOK +ᆲᄂ → mn ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG NIEUN +ᆲᄃ → pt ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG TIKEUT +ᆲᄅ → ml ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG RIEUL +ᆲᄆ → mm ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG MIEUM +ᆲᄇ → lb ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG PIEUP +ᆲᄉ → ps ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SIOS +ᆲᄋ → lb ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG IEUNG +ᆲᄌ → pch ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG CIEUC +ᆲᄎ → pch $aspirate ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG CHIEUCH +ᆲᄏ → pk $aspirate ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG KHIEUKH +ᆲᄐ → pt $aspirate ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG THIEUTH +ᆲᄑ → lp $aspirate ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG PHIEUPH +ᆲᄒ → lph ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG HIEUH +ᆲᄁ → pkk ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGKIYEOK +ᆲᄄ → ptt ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGTIKEUT +ᆲᄈ → lpp ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGPIEUP +ᆲᄊ → pss ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGSIOS +ᆲᄍ → ptch ; # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGCIEUC +# +# +######################################################################## +# +# End of Rule 1 +# +######################################################################## +# +######################################################################## +# +# Start of Transformations +# +######################################################################## +$wordBoundary{ᄀ → k ; # HANGUL CHOSEONG KIYEOK +$wordBoundary{ᄂ → n ; # HANGUL CHOSEONG NIEUN +$wordBoundary{ᄃ → t ; # HANGUL CHOSEONG TIKEUT +$wordBoundary{ᄅ → n ; # HANGUL CHOSEONG RIEUL +$wordBoundary{ᄆ → m ; # HANGUL CHOSEONG MIEUM +$wordBoundary{ᄇ → p ; # HANGUL CHOSEONG PIEUP +$wordBoundary{ᄉ → s ; # HANGUL CHOSEONG SIOS +$wordBoundary{ᄋ → ; # HANGUL CHOSEONG IEUNG +$wordBoundary{ᄌ → ch ; # HANGUL CHOSEONG CIEUC +$wordBoundary{ᄎ → ch $aspirate ; # HANGUL CHOSEONG CHIEUCH +$wordBoundary{ᄏ → k $aspirate ; # HANGUL CHOSEONG KHIEUKH +$wordBoundary{ᄐ → t $aspirate ; # HANGUL CHOSEONG THIEUTH +$wordBoundary{ᄑ → p $aspirate ; # HANGUL CHOSEONG PHIEUPH +$wordBoundary{ᄒ → h ; # HANGUL CHOSEONG HIEUH +$wordBoundary{ᄁ → kk ; # HANGUL CHOSEONG SSANGKIYEOK +$wordBoundary{ᄭ → kk ; # HANGUL CHOSEONG SIOS-KIYEOK +$wordBoundary{ᄄ → tt ; # HANGUL CHOSEONG SSANGTIKEUT +$wordBoundary{ᄯ → tt ; # HANGUL CHOSEONG SIOS-TIKEUT +$wordBoundary{ᄈ → pp ; # HANGUL CHOSEONG SSANGPIEUP +$wordBoundary{ᄲ → pp ; # HANGUL CHOSEONG SIOS-PIEUP +$wordBoundary{ᄊ → ss ; # HANGUL CHOSEONG SSANGSIOS +$wordBoundary{ᄍ → tch ; # HANGUL CHOSEONG SSANGCIEUC +$wordBoundary{ᄶ → tch ; # HANGUL CHOSEONG SIOS-CIEUC +ᅡ → a ; # HANGUL JUNGSEONG A +ᅣ → ya ; # HANGUL JUNGSEONG YA +ᅥ → ŏ ; # HANGUL JUNGSEONG EO +ᅧ → yŏ ; # HANGUL JUNGSEONG YEO +ᅩ → o ; # HANGUL JUNGSEONG O +ᅭ → yo ; # HANGUL JUNGSEONG YO +ᅮ → u ; # HANGUL JUNGSEONG U +ᅲ → yu ; # HANGUL JUNGSEONG YU +ᅳ → ŭ ; # HANGUL JUNGSEONG EU +ᅵ → i ; # HANGUL JUNGSEONG I +ᅢ → ae ; # HANGUL JUNGSEONG AE +ᅤ → yae ; # HANGUL JUNGSEONG YAE +ᅦ → e ; # HANGUL JUNGSEONG E +ᅨ → ye ; # HANGUL JUNGSEONG YE +ᅬ → oe ; # HANGUL JUNGSEONG OE +ᅱ → wi ; # HANGUL JUNGSEONG WI +ᅴ → ŭi ; # HANGUL JUNGSEONG YI +ᅪ → wa ; # HANGUL JUNGSEONG WA +ᅯ → wŏ ; # HANGUL JUNGSEONG WEO +ᅫ → wae ; # HANGUL JUNGSEONG WAE +ᅰ → we ; # HANGUL JUNGSEONG WE +ᆨ}$wordBoundary → k ; # HANGUL JONGSEONG KIYEOK +ᆫ}$wordBoundary → n ; # HANGUL JONGSEONG NIEUN +ᆮ}$wordBoundary → t ; # HANGUL JONGSEONG TIKEUT +ᆯ}$wordBoundary → l ; # HANGUL JONGSEONG RIEUL +ᆷ}$wordBoundary → m ; # HANGUL JONGSEONG MIEUM +ᆸ}$wordBoundary → p ; # HANGUL JONGSEONG PIEUP +ᆺ}$wordBoundary → t ; # HANGUL JONGSEONG SIOS +ᆼ}$wordBoundary → ng ; # HANGUL JONGSEONG IEUNG +ᆽ}$wordBoundary → t ; # HANGUL JONGSEONG CIEUC +ᆾ}$wordBoundary → t ; # HANGUL JONGSEONG CHIEUCH +ᆿ}$wordBoundary → k ; # HANGUL JONGSEONG KHIEUKH +ᇀ}$wordBoundary → t ; # HANGUL JONGSEONG THIEUTH +ᇁ}$wordBoundary → p ; # HANGUL JONGSEONG PHIEUPH +ᆰ}$wordBoundary → k ; # HANGUL JONGSEONG RIEUL-KIYEOK +ᆲ}$wordBoundary → p ; # HANGUL JONGSEONG RIEUL-PIEUP +# +# +######################################################################## + diff --git a/icu4c/source/data/translit/ky_ky_Latn_BGN.txt b/icu4c/source/data/translit/ky_ky_Latn_BGN.txt new file mode 100644 index 00000000000..0f14c66466e --- /dev/null +++ b/icu4c/source/data/translit/ky_ky_Latn_BGN.txt @@ -0,0 +1,217 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ky_ky_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1979 System +# +# The BGN/PCGN system for Kirghiz Cyrillic was designed for use in +# romanizing names written in the Kirghiz Cyrillic alphabet. +# The Kirghiz Cyrillic alphabet contains three letters not present +# in the Russian alphabet: Ңң, Өө, and Үү. +# +# The Kirghiz Cyrillic Alphabet as defined by the BGN (Page 55): +# +# АБВГДЕЁЖЗИЙКЛМНҢОӨПРСТУҮФХЦЧШЩЪЫЬЭЮЯ +# абвгдеёжзийклмнңоөпрстуүфхцчшщъыьэюя +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: KirghizCyrl-Latin +# +:: [АБВГДЕЁЖЗИЙКЛМНҢОӨПРСТУҮФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнңоөпрстуүфхцчшщъыьэюя] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ʹ ; +$doublePrime = ʺ ; +$upperConsonants = [БВГДЖЗЙКЛМНҢПРСТФХЦЧШЩЪЬ] ; +$lowerConsonants = [бвгджзйклмнңпрстфхцчшщъь] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕЁИОӨУҮЫЭЮЯ] ; +$lowerVowels = [аеёиоөуүыэюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +Е → E ; # CYRILLIC CAPITAL LETTER IE +е → e ; # CYRILLIC SMALL LETTER IE +Ё} $lower → Yo ; # CYRILLIC CAPITAL LETTER IO +Ё → YO ; # CYRILLIC CAPITAL LETTER IO +ё → yo ; # CYRILLIC SMALL LETTER IO +Ж → J ; # CYRILLIC CAPITAL LETTER ZHE +ж → j ; # CYRILLIC SMALL LETTER ZHE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +И → I ; # CYRILLIC CAPITAL LETTER I +и → i ; # CYRILLIC SMALL LETTER I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +# +# +######################################################################## +# +# BGN Page 56 Rule 1 +# +# The character sequence нг may be romanized n·g in order to differentiate +# that romanizations from the digraph ng, which is used to render the +# character ң. +# +######################################################################## +# +НГ → N·G ; # CYRILLIC CAPITAL LETTER EN +Нг → N·g ; # CYRILLIC CAPITAL LETTER EN +нг → n·g ; # CYRILLIC SMALL LETTER EN +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Ң} $lower → Ng ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +Ң → NG ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +ң → ng ; # CYRILLIC SMALL LETTER EN WITH DESCENDER +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +Ө → Ö ; # CYRILLIC CAPITAL LETTER BARRED O +ө → ö ; # CYRILLIC SMALL LETTER BARRED O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ү → Ü ; # CYRILLIC CAPITAL LETTER STRAIGHT U +ү → ü ; # CYRILLIC SMALL LETTER STRAIGHT U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA +Х → KH ; # CYRILLIC CAPITAL LETTER HA +х → kh ; # CYRILLIC SMALL LETTER HA +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +# +# +######################################################################## +# +# Implied rule from BGN Russian-Latin transliteration (Page 94 Note 3.6). +# +# шч becomes sh·ch +# +######################################################################## +# +ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA +Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA +шч → sh·ch ; # CYRILLIC SMALL LETTER SHA +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA +Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA +щ → shch ; # CYRILLIC SMALL LETTER SHCHA +# +# +######################################################################## +# +# End Rule 3.6 +# +######################################################################## +# +Ъ → $doublePrime ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ → $doublePrime ; # CYRILLIC SMALL LETTER HARD SIGN +# +# +######################################################################## +# +# BGN Page 56 Note 2 +# +# The character Ыы may be romanized Ɨɨ instead of Yy, if so desired. +# +######################################################################## +# +Ы → Y ; # CYRILLIC CAPITAL LETTER YERU +ы → y ; # CYRILLIC SMALL LETTER YERU +# +# +# Alternative rule to implement the option described here. To apply +# uncomment the following by removing the '#' mark at the start of the +# line and insert before the two rule lines above. +# +#Ы → Ɨ ; # CYRILLIC CAPITAL LETTER YERU +#ы → ɨ ; # CYRILLIC SMALL LETTER YERU +# +######################################################################## +# +# End BGN Page 56 Note 2 +# +######################################################################## +# +Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN +Э → E ; # CYRILLIC CAPITAL LETTER E +э → e ; # CYRILLIC SMALL LETTER E +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA +# +# +######################################################################## + diff --git a/icu4c/source/data/translit/mk_mk_Latn_BGN.txt b/icu4c/source/data/translit/mk_mk_Latn_BGN.txt new file mode 100644 index 00000000000..4dbd9999f86 --- /dev/null +++ b/icu4c/source/data/translit/mk_mk_Latn_BGN.txt @@ -0,0 +1,182 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: mk_mk_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1981 System +# +# Macedonian was official established as a literary language in +# Yugoslavia during World War II and is now the official language +# of Macedonia. Its alphabet is identical to Serbian, except +# that the letters Ђђ and Ћћ are replaced by Ѓѓ and Ќќ, and +# the letter Ѕѕ and the apostrophe are added. +# +# The Macedonian Alphabet as defined by the BGN (Page 69): +# +# АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШ +# абвгдѓежзѕијклљмнњопрстќуфхцчџш’ +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: Macedonian-Latin +# +:: [АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШабвгдѓежзѕијклљмнњопрстќуфхцчџш’] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ’ ; +$upperConsonants = [БВГДЃЖЗЅЈКЛЉМНЊПРСТЌФХЦЧЏШ] ; +$lowerConsonants = [бвгдѓжзѕјклљмнњпрстќфхцчџш’] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕИОУ] ; +$lowerVowels = [аеиоу] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +# +# +######################################################################## +# +# BGN Page 70 Rule 1: +# +# The character ѓ should be romanized g when it occurs before е +# and и. In all other instances, it should be romanized đ (Đ). +# +######################################################################## +# +Ѓ}[ЕеИи] → G ; # CYRILLIC CAPITAL LETTER GJE +ѓ}[ЕеИи] → g ; # CYRILLIC SMALL LETTER GJE +Ѓ → Đ ; # CYRILLIC CAPITAL LETTER GJE +ѓ → đ ; # CYRILLIC SMALL LETTER GJE +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Е → E ; # CYRILLIC CAPITAL LETTER DE +е → e ; # CYRILLIC SMALL LETTER DE +Ж → Ž ; # CYRILLIC CAPITAL LETTER ZHE +ж → ž ; # CYRILLIC SMALL LETTER ZHE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +Ѕ} $lower → Dz ; # CYRILLIC CAPITAL LETTER DZE +Ѕ → DZ ; # CYRILLIC CAPITAL LETTER DZE +ѕ → dz ; # CYRILLIC SMALL LETTER DZE +И → I ; # CYRILLIC CAPITAL LETTER I +и → i ; # CYRILLIC SMALL LETTER I +Ј → J ; # CYRILLIC CAPITAL LETTER JE +ј → j ; # CYRILLIC SMALL LETTER JE +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +Љ} $lower → Lj ; # CYRILLIC CAPITAL LETTER LJE +Љ → LJ ; # CYRILLIC CAPITAL LETTER LJE +љ → lj ; # CYRILLIC SMALL LETTER LJE +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +Њ} $lower → Nj ; # CYRILLIC CAPITAL LETTER NJE +Њ → NJ ; # CYRILLIC CAPITAL LETTER NJE +њ → nj ; # CYRILLIC SMALL LETTER NJE +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +# +# +######################################################################## +# +# BGN Page 70 Rule 2: +# +# The character ќ should be romanized k when it occurs before е +# and и. In all other instances, it should be romanized c\u0301. +# +######################################################################## +# +Ќ}[ЕеИи] → K ; # CYRILLIC CAPITAL LETTER KJE +ќ}[ЕеИи] → k ; # CYRILLIC SMALL LETTER KJE +Ќ → C\u0301 ; # CYRILLIC CAPITAL LETTER KJE +ќ → c\u0301 ; # CYRILLIC SMALL LETTER KJE +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х → H ; # CYRILLIC CAPITAL LETTER HA +х → h ; # CYRILLIC SMALL LETTER HA +Ц → C ; # CYRILLIC CAPITAL LETTER TSE +ц → c ; # CYRILLIC SMALL LETTER TSE +Ч → Č ; # CYRILLIC CAPITAL LETTER CHE +ч → č ; # CYRILLIC SMALL LETTER CHE +Џ} $lower → Dž ; # CYRILLIC CAPITAL LETTER SHA +Џ → DŽ ; # CYRILLIC CAPITAL LETTER SHA +џ → dž ; # CYRILLIC SMALL LETTER SHA +Ш → Š ; # CYRILLIC CAPITAL LETTER SHA +ш → š ; # CYRILLIC SMALL LETTER SHA +# +# +######################################################################## +# +# BGN Page 69 Rule 32, maps the symbol onto itself and +# is ignored here for computational efficiency. +# +# $prime → $prime ; # RIGHT SINGLE QUOTATION MARK +# +######################################################################## + diff --git a/icu4c/source/data/translit/mn_mn_Latn_BGN.txt b/icu4c/source/data/translit/mn_mn_Latn_BGN.txt new file mode 100644 index 00000000000..dbed925d0d6 --- /dev/null +++ b/icu4c/source/data/translit/mn_mn_Latn_BGN.txt @@ -0,0 +1,157 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: mn_mn_Latn_BGN.txt +# Generated from CLDR +# + +######################################################################## +# BGN/PCGN 1964 System +# +# The BGN/PCGN system for Mongolian was adopted by the BGN in 1957 +# and by the PCGN in 1964 for use in romanizing names written in +# the Mongolian Cyrillic alphabet. The Mongolian Cyrillic alphabet +# contains two letters not present in the Russian alphabet, Өө +# and Үү. Names written in the indigenous Mongolian alphabet, which +# is still utilized in the Inner Mongolia Autonomous Region of China, +# are not romanized by BGN and PCGN. Instead, for such names, +# BGN and PCGN utilize the Roman-script spellings appearing in +# official sources published by the People's Republic of China +# +# The Mongolian Alphabet as defined by the BGN (Page 73): +# +# АБВГДЕЁЖЗИЙКЛМНОӨПРСТУҮФХЦЧШЩЪЫЬЭЮЯ +# абвгдеёжзийклмноөпрстуүфхцчшщъыьэюя +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: Mongolian-Latin, works both in NFC and NFD +::[АБВГДЕЁЖЗИЙКЛМНОӨПРСТУҮФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмноөпрстуүфхцчшщъыьэюя\u0308]; +::NFC; +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ʹ ; +$upperConsonants = [БВГДЖЙКЛМНПРСТФХЦЧШЩЭ] ; +$lowerConsonants = [бвгджйклмнпрстфхцчшщэ] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕЁЭИОУЫЮЯ] ; +$lowerVowels = [аеёэиоуыюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +$wordBoundary = [^[:L:][:M:][:N:]] ; +######################################################################## +# Start of Alphabetic Transformations +######################################################################## +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +Е} $lower → Yö ; # CYRILLIC CAPITAL LETTER IE +Е → YÖ ; # CYRILLIC CAPITAL LETTER IE +е → yö ; # CYRILLIC SMALL LETTER IE +Ё} $lower → Yo ; # CYRILLIC CAPITAL LETTER IO +Ё → YO ; # CYRILLIC CAPITAL LETTER IO +ё → yo ; # CYRILLIC SMALL LETTER IO +Ж → J ; # CYRILLIC CAPITAL LETTER ZHE +ж → j ; # CYRILLIC SMALL LETTER ZHE +З} $lower → Dz ; # CYRILLIC CAPITAL LETTER ZE +З → DZ ; # CYRILLIC CAPITAL LETTER ZE +з → dz ; # CYRILLIC SMALL LETTER ZE +И → I ; # CYRILLIC CAPITAL LETTER I +и → i ; # CYRILLIC SMALL LETTER I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +Ө → Ö ; # CYRILLIC CAPITAL LETTER BARRED O +ө → ö ; # CYRILLIC SMALL LETTER BARRED O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ү → Ü ; # CYRILLIC CAPITAL LETTER STRAIGHT U +ү → ü ; # CYRILLIC SMALL LETTER STRAIGHT U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х → H ; # CYRILLIC CAPITAL LETTER HA +х → h ; # CYRILLIC SMALL LETTER HA +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +######################################################################## +# Implied rule from BGN Russian-Latin transliteration (Page 94 Note 3.6). +# шч becomes sh·ch +######################################################################## +ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA +Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA +шч → sh·ch ; # CYRILLIC SMALL LETTER SHA +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA +Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA +щ → shch ; # CYRILLIC SMALL LETTER SHCHA +######################################################################## +# End Implied rule +######################################################################## +Ъ → $prime ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ → $prime ; # CYRILLIC SMALL LETTER HARD SIGN +Ы → Ï ; # CYRILLIC CAPITAL LETTER YERU +ы → ï ; # CYRILLIC SMALL LETTER YERU +Ь → Ĭ ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → ĭ ; # CYRILLIC SMALL LETTER SOFT SIGN +Э → E ; # CYRILLIC CAPITAL LETTER E +э → e ; # CYRILLIC SMALL LETTER E +######################################################################## +# +# BGN Page 74 Rule 7 +# +# In monosyllables, the character ю is romanized yu or yü depending on +# pronunciation; in polysyllables, it is romanized yu when followed by +# a, o, or u, buy yü when followed by i, e, ö, or ü. +# +# This rule is lexical and has not been implemented in this file. +# +######################################################################## +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +######################################################################## +# +# End Rule 7 +# +######################################################################## +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA + diff --git a/icu4c/source/data/translit/ps_ps_Latn_BGN.txt b/icu4c/source/data/translit/ps_ps_Latn_BGN.txt new file mode 100644 index 00000000000..90f48df9ef7 --- /dev/null +++ b/icu4c/source/data/translit/ps_ps_Latn_BGN.txt @@ -0,0 +1,237 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ps_ps_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1968 System +# +# This system was adopted in 1968 for the romanization of Pashto +# geographic names in Afghanistan. Persian names in Afghanistan are +# romanized in accordance with the Romanization System for Persian +# (BGN/PCGN 1958 System), shown on pages 87-92). +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: Pashto-Latin +# +:: [ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىي\u064E\u064F\u0650\u0651\u0652\u0654٠١٢٣٤٥٦٧٨٩ټپځڅچډړږژښگڰڼیۍې] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$alef = ’; +$ayin = ‘; +$disambig = \u0331 ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR +[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR +٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR +٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR +# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate +، ↔ ',' ; # ARABIC COMMA +؛ ↔ ';' ; # ARABIC SEMICOLON +؟ ↔ '?' ; # ARABIC QUESTION MARK +٪ ↔ '%' ; # ARABIC PERCENT SIGN +٠ ↔ 0 $disambig ; # ARABIC-INDIC DIGIT ZERO +١ ↔ 1 $disambig ; # ARABIC-INDIC DIGIT ONE +٢ ↔ 2 $disambig ; # ARABIC-INDIC DIGIT TWO +٣ ↔ 3 $disambig ; # ARABIC-INDIC DIGIT THREE +٤ ↔ 4 $disambig ; # ARABIC-INDIC DIGIT FOUR +٥ ↔ 5 $disambig ; # ARABIC-INDIC DIGIT FIVE +٦ ↔ 6 $disambig ; # ARABIC-INDIC DIGIT SIX +٧ ↔ 7 $disambig ; # ARABIC-INDIC DIGIT SEVEN +٨ ↔ 8 $disambig ; # ARABIC-INDIC DIGIT EIGHT +٩ ↔ 9 $disambig ; # ARABIC-INDIC DIGIT NINE +۰ ↔ 0 ; # EXTENDED ARABIC-INDIC DIGIT ZERO +۱ ↔ 1 ; # EXTENDED ARABIC-INDIC DIGIT ONE +۲ ↔ 2 ; # EXTENDED ARABIC-INDIC DIGIT TWO +۳ ↔ 3 ; # EXTENDED ARABIC-INDIC DIGIT THREE +۴ ↔ 4 ; # EXTENDED ARABIC-INDIC DIGIT FOUR +۵ ↔ 5 ; # EXTENDED ARABIC-INDIC DIGIT FIVE +۶ ↔ 6 ; # EXTENDED ARABIC-INDIC DIGIT SIX +۷ ↔ 7 ; # EXTENDED ARABIC-INDIC DIGIT SEVEN +۸ ↔ 8 ; # EXTENDED ARABIC-INDIC DIGIT EIGHT +۹ ↔ 9 ; # EXTENDED ARABIC-INDIC DIGIT NINE +# +######################################################################## +# +# Rules moved to front to avoid masking +# +######################################################################## +# +######################################################################## +# +# BGN Page 89 Rule 4 +# +# The character sequences كه , زه , سه , and گه may be romanized k·h, z·h, +# s·h, and g·h in order to differentiate those romanizations from the +# digraphs kh, zh, sh, and gh. +# +######################################################################## +# +كه → k·h ; # ARABIC LETTER KAF + HEH +زه → z·h ; # ARABIC LETTER ZAIN + HEH +سه → s·h ; # ARABIC LETTER SEEN + HEH +گه → g·h ; # ARABIC LETTER GAF + HEH +# +# +######################################################################## +# +# End Rule 4 +# +######################################################################## +# +######################################################################## +# +# BGN Page 91 Rule 7 +# +# Doubles consonant sounds are represented in Arabic script by +# placing a shaddah ( \u0651 ) over a consonant character. In romanization +# the letter should be doubled. [The remainder of this rule deals with +# the definite article and is lexical.] +# +######################################################################## +# +ب\u0651 → bb ; # ARABIC LETTER BEH + SHADDA +پ\u0651 → pp ; # ARABIC LETTER PEH + SHADDA +ت\u0651 → tt ; # ARABIC LETTER TEH + SHADDA +ټ\u0651 → ṯṯ ; # ARABIC LETTER TEH WITH RING + SHADDA +ث\u0651 → s\u0304s\u0304 ; # ARABIC LETTER THEH + SHADDA +ج\u0651 → jj ; # ARABIC LETTER JEEM + SHADDA +چ\u0651 → chch ; # ARABIC LETTER TCHEH + SHADDA +\u0651څ → tsts ; # ARABIC LETTER HAH WITH THREE DOTS ABOVE + SHADDA +\u0651ځ → dz ; # ARABIC LETTER HAH WITH HAMZA ABOVE + SHADDA +ح\u0651 → ḥḥ ; # ARABIC LETTER HAH + SHADDA +خ\u0651 → khkh ; # ARABIC LETTER KHAH + SHADDA +د\u0651 → dd ; # ARABIC LETTER DAL + SHADDA +\u0651ډ → ḏḏ ; # ARABIC LETTER DAL WITH RING + SHADDA +ذ\u0651 → z\u0304z\u0304 ; # ARABIC LETTER THAL + SHADDA +ر\u0651 → rr ; # ARABIC LETTER REH + SHADDA +\u0651ړ → ṟṟ ; # ARABIC LETTER REH WITH RING + SHADDA +ز\u0651 → zz ; # ARABIC LETTER ZAIN + SHADDA +ژ\u0651 → zhzh ; # ARABIC LETTER JEH + SHADDA +\u0651ژ → z\u035Fhz\u035Fh ; # ARABIC LETTER REH WITH DOT BELOW AND DOT ABOVE + SHADDA +س\u0651 → ss ; # ARABIC LETTER SEEN + SHADDA +ش\u0651 → shsh ; # ARABIC LETTER SHEEN + SHADDA +\u0651ښ → s\u035Fhs\u035Fh ; # ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE +ص\u0651 → ṣṣ ; # ARABIC LETTER SAD + SHADDA +ض\u0651 → ḍḍ ; # ARABIC LETTER DAD + SHADDA +ط\u0651 → ṭṭ ; # ARABIC LETTER TAH + SHADDA +ظ\u0651 → ẓẓ ; # ARABIC LETTER ZAH + SHADDA +ع\u0651 → $ayin $ayin ; # ARABIC LETTER AIN + SHADDA +غ\u0651 → ghgh ; # ARABIC LETTER GHAIN + SHADDA +ف\u0651 → ff ; # ARABIC LETTER FEH + SHADDA +ق\u0651 → qq ; # ARABIC LETTER QAF + SHADDA +ك\u0651 → kk ; # ARABIC LETTER KAF + SHADDA +\u0651گ → gg ; # ARABIC LETTER GAF + SHADDA +\u0651ڰ → gg ; # ARABIC LETTER GAF WITH RING + SHADDA +ل\u0651 → ll ; # ARABIC LETTER LAM + SHADDA +م\u0651 → mm ; # ARABIC LETTER MEEM + SHADDA +ن\u0651 → nn ; # ARABIC LETTER NOON + SHADDA +\u0651ڼ → ṉṉ ; # ARABIC LETTER NOON WITH RING + SHADDA +ه\u0651 → hh ; # ARABIC LETTER HEH + SHADDA +و\u0651 → ww ; # ARABIC LETTER WAW + SHADDA +\u0651ی → yy ; # ARABIC LETTER FARSI YEH + SHADDA +ى\u0651 → yy ; # ARABIC LETTER YEH + SHADDA +# +# +######################################################################## +# +# End Rule 7 +# +######################################################################## +# +######################################################################## +# +# Start of Transformations +# +######################################################################## +# +$wordBoundary{ء → ; # ARABIC LETTER HAMZA +ء → $alef ; # ARABIC LETTER HAMZA +$wordBoundary{ا → ; # ARABIC LETTER ALEF +آ → $alef ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE +ب → b ; # ARABIC LETTER BEH +پ → p ; # ARABIC LETTER PEH +ت → t ; # ARABIC LETTER TEH +ټ → ṯ ; # ARABIC LETTER TEH WITH RING +ة → h ; # ARABIC LETTER TEH MARBUTA +ث → s\u0304 ; # ARABIC LETTER THEH +ج → j ; # ARABIC LETTER JEEM +چ → ch ; # ARABIC LETTER TCHEH +څ → ts ; # ARABIC LETTER HAH WITH THREE DOTS ABOVE +ځ → dz ; # ARABIC LETTER HAH WITH HAMZA ABOVE +ح → ḥ ; # ARABIC LETTER HAH +خ → kh ; # ARABIC LETTER KHAH +د → d ; # ARABIC LETTER DAL +ډ → ḏ ; # ARABIC LETTER DAL WITH RING +ذ → z\u0304 ; # ARABIC LETTER THAL +ر → r ; # ARABIC LETTER REH +ړ → ṟ ; # ARABIC LETTER REH WITH RING +ز → z ; # ARABIC LETTER ZAIN +ژ → zh ; # ARABIC LETTER JEH +ږ → z\u035Fh ; # ARABIC LETTER REH WITH DOT BELOW AND DOT ABOVE +س → s ; # ARABIC LETTER SEEN +ش → sh ; # ARABIC LETTER SHEEN +ښ → s\u035Fh ; # ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE +ص → ṣ ; # ARABIC LETTER SAD +ض → ẕ ; # ARABIC LETTER DAD +ط → ṭ ; # ARABIC LETTER TAH +ظ → ẓ ; # ARABIC LETTER ZAH +ع → $ayin ; # ARABIC LETTER AIN +غ → gh ; # ARABIC LETTER GHAIN +ف → f ; # ARABIC LETTER FEH +ق → q ; # ARABIC LETTER QAF +ك → k ; # ARABIC LETTER KAF +گ → g ; # ARABIC LETTER GAF +ڰ → g ; # ARABIC LETTER GAF WITH RING +ل → l ; # ARABIC LETTER LAM +م → m ; # ARABIC LETTER MEEM +ن → n ; # ARABIC LETTER NOON +ڼ → ṉ ; # ARABIC LETTER NOON WITH RING +و → w ; # ARABIC LETTER WAW +ه → h ; # ARABIC LETTER HEH +\u0654ی → ey ; # ARABIC LETTER FARSI YEH + HAMZA ABOVE +ی → y ; # ARABIC LETTER FARSI YEH +ى → y ; # ARABIC LETTER YEH +ې → e ; # ARABIC LETTER E +\u064Eا → ā ; # ARABIC FATHA + ALEF +\u064Eى\u0652 → ay ; # ARABIC FATHA + FARSI YEH + SUKUN +\u064Eى → á ; # ARABIC FATHA + ALEF MAKSURA +\u064E\u0652ۍ → êy ; # ARABIC FATHA + SUKUN + YEH WITH TAIL +\u064E\u0652 → ê ; # ARABIC FATHA + SUKUN +\u064E → a ; # ARABIC FATHA +\u0650\u0652ي → ey ; # ARABIC KASRA + FARSI YEH + SUKUN +\u0650ي → ī ; # ARABIC KASRA + FARSI YEH +\u0650ى → ī ; # ARABIC KASRA + YEH +\u0650و → ew ; # ARABIC KASRA + WAW +\u0650 → i ; # ARABIC KASRA +\u064Fو\u0652 → ow ; # ARABIC DAMMA + WAW + SUKUN +\u064Fوی → ūy ; # ARABIC DAMMA + WAW + FARSI YEH +\u064Fو → ū ; # ARABIC DAMMA + WAW +\u064F → u ; # ARABIC DAMMA +\u0652 → ; # ARABIC SUKUN +# +# +######################################################################## + diff --git a/icu4c/source/data/translit/root.txt b/icu4c/source/data/translit/root.txt index c6ad57e006a..0c8afd569f7 100644 --- a/icu4c/source/data/translit/root.txt +++ b/icu4c/source/data/translit/root.txt @@ -12,6 +12,19 @@ root { Digit-Tone { alias {"NumericPinyin-Pinyin"} } + Amharic-Latin/BGN { + alias {"am-am_Latn/BGN"} + } + am-Latn-t-am-m0-bgn { + alias {"am-am_Latn/BGN"} + } + am-am_Latn/BGN { + file { + resource:process(transliterator) {"am_am_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + und-t-d0-accents { alias {"Any-Accents"} } @@ -75,6 +88,58 @@ root { } } + Arabic-Latin/BGN { + alias {"ar-ar_Latn/BGN"} + } + ar-Latn-t-ar-m0-bgn { + alias {"ar-ar_Latn/BGN"} + } + ar-ar_Latn/BGN { + file { + resource:process(transliterator) {"ar_ar_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + + Armenian-Latin/BGN { + alias {"hy-hy_Latn/BGN"} + } + hy-Latn-t-hy-m0-bgn { + alias {"hy-hy_Latn/BGN"} + } + hy-hy_Latn/BGN { + file { + resource:process(transliterator) {"hy_hy_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + + Azerbaijani-Latin/BGN { + alias {"az_Cyrl-az/BGN"} + } + az-t-az-cyrl-m0-bgn { + alias {"az_Cyrl-az/BGN"} + } + az_Cyrl-az/BGN { + file { + resource:process(transliterator) {"az_Cyrl_az_BGN.txt"} + direction {"FORWARD"} + } + } + + Belarusian-Latin/BGN { + alias {"be-be_Latn/BGN"} + } + be-Latn-t-be-m0-bgn { + alias {"be-be_Latn/BGN"} + } + be-be_Latn/BGN { + file { + resource:process(transliterator) {"be_be_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + Bengali-Arabic { alias {"Beng-Arab"} } @@ -222,6 +287,19 @@ root { } } + Bulgarian-Latin/BGN { + alias {"bg-bg_Latn/BGN"} + } + bg-Latn-t-bg-m0-bgn { + alias {"bg-bg_Latn/BGN"} + } + bg-bg_Latn/BGN { + file { + resource:process(transliterator) {"bg_bg_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + Cyrillic-Latin { alias {"Cyrl-Latn"} } @@ -438,6 +516,32 @@ root { } } + Georgian-Latin/BGN { + alias {"ka-ka_Latn/BGN"} + } + ka-Latn-t-ka-m0-bgn { + alias {"ka-ka_Latn/BGN"} + } + ka-Latn-t-ka-m0-bgn-2009 { + alias {"ka-ka_Latn/BGN"} + } + ka-ka_Latn/BGN { + file { + resource:process(transliterator) {"ka_ka_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + + ka-Latn-t-ka-m0-bgn-1981 { + alias {"ka-ka_Latn/BGN_1981"} + } + ka-ka_Latn/BGN_1981 { + file { + resource:process(transliterator) {"ka_ka_Latn_BGN_1981.txt"} + direction {"FORWARD"} + } + } + Greek-Latin { alias {"Grek-Latn"} } @@ -463,6 +567,19 @@ root { } } + Greek-Latin/BGN { + alias {"el-el_Latn/BGN"} + } + el-Latn-t-el-m0-bgn { + alias {"el-el_Latn/BGN"} + } + el-el_Latn/BGN { + file { + resource:process(transliterator) {"el_el_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + Greek-Latin/UNGEGN { alias {"Grek-Latn/UNGEGN"} } @@ -856,6 +973,19 @@ root { } } + Hebrew-Latin/BGN { + alias {"he-he_Latn/BGN"} + } + he-Latn-t-he-m0-bgn { + alias {"he-he_Latn/BGN"} + } + he-he_Latn/BGN { + file { + resource:process(transliterator) {"he_he_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + Hiragana-Katakana { alias {"Hira-Kana"} } @@ -1175,6 +1305,58 @@ root { } } + Katakana-Latin/BGN { + alias {"ja_Hrkt-ja_Latn/BGN"} + } + ja-Latn-t-ja-hrkt-m0-bgn { + alias {"ja_Hrkt-ja_Latn/BGN"} + } + ja_Hrkt-ja_Latn/BGN { + file { + resource:process(transliterator) {"ja_Hrkt_ja_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + + Kazakh-Latin/BGN { + alias {"kk-kk_Latn/BGN"} + } + kk-Latn-t-kk-m0-bgn { + alias {"kk-kk_Latn/BGN"} + } + kk-kk_Latn/BGN { + file { + resource:process(transliterator) {"kk_kk_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + + Kirghiz-Latin/BGN { + alias {"ky-ky_Latn/BGN"} + } + ky-Latn-t-ky-m0-bgn { + alias {"ky-ky_Latn/BGN"} + } + ky-ky_Latn/BGN { + file { + resource:process(transliterator) {"ky_ky_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + + Korean-Latin/BGN { + alias {"ko-ko_Latn/BGN"} + } + ko-Latn-t-ko-m0-bgn { + alias {"ko-ko_Latn/BGN"} + } + ko-ko_Latn/BGN { + file { + resource:process(transliterator) {"ko_ko_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + und-t-d0-ascii { alias {"Latin-ASCII"} } @@ -1464,6 +1646,19 @@ root { } } + Macedonian-Latin/BGN { + alias {"mk-mk_Latn/BGN"} + } + mk-Latn-t-mk-m0-bgn { + alias {"mk-mk_Latn/BGN"} + } + mk-mk_Latn/BGN { + file { + resource:process(transliterator) {"mk_mk_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + Malayalam-Arabic { alias {"Mlym-Arab"} } @@ -1611,6 +1806,32 @@ root { } } + Maldivian-Latin/BGN { + alias {"dv-dv_Latn/BGN"} + } + dv-Latn-t-dv-m0-bgn { + alias {"dv-dv_Latn/BGN"} + } + dv-dv_Latn/BGN { + file { + resource:process(transliterator) {"dv_dv_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + + Mongolian-Latin/BGN { + alias {"mn-mn_Latn/BGN"} + } + mn-Latn-t-mn-m0-bgn { + alias {"mn-mn_Latn/BGN"} + } + mn-mn_Latn/BGN { + file { + resource:process(transliterator) {"mn_mn_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + Oriya-Arabic { alias {"Orya-Arab"} } @@ -1758,6 +1979,32 @@ root { } } + Pashto-Latin/BGN { + alias {"ps-ps_Latn/BGN"} + } + ps-Latn-t-ps-m0-bgn { + alias {"ps-ps_Latn/BGN"} + } + ps-ps_Latn/BGN { + file { + resource:process(transliterator) {"ps_ps_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + + Persian-Latin/BGN { + alias {"fa-fa_Latn/BGN"} + } + fa-Latn-t-fa-m0-bgn { + alias {"fa-fa_Latn/BGN"} + } + fa-fa_Latn/BGN { + file { + resource:process(transliterator) {"fa_fa_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + Pinyin-NumericPinyin { file { resource:process(transliterator) {"Pinyin_NumericPinyin.txt"} @@ -1771,6 +2018,19 @@ root { } } + Russian-Latin/BGN { + alias {"ru-ru_Latn/BGN"} + } + ru-Latn-t-ru-m0-bgn { + alias {"ru-ru_Latn/BGN"} + } + ru-ru_Latn/BGN { + file { + resource:process(transliterator) {"ru_ru_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + Serbian-Latin/BGN { alias {"sr-sr_Latn/BGN"} } @@ -2180,6 +2440,45 @@ root { } } + Turkmen-Latin/BGN { + alias {"tk_Cyrl-tk/BGN"} + } + tk-t-tk-cyrl-m0-bgn { + alias {"tk_Cyrl-tk/BGN"} + } + tk_Cyrl-tk/BGN { + file { + resource:process(transliterator) {"tk_Cyrl_tk_BGN.txt"} + direction {"FORWARD"} + } + } + + Ukrainian-Latin/BGN { + alias {"uk-uk_Latn/BGN"} + } + uk-Latn-t-uk-m0-bgn { + alias {"uk-uk_Latn/BGN"} + } + uk-uk_Latn/BGN { + file { + resource:process(transliterator) {"uk_uk_Latn_BGN.txt"} + direction {"FORWARD"} + } + } + + Uzbek-Latin/BGN { + alias {"uz_Cyrl-uz/BGN"} + } + uz-t-uz-cyrl-m0-bgn { + alias {"uz_Cyrl-uz/BGN"} + } + uz_Cyrl-uz/BGN { + file { + resource:process(transliterator) {"uz_Cyrl_uz_BGN.txt"} + direction {"FORWARD"} + } + } + am-fonipa-t-am { alias {"am-am_FONIPA"} } @@ -3319,6 +3618,19 @@ root { } } + Latin-Russian/BGN { + alias {"ru_Latn-ru/BGN"} + } + ru-t-ru-latn-m0-bgn { + alias {"ru_Latn-ru/BGN"} + } + ru_Latn-ru/BGN { + file { + resource:process(transliterator) {"ru_Latn_ru_BGN.txt"} + direction {"FORWARD"} + } + } + am-t-sat { alias {"sat-am"} } diff --git a/icu4c/source/data/translit/ru_Latn_ru_BGN.txt b/icu4c/source/data/translit/ru_Latn_ru_BGN.txt new file mode 100644 index 00000000000..19196b05aee --- /dev/null +++ b/icu4c/source/data/translit/ru_Latn_ru_BGN.txt @@ -0,0 +1,103 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ru_Latn_ru_BGN.txt +# Generated from CLDR +# + +# BGN/PCGN 1947 System for Russian, in direction ru_Latn → ru +# http://geonames.nga.mil/gns/html/Romanization/Romanization_Russian.pdf +$prime = ʹ; +$doublePrime = ʺ; +$wordBoundary = [^[:L:][:M:][:N:]]; +$upperConsonant = [БВГДЖЙКЛМНПРСТФХЦЧШЩЭ]; +$lowerConsonant = [бвгджйклмнпрстфхцчшщэ]; +$consonant = [$upperConsonant $lowerConsonant]; +::NFC; +[:Upper:] {$prime} [^[:Lower:]] → Ь; +$prime → ь; +[:Upper:] {$doublePrime} [^[:Lower:]] → Ъ; +$doublePrime → ъ; +K[Hh] → Х; +k[Hh] → х; +T·S → ТС; +T·s → Тс; +t·S → тС; +t·s → тс; +T[Ss] → Ц; +t[Ss] → ц; +C[Hh] → Ч; +c[Hh] → ч; +S[Hh]·C[Hh] → ШЧ; +S[Hh]·c[Hh] → Шч; +s[Hh]·C[Hh] → шЧ; +s[Hh]·c[Hh] → шч; +S[Hh][Cc][Hh] → Щ; +s[Hh][Cc][Hh] → щ; +S[Hh] → Ш; +s[Hh] → ш; +Y[Ee] → Е; +y[Ee] → е; +Y[Ëë] → Ё; +y[Ëë] → ё; +Y[Uu] → Ю; +y[Uu] → ю; +Y[Aa] → Я; +y[Aa] → я; +{yy} $wordBoundary → ый; +$wordBoundary {Y} [^aeëiouyAEËIOUY] → Ы; +$wordBoundary {y} [^aeëiouyAEËIOUY] → ы; +$consonant {Y} → Ы; +$consonant {y} → ы; +Y → Й; +y → й; +$wordBoundary {E} → Э; +$wordBoundary {e} → э; +·E → Э; +·e → э; +E → Е; +e → е; +A → А; +a → а; +B → Б; +b → б; +V → В; +v → в; +G → Г; +g → г; +D → Д; +d → д; +Ë → Ё; +ë → ё; +Z[Hh] → Ж; +z[Hh] → ж; +Z → З; +z → з; +I → И; +i → и; +K → К; +k → к; +L → Л; +l → л; +M → М; +m → м; +N → Н; +n → н; +O → О; +o → о; +P → П; +p → п; +R → Р; +r → р; +S → С; +s → с; +T → Т; +t → т; +U → У; +u → у; +F → Ф; +f → ф; +·Y → Ы; +·y → ы; +· → ; + diff --git a/icu4c/source/data/translit/ru_ru_Latn_BGN.txt b/icu4c/source/data/translit/ru_ru_Latn_BGN.txt new file mode 100644 index 00000000000..be3a4e87664 --- /dev/null +++ b/icu4c/source/data/translit/ru_ru_Latn_BGN.txt @@ -0,0 +1,241 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: ru_ru_Latn_BGN.txt +# Generated from CLDR +# + +# BGN/PCGN 1947 System +# +# The BGN/PCGN system for Russian was adopted by the BGN in 1944 and +# by the PCGN in 1947 for use in romanizing names written in the +# Russian Cyrillic alphabet. +# +# The Russian Alphabet as defined by the BGN (Page 93): +# АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ +# абвгдеёжзийклмнопрстуфхцчшщъыьэюя +# +# Originally prepared by Michael Everson everson@evertype.com +# Fixed by Frank Yung-Fong Tang ftang@google.com +# +# Test Data from http://en.wikipedia.org/wiki/BGN/PCGN_romanization_of_Russian +######################################################################## +# MINIMAL FILTER: Russian-Latin +::[АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя]; +::NFC; +######################################################################## +# Define All Transformation Variables +######################################################################## +$prime = ʹ ; +$doublePrime = ʺ ; +$wordBoundary = [^[:L:][:M:][:N:]] ; +$upperVowels = [АЕЁЭИОУЫЮЯ] ; +$lowerVowels = [аеёэиоуыюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$upperConsonants = [[:Uppercase:]-$vowels] ; +$lowerConsonants = [[:Lowercase:]-$vowels] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upper = [:Uppercase:]; +$lower = [:Lowercase:]; +######################################################################## +# Rules moved to front to avoid masking +######################################################################## +$lowerVowels { ы → ·y ; +$upperVowels { [Ыы] } $lower → ·y ; +$upperVowels { [Ыы] } → ·Y ; +[$consonants - [Йй]]{Э → ·E ; +[$consonants - [Йй]]{э → ·e ; +[$upperVowels [ЙЪЬ]] { Е } $upper → YE ; # CYRILLIC CAPITAL LETTER IE +[$upperVowels [ЙЪЬ]] { Е → Ye ; # CYRILLIC CAPITAL LETTER IE +[$upperVowels $lowerVowels [ЙйЪъЬь]] { е → ye ; # CYRILLIC SMALL LETTER IE +[$upperVowels [ЙЪЬ]] { Ё } $upper → YË ; # CYRILLIC CAPITAL LETTER IO +[$upperVowels [ЙЪЬ]] { Ё → Yë ; # CYRILLIC CAPITAL LETTER IO +[$upperVowels $lowerVowels [ЙйЪъЬь]] { ё → yë ; # CYRILLIC SMALL LETTER IO +# Since in the above rule we look at the Cyrillic context before the E/Ё/ё, +# we have to transform these in a separate pass before we change the vowels. +# The ::Null forces a separate pass. +::Null; +######################################################################## +# Start of Alphabetic Transformations +######################################################################## +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +######################################################################## +# BGN Page 94 Rule 1: +# # The character e should be romanized ye +# initially, after the vowel # characters a, e, ё, и, о, у, ы, э, ю, +# and я, and after й, ъ, and ь. +# In all other instances, it should +# be romanized e. +######################################################################## +# BUG(ftang)- the following two lines said BEFORE the vowels, instead of AFTER +# Е}[$upperVowels [ЙЪЬ]] → YE ; # CYRILLIC CAPITAL LETTER IE +# Е}[$lowerVowels [йъь]] → Ye ; # CYRILLIC CAPITAL LETTER IE +$wordBoundary{Е} $upper → YE ; # CYRILLIC CAPITAL LETTER IE +$wordBoundary{Е → Ye ; # CYRILLIC CAPITAL LETTER IE +Е → E ; # CYRILLIC CAPITAL LETTER IE +# +# BUG(ftang)- the following line said BEFORE the vowels, instead of AFTER +# е}[$upperVowels $lowerVowels [ЙйЪъЬь]] → ye ; # CYRILLIC SMALL LETTER IE +$wordBoundary{е → ye ; # CYRILLIC SMALL LETTER IE +е → e ; # CYRILLIC SMALL LETTER IE +######################################################################## +# End of Rule 1 +######################################################################## +######################################################################## +# BGN Page 94 Rule 2: +# +# The character ё is not considered a separate character of the +# Russian alphabet and the dieresis is generally not shown. When the +# dieresis is shown, the character should be romanized yë initially, +# after the vowel characters a, e, ё, и, о, у, ы, э, ю, and я, and +# after й, ъ, and ь, In all other instances, it should be romanized +# ё. When the dieresis is not shown, the character may still be +# romanized in the preceding manner or, alternatively, in accordance +# with note 1. +######################################################################## +# BUG(ftang)- the following two lines said BEFORE the vowels, instead of AFTER +# Ё}[$upperVowels [ЙЪЬ]] → YË ; # CYRILLIC CAPITAL LETTER IO +# Ё}[$lowerVowels [йъь]] → Yë ; # CYRILLIC CAPITAL LETTER IO +$wordBoundary {Ё} [·]? $upper → YË ; # CYRILLIC CAPITAL LETTER IO +$wordBoundary {Ё} [·]? $lower → Yë ; # CYRILLIC CAPITAL LETTER IO +Ё → Ë ; # CYRILLIC CAPITAL LETTER IO +# BUG(ftang)- the following line said BEFORE the vowels, instead of AFTER +# ё}[$upperVowels $lowerVowels [ЙйЪъЬь]] → yë ; # CYRILLIC SMALL LETTER IO +$wordBoundary{ё → yë ; # CYRILLIC SMALL LETTER IO +ё → ë ; # CYRILLIC SMALL LETTER IO +######################################################################## +# End of Rule 2 +######################################################################## +Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE +Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE +ж → zh ; # CYRILLIC SMALL LETTER ZHE +######################################################################## +# BGN Page 94 Rule 3.4 +# э after any consonant character except +# й becomes ·е +######################################################################## +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +# BUG(ftang) The following two lines said those consonant becomes ·е +# [$consonants - [Йй]]}Э → ·Е ; +# [$consonants - [Йй]]}э → ·е ; +######################################################################## +# End of Rule 3.4 +######################################################################## +И → I ; # CYRILLIC CAPITAL LETTER I +и → i ; # CYRILLIC SMALL LETTER I +######################################################################## +# BGN Page 94 Rule 3: +# +# Unusual Russian character sequences occurring primarily in +# non-Russian-language names may be romanized as shown below in order +# to provide differentiation from regularly-occurring digraphs and +# character sequences. +# +# BGN Page 94 Rule 3.1 +# й before а, у, ы, or э becomes у· +######################################################################## +Й}[АаУуЫыЭэ] → Y· ; # CYRILLIC CAPITAL LETTER I +й}[АаУуЫыЭэ] → y· ; # CYRILLIC SMALL LETTER I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +######################################################################## +# End Rule 3.1 +######################################################################## +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +######################################################################## +# BGN Page 94 Rule 3.5 +# тс becomes t·s +######################################################################## +ТС → T·S ; # CYRILLIC CAPITAL LETTER TE +Тс → T·s ; # CYRILLIC CAPITAL LETTER TE +тс → t·s ; # CYRILLIC SMALL LETTER TE +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +######################################################################## +# End Rule 3.5 +######################################################################## +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA +Х → KH ; # CYRILLIC CAPITAL LETTER HA +х → kh ; # CYRILLIC SMALL LETTER HA +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +######################################################################## +# BGN Page 94 Rule 3.6 +# шч becomes sh·ch +######################################################################## +ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA +Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA +шч → sh·ch ; # CYRILLIC SMALL LETTER SHA +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA +Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA +щ → shch ; # CYRILLIC SMALL LETTER SHCHA +######################################################################## +# End Rule 3.6 +######################################################################## +Ъ → $doublePrime ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ → $doublePrime ; # CYRILLIC SMALL LETTER HARD SIGN +######################################################################## +# BGN Page 94 Rule 3.2 +# ы before а, у, ы, or э becomes у· +# +# BGN Page 94 Rule 3.3 +# ы after any vowel character becomes ·у +######################################################################## +# +# BUG(ftang) the following line said the vowels will change +# $vowels}Ы → ·Y ; # CYRILLIC CAPITAL LETTER I +# $vowels}ы → ·y ; # CYRILLIC CAPITAL LETTER I +Ы}[АаУуЫыЭэ] → Y· ; # CYRILLIC CAPITAL LETTER YERU +ы}[ауыэ] → y· ; # CYRILLIC SMALL LETTER YERU +Ы → Y ; # CYRILLIC CAPITAL LETTER YERU +ы → y ; # CYRILLIC SMALL LETTER YERU +######################################################################## +# End Rule 3.2 and 3.3 +######################################################################## +Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN +Э → E ; # CYRILLIC CAPITAL LETTER E +э → e ; # CYRILLIC SMALL LETTER E +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA + diff --git a/icu4c/source/data/translit/tk_Cyrl_tk_BGN.txt b/icu4c/source/data/translit/tk_Cyrl_tk_BGN.txt new file mode 100644 index 00000000000..da7785433ac --- /dev/null +++ b/icu4c/source/data/translit/tk_Cyrl_tk_BGN.txt @@ -0,0 +1,308 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: tk_Cyrl_tk_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1979 System +# +# The BGN/PCGN system for Turkmen was designed for use in +# romanizing names written in the Turkmen alphabet. +# The Turkmen alphabet contains five letters not present +# in the Russian alphabet: Җҗ, Ңң, Өө, Үү, and Әә. +# +# The Turkmen Cyrillic Alphabet as defined by the BGN (Page 103): +# +# АБВГДЕЁЖҖЗИЙКЛМНҢОӨПРСТУҮФХЦЧШЩЪЫЬЭӘЮЯ +# абвгдеёжзҗийклмнңоөпрстуүфхцчшщъыьэәюя +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: Turkmen-Latin +# +# :: [АБВГДЕЁЖҖЗИЙКЛМНҢОӨПРСТУҮФХЦЧШЩЪЫЬЭӘЮЯабвгдеёжзҗийклмнңоөпрстуүфхцчшщъыьэәюя] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ʹ ; +$doublePrime = ʺ ; +$upperConsonants = [БВГДЖҖЗЙКЛМНҢПРСТФХЦЧШЩЪЬ] ; +$lowerConsonants = [бвгджҗзйклмнңпрстфхцчшщъь] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕЁИОӨУҮЫЭӘЮЯ] ; +$lowerVowels = [аеёиоөуүыэәюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → W ; # CYRILLIC CAPITAL LETTER VE +в → w ; # CYRILLIC SMALL LETTER VE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +# +# +######################################################################## +# +# BGN Page 104 Rule 1: +# +# The character e should be romanized ye initially, after the vowel +# characters a, e, ё, и, о, ө, у, ү, ы, э, ю, and я, and after й, ъ, and ь. +# In all other instances, it should be romanized e. +# +######################################################################## +# +Е}[[$upperVowels - [Ә]] [ЙЪЬ]] → YE ; # CYRILLIC CAPITAL LETTER IE +Е}[[$lowerVowels - [ә]] [йъь]] → Ye ; # CYRILLIC CAPITAL LETTER IE +$wordBoundary{Е → Ye ; # CYRILLIC CAPITAL LETTER IE +Е → E ; # CYRILLIC CAPITAL LETTER IE +е}[[$upperVowels - [Ә]] [$lowerVowels - [ә]] [ЙйЪъЬь]] → ye ; # CYRILLIC SMALL LETTER IE +$wordBoundary{е → ye ; # CYRILLIC SMALL LETTER IE +е → e ; # CYRILLIC SMALL LETTER IE +# +# +######################################################################## +# +# End of Rule 1 +# +######################################################################## +# +Ё} $lower → Yo ; # CYRILLIC CAPITAL LETTER IO +Ё → YO ; # CYRILLIC CAPITAL LETTER IO +ё → yo ; # CYRILLIC SMALL LETTER IO +Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE +Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE +ж → zh ; # CYRILLIC SMALL LETTER ZHE +# +# +######################################################################## +# +# BGN Page 104 Rule 2 +# +# The character sequences зх, нг, сх, and цх may be romanized z·h, +# n·g, s·h, and ts·h in order to differentiate those romanizations form +# the digraphs zh, ng, sh, and the letter sequence tsh, which are used +# to render the characters ж, ң, ш, and the character sequence тш. +# +######################################################################## +# +ЗХ → Z·H ; # CYRILLIC CAPITAL LETTER ZE +Зх → Z·h ; # CYRILLIC CAPITAL LETTER ZE +зх → z·h ; # CYRILLIC SMALL LETTER ZE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +И → И ; # CYRILLIC CAPITAL LETTER I +и → и ; # CYRILLIC SMALL LETTER I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +# +# +######################################################################## +# +# BGN Page 104 Rule 2 +# +# нг becomes n·g +# +######################################################################## +# +НГ → N·G ; # CYRILLIC CAPITAL LETTER EN +Нг → N·g ; # CYRILLIC CAPITAL LETTER EN +нг → n·g ; # CYRILLIC SMALL LETTER EN +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +Ң} $lower → Ng ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +Ң → NG ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +ң → ng ; # CYRILLIC SMALL LETTER EN WITH DESCENDER +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +Ө → Ö ; # CYRILLIC CAPITAL LETTER BARRED O +ө → ö ; # CYRILLIC SMALL LETTER BARRED O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +# +# +######################################################################## +# +# BGN Page 104 Rule 2 +# +# сх becomes s·h +# +######################################################################## +# +СХ → S·H ; # CYRILLIC CAPITAL LETTER ES +Сх → S·h ; # CYRILLIC CAPITAL LETTER ES +сх → s·h ; # CYRILLIC SMALL LETTER ES +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ү → Ü ; # CYRILLIC CAPITAL LETTER STRAIGHT U +ү → ü ; # CYRILLIC SMALL LETTER STRAIGHT U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х → H ; # CYRILLIC CAPITAL LETTER HA +х → h ; # CYRILLIC SMALL LETTER HA +# +# +######################################################################## +# +# BGN Page 104 Rule 2 +# +# цх becomes ts·h +# +######################################################################## +# +ЦХ → TS·H ; # CYRILLIC CAPITAL LETTER GHE +Цх → Ts·h ; # CYRILLIC CAPITAL LETTER GHE +цх → ts·h ; # CYRILLIC SMALL LETTER GHE +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +# +# +######################################################################## +# +# Implied rule from BGN Russian-Latin transliteration (Page 94 Note 3.6). +# +# шч becomes sh·ch +# +######################################################################## +# +ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA +Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA +шч → sh·ch ; # CYRILLIC SMALL LETTER SHA +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA +Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA +щ → shch ; # CYRILLIC SMALL LETTER SHCHA +# +# +######################################################################## +# +# End Implied rule +# +######################################################################## +# +Ъ → $doublePrime ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ → $doublePrime ; # CYRILLIC SMALL LETTER HARD SIGN +# +# +######################################################################## +# +# BGN Page 104 Note 3 +# +# The character Ыы may be romanized Ɨɨ instead of Yy, if so desired. +# +######################################################################## +# +Ы → Y ; # CYRILLIC CAPITAL LETTER YERU +ы → y ; # CYRILLIC SMALL LETTER YERU +# +# +# Alternative rule to implement the option described here. To apply +# uncomment the following by removing the '#' mark at the start of the +# line and insert before the two rule lines above. +# +#Ы → Ɨ ; # CYRILLIC CAPITAL LETTER YERU +#ы → ɨ ; # CYRILLIC SMALL LETTER YERU +# +######################################################################## +# +# End BGN Page 104 Note 2 +# +######################################################################## +# +Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN +Э → E ; # CYRILLIC CAPITAL LETTER E +э → e ; # CYRILLIC SMALL LETTER E +Ә → Ä ; # CYRILLIC CAPITAL LETTER SCHWA +ә → ä ; # CYRILLIC SMALL LETTER SCHWA +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA +# +# +######################################################################## + diff --git a/icu4c/source/data/translit/uk_uk_Latn_BGN.txt b/icu4c/source/data/translit/uk_uk_Latn_BGN.txt new file mode 100644 index 00000000000..c2da7942049 --- /dev/null +++ b/icu4c/source/data/translit/uk_uk_Latn_BGN.txt @@ -0,0 +1,289 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: uk_uk_Latn_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1979 System +# +# The BGN/PCGN system for Ukrainian was designed for use in romanizing +# names written in the Ukrainian alphabet. The Ukrainian alphabet +# contains five letters not present in the Russian alphabet: +# Ґґ, Єє, Іі, Її, and ’. +# +# The Ukrainian Alphabet as defined by the BGN (Page 105): +# +# АБВГҐДЕЄЖЗИІЇЙКЛМНОПРСТУФХЦЧШЩЮЯЬ +# абвгґдеєжзиіїйклмнопрстуфхцчшщюяь’ +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: Ukrainian-Latin +# +:: [АБВГҐДЕЄЖЗИІЇЙКЛМНОПРСТУФХЦЧШЩЮЯЬабвгґдеєжзиіїйклмнопрстуфхцчшщюяь’] ; +:: NFC ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ʹ ; +$doublePrime = ʺ ; +$upperConsonants = [БВГҐДЖЗЙКЛМНПРСТФХЦЧШЩЬ] ; +$lowerConsonants = [бвгґджзйклмнпрстфхцчшщь’] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕЄИІЇОУЮЯ] ; +$lowerVowels = [аеєиіїоуюя] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → V ; # CYRILLIC CAPITAL LETTER VE +в → v ; # CYRILLIC SMALL LETTER VE +# +# +######################################################################## +# +# Comment. The BGN gives h as the transliteration for both г and ґ. +# This is an error: г is h and ґ is g. +# +######################################################################## +# +Г → H ; # CYRILLIC CAPITAL LETTER GHE +г → h ; # CYRILLIC SMALL LETTER GHE +Ґ → G ; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +ґ → g ; # CYRILLIC SMALL LETTER GHE WITH UPTURN +# +# +######################################################################## +# +# End Comment. +# +######################################################################## +# +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +Е → E ; # CYRILLIC CAPITAL LETTER IE +е → e ; # CYRILLIC SMALL LETTER IE +Є} $lower → Ye ; # CYRILLIC CAPITAL LETTER UKRAINIAN IE +Є → YE ; # CYRILLIC CAPITAL LETTER UKRAINIAN IE +є → ye ; # CYRILLIC SMALL LETTER UKRAINIAN IE +Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE +Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE +ж → zh ; # CYRILLIC SMALL LETTER ZHE +# +# +######################################################################## +# +# BGN Page 105 Rule 1 +# +# The character sequences зг, кг, сг, тс, and цг may be romanized +# z·h, k·h, s·h, t·s, and ts·h in order to differentiate those +# romanizations from the digraphs zh, kh, sh, ts, and the letter +# sequence tsh, which are used to render the characters ж, х, ш, ц +# and the character sequence тш. +# +######################################################################## +# +ЗГ → Z·H ; # CYRILLIC CAPITAL LETTER ZE +Зг → Z·h ; # CYRILLIC CAPITAL LETTER ZE +зг → z·h ; # CYRILLIC SMALL LETTER ZE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +И → Y ; # CYRILLIC CAPITAL LETTER I +и → y ; # CYRILLIC SMALL LETTER I +І → I ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +і → i ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +Ї} $lower → Yi ; # CYRILLIC CAPITAL LETTER YI +Ї → YI ; # CYRILLIC CAPITAL LETTER YI +ї → yi ; # CYRILLIC SMALL LETTER YI +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +# +# +######################################################################## +# +# BGN Page 105 Rule 1 +# +# кг becomes k·h +# +######################################################################## +# +КГ → K·H ; # CYRILLIC CAPITAL LETTER KA +Кг → K·h ; # CYRILLIC CAPITAL LETTER KA +кг → k·h ; # CYRILLIC SMALL LETTER KA +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +# +# +######################################################################## +# +# BGN Page 105 Rule 1 +# +# сг becomes s·h +# +######################################################################## +# +СГ → S·H ; # CYRILLIC CAPITAL LETTER ES +Сг → S·h ; # CYRILLIC CAPITAL LETTER ES +сг → s·h ; # CYRILLIC SMALL LETTER ES +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +######################################################################## +# +# BGN Page 105 Rule 1 +# +# тс becomes t·s +# +######################################################################## +# +ТС → T·S ; # CYRILLIC CAPITAL LETTER TE +Тс → T·s ; # CYRILLIC CAPITAL LETTER TE +тс → t·s ; # CYRILLIC SMALL LETTER TE +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +У → U ; # CYRILLIC CAPITAL LETTER U +у → u ; # CYRILLIC SMALL LETTER U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA +Х → KH ; # CYRILLIC CAPITAL LETTER HA +х → kh ; # CYRILLIC SMALL LETTER HA +# +# +######################################################################## +# +# BGN Page 105 Rule 1 +# +# цг becomes ts·h +# +######################################################################## +# +ЦГ → TS·H ; # CYRILLIC CAPITAL LETTER TSE +Цг → Ts·h ; # CYRILLIC CAPITAL LETTER TSE +цг → ts·h ; # CYRILLIC SMALL LETTER TSE +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +# +# +######################################################################## +# +# End Rule 1 +# +######################################################################## +# +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +# +# +######################################################################## +# +# BGN Page 94 Rule 3.6 +# +# шч becomes sh·ch +# +######################################################################## +# +ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA +Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA +шч → sh·ch ; # CYRILLIC SMALL LETTER SHA +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA +Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA +щ → shch ; # CYRILLIC SMALL LETTER SHCHA +# +# +######################################################################## +# +# End Rule 3.6 +# +######################################################################## +# +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA +Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN +’ → $doublePrime ; # LEFT SINGLE QUOTATION MARK +# +# +######################################################################## + diff --git a/icu4c/source/data/translit/uz_Cyrl_uz_BGN.txt b/icu4c/source/data/translit/uz_Cyrl_uz_BGN.txt new file mode 100644 index 00000000000..84898e65e10 --- /dev/null +++ b/icu4c/source/data/translit/uz_Cyrl_uz_BGN.txt @@ -0,0 +1,267 @@ +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: uz_Cyrl_uz_BGN.txt +# Generated from CLDR +# + +# +######################################################################## +# BGN/PCGN 1979 System +# +# The BGN/PCGN system for Uzbek was designed for use in +# romanizing names written in the Uzbek alphabet. +# The Uzbek alphabet contains four letters not present +# in the Russian alphabet: Ўў, Ққ, Ғғ, and Ҳҳ. +# +# The Uzbek Alphabet as defined by the BGN (Page 107): +# +# АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЪЬЭЮЯЎҚҒҲ +# абвгдеёжзийклмнопрстуфхцчшъьэюяўқғҳ +# +# Originally prepared by Michael Everson +######################################################################## +# +# MINIMAL FILTER: Uzbek-Latin +# +:: [АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЪЬЭЮЯЎҚҒҲабвгдеёжзийклмнопрстуфхцчшъьэюяўқғҳ] ; +:: NFD (NFC) ; +# +# +######################################################################## +# +######################################################################## +# +# Define All Transformation Variables +# +######################################################################## +# +$prime = ʹ ; +$doublePrime = ʺ ; +$upperConsonants = [БВГДЖЗЙКЛМНПРСТФХЦЧШЪЬҚҒҲ] ; +$lowerConsonants = [бвгджзйклмнпрстфхцчшъьқғҳ] ; +$consonants = [$upperConsonants $lowerConsonants] ; +$upperVowels = [АЕЁИОУЭЮЯЎ] ; +$lowerVowels = [аеёиоуэюяў] ; +$vowels = [$upperVowels $lowerVowels] ; +$lower = [$lowerConsonants $lowerVowels] ; +# +# +# Use this $wordBoundary until bug 2034 is fixed in ICU: +# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest +# +$wordBoundary = [^[:L:][:M:][:N:]] ; +# +# +######################################################################## +# +######################################################################## +# +# Rules moved to front to avoid masking +# +######################################################################## +# +$lowerVowels ы → y ; +$upperVowels[Ыы] → Y ; +# +# +######################################################################## +# +# Start of Alphabetic Transformations +# +######################################################################## +# +А → A ; # CYRILLIC CAPITAL LETTER A +а → a ; # CYRILLIC SMALL LETTER A +Б → B ; # CYRILLIC CAPITAL LETTER BE +б → b ; # CYRILLIC SMALL LETTER BE +В → W ; # CYRILLIC CAPITAL LETTER VE +в → w ; # CYRILLIC SMALL LETTER VE +# +# +######################################################################## +# +# BGN Page 108 Rule 2 +# +# The character sequences гҳ, кҳ, сҳ, and цҳ may be romanized g·h, +# k·h, s·h, and ts·h in order to differentiate those romanizations from +# the digraphs gh, kh, sh, and the letter sequence tsh, which are used +# to render the chаracters г, х, ш, and the character sequence тш. +# +######################################################################## +# +ГҲ → G·H ; # CYRILLIC CAPITAL LETTER GHE +Гҳ → G·h ; # CYRILLIC CAPITAL LETTER GHE +гҳ → g·h ; # CYRILLIC SMALL LETTER GHE +Г → G ; # CYRILLIC CAPITAL LETTER GHE +г → g ; # CYRILLIC SMALL LETTER GHE +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +Д → D ; # CYRILLIC CAPITAL LETTER DE +д → d ; # CYRILLIC SMALL LETTER DE +# +# +######################################################################## +# +# BGN Page 108 Rule 1: +# +# The character e should be romanized ye initially, after the vowel +# characters a, e, ё, и, о, у, э, ю, я, and ў, and after й and ь. +# In all other instances, it should be romanized e. +# +######################################################################## +# +Е}[$upperVowels [ЙЬ]] → YE ; # CYRILLIC CAPITAL LETTER IE +Е}[$lowerVowels [йь]] → Ye ; # CYRILLIC CAPITAL LETTER IE +$wordBoundary{Е → Ye ; # CYRILLIC CAPITAL LETTER IE +Е → E ; # CYRILLIC CAPITAL LETTER IE +е}[$upperVowels $lowerVowels [ЙйЬь]] → ye ; # CYRILLIC SMALL LETTER IE +$wordBoundary{е → ye ; # CYRILLIC SMALL LETTER IE +е → e ; # CYRILLIC SMALL LETTER IE +# +# +######################################################################## +# +# End of Rule 1 +# +######################################################################## +# +Ё} $lower → Yo ; # CYRILLIC CAPITAL LETTER IO +Ё → YO ; # CYRILLIC CAPITAL LETTER IO +ё → yo ; # CYRILLIC SMALL LETTER IO +Ж → J ; # CYRILLIC CAPITAL LETTER ZHE +ж → j ; # CYRILLIC SMALL LETTER ZHE +З → Z ; # CYRILLIC CAPITAL LETTER ZE +з → z ; # CYRILLIC SMALL LETTER ZE +И → I ; # CYRILLIC CAPITAL LETTER I +и → i ; # CYRILLIC SMALL LETTER I +Й → Y ; # CYRILLIC CAPITAL LETTER I +й → y ; # CYRILLIC SMALL LETTER I +# +# +######################################################################## +# +# BGN Page 108 Rule 2 +# +# кҳ becomes k·h +# +######################################################################## +# +КҲ → K·H ; # CYRILLIC CAPITAL LETTER KA +Кҳ → K·h ; # CYRILLIC CAPITAL LETTER KA +кҳ → k·h ; # CYRILLIC SMALL LETTER KA +К → K ; # CYRILLIC CAPITAL LETTER KA +к → k ; # CYRILLIC SMALL LETTER KA +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +Л → L ; # CYRILLIC CAPITAL LETTER EL +л → l ; # CYRILLIC SMALL LETTER EL +М → M ; # CYRILLIC CAPITAL LETTER EM +м → m ; # CYRILLIC SMALL LETTER EM +Н → N ; # CYRILLIC CAPITAL LETTER EN +н → n ; # CYRILLIC SMALL LETTER EN +О → O ; # CYRILLIC CAPITAL LETTER O +о → o ; # CYRILLIC SMALL LETTER O +П → P ; # CYRILLIC CAPITAL LETTER PE +п → p ; # CYRILLIC SMALL LETTER PE +Р → R ; # CYRILLIC CAPITAL LETTER ER +р → r ; # CYRILLIC SMALL LETTER ER +# +# +######################################################################## +# +# BGN Page 108 Rule 2 +# +# сҳ becomes s·h +# +######################################################################## +# +СҲ → S·H ; # CYRILLIC CAPITAL LETTER ES +Сҳ → S·h ; # CYRILLIC CAPITAL LETTER ES +сҳ → s·h ; # CYRILLIC SMALL LETTER ES +С → S ; # CYRILLIC CAPITAL LETTER ES +с → s ; # CYRILLIC SMALL LETTER ES +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +Т → T ; # CYRILLIC CAPITAL LETTER TE +т → t ; # CYRILLIC SMALL LETTER TE +У → Ū ; # CYRILLIC CAPITAL LETTER U +у → ū ; # CYRILLIC SMALL LETTER U +Ф → F ; # CYRILLIC CAPITAL LETTER EF +ф → f ; # CYRILLIC SMALL LETTER EF +Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA +Х → KH ; # CYRILLIC CAPITAL LETTER HA +х → kh ; # CYRILLIC SMALL LETTER HA +# +# +######################################################################## +# +# BGN Page 108 Rule 2 +# +# цҳ becomes ts·h +# +######################################################################## +# +ЦҲ → TS·H ; # CYRILLIC CAPITAL LETTER GHE +Цҳ → Ts·h ; # CYRILLIC CAPITAL LETTER GHE +цҳ → ts·h ; # CYRILLIC SMALL LETTER GHE +Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE +Ц → TS ; # CYRILLIC CAPITAL LETTER TSE +ц → ts ; # CYRILLIC SMALL LETTER TSE +# +# +######################################################################## +# +# End Rule 2 +# +######################################################################## +# +Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE +Ч → CH ; # CYRILLIC CAPITAL LETTER CHE +ч → ch ; # CYRILLIC SMALL LETTER CHE +Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA +Ш → SH ; # CYRILLIC CAPITAL LETTER SHA +ш → sh ; # CYRILLIC SMALL LETTER SHA +Ъ → $prime ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ → $prime ; # CYRILLIC SMALL LETTER HARD SIGN +Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN +Э → e ; # CYRILLIC CAPITAL LETTER E +э → e ; # CYRILLIC SMALL LETTER E +Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU +Ю → YU ; # CYRILLIC CAPITAL LETTER YU +ю → yu ; # CYRILLIC SMALL LETTER YU +Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA +Я → YA ; # CYRILLIC CAPITAL LETTER YA +я → ya ; # CYRILLIC SMALL LETTER YA +Ў → Ŭ ; # CYRILLIC CAPITAL LETTER SHORT U +ў → ŭ ; # CYRILLIC SMALL LETTER SHORT U +Қ → Q ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER +қ → q ; # CYRILLIC SMALL LETTER KA WITH DESCENDER +Ғ} $lower → Gh ; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +Ғ → GH ; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +ғ → gh ; # CYRILLIC SMALL LETTER GHE WITH STROKE +Ҳ → H ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER +ҳ → h ; # CYRILLIC SMALL LETTER HA WITH DESCENDER +# +# +######################################################################## + diff --git a/icu4c/source/data/unit/de.txt b/icu4c/source/data/unit/de.txt index da7fa7f8a4f..de19d3023c0 100644 --- a/icu4c/source/data/unit/de.txt +++ b/icu4c/source/data/unit/de.txt @@ -1,7 +1,7 @@ // © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html#License de{ - Version{"2.1.37.11"} + Version{"2.1.37.96"} durationUnits{ hm{"h:mm"} hms{"h:mm:ss"} diff --git a/icu4c/source/data/unit/resfiles.mk b/icu4c/source/data/unit/resfiles.mk index e72be18f610..8f02edaf732 100644 --- a/icu4c/source/data/unit/resfiles.mk +++ b/icu4c/source/data/unit/resfiles.mk @@ -1,6 +1,6 @@ # © 2016 and later: Unicode, Inc. and others. # License & terms of use: http://www.unicode.org/copyright.html#License -UNIT_CLDR_VERSION = 32 +UNIT_CLDR_VERSION = 32.0.1 # A list of txt's to build # Note: # diff --git a/icu4c/source/data/zone/de.txt b/icu4c/source/data/zone/de.txt index 2bc39ccb56f..07ce76892d2 100644 --- a/icu4c/source/data/zone/de.txt +++ b/icu4c/source/data/zone/de.txt @@ -1,7 +1,7 @@ // © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html#License de{ - Version{"2.1.37.11"} + Version{"2.1.37.96"} zoneStrings{ "Africa:Abidjan"{ ec{"Abidjan"} diff --git a/icu4c/source/data/zone/resfiles.mk b/icu4c/source/data/zone/resfiles.mk index cf4ab64b233..ecf2bd272ee 100644 --- a/icu4c/source/data/zone/resfiles.mk +++ b/icu4c/source/data/zone/resfiles.mk @@ -1,6 +1,6 @@ # © 2016 and later: Unicode, Inc. and others. # License & terms of use: http://www.unicode.org/copyright.html#License -ZONE_CLDR_VERSION = 32 +ZONE_CLDR_VERSION = 32.0.1 # A list of txt's to build # Note: # diff --git a/icu4c/source/test/intltest/dtfmttst.cpp b/icu4c/source/test/intltest/dtfmttst.cpp index 77dd1c817a9..fd3a9f1faf0 100644 --- a/icu4c/source/test/intltest/dtfmttst.cpp +++ b/icu4c/source/test/intltest/dtfmttst.cpp @@ -5180,9 +5180,9 @@ void DateFormatTest::TestDayPeriodWithLocales() { // assertEquals("hh:mm:ss bbbb | 00:00:00 | de", "12:00:00 Mitternacht", // sdf.format(k000000, out.remove())); - assertEquals("hh:mm:ss bbbb | 00:00:00 | de", "12:00:00 AM", + assertEquals("hh:mm:ss bbbb | 00:00:00 | de", "12:00:00 vorm.", sdf.format(k000000, out.remove())); - assertEquals("hh:mm:ss bbbb | 12:00:00 | de", "12:00:00 PM", + assertEquals("hh:mm:ss bbbb | 12:00:00 | de", "12:00:00 nachm.", sdf.format(k120000, out.remove())); // Locale ee has a rule that wraps around midnight (21h - 4h). diff --git a/icu4c/source/test/intltest/dtifmtts.cpp b/icu4c/source/test/intltest/dtifmtts.cpp index f578b22fc65..f840b7d695d 100644 --- a/icu4c/source/test/intltest/dtifmtts.cpp +++ b/icu4c/source/test/intltest/dtifmtts.cpp @@ -904,7 +904,7 @@ void DateIntervalFormatTest::testFormat() { "de", "2007 10 10 10:10:10", "2008 10 10 10:10:10", "M", "10.2007 \\u2013 10.2008", - "de", "2007 10 10 10:10:10", "2008 10 10 10:10:10", "hm", "10.10.2007, 10:10 AM \\u2013 10.10.2008, 10:10 AM", + "de", "2007 10 10 10:10:10", "2008 10 10 10:10:10", "hm", "10.10.2007, 10:10 vorm. \\u2013 10.10.2008, 10:10 vorm.", "de", "2007 10 10 10:10:10", "2008 10 10 10:10:10", "Hm", "10.10.2007, 10:10 \\u2013 10.10.2008, 10:10", "de", "2007 10 10 10:10:10", "2007 11 10 10:10:10", "EEEEdMMMy", "Mittwoch, 10. Okt. \\u2013 Samstag, 10. Nov. 2007", @@ -930,7 +930,7 @@ void DateIntervalFormatTest::testFormat() { "de", "2007 10 10 10:10:10", "2007 11 10 10:10:10", "MMM", "Okt.\\u2013Nov.", - "de", "2007 10 10 10:10:10", "2007 11 10 10:10:10", "hms", "10.10.2007, 10:10:10 AM \\u2013 10.11.2007, 10:10:10 AM", + "de", "2007 10 10 10:10:10", "2007 11 10 10:10:10", "hms", "10.10.2007, 10:10:10 vorm. \\u2013 10.11.2007, 10:10:10 vorm.", "de", "2007 10 10 10:10:10", "2007 11 10 10:10:10", "Hms", "10.10.2007, 10:10:10 \\u2013 10.11.2007, 10:10:10", "de", "2007 11 10 10:10:10", "2007 11 20 10:10:10", "EEEEdMMMy", "Samstag, 10. \\u2013 Dienstag, 20. Nov. 2007", @@ -955,7 +955,7 @@ void DateIntervalFormatTest::testFormat() { "de", "2007 11 10 10:10:10", "2007 11 20 10:10:10", "y", "2007", - "de", "2007 11 10 10:10:10", "2007 11 20 10:10:10", "hmv", "10.11.2007, 10:10 AM Los Angeles Zeit \\u2013 20.11.2007, 10:10 AM Los Angeles Zeit", + "de", "2007 11 10 10:10:10", "2007 11 20 10:10:10", "hmv", "10.11.2007, 10:10 vorm. Los Angeles Zeit \\u2013 20.11.2007, 10:10 vorm. Los Angeles Zeit", "de", "2007 01 10 10:00:10", "2007 01 10 14:10:10", "EEEEdMMMy", "Mittwoch, 10. Jan. 2007", @@ -968,33 +968,33 @@ void DateIntervalFormatTest::testFormat() { /* Following is an important test, because the 'h' in 'Uhr' is interpreted as a pattern if not escaped properly. */ - "de", "2007 01 10 10:00:10", "2007 01 10 14:10:10", "h", "10 Uhr AM \\u2013 2 Uhr PM", + "de", "2007 01 10 10:00:10", "2007 01 10 14:10:10", "h", "10 Uhr vorm. \\u2013 2 Uhr nachm.", "de", "2007 01 10 10:00:10", "2007 01 10 14:10:10", "H", "10\\u201314 Uhr", "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "EEEEdMMM", "Mittwoch, 10. Jan.", - "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hmv", "10:00\\u201310:20 AM Los Angeles Zeit", + "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hmv", "10:00\\u201310:20 vorm. Los Angeles Zeit", - "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hmz", "10:00\\u201310:20 AM GMT-8", + "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hmz", "10:00\\u201310:20 vorm. GMT-8", - "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "h", "10 Uhr AM", + "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "h", "10 Uhr vorm.", "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "H", "10 Uhr", - "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hz", "10 Uhr AM GMT-8", + "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hz", "10 Uhr vorm. GMT-8", "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "EEEEdMMMy", "Mittwoch, 10. Jan. 2007", - "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hmv", "10:10 AM Los Angeles Zeit", + "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hmv", "10:10 vorm. Los Angeles Zeit", - "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hmz", "10:10 AM GMT-8", + "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hmz", "10:10 vorm. GMT-8", - "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hv", "10 Uhr AM Los Angeles Zeit", + "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hv", "10 Uhr vorm. Los Angeles Zeit", - "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hz", "10 Uhr AM GMT-8", + "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hz", "10 Uhr vorm. GMT-8", // Thai (default calendar buddhist) diff --git a/icu4c/source/test/intltest/dtptngts.cpp b/icu4c/source/test/intltest/dtptngts.cpp index 2ecd32c0c4e..33d248d5f31 100644 --- a/icu4c/source/test/intltest/dtptngts.cpp +++ b/icu4c/source/test/intltest/dtptngts.cpp @@ -131,7 +131,7 @@ void IntlTestDateTimePatternGeneratorAPI::testAPI(/*char *par*/) UnicodeString("13. Jan."), // 05: MMMd UnicodeString("13. Januar"), // 06: MMMMd UnicodeString("Q1 1999"), // 07: yQQQ - UnicodeString("11:58 PM"), // 08: hhmm + UnicodeString("11:58 nachm."), // 08: hhmm UnicodeString("23:58"), // 09: HHmm UnicodeString("23:58"), // 10: jjmm UnicodeString("58:59"), // 11: mmss diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp index 93e90337620..b2c496db801 100644 --- a/icu4c/source/test/intltest/transtst.cpp +++ b/icu4c/source/test/intltest/transtst.cpp @@ -3578,11 +3578,13 @@ void TransliteratorTest::TestIncrementalProgress(void) { // The following are forward-only, it is OK that creating an inverse will not work: // 1. Devanagari-Arabic // 2. Any-*/BGN + // 2a. Any-*/BGN_1981 // 3. Any-*/UNGEGN + // 4. Any-*/MNS // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work. if ( id.compare((UnicodeString)"Devanagari-Arabic/") != 0 && !(id.startsWith((UnicodeString)"Any-") && - (id.endsWith((UnicodeString)"/BGN") || id.endsWith((UnicodeString)"/UNGEGN") || id.endsWith((UnicodeString)"/MNS")) + (id.endsWith((UnicodeString)"/BGN") || id.endsWith((UnicodeString)"/BGN_1981") || id.endsWith((UnicodeString)"/UNGEGN") || id.endsWith((UnicodeString)"/MNS")) ) #if UCONFIG_NO_BREAK_ITERATION && id.compare((UnicodeString)"Latin-Thai/") != 0 diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar index 9a455f6c4e9..d75243b1bbc 100755 --- a/icu4j/main/shared/data/icudata.jar +++ b/icu4j/main/shared/data/icudata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2aa096cfd030ae188c9d51104a2432357666dd6bff8da5600cd99e46b4ccc600 -size 12459512 +oid sha256:820d2b44ae5c7e1b8da24e5c068014ffa80b985129c0b890a0cd4cb3887b8161 +size 12475724 diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateFormatTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateFormatTest.java index 98869805306..ce714e53231 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateFormatTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateFormatTest.java @@ -5164,8 +5164,8 @@ public class DateFormatTest extends TestFmwk { sdf.applyPattern("hh:mm:ss bbbb"); // assertEquals("hh:mm:ss bbbb | 00:00:00 | de", "12:00:00 Mitternacht", sdf.format(k000000)); - assertEquals("hh:mm:ss bbbb | 00:00:00 | de", "12:00:00 AM", sdf.format(k000000)); - assertEquals("hh:mm:ss bbbb | 12:00:00 | de", "12:00:00 PM", sdf.format(k120000)); + assertEquals("hh:mm:ss bbbb | 00:00:00 | de", "12:00:00 vorm.", sdf.format(k000000)); + assertEquals("hh:mm:ss bbbb | 12:00:00 | de", "12:00:00 nachm.", sdf.format(k120000)); // Locale ee has a rule that wraps around midnight (21h - 4h). sdf = new SimpleDateFormat("", new ULocale("ee")); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateIntervalFormatTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateIntervalFormatTest.java index 6129b0d3375..ec22af2cbee 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateIntervalFormatTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateIntervalFormatTest.java @@ -531,7 +531,7 @@ public class DateIntervalFormatTest extends TestFmwk { "de", "2007 10 10 10:10:10", "2008 10 10 10:10:10", "MMM", "Okt. 2007 \\u2013 Okt. 2008", - "de", "2007 10 10 10:10:10", "2008 10 10 10:10:10", "hm", "10.10.2007, 10:10 AM \\u2013 10.10.2008, 10:10 AM", + "de", "2007 10 10 10:10:10", "2008 10 10 10:10:10", "hm", "10.10.2007, 10:10 vorm. \\u2013 10.10.2008, 10:10 vorm.", "de", "2007 10 10 10:10:10", "2008 10 10 10:10:10", "jm", "10.10.2007, 10:10 \\u2013 10.10.2008, 10:10", @@ -560,11 +560,11 @@ public class DateIntervalFormatTest extends TestFmwk { "de", "2007 10 10 10:10:10", "2007 11 10 10:10:10", "MMM", "Okt.\\u2013Nov.", - "de", "2007 10 10 10:10:10", "2007 11 10 10:10:10", "hmv", "10.10.2007, 10:10 AM Los Angeles Zeit \\u2013 10.11.2007, 10:10 AM Los Angeles Zeit", + "de", "2007 10 10 10:10:10", "2007 11 10 10:10:10", "hmv", "10.10.2007, 10:10 vorm. Los Angeles Zeit \\u2013 10.11.2007, 10:10 vorm. Los Angeles Zeit", "de", "2007 10 10 10:10:10", "2007 11 10 10:10:10", "jmv", "10.10.2007, 10:10 Los Angeles Zeit \\u2013 10.11.2007, 10:10 Los Angeles Zeit", - "de", "2007 10 10 10:10:10", "2007 11 10 10:10:10", "hms", "10.10.2007, 10:10:10 AM \\u2013 10.11.2007, 10:10:10 AM", + "de", "2007 10 10 10:10:10", "2007 11 10 10:10:10", "hms", "10.10.2007, 10:10:10 vorm. \\u2013 10.11.2007, 10:10:10 vorm.", "de", "2007 11 10 10:10:10", "2007 11 20 10:10:10", "EEEEdMMMy", "Samstag, 10. \\u2013 Dienstag, 20. Nov. 2007", @@ -592,7 +592,7 @@ public class DateIntervalFormatTest extends TestFmwk { "de", "2007 11 10 10:10:10", "2007 11 20 10:10:10", "M", "11", - "de", "2007 11 10 10:10:10", "2007 11 20 10:10:10", "hmv", "10.11.2007, 10:10 AM Los Angeles Zeit \\u2013 20.11.2007, 10:10 AM Los Angeles Zeit", + "de", "2007 11 10 10:10:10", "2007 11 20 10:10:10", "hmv", "10.11.2007, 10:10 vorm. Los Angeles Zeit \\u2013 20.11.2007, 10:10 vorm. Los Angeles Zeit", "de", "2007 11 10 10:10:10", "2007 11 20 10:10:10", "jmv", "10.11.2007, 10:10 Los Angeles Zeit \\u2013 20.11.2007, 10:10 Los Angeles Zeit", @@ -606,43 +606,43 @@ public class DateIntervalFormatTest extends TestFmwk { "de", "2007 01 10 10:00:10", "2007 01 10 14:10:10", "EEEEdMMM", "Mittwoch, 10. Jan.", - "de", "2007 01 10 10:00:10", "2007 01 10 14:10:10", "hmz", "10:00 AM \\u2013 2:10 PM GMT-8", + "de", "2007 01 10 10:00:10", "2007 01 10 14:10:10", "hmz", "10:00 vorm. \\u2013 2:10 nachm. GMT-8", - "de", "2007 01 10 10:00:10", "2007 01 10 14:10:10", "h", "10 Uhr AM \\u2013 2 Uhr PM", + "de", "2007 01 10 10:00:10", "2007 01 10 14:10:10", "h", "10 Uhr vorm. \\u2013 2 Uhr nachm.", "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "EEEEdMMM", "Mittwoch, 10. Jan.", - "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hm", "10:00\\u201310:20 AM", + "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hm", "10:00\\u201310:20 vorm.", - "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hmv", "10:00\\u201310:20 AM Los Angeles Zeit", + "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hmv", "10:00\\u201310:20 vorm. Los Angeles Zeit", - "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hmz", "10:00\\u201310:20 AM GMT-8", + "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hmz", "10:00\\u201310:20 vorm. GMT-8", - "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "h", "10 Uhr AM", + "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "h", "10 Uhr vorm.", - "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hv", "10 Uhr AM Los Angeles Zeit", + "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hv", "10 Uhr vorm. Los Angeles Zeit", - "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hz", "10 Uhr AM GMT-8", + "de", "2007 01 10 10:00:10", "2007 01 10 10:20:10", "hz", "10 Uhr vorm. GMT-8", "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "EEEEdMMMy", "Mittwoch, 10. Jan. 2007", - "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hm", "10:10 AM", + "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hm", "10:10 vorm.", "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "jm", "10:10", - "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hmv", "10:10 AM Los Angeles Zeit", + "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hmv", "10:10 vorm. Los Angeles Zeit", "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "jmv", "10:10 Los Angeles Zeit", - "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hmz", "10:10 AM GMT-8", + "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hmz", "10:10 vorm. GMT-8", "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "jmz", "10:10 GMT-8", - "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "h", "10 Uhr AM", + "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "h", "10 Uhr vorm.", - "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hv", "10 Uhr AM Los Angeles Zeit", + "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hv", "10 Uhr vorm. Los Angeles Zeit", - "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hz", "10 Uhr AM GMT-8", + "de", "2007 01 10 10:10:10", "2007 01 10 10:10:20", "hz", "10 Uhr vorm. GMT-8", // Thai (default calendar buddhist) diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java index fe4e9111097..8cfa7e65fa2 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java @@ -415,7 +415,7 @@ public class DateTimeGeneratorTest extends TestFmwk { new String[] {"MMMd", "13. Jan."}, new String[] {"MMMMd", "13. Januar"}, new String[] {"yQQQ", "Q1 1999"}, - new String[] {"hhmm", "11:58 PM"}, + new String[] {"hhmm", "11:58 nachm."}, new String[] {"HHmm", "23:58"}, new String[] {"jjmm", "23:58"}, new String[] {"mmss", "58:59"},