From 84663727157287dee123e42c6ff09e2a73ddb282 Mon Sep 17 00:00:00 2001 From: Maxime Serrano Date: Fri, 17 Aug 2012 17:09:01 +0000 Subject: [PATCH] ICU-9353 fix windows makefile, remove mentions of word_ja.txt X-SVN-Rev: 32191 --- icu4c/source/common/brkeng.cpp | 7 +- icu4c/source/data/brkitr/ja.txt | 1 - icu4c/source/data/brkitr/word_ja.txt | 223 --------------------------- icu4c/source/data/makedata.mak | 19 ++- 4 files changed, 18 insertions(+), 232 deletions(-) delete mode 100644 icu4c/source/data/brkitr/word_ja.txt diff --git a/icu4c/source/common/brkeng.cpp b/icu4c/source/common/brkeng.cpp index abc201e9f54..381ce7dd3d1 100644 --- a/icu4c/source/common/brkeng.cpp +++ b/icu4c/source/common/brkeng.cpp @@ -288,10 +288,15 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* } if (U_SUCCESS(status) && dictfname) { UChar *extStart = u_strchr(dictfname, 0x002e); + int32_t extLen = u_strlen(extStart+1); + if (extLen > sizeof(ext) - 1) { + ures_close(b); + return NULL; + } int len = 0; if (extStart != NULL) { len = (int)(extStart - dictfname); - u_UCharsToChars(extStart+1, ext, sizeof(ext)-1); // null-terminates the buffer + u_UCharsToChars(extStart+1, ext, extLen); // null-terminates the buffer u_UCharsToChars(dictfname, dictnbuf, len); } dictnbuf[len] = '\0'; // null-terminate diff --git a/icu4c/source/data/brkitr/ja.txt b/icu4c/source/data/brkitr/ja.txt index 898c696d647..161b1982f93 100644 --- a/icu4c/source/data/brkitr/ja.txt +++ b/icu4c/source/data/brkitr/ja.txt @@ -10,6 +10,5 @@ ja{ Version{"1.1"} boundaries{ line:process(dependency){"line_ja.brk"} - word:process(dependency){"word_ja.brk"} } } diff --git a/icu4c/source/data/brkitr/word_ja.txt b/icu4c/source/data/brkitr/word_ja.txt deleted file mode 100644 index a870345fa1a..00000000000 --- a/icu4c/source/data/brkitr/word_ja.txt +++ /dev/null @@ -1,223 +0,0 @@ -# -# Copyright (C) 2002-2011, International Business Machines Corporation -# and others. All Rights Reserved. -# -# file: word_ja.txt -# -# ICU Word Break Rules -# See Unicode Standard Annex #29. -# These rules are based on UAX #29 Revision 19 for Unicode Version 6.1 -# -# Note: Updates to word.txt will usually need to be merged into -# word_POSIX.txt and word_ja.txt also. - -############################################################################## -# -# Character class definitions from TR 29 -# -############################################################################## - -!!chain; - - -# -# Character Class Definitions. -# - -$CR = [\p{Word_Break = CR}]; -$LF = [\p{Word_Break = LF}]; -$Newline = [\p{Word_Break = Newline}]; -$Extend = [\p{Word_Break = Extend}]; -$Format = [\p{Word_Break = Format}]; -$Katakana = [\p{Word_Break = Katakana}]; -$ALetter = [\p{Word_Break = ALetter}]; -$MidNumLet = [\p{Word_Break = MidNumLet}]; -$MidLetter = [\p{Word_Break = MidLetter}]; -$MidNum = [\p{Word_Break = MidNum}]; -$Numeric = [\p{Word_Break = Numeric}]; -$ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; - - -# Dictionary character set, for triggering language-based break engines. Currently -# limited to LineBreak=Complex_Context. Note that this set only works in Unicode -# 5.0 or later as the definition of Complex_Context was corrected to include all -# characters requiring dictionary break. - -$dictionary = [:LineBreak = Complex_Context:]; -$Control = [\p{Grapheme_Cluster_Break = Control}]; -$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]]; # Note: default ALetter does not - # include the dictionary characters. - -# -# Rules 4 Ignore Format and Extend characters, -# except when they appear at the beginning of a region of text. -# -$KatakanaEx = $Katakana ($Extend | $Format)*; -$ALetterEx = $ALetterPlus ($Extend | $Format)*; -$MidNumLetEx = $MidNumLet ($Extend | $Format)*; -$MidLetterEx = $MidLetter ($Extend | $Format)*; -$MidNumEx = $MidNum ($Extend | $Format)*; -$NumericEx = $Numeric ($Extend | $Format)*; -$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*; - -$Hiragana = [\p{script=Hiragana}]; -$Ideographic = [\p{Ideographic} [\u3005 \u3007 \u303B]]; -$HiraganaEx = $Hiragana ($Extend | $Format)*; -$IdeographicEx = $Ideographic ($Extend | $Format)*; - -## ------------------------------------------------- - -!!forward; - - -# Rule 3 - CR x LF -# -$CR $LF; - -# Rule 4 - ignore Format and Extend characters, except when they appear at the beginning -# of a region of Text. The rule here comes into play when the start of text -# begins with a group of Format chars, or with a "word" consisting of a single -# char that is not in any of the listed word break categories followed by -# format char(s). -[^$CR $LF $Newline]? ($Extend | $Format)+; - -$NumericEx {100}; -$ALetterEx {200}; -$KatakanaEx {300}; # note: these status values override those from rule 5 -$HiraganaEx {300}; # by virtual of being numerically larger. -$IdeographicEx {400}; # - -# -# rule 5 -# Do not break between most letters. -# -$ALetterEx $ALetterEx {200}; - -# rule 6 and 7 -$ALetterEx ($MidLetterEx | $MidNumLetEx) $ALetterEx {200}; - -# rule 8 - -$NumericEx $NumericEx {100}; - -# rule 9 - -$ALetterEx $NumericEx {200}; - -# rule 10 - -$NumericEx $ALetterEx {200}; - -# rule 11 and 12 - -$NumericEx ($MidNumEx | $MidNumLetEx) $NumericEx {100}; - -# rule 13 - -$KatakanaEx $KatakanaEx {300}; -$HiraganaEx $HiraganaEx {300}; -$IdeographicEx $IdeographicEx {400}; - - -# rule 13a/b - -$ALetterEx $ExtendNumLetEx {200}; # (13a) -$NumericEx $ExtendNumLetEx {100}; # (13a) -$KatakanaEx $ExtendNumLetEx {300}; # (13a) -$ExtendNumLetEx $ExtendNumLetEx {200}; # (13a) - -$ExtendNumLetEx $ALetterEx {200}; # (13b) -$ExtendNumLetEx $NumericEx {100}; # (13b) -$ExtendNumLetEx $KatakanaEx {300}; # (13b) - - - -## ------------------------------------------------- - -!!reverse; - -$BackALetterEx = ($Format | $Extend)* $ALetterPlus; -$BackMidNumLetEx = ($Format | $Extend)* $MidNumLet; -$BackNumericEx = ($Format | $Extend)* $Numeric; -$BackMidNumEx = ($Format | $Extend)* $MidNum; -$BackMidLetterEx = ($Format | $Extend)* $MidLetter; -$BackKatakanaEx = ($Format | $Extend)* $Katakana; -$BackHiraganaEx = ($Format | $Extend)* $Hiragana; -$BackIdeographicEx = ($Format | $Extend)* $Ideographic; -$BackExtendNumLetEx= ($Format | $Extend)* $ExtendNumLet; - -# rule 3 -$LF $CR; - -# rule 4 -($Format | $Extend)* [^$CR $LF $Newline]?; - -# rule 5 - -$BackALetterEx $BackALetterEx; - -# rule 6 and 7 - -$BackALetterEx ($BackMidLetterEx | $BackMidNumLetEx) $BackALetterEx; - - -# rule 8 - -$BackNumericEx $BackNumericEx; - -# rule 9 - -$BackNumericEx $BackALetterEx; - -# rule 10 - -$BackALetterEx $BackNumericEx; - -# rule 11 and 12 - -$BackNumericEx ($BackMidNumEx | $BackMidNumLetEx) $BackNumericEx; - -# rule 13 - -$BackKatakanaEx $BackKatakanaEx; -$BackHiraganaEx $BackHiraganaEx; -$BackIdeographicEx $BackIdeographicEx; - - - -# rules 13 a/b -# -$BackExtendNumLetEx ($BackALetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx); -($BackALetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx; - -## ------------------------------------------------- - -!!safe_reverse; - -# rule 3 -($Extend | $Format)+ .?; - -# rule 6 -($MidLetter | $MidNumLet) $BackALetterEx; - -# rule 11 -($MidNum | $MidNumLet) $BackNumericEx; - -# For dictionary-based break -$dictionary $dictionary; - -## ------------------------------------------------- - -!!safe_forward; - -# rule 4 -($Extend | $Format)+ .?; - -# rule 6 -($MidLetterEx | $MidNumLetEx) $ALetterEx; - -# rule 11 -($MidNumEx | $MidNumLetEx) $NumericEx; - -# For dictionary-based break -$dictionary $dictionary; diff --git a/icu4c/source/data/makedata.mak b/icu4c/source/data/makedata.mak index b64722ff665..6c66f274b50 100644 --- a/icu4c/source/data/makedata.mak +++ b/icu4c/source/data/makedata.mak @@ -253,9 +253,9 @@ BRK_FILES=$(BRK_FILES:.txt=.brk) BRK_FILES=$(BRK_FILES:brkitr\ =brkitr\) !IFDEF BRK_DICT_SOURCE -BRK_DICT_FILES = $(ICUBRK)\$(BRK_DICT_SOURCE):.txt=.dict brkitr\) +BRK_DICT_FILES = $(ICUBRK)\$(BRK_DICT_SOURCE:.txt =.dict brkitr\) BRK_DICT_FILES = $(BRK_DICT_FILES:.txt=.dict) -BRK_DICT_FILES = $(BRK_DICT_FILES:brkitr\ =) +BRK_DICT_FILES = $(BRK_DICT_FILES:brkitr\ =brkitr\) !ENDIF !IFDEF BRK_RES_SOURCE @@ -360,9 +360,6 @@ ZONE_SOURCE=$(ZONE_SOURCE) $(ZONE_SOURCE_LOCAL) !MESSAGE Warning: cannot find "zone\resfiles.mk" !ENDIF -BRK_DICT_FILES = $(ICUBRK)\$(BRK_DICT_SOURCE):.txt=.dict brkitr\) -BRK_DICT_FILES = $(BRK_DICT_FILES:.txt=.dict) -BRK_DICT_FILES = $(BRK_DICT_FILES:brkitr\ =) !IFDEF ZONE_SOURCE ZONE_FILES = zone\root.txt $(ZONE_ALIAS_SOURCE) $(ZONE_SOURCE) ZONE_RES_FILES = $(ZONE_FILES:.txt =.res zone\) @@ -640,7 +637,7 @@ $(TRANSLIT_RES_FILES:.res =.res ) $(BRK_FILES:.brk =.brk ) -$(BRK_DICT_FILES:.dict=.dict +$(BRK_DICT_FILES:.dict =.dict ) $(BRK_RES_FILES:.res =.res ) @@ -740,7 +737,15 @@ CLEAN : GODATA #RBBI .dict file generation. {$(ICUSRCDATA_RELATIVE_PATH)\$(ICUBRK)}.txt.dict: @echo Creating $@ - @"$(ICUTOOLS)\gendict\$(CFG)\gendict" -c --uchars -i "$(ICUBLD_PKG)" $< $(ICUBLD_PKG)\$@ + @"$(ICUTOOLS)\gendict\$(CFG)\gendict" -c --uchars $< $(ICUBLD_PKG)\$@ + +$(ICUBRK)\thaidict.dict: + @echo Creating $(ICUBRK)\thaidict.dict + @"$(ICUTOOLS)\gendict\$(CFG)\gendict" -c --bytes --transform offset-0xe00 $(ICUSRCDATA_RELATIVE_PATH)\$(ICUBRK)\thaidict.txt $(ICUBLD_PKG)\$(ICUBRK)\thaidict.dict + +$(ICUBRK)\khmerdict.dict: + @echo Creating $(ICUBRK)\khmerdict.dict + @"$(ICUTOOLS)\gendict\$(CFG)\gendict" -c --bytes --transform offset-0x1780 $(ICUSRCDATA_RELATIVE_PATH)\$(ICUBRK)\khmerdict.txt $(ICUBLD_PKG)\$(ICUBRK)\khmerdict.dict !IFNDEF ICUDATA_SOURCE_ARCHIVE # Rule for creating converters -- 2.40.0