From d06ffe0e270aa794eff67502e3fabb1e1c615897 Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Thu, 8 Oct 2015 22:20:33 +0000 Subject: [PATCH] ICU-10616 break iterator data files, separate rules, dictinaries and locales. X-SVN-Rev: 38052 --- icu4c/source/data/Makefile.in | 14 ++++++++------ .../data/brkitr/{ => dictionaries}/burmesedict.txt | 2 +- .../data/brkitr/{ => dictionaries}/cjdict.txt | 0 .../data/brkitr/{ => dictionaries}/khmerdict.txt | 2 +- .../data/brkitr/{ => dictionaries}/laodict.txt | 2 +- .../data/brkitr/{ => dictionaries}/thaidict.txt | 0 icu4c/source/data/brkitr/{ => rules}/char.txt | 2 +- icu4c/source/data/brkitr/{ => rules}/line.txt | 0 icu4c/source/data/brkitr/{ => rules}/line_fi.txt | 0 .../source/data/brkitr/{ => rules}/line_loose.txt | 0 .../data/brkitr/{ => rules}/line_loose_cj.txt | 0 .../data/brkitr/{ => rules}/line_loose_fi.txt | 0 .../source/data/brkitr/{ => rules}/line_normal.txt | 0 .../data/brkitr/{ => rules}/line_normal_cj.txt | 0 .../data/brkitr/{ => rules}/line_normal_fi.txt | 0 icu4c/source/data/brkitr/{ => rules}/sent.txt | 0 icu4c/source/data/brkitr/{ => rules}/sent_el.txt | 0 icu4c/source/data/brkitr/{ => rules}/title.txt | 2 +- icu4c/source/data/brkitr/{ => rules}/word.txt | 2 +- .../source/data/brkitr/{ => rules}/word_POSIX.txt | 2 +- icu4c/source/data/makedata.mak | 12 ++++++------ 21 files changed, 21 insertions(+), 19 deletions(-) rename icu4c/source/data/brkitr/{ => dictionaries}/burmesedict.txt (99%) rename icu4c/source/data/brkitr/{ => dictionaries}/cjdict.txt (100%) rename icu4c/source/data/brkitr/{ => dictionaries}/khmerdict.txt (99%) rename icu4c/source/data/brkitr/{ => dictionaries}/laodict.txt (99%) rename icu4c/source/data/brkitr/{ => dictionaries}/thaidict.txt (100%) rename icu4c/source/data/brkitr/{ => rules}/char.txt (96%) rename icu4c/source/data/brkitr/{ => rules}/line.txt (100%) rename icu4c/source/data/brkitr/{ => rules}/line_fi.txt (100%) rename icu4c/source/data/brkitr/{ => rules}/line_loose.txt (100%) rename icu4c/source/data/brkitr/{ => rules}/line_loose_cj.txt (100%) rename icu4c/source/data/brkitr/{ => rules}/line_loose_fi.txt (100%) rename icu4c/source/data/brkitr/{ => rules}/line_normal.txt (100%) rename icu4c/source/data/brkitr/{ => rules}/line_normal_cj.txt (100%) rename icu4c/source/data/brkitr/{ => rules}/line_normal_fi.txt (100%) rename icu4c/source/data/brkitr/{ => rules}/sent.txt (100%) rename icu4c/source/data/brkitr/{ => rules}/sent_el.txt (100%) rename icu4c/source/data/brkitr/{ => rules}/title.txt (96%) rename icu4c/source/data/brkitr/{ => rules}/word.txt (99%) rename icu4c/source/data/brkitr/{ => rules}/word_POSIX.txt (99%) diff --git a/icu4c/source/data/Makefile.in b/icu4c/source/data/Makefile.in index 9bc96cc7c04..e007b872c56 100644 --- a/icu4c/source/data/Makefile.in +++ b/icu4c/source/data/Makefile.in @@ -101,6 +101,8 @@ TRANSLITBLDDIR=$(BUILDDIR)/translit MISCSRCDIR=$(SRCDATADIR)/misc BRKSRCDIR=$(SRCDATADIR)/brkitr BRKBLDDIR=$(BUILDDIR)/brkitr +DICTSRCDIR=$(BRKSRCDIR)/dictionaries +BRKRULESRCDIR=$(BRKSRCDIR)/rules MISCSRCDIR=$(SRCDATADIR)/misc UCMSRCDIR=$(SRCDATADIR)/mappings SPREPSRCDIR=$(SRCDATADIR)/sprep @@ -541,7 +543,7 @@ $(BUILDDIR)/%.spp: $(SPREPSRCDIR)/%.txt $(TOOLBINDIR)/gensprep$(TOOLEXEEXT) $(BU #################################################### BRK # BRK FILES -$(BRKBLDDIR)/%.brk: $(BRKSRCDIR)/%.txt $(TOOLBINDIR)/genbrk$(TOOLEXEEXT) $(DAT_FILES) +$(BRKBLDDIR)/%.brk: $(BRKRULESRCDIR)/%.txt $(TOOLBINDIR)/genbrk$(TOOLEXEEXT) $(DAT_FILES) $(INVOKE) $(TOOLBINDIR)/genbrk -c -i $(BUILDDIR) -r $< -o $@ #################################################### DICT @@ -550,20 +552,20 @@ $(BRKBLDDIR)/%.brk: $(BRKSRCDIR)/%.txt $(TOOLBINDIR)/genbrk$(TOOLEXEEXT) $(DAT_F # .dict file generated regardless of whether dictionary file exists $(BRKBLDDIR)/%.dict: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(DAT_FILES) - $(INVOKE) $(TOOLBINDIR)/gendict --uchars -c -i $(BUILDDIR) $(BRKSRCDIR)/$(*F).txt $@ + $(INVOKE) $(TOOLBINDIR)/gendict --uchars -c -i $(BUILDDIR) $(DICTSRCDIR)/$(*F).txt $@ $(BRKBLDDIR)/thaidict.dict: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(DAT_FILES) - $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x0e00 -c -i $(BUILDDIR) $(BRKSRCDIR)/thaidict.txt $(BRKBLDDIR)/thaidict.dict + $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x0e00 -c -i $(BUILDDIR) $(DICTSRCDIR)/thaidict.txt $(BRKBLDDIR)/thaidict.dict $(BRKBLDDIR)/laodict.dict: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(DAT_FILES) - $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x0e80 -c -i $(BUILDDIR) $(BRKSRCDIR)/laodict.txt $(BRKBLDDIR)/laodict.dict + $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x0e80 -c -i $(BUILDDIR) $(DICTSRCDIR)/laodict.txt $(BRKBLDDIR)/laodict.dict $(BRKBLDDIR)/burmesedict.dict: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(DAT_FILES) - $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1000 -c -i $(BUILDDIR) $(BRKSRCDIR)/burmesedict.txt $(BRKBLDDIR)/burmesedict.dict + $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1000 -c -i $(BUILDDIR) $(DICTSRCDIR)/burmesedict.txt $(BRKBLDDIR)/burmesedict.dict # TODO: figure out why combining characters are here? $(BRKBLDDIR)/khmerdict.dict: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(DAT_FILES) - $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(BRKSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict + $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(DICTSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict #################################################### CFU # CFU FILES diff --git a/icu4c/source/data/brkitr/burmesedict.txt b/icu4c/source/data/brkitr/dictionaries/burmesedict.txt similarity index 99% rename from icu4c/source/data/brkitr/burmesedict.txt rename to icu4c/source/data/brkitr/dictionaries/burmesedict.txt index 512c44d0862..a42a62bfe16 100644 --- a/icu4c/source/data/brkitr/burmesedict.txt +++ b/icu4c/source/data/brkitr/dictionaries/burmesedict.txt @@ -1,4 +1,4 @@ - # Copyright (c) 2014 International Business Machines Corporation + # Copyright (c) 2015 International Business Machines Corporation # and others. All Rights Reserved. # # burmese-word-list.txt from LeRoy Benjamin Sharon, diff --git a/icu4c/source/data/brkitr/cjdict.txt b/icu4c/source/data/brkitr/dictionaries/cjdict.txt similarity index 100% rename from icu4c/source/data/brkitr/cjdict.txt rename to icu4c/source/data/brkitr/dictionaries/cjdict.txt diff --git a/icu4c/source/data/brkitr/khmerdict.txt b/icu4c/source/data/brkitr/dictionaries/khmerdict.txt similarity index 99% rename from icu4c/source/data/brkitr/khmerdict.txt rename to icu4c/source/data/brkitr/dictionaries/khmerdict.txt index 2bd9343610c..3954e3b35f9 100644 --- a/icu4c/source/data/brkitr/khmerdict.txt +++ b/icu4c/source/data/brkitr/dictionaries/khmerdict.txt @@ -1,4 +1,4 @@ - # Copyright (c) 2011-2012 International Business Machines Corporation + # Copyright (c) 2011-2015 International Business Machines Corporation # and others. All Rights Reserved. ក កក diff --git a/icu4c/source/data/brkitr/laodict.txt b/icu4c/source/data/brkitr/dictionaries/laodict.txt similarity index 99% rename from icu4c/source/data/brkitr/laodict.txt rename to icu4c/source/data/brkitr/dictionaries/laodict.txt index f76bcfef0d1..7dd6754f7d9 100644 --- a/icu4c/source/data/brkitr/laodict.txt +++ b/icu4c/source/data/brkitr/dictionaries/laodict.txt @@ -1,4 +1,4 @@ - # Copyright (c) 2013 International Business Machines Corporation + # Copyright (c) 2015 International Business Machines Corporation # and others. All Rights Reserved. # # Lao Word List from Brian Eugene Wilson, Robert Martin Campbell; diff --git a/icu4c/source/data/brkitr/thaidict.txt b/icu4c/source/data/brkitr/dictionaries/thaidict.txt similarity index 100% rename from icu4c/source/data/brkitr/thaidict.txt rename to icu4c/source/data/brkitr/dictionaries/thaidict.txt diff --git a/icu4c/source/data/brkitr/char.txt b/icu4c/source/data/brkitr/rules/char.txt similarity index 96% rename from icu4c/source/data/brkitr/char.txt rename to icu4c/source/data/brkitr/rules/char.txt index abf71fcf402..5164a682792 100644 --- a/icu4c/source/data/brkitr/char.txt +++ b/icu4c/source/data/brkitr/rules/char.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2002-2013, International Business Machines Corporation and others. +# Copyright (C) 2002-2015, International Business Machines Corporation and others. # All Rights Reserved. # # file: char.txt diff --git a/icu4c/source/data/brkitr/line.txt b/icu4c/source/data/brkitr/rules/line.txt similarity index 100% rename from icu4c/source/data/brkitr/line.txt rename to icu4c/source/data/brkitr/rules/line.txt diff --git a/icu4c/source/data/brkitr/line_fi.txt b/icu4c/source/data/brkitr/rules/line_fi.txt similarity index 100% rename from icu4c/source/data/brkitr/line_fi.txt rename to icu4c/source/data/brkitr/rules/line_fi.txt diff --git a/icu4c/source/data/brkitr/line_loose.txt b/icu4c/source/data/brkitr/rules/line_loose.txt similarity index 100% rename from icu4c/source/data/brkitr/line_loose.txt rename to icu4c/source/data/brkitr/rules/line_loose.txt diff --git a/icu4c/source/data/brkitr/line_loose_cj.txt b/icu4c/source/data/brkitr/rules/line_loose_cj.txt similarity index 100% rename from icu4c/source/data/brkitr/line_loose_cj.txt rename to icu4c/source/data/brkitr/rules/line_loose_cj.txt diff --git a/icu4c/source/data/brkitr/line_loose_fi.txt b/icu4c/source/data/brkitr/rules/line_loose_fi.txt similarity index 100% rename from icu4c/source/data/brkitr/line_loose_fi.txt rename to icu4c/source/data/brkitr/rules/line_loose_fi.txt diff --git a/icu4c/source/data/brkitr/line_normal.txt b/icu4c/source/data/brkitr/rules/line_normal.txt similarity index 100% rename from icu4c/source/data/brkitr/line_normal.txt rename to icu4c/source/data/brkitr/rules/line_normal.txt diff --git a/icu4c/source/data/brkitr/line_normal_cj.txt b/icu4c/source/data/brkitr/rules/line_normal_cj.txt similarity index 100% rename from icu4c/source/data/brkitr/line_normal_cj.txt rename to icu4c/source/data/brkitr/rules/line_normal_cj.txt diff --git a/icu4c/source/data/brkitr/line_normal_fi.txt b/icu4c/source/data/brkitr/rules/line_normal_fi.txt similarity index 100% rename from icu4c/source/data/brkitr/line_normal_fi.txt rename to icu4c/source/data/brkitr/rules/line_normal_fi.txt diff --git a/icu4c/source/data/brkitr/sent.txt b/icu4c/source/data/brkitr/rules/sent.txt similarity index 100% rename from icu4c/source/data/brkitr/sent.txt rename to icu4c/source/data/brkitr/rules/sent.txt diff --git a/icu4c/source/data/brkitr/sent_el.txt b/icu4c/source/data/brkitr/rules/sent_el.txt similarity index 100% rename from icu4c/source/data/brkitr/sent_el.txt rename to icu4c/source/data/brkitr/rules/sent_el.txt diff --git a/icu4c/source/data/brkitr/title.txt b/icu4c/source/data/brkitr/rules/title.txt similarity index 96% rename from icu4c/source/data/brkitr/title.txt rename to icu4c/source/data/brkitr/rules/title.txt index 30c1c40d45b..5384497462d 100644 --- a/icu4c/source/data/brkitr/title.txt +++ b/icu4c/source/data/brkitr/rules/title.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2002-2014, International Business Machines Corporation and +# Copyright (c) 2002-2015, International Business Machines Corporation and # others. All Rights Reserved. # # Title Casing Break Rules diff --git a/icu4c/source/data/brkitr/word.txt b/icu4c/source/data/brkitr/rules/word.txt similarity index 99% rename from icu4c/source/data/brkitr/word.txt rename to icu4c/source/data/brkitr/rules/word.txt index f89a2fe7452..e7ea67cfeef 100644 --- a/icu4c/source/data/brkitr/word.txt +++ b/icu4c/source/data/brkitr/rules/word.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2002-2013, International Business Machines Corporation +# Copyright (C) 2002-2015, International Business Machines Corporation # and others. All Rights Reserved. # # file: word.txt diff --git a/icu4c/source/data/brkitr/word_POSIX.txt b/icu4c/source/data/brkitr/rules/word_POSIX.txt similarity index 99% rename from icu4c/source/data/brkitr/word_POSIX.txt rename to icu4c/source/data/brkitr/rules/word_POSIX.txt index 17cc473a889..df305db8104 100644 --- a/icu4c/source/data/brkitr/word_POSIX.txt +++ b/icu4c/source/data/brkitr/rules/word_POSIX.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2002-2014, International Business Machines Corporation +# Copyright (C) 2002-2015, International Business Machines Corporation # and others. All Rights Reserved. # # file: word_POSIX.txt diff --git a/icu4c/source/data/makedata.mak b/icu4c/source/data/makedata.mak index 9246834ccd2..bf14fdde6e8 100644 --- a/icu4c/source/data/makedata.mak +++ b/icu4c/source/data/makedata.mak @@ -753,30 +753,30 @@ CLEAN : GODATA # RBBI .brk file generation. -{$(ICUSRCDATA_RELATIVE_PATH)\$(ICUBRK)}.txt.brk: +{$(ICUSRCDATA_RELATIVE_PATH)\$(ICUBRK)\rules}.txt.brk: @echo Creating $@ @"$(ICUTOOLS)\genbrk\$(CFG)\genbrk" -c -r $< -o $@ -d"$(ICUBLD_PKG)" -i "$(ICUBLD_PKG)" #RBBI .dict file generation. -{$(ICUSRCDATA_RELATIVE_PATH)\$(ICUBRK)}.txt.dict: +{$(ICUSRCDATA_RELATIVE_PATH)\$(ICUBRK)\dictionaries}.txt.dict: @echo Creating $@ @"$(ICUTOOLS)\gendict\$(CFG)\gendict" -c --uchars $< "$(ICUBLD_PKG)\$@" $(ICUBRK)\thaidict.dict: @echo Creating $(ICUBRK)\thaidict.dict - @"$(ICUTOOLS)\gendict\$(CFG)\gendict" -c --bytes --transform offset-0x0e00 $(ICUSRCDATA_RELATIVE_PATH)\$(ICUBRK)\thaidict.txt "$(ICUBLD_PKG)\$(ICUBRK)\thaidict.dict" + @"$(ICUTOOLS)\gendict\$(CFG)\gendict" -c --bytes --transform offset-0x0e00 $(ICUSRCDATA_RELATIVE_PATH)\$(ICUBRK)\dictionaries\thaidict.txt "$(ICUBLD_PKG)\$(ICUBRK)\thaidict.dict" $(ICUBRK)\laodict.dict: @echo Creating $(ICUBRK)\laodict.dict - @"$(ICUTOOLS)\gendict\$(CFG)\gendict" -c --bytes --transform offset-0x0e80 $(ICUSRCDATA_RELATIVE_PATH)\$(ICUBRK)\laodict.txt "$(ICUBLD_PKG)\$(ICUBRK)\laodict.dict" + @"$(ICUTOOLS)\gendict\$(CFG)\gendict" -c --bytes --transform offset-0x0e80 $(ICUSRCDATA_RELATIVE_PATH)\$(ICUBRK)\dictionaries\laodict.txt "$(ICUBLD_PKG)\$(ICUBRK)\laodict.dict" $(ICUBRK)\burmesedict.dict: @echo Creating $(ICUBRK)\burmesedict.dict - @"$(ICUTOOLS)\gendict\$(CFG)\gendict" -c --bytes --transform offset-0x1000 $(ICUSRCDATA_RELATIVE_PATH)\$(ICUBRK)\burmesedict.txt "$(ICUBLD_PKG)\$(ICUBRK)\burmesedict.dict" + @"$(ICUTOOLS)\gendict\$(CFG)\gendict" -c --bytes --transform offset-0x1000 $(ICUSRCDATA_RELATIVE_PATH)\$(ICUBRK)\dictionaries\burmesedict.txt "$(ICUBLD_PKG)\$(ICUBRK)\burmesedict.dict" $(ICUBRK)\khmerdict.dict: @echo Creating $(ICUBRK)\khmerdict.dict - @"$(ICUTOOLS)\gendict\$(CFG)\gendict" -c --bytes --transform offset-0x1780 $(ICUSRCDATA_RELATIVE_PATH)\$(ICUBRK)\khmerdict.txt "$(ICUBLD_PKG)\$(ICUBRK)\khmerdict.dict" + @"$(ICUTOOLS)\gendict\$(CFG)\gendict" -c --bytes --transform offset-0x1780 $(ICUSRCDATA_RELATIVE_PATH)\$(ICUBRK)\dictionaries\khmerdict.txt "$(ICUBLD_PKG)\$(ICUBRK)\khmerdict.dict" !IFNDEF ICUDATA_SOURCE_ARCHIVE # Rule for creating converters -- 2.40.0