From 2f02059ddad6f56091d1f698023d758698ededc0 Mon Sep 17 00:00:00 2001 From: Peter Edberg Date: Thu, 29 Aug 2013 05:13:36 +0000 Subject: [PATCH] ICU-10299 Fix CjkBreakEngine fSet to include 30FC,FF70; fix broken test data (ICU4C) X-SVN-Rev: 34118 --- icu4c/source/common/dictbe.cpp | 5 +++-- icu4c/source/test/testdata/rbbitst.txt | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/icu4c/source/common/dictbe.cpp b/icu4c/source/common/dictbe.cpp index 15df9fb8dce..bbedf14694d 100644 --- a/icu4c/source/common/dictbe.cpp +++ b/icu4c/source/common/dictbe.cpp @@ -1,6 +1,6 @@ /** ******************************************************************************* - * Copyright (C) 2006-2012, International Business Machines Corporation + * Copyright (C) 2006-2013, International Business Machines Corporation * and others. All Rights Reserved. ******************************************************************************* */ @@ -667,7 +667,8 @@ CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType cjSet.addAll(fHanWordSet); cjSet.addAll(fKatakanaWordSet); cjSet.addAll(fHiraganaWordSet); - cjSet.add(UNICODE_STRING_SIMPLE("\\uff70\\u30fc")); + cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK + cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK setCharacters(cjSet); } } diff --git a/icu4c/source/test/testdata/rbbitst.txt b/icu4c/source/test/testdata/rbbitst.txt index aef6b939a2c..3301893101c 100644 --- a/icu4c/source/test/testdata/rbbitst.txt +++ b/icu4c/source/test/testdata/rbbitst.txt @@ -757,11 +757,11 @@ Bangkok)• -•私<400>達<400>に<400>一<400>〇<400>〇〇<400>の<400>コンピュ<400>ー<400>タ<400>が<400>ある<400>。<0>奈々<400>は<400>ワ<400>ー<400>ド<400>で<400>ある<400>。• +•私<400>達<400>に<400>一<400>〇<400>〇〇<400>の<400>コンピュータ<400>が<400>ある<400>。<0>奈々<400>は<400>ワード<400>で<400>ある<400>。• -•私<400>達<400>に<400>一<400>〇<400>〇〇<400>の<400>コンピュ<400>ー<400>タ<400>が<400>ある<400>。<0>奈々<400>は<400>ワ<400>ー<400>ド<400>で<400>ある<400>。• +•私<400>達<400>に<400>一<400>〇<400>〇〇<400>の<400>コンピュータ<400>が<400>ある<400>。<0>奈々<400>は<400>ワード<400>で<400>ある<400>。• # UBreakIteratorType UBRK_SENTENCE, Locale "el" # Add break after Greek question mark (cldrbug #2069). -- 2.40.0