#
-# Copyright (C) 2002-2012, International Business Machines Corporation and others.
+# Copyright (C) 2002-2013, International Business Machines Corporation and others.
# All Rights Reserved.
#
# file: char.txt
## -------------------------------------------------
+# We don't logically need safe char break rules, but if we don't provide any at all
+# the engine for preceding() and following() will fall back to the
+# old style inefficient algorithm.
!!safe_reverse;
-
+$LF $CR;
## -------------------------------------------------
!!safe_forward;
+$CR $LF;
int32_t expectedBreak = BreakIterator::DONE;
// For supplementaries, back up to the start of the character.
- int32_t currentCharStart = i < t->dataToBreak.length()? t->dataToBreak.getChar32Start(i) : i;
+ // int32_t currentCharStart = i < t->dataToBreak.length()? t->dataToBreak.getChar32Start(i) : i;
- for (int32_t j=currentCharStart-1; j >= 0; j--) {
- // for (int32_t j=i-1; j >= 0; j--) {
+ // for (int32_t j=currentCharStart-1; j >= 0; j--) {
+ for (int32_t j=i-1; j >= 0; j--) {
if (t->expectedBreaks->elementAti(j) != 0) {
expectedBreak = j;
break;
# Temp debugging tests
-<line>
-<data>•\ufffc•\u30e3\u000c<100>\u1b39\u300a\u002f\u203a\u200b•\ufffc•\uaf64•\udcfb•</data>
+<char>
+<data>•\U00010020•\U00010000\u0301•x•</data>
+<data>•\U00010020•\U00010000\N{COMBINING MACRON}•</data>
########################################################################################
#
<data>•abc\U00010300<200> •abc\N{DESERET SMALL LETTER ENG}<200> •abc\N{MATHEMATICAL BOLD SMALL Z}<200> •abc\N{MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL}<200> •</data>
# Unassigned code points
-# TODO: This case should pass.
-#<data>•abc<200>\U0001D800•def<200>\U0001D3FF• •</data>
+<data>•abc<200>\U0001D800•def<200>\U0001D3FF• •</data>
# Hiragana & Katakana stay together, but separates from each other and Latin.
# *** what to do about theoretical combos of chars? i.e. hiragana + accent
# Surrogate line break tests.
#
-#<data>•\u4e01•\ud840\udc01•\u4e02•abc •\ue000 •\udb80\udc01•</data> #TODO: should be same as the next line.
+<data>•\u4e01•\ud840\udc01•\u4e02•abc •\ue000 •\udb80\udc01•</data> #This line and the following are equivalent.
<data>•\u4e01•\U20001•\u4e02•abc •\ue000 •\Uf0001•</data>
# Regression for bug 836
<locale fi>
<line>
+// TODO: problems with Finnish line break rules cause these two lines to fail.
#<data>•abc •- •def •abc •-def •abc- •def •abc-•def•</data> # With ASCII hyphen
#<data>•abc •‐ •def •abc •‐def •abc‐ •def •abc‐•def•</data> # With Unicode u2010 hyphen
+<data>•abc •- •def •abc •-def •abc- •def •</data> # With ASCII hyphen
+<data>•abc •‐ •def •abc •‐def •abc‐ •def •</data> # With Unicode u2010 hyphen