From c68b5d9d38e9e09640f00db1ae890a8e2b89ade0 Mon Sep 17 00:00:00 2001
From: Andy Heninger <andy.heninger@gmail.com>
Date: Fri, 15 Feb 2013 07:17:59 +0000
Subject: [PATCH] ICU-9077 Enhancements to break iteration tests.

X-SVN-Rev: 33233
---
 icu4c/source/data/brkitr/char.txt      |  8 ++++++--
 icu4c/source/test/intltest/rbbitst.cpp |  6 +++---
 icu4c/source/test/testdata/rbbitst.txt | 13 ++++++++-----
 3 files changed, 17 insertions(+), 10 deletions(-)
diff --git a/icu4c/source/data/brkitr/char.txt b/icu4c/source/data/brkitr/char.txt
index c0d9731c199..abf71fcf402 100644
--- a/icu4c/source/data/brkitr/char.txt
+++ b/icu4c/source/data/brkitr/char.txt
@@ -1,5 +1,5 @@
 #
-#   Copyright (C) 2002-2012, International Business Machines Corporation and others.
+#   Copyright (C) 2002-2013, International Business Machines Corporation and others.
 #       All Rights Reserved.
 #
 #   file:  char.txt 
@@ -66,11 +66,15 @@ $SpacingMark [^$Control $CR $LF];
 
 
 ## -------------------------------------------------
+#  We don't logically need safe char break rules, but if we don't provide any at all
+#  the engine for preceding() and following() will fall back to the
+#  old style inefficient algorithm.
 
 !!safe_reverse;
-
+$LF $CR;
 
 ## -------------------------------------------------
 
 !!safe_forward;
+$CR $LF;
 
diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp
index 3d861bb1bcc..4281bfab01e 100644
--- a/icu4c/source/test/intltest/rbbitst.cpp
+++ b/icu4c/source/test/intltest/rbbitst.cpp
@@ -992,10 +992,10 @@ void RBBITest::executeTest(TestParams *t) {
         int32_t expectedBreak = BreakIterator::DONE;
 
         // For supplementaries, back up to the start of the character.
-        int32_t currentCharStart = i < t->dataToBreak.length()? t->dataToBreak.getChar32Start(i) : i;
+        // int32_t currentCharStart = i < t->dataToBreak.length()? t->dataToBreak.getChar32Start(i) : i;
 
-        for (int32_t j=currentCharStart-1; j >= 0; j--) {
-        // for (int32_t j=i-1; j >= 0; j--) {
+        // for (int32_t j=currentCharStart-1; j >= 0; j--) {
+        for (int32_t j=i-1; j >= 0; j--) {
             if (t->expectedBreaks->elementAti(j) != 0) {
                 expectedBreak = j;
                 break;
diff --git a/icu4c/source/test/testdata/rbbitst.txt b/icu4c/source/test/testdata/rbbitst.txt
index 5a240f19ed9..49f310c8f60 100644
--- a/icu4c/source/test/testdata/rbbitst.txt
+++ b/icu4c/source/test/testdata/rbbitst.txt
@@ -33,8 +33,9 @@
 
 
 #   Temp debugging tests 
-<line>
-<data>â¢\ufffcâ¢\u30e3\u000c<100>\u1b39\u300a\u002f\u203a\u200bâ¢\ufffcâ¢\uaf64â¢\udcfbâ¢</data>
+<char>
+<data>â¢\U00010020â¢\U00010000\u0301â¢xâ¢</data>
+<data>â¢\U00010020â¢\U00010000\N{COMBINING MACRON}â¢</data>
 
 ########################################################################################
 #
@@ -167,8 +168,7 @@
 <data>â¢abc\U00010300<200> â¢abc\N{DESERET SMALL LETTER ENG}<200> â¢abc\N{MATHEMATICAL BOLD SMALL Z}<200> â¢abc\N{MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL}<200> â¢</data>
 
 # Unassigned code points
-# TODO: This case should pass.
-#<data>â¢abc<200>\U0001D800â¢def<200>\U0001D3FFâ¢ â¢</data>
+<data>â¢abc<200>\U0001D800â¢def<200>\U0001D3FFâ¢ â¢</data>
 
 # Hiragana & Katakana stay together, but separates from each other and Latin.
 # *** what to do about theoretical combos of chars? i.e. hiragana + accent
@@ -539,7 +539,7 @@ What is the proper use of the abbreviation pp.? â¢Yes, I am definatelly 12" tal
 
 #      Surrogate line break tests.
 #
-#<data>â¢\u4e01â¢\ud840\udc01â¢\u4e02â¢abc â¢\ue000 â¢\udb80\udc01â¢</data> #TODO: should be same as the next line.
+<data>â¢\u4e01â¢\ud840\udc01â¢\u4e02â¢abc â¢\ue000 â¢\udb80\udc01â¢</data>  #This line and the following are equivalent.
 <data>â¢\u4e01â¢\U20001â¢\u4e02â¢abc â¢\ue000 â¢\Uf0001â¢</data>
 
 #      Regression for bug 836
@@ -819,6 +819,9 @@ Bangkok)â¢</data>
 
 <locale fi>
 <line>
+// TODO: problems with Finnish line break rules cause these two lines to fail.
 #<data>â¢abc â¢- â¢def    â¢abc â¢-def    â¢abc- â¢def   â¢abc-â¢defâ¢</data>   # With ASCII hyphen
 #<data>â¢abc â¢â â¢def    â¢abc â¢âdef    â¢abcâ â¢def   â¢abcââ¢defâ¢</data>   # With Unicode u2010 hyphen
 
+<data>â¢abc â¢- â¢def    â¢abc â¢-def    â¢abc- â¢def   â¢</data>   # With ASCII hyphen
+<data>â¢abc â¢â â¢def    â¢abc â¢âdef    â¢abcâ â¢def   â¢</data>   # With Unicode u2010 hyphen
-- 
2.49.0