]> granicus.if.org Git - icu/commitdiff
ICU-10583 Fixed a minor problem in illegal lead bye handling in the UTF-8 charset...
authorYoshito Umaoka <y.umaoka@gmail.com>
Fri, 10 Jan 2014 16:12:09 +0000 (16:12 +0000)
committerYoshito Umaoka <y.umaoka@gmail.com>
Fri, 10 Jan 2014 16:12:09 +0000 (16:12 +0000)
X-SVN-Rev: 34857

icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_UTF8.java

index 454e81c97c8e4940535f60bd0339dd674b6e7960..37357329b86440278a7cb89aeb3942a42c1f23c3 100644 (file)
@@ -1,6 +1,6 @@
 /**
 *******************************************************************************
-* Copyright (C) 2005 - 2013, International Business Machines Corporation and  *
+* Copyright (C) 2005 - 2014, International Business Machines Corporation and  *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
@@ -48,10 +48,7 @@ class CharsetRecog_UTF8 extends CharsetRecognizer {
                 trailBytes = 3;
             } else {
                 numInvalid++;
-                if (numInvalid > 5) {
-                    break;
-                }
-                trailBytes = 0;
+                continue;
             }
                 
             // Verify that we've got the right number of trail bytes in the sequence
@@ -70,7 +67,6 @@ class CharsetRecog_UTF8 extends CharsetRecognizer {
                     break;
                 }
             }
-                        
         }
         
         // Cook up some sort of confidence score, based on presense of a BOM