From 47d5e79a13a3940b168741fe6e0e4d1baae5f138 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Tue, 4 Mar 2014 23:46:53 +0000 Subject: [PATCH] ICU-7203 remove MAX_CP_COUNT=200 limitation from Punycode X-SVN-Rev: 35338 --- .../core/src/com/ibm/icu/impl/Punycode.java | 171 +++++++----------- 1 file changed, 66 insertions(+), 105 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Punycode.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Punycode.java index cef7866a6b8..e67bc3bf1b2 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Punycode.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Punycode.java @@ -1,7 +1,7 @@ /* ******************************************************************************* - * Copyright (C) 2003-2010, International Business Machines Corporation and * - * others. All Rights Reserved. * + * Copyright (C) 2003-2014, International Business Machines Corporation and + * others. All Rights Reserved. ******************************************************************************* */ package com.ibm.icu.impl; @@ -28,8 +28,8 @@ public final class Punycode { private static final int INITIAL_N = 0x80; /* "Basic" Unicode/ASCII code points */ - private static final int HYPHEN = 0x2d; - private static final int DELIMITER = HYPHEN; + private static final char HYPHEN = 0x2d; + private static final char DELIMITER = HYPHEN; private static final int ZERO = 0x30; //private static final int NINE = 0x39; @@ -39,10 +39,7 @@ public final class Punycode { private static final int CAPITAL_A = 0x41; private static final int CAPITAL_Z = 0x5a; - private static final int MAX_CP_COUNT = 200; - //private static final int UINT_MAGIC = 0x80000000; - //private static final long ULONG_MAGIC = 0x8000000000000000L; - + private static int adaptBias(int delta, int length, boolean firstTime){ if(firstTime){ delta /=DAMP; @@ -133,35 +130,22 @@ public final class Punycode { * @return An array of ASCII code points. */ public static StringBuilder encode(CharSequence src, boolean[] caseFlags) throws StringPrepParseException{ - - int[] cpBuffer = new int[MAX_CP_COUNT]; - int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount; + int n, delta, handledCPCount, basicLength, bias, j, m, q, k, t, srcCPCount; char c, c2; int srcLength = src.length(); - int destCapacity = MAX_CP_COUNT; - char[] dest = new char[destCapacity]; - StringBuilder result = new StringBuilder(); + int[] cpBuffer = new int[srcLength]; + StringBuilder dest = new StringBuilder(srcLength); /* * Handle the basic code points and * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit): */ - srcCPCount=destLength=0; + srcCPCount=0; for(j=0; j0) { - if(destLength state to , but guard against overflow: */ - if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) { + if(m-n>(0x7fffffff-delta)/(handledCPCount+1)) { throw new IllegalStateException("Internal program error"); } delta+=(m-n)*(handledCPCount+1); @@ -251,15 +232,11 @@ public final class Punycode { break; } - if(destLength0;) { if(src.charAt(--j)==DELIMITER) { break; } } - destLength=basicLength=destCPCount=j; + basicLength=destCPCount=j; - while(j>0) { - b=src.charAt(--j); + for(j=0; j1) { - firstSupplementaryIndex=codeUnitIndex; - } else { - ++firstSupplementaryIndex; - } + cpLength=Character.charCount(n); + int codeUnitIndex; + + /* + * Handle indexes when supplementary code points are present. + * + * In almost all cases, there will be only BMP code points before i + * and even in the entire string. + * This is handled with the same efficiency as with UTF-32. + * + * Only the rare cases with supplementary code points are handled + * more slowly - but not too bad since this is an insertion anyway. + */ + if(i<=firstSupplementaryIndex) { + codeUnitIndex=i; + if(cpLength>1) { + firstSupplementaryIndex=codeUnitIndex; } else { - codeUnitIndex=firstSupplementaryIndex; - codeUnitIndex=UTF16.moveCodePointOffset(dest, 0, destLength, codeUnitIndex, i-codeUnitIndex); + ++firstSupplementaryIndex; } + } else { + codeUnitIndex=dest.offsetByCodePoints(firstSupplementaryIndex, i-firstSupplementaryIndex); + } - /* use the UChar index codeUnitIndex instead of the code point index i */ - if(codeUnitIndex