/**
*******************************************************************************
-* Copyright (C) 2006-2014, International Business Machines Corporation and *
-* others. All Rights Reserved. *
-*******************************************************************************
-*
+* Copyright (C) 2006-2015, International Business Machines Corporation and
+* others. All Rights Reserved.
*******************************************************************************
*/
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.nio.charset.spi.CharsetProvider;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
import java.util.Map;
import com.ibm.icu.impl.InvalidFormatException;
* @stable ICU 3.6
*/
public final class CharsetProviderICU extends CharsetProvider{
- private String optionsString;
-
+ /**
+ * List of available ICU Charsets, empty during static initialization.
+ */
+ private static List<Charset> icuCharsets = Collections.<Charset>emptyList();
+ /**
+ * Maps uppercased Java charset names and aliases to canonical Java charset names.
+ */
+ private static final Map<String, String> javaNamesMap = new HashMap<String, String>();
+
+ static {
+ // This loop will exclude ICU charsets because Charset.availableCharsets() calls
+ // our charsets() which returns an empty iterator
+ // until we have tried to open all of the ICU charsets and built icuCharsets.
+ // We can only open ICU charsets when we have the javaNamesMap,
+ // for getting the Java canonical name.
+ for (Map.Entry<String, Charset> nameAndCharset : Charset.availableCharsets().entrySet()) {
+ String canonicalName = nameAndCharset.getKey();
+ javaNamesMap.put(ASCII.toUpperCase(canonicalName), canonicalName);
+ for (String alias : nameAndCharset.getValue().aliases()) {
+ javaNamesMap.put(ASCII.toUpperCase(alias), canonicalName);
+ }
+ }
+ }
+
+ /**
+ * Simpler/faster methods for ASCII than ones based on Unicode data.
+ * TODO: There should be code like this somewhere already??
+ */
+ private static final class ASCII {
+ static String toUpperCase(String s) {
+ for (int i = 0; i < s.length(); ++i) {
+ char c = s.charAt(i);
+ if ('a' <= c && c <= 'z') {
+ StringBuilder sb = new StringBuilder(s.length());
+ sb.append(s, 0, i).append((char)(c - 0x20));
+ while (++i < s.length()) {
+ c = s.charAt(i);
+ if ('a' <= c && c <= 'z') { c = (char)(c - 0x20); }
+ sb.append(c);
+ }
+ return sb.toString();
+ }
+ }
+ return s;
+ }
+ }
+
/**
* Default constructor
* @stable ICU 3.6
*/
public CharsetProviderICU() {
- optionsString = null;
}
/**
- * Constructs a charset for the given charset name.
+ * Constructs a Charset for the given charset name.
* Implements the abstract method of super class.
* @param charsetName charset name
- * @return charset objet for the given charset name, null if unsupported
+ * @return Charset object for the given charset name, null if unsupported
* @stable ICU 3.6
*/
public final Charset charsetForName(String charsetName){
try{
// extract the options from the charset name
- charsetName = processOptions(charsetName);
+ String optionsString = "";
+ if (charsetName.endsWith(UConverterConstants.OPTION_SWAP_LFNL_STRING)) {
+ /* Remove and save the swap lfnl option string portion of the charset name. */
+ optionsString = UConverterConstants.OPTION_SWAP_LFNL_STRING;
+ charsetName = charsetName.substring(0, charsetName.length() - optionsString.length());
+ }
// get the canonical name
String icuCanonicalName = getICUCanonicalName(charsetName);
-
- // create the converter object and return it
+
+ // create the converter object and return it
if(icuCanonicalName==null || icuCanonicalName.length()==0){
// Try the original name, may be something added and not in the alias table.
// Will get an unsupported encoding exception if it doesn't work.
- return getCharset(charsetName);
+ icuCanonicalName = charsetName;
}
- return getCharset(icuCanonicalName);
+ return getCharset(icuCanonicalName, optionsString);
}catch(UnsupportedCharsetException ex){
}catch(IOException ex){
}
throw new UnsupportedCharsetException(enc);
}
}
- private Charset getCharset(String icuCanonicalName) throws IOException{
- String[] aliases = getAliases(icuCanonicalName);
+ private static final Charset getCharset(String icuCanonicalName, String optionsString)
+ throws IOException {
+ String[] aliases = getAliases(icuCanonicalName);
String canonicalName = getJavaCanonicalName(icuCanonicalName);
-
+
/* Concat the option string to the icuCanonicalName so that the options can be handled properly
* by the actual charset.
*/
- if (optionsString != null) {
- icuCanonicalName = icuCanonicalName.concat(optionsString);
- optionsString = null;
- }
-
- return (CharsetICU.getCharset(icuCanonicalName,canonicalName, aliases));
+ return (CharsetICU.getCharset(icuCanonicalName + optionsString, canonicalName, aliases));
}
/**
* Gets the canonical name of the converter as defined by Java
cName = "x-"+ name;
}
}
- /* After getting the java canonical name from ICU alias table, get the
- * java canonical name from the current JDK. This is neccessary because
+ /* After getting the Java canonical name from the ICU alias table, get the
+ * Java canonical name from the current JDK. This is necessary because
* different versions of the JVM (Sun and IBM) may have a different
- * canonical name then the one given by ICU. So the java canonical name
- * will depend on the current JVM. Since java cannot use the ICU canonical
- * we have to try to use a java compatible name.
+ * canonical name than the one given by ICU. So the Java canonical name
+ * will depend on the current JVM. Since Java cannot use the ICU canonical name
+ * we have to try to use a Java compatible name.
*/
if (cName != null) {
- try {
- if (Charset.isSupported(cName)) {
- String testName = Charset.forName(cName).name();
- /* Ensure that the java canonical name works in ICU */
- if (!testName.equals(cName)) {
- if (getICUCanonicalName(testName).length() > 0) {
- cName = testName;
- }
- }
- }
- } catch (Exception e) {
- // Any exception in the try block above
- // must result Java's canonical name to be
- // null. This block is necessary to reset
- // gettingJavaCanonicalName to true always.
- // See #9966.
- // Note: The use of static gettingJavaCanonicalName
- // looks really dangerous and obviously thread unsafe.
- // We should revisit this code later. See #9973
- cName = null;
+ String testName = javaNamesMap.get(ASCII.toUpperCase(cName));
+ if (testName != null && !testName.equals(cName) &&
+ getICUCanonicalName(testName).length() > 0) {
+ cName = testName;
}
}
return cName;
}
- private void putCharsets(Map<Charset, String> map){
+ /**
+ * Lazy-init the icuCharsets list.
+ * Could be done during static initialization if constructing all of the Charsets
+ * were cheap enough. See ICU ticket #11481.
+ */
+ private static final synchronized void loadAvailableICUCharsets() {
+ // The Java names Map is empty during static initialization when we are
+ // just about to build it.
+ if (!icuCharsets.isEmpty() || javaNamesMap.isEmpty()) {
+ return;
+ }
+ List<Charset> icucs = new LinkedList<Charset>();
int num = UConverterAlias.countAvailable();
- for(int i=0;i<num;i++) {
+ for (int i = 0; i < num; ++i) {
String name = UConverterAlias.getAvailableName(i);
try {
- Charset cs = getCharset(name);
- map.put(cs, getJavaCanonicalName(name));
- }catch(UnsupportedCharsetException ex){
- }catch (IOException e) {
+ Charset cs = getCharset(name, "");
+ icucs.add(cs);
+ } catch(UnsupportedCharsetException ex) {
+ } catch(IOException e) {
}
// add only charsets that can be created!
}
+ // Unmodifiable so that charsets().next().remove() cannot change it.
+ icuCharsets = Collections.unmodifiableList(icucs);
}
/**
- * Returns an iterator for the available charsets.
+ * Returns an iterator for the available ICU Charsets.
* Implements the abstract method of super class.
- * @return Iterator the charset name iterator
+ * @return the Charset iterator
* @stable ICU 3.6
*/
- public final Iterator<Charset> charsets(){
- HashMap<Charset, String> map = new HashMap<Charset, String>();
- putCharsets(map);
- return map.keySet().iterator();
+ public final Iterator<Charset> charsets() {
+ loadAvailableICUCharsets();
+ return icuCharsets.iterator();
}
-
+
/**
- * Gets the canonical names of available converters
+ * Gets the canonical names of available ICU converters
* @return array of available converter names
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
- public static final String[] getAvailableNames(){
- CharsetProviderICU provider = new CharsetProviderICU();
- HashMap<Charset, String> map = new HashMap<Charset, String>();
- provider.putCharsets(map);
- return map.values().toArray(new String[0]);
+ public static final String[] getAvailableNames() {
+ loadAvailableICUCharsets();
+ String[] names = new String[icuCharsets.size()];
+ int i = 0;
+ for (Charset cs : icuCharsets) {
+ names[i++] = cs.name();
+ }
+ return names;
}
-
+
/**
* Return all names available
* @return String[] an array of all available names
}
return names;
}
-
- private String processOptions(String charsetName) {
- if (charsetName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING) > -1) {
- /* Remove and save the swap lfnl option string portion of the charset name. */
- optionsString = UConverterConstants.OPTION_SWAP_LFNL_STRING;
-
- charsetName = charsetName.substring(0, charsetName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING));
- }
-
- return charsetName;
- }
}
/**
*******************************************************************************
-* Copyright (C) 2006-2014, International Business Machines Corporation and *
-* others. All Rights Reserved. *
-*******************************************************************************
-*
+* Copyright (C) 2006-2015, International Business Machines Corporation and
+* others. All Rights Reserved.
*******************************************************************************
*/
import com.ibm.icu.text.UnicodeSet;
public class TestCharset extends TestFmwk {
- private String m_encoding = "UTF-16";
- CharsetDecoder m_decoder = null;
- CharsetEncoder m_encoder = null;
- Charset m_charset =null;
- static final String unistr = "abcd\ud800\udc00\u1234\u00a5\u3000\r\n";
- static final byte[] byteStr ={
- (byte) 0x00,(byte) 'a',
- (byte) 0x00,(byte) 'b',
- (byte) 0x00,(byte) 'c',
- (byte) 0x00,(byte) 'd',
- (byte) 0xd8,(byte) 0x00,
- (byte) 0xdc,(byte) 0x00,
- (byte) 0x12,(byte) 0x34,
- (byte) 0x00,(byte) 0xa5,
- (byte) 0x30,(byte) 0x00,
- (byte) 0x00,(byte) 0x0d,
- (byte) 0x00,(byte) 0x0a };
- static final byte[] expectedByteStr ={
- (byte) 0xfe,(byte) 0xff,
- (byte) 0x00,(byte) 'a',
- (byte) 0x00,(byte) 'b',
- (byte) 0x00,(byte) 'c',
- (byte) 0x00,(byte) 'd',
- (byte) 0xd8,(byte) 0x00,
- (byte) 0xdc,(byte) 0x00,
- (byte) 0x12,(byte) 0x34,
- (byte) 0x00,(byte) 0xa5,
- (byte) 0x30,(byte) 0x00,
- (byte) 0x00,(byte) 0x0d,
- (byte) 0x00,(byte) 0x0a };
-
- protected void init(){
- try{
- CharsetProviderICU provider = new CharsetProviderICU();
- //Charset charset = CharsetICU.forName(encoding);
- m_charset = provider.charsetForName(m_encoding);
- m_decoder = (CharsetDecoder) m_charset.newDecoder();
- m_encoder = (CharsetEncoder) m_charset.newEncoder();
- }catch(MissingResourceException ex){
- warnln("Could not load charset data");
- }
- }
-
public static void main(String[] args) throws Exception {
new TestCharset().run(args);
}
// }
- public void TestAPISemantics(/*String encoding*/)
- throws Exception {
- int rc;
+ public void TestAPISemantics(/*String encoding*/) {
+ String encoding = "UTF-16";
+ CharsetDecoder decoder = null;
+ CharsetEncoder encoder = null;
+ try {
+ CharsetProviderICU provider = new CharsetProviderICU();
+ Charset charset = provider.charsetForName(encoding);
+ decoder = charset.newDecoder();
+ encoder = charset.newEncoder();
+ } catch(MissingResourceException ex) {
+ warnln("Could not load charset data: " + encoding);
+ return;
+ }
+
+ final String unistr = "abcd\ud800\udc00\u1234\u00a5\u3000\r\n";
+ final byte[] byteStr = {
+ (byte) 0x00,(byte) 'a',
+ (byte) 0x00,(byte) 'b',
+ (byte) 0x00,(byte) 'c',
+ (byte) 0x00,(byte) 'd',
+ (byte) 0xd8,(byte) 0x00,
+ (byte) 0xdc,(byte) 0x00,
+ (byte) 0x12,(byte) 0x34,
+ (byte) 0x00,(byte) 0xa5,
+ (byte) 0x30,(byte) 0x00,
+ (byte) 0x00,(byte) 0x0d,
+ (byte) 0x00,(byte) 0x0a
+ };
+ final byte[] expectedByteStr = {
+ (byte) 0xfe,(byte) 0xff,
+ (byte) 0x00,(byte) 'a',
+ (byte) 0x00,(byte) 'b',
+ (byte) 0x00,(byte) 'c',
+ (byte) 0x00,(byte) 'd',
+ (byte) 0xd8,(byte) 0x00,
+ (byte) 0xdc,(byte) 0x00,
+ (byte) 0x12,(byte) 0x34,
+ (byte) 0x00,(byte) 0xa5,
+ (byte) 0x30,(byte) 0x00,
+ (byte) 0x00,(byte) 0x0d,
+ (byte) 0x00,(byte) 0x0a
+ };
+
ByteBuffer byes = ByteBuffer.wrap(byteStr);
CharBuffer uniVal = CharBuffer.wrap(unistr);
ByteBuffer expected = ByteBuffer.wrap(expectedByteStr);
-
- rc = 0;
- if(m_decoder==null){
+
+ int rc = 0;
+ if(decoder==null){
warnln("Could not load decoder.");
return;
}
- m_decoder.reset();
+ decoder.reset();
/* Convert the whole buffer to Unicode */
try {
CharBuffer chars = CharBuffer.allocate(unistr.length());
- CoderResult result = m_decoder.decode(byes, chars, false);
+ CoderResult result = decoder.decode(byes, chars, false);
if (result.isError()) {
errln("ToChars encountered Error");
try {
CharBuffer chars = CharBuffer.allocate(unistr.length());
ByteBuffer b = ByteBuffer.wrap(byteStr);
- m_decoder.reset();
+ decoder.reset();
CoderResult result=null;
for (int i = 1; i <= byteStr.length; i++) {
b.limit(i);
- result = m_decoder.decode(b, chars, false);
+ result = decoder.decode(b, chars, false);
if(result.isOverflow()){
errln("ToChars single threw an overflow exception");
}
/* Convert the buffer one at a time to Unicode */
try {
CharBuffer chars = CharBuffer.allocate(unistr.length());
- m_decoder.reset();
+ decoder.reset();
byes.rewind();
for (int i = 1; i <= byteStr.length; i++) {
byes.limit(i);
- CoderResult result = m_decoder.decode(byes, chars, false);
+ CoderResult result = decoder.decode(byes, chars, false);
if (result.isError()) {
errln("Error while decoding: "+result.toString());
}
/* Convert the whole buffer from unicode */
try {
ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length);
- m_encoder.reset();
- CoderResult result = m_encoder.encode(uniVal, bytes, false);
+ encoder.reset();
+ CoderResult result = encoder.encode(uniVal, bytes, false);
if (result.isError()) {
errln("FromChars reported error: " + result.toString());
rc = 1;
try {
ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length);
CharBuffer c = CharBuffer.wrap(unistr);
- m_encoder.reset();
+ encoder.reset();
CoderResult result= null;
for (int i = 1; i <= unistr.length(); i++) {
c.limit(i);
- result = m_encoder.encode(c, bytes, false);
+ result = encoder.encode(c, bytes, false);
if(result.isOverflow()){
errln("FromChars single threw an overflow exception");
}
/* Convert one char at a time to unicode */
try {
ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length);
- m_encoder.reset();
+ encoder.reset();
char[] temp = unistr.toCharArray();
CoderResult result=null;
for (int i = 0; i <= temp.length; i++) {
uniVal.limit(i);
- result = m_encoder.encode(uniVal, bytes, false);
+ result = encoder.encode(uniVal, bytes, false);
if(result.isOverflow()){
errln("FromChars simple threw an overflow exception");
}
rc = 9;
}
if (rc != 0) {
- errln("Test Simple FromChars " + m_encoding + " --FAILED");
+ errln("Test Simple FromChars " + encoding + " --FAILED");
}
}
'\u22B5','\u22B6','\u22B7','\u22B8','\u22B9',
'\u22BA','\u22BB','\u22BC','\u22BD','\u22BE'
};
- if(m_encoder==null){
- warnln("Could not load encoder.");
+ String encoding = "UTF-16";
+ CharsetEncoder encoder = null;
+ try {
+ CharsetProviderICU provider = new CharsetProviderICU();
+ Charset charset = provider.charsetForName(encoding);
+ encoder = charset.newEncoder();
+ } catch(MissingResourceException ex) {
+ warnln("Could not load charset data: " + encoding);
return;
}
- m_encoder.reset();
- if (!m_encoder.canEncode(new String(mySource))) {
- errln("Test canConvert() " + m_encoding + " failed. "+m_encoder);
+ if (!encoder.canEncode(new String(mySource))) {
+ errln("Test canConvert() " + encoding + " failed. "+encoder);
}
}
}
public void convertAllTest(ByteBuffer bSource, CharBuffer uSource) throws Exception {
+ String encoding = "UTF-16";
+ CharsetDecoder decoder = null;
+ CharsetEncoder encoder = null;
+ try {
+ CharsetProviderICU provider = new CharsetProviderICU();
+ Charset charset = provider.charsetForName(encoding);
+ decoder = charset.newDecoder();
+ encoder = charset.newEncoder();
+ } catch(MissingResourceException ex) {
+ warnln("Could not load charset data: " + encoding);
+ return;
+ }
{
try {
- m_decoder.reset();
+ decoder.reset();
ByteBuffer mySource = bSource.duplicate();
- CharBuffer myTarget = m_decoder.decode(mySource);
+ CharBuffer myTarget = decoder.decode(mySource);
if (!equals(myTarget, uSource)) {
errln(
"--Test convertAll() "
- + m_encoding
+ + encoding
+ " to Unicode --FAILED");
}
} catch (Exception e) {
}
{
try {
- m_encoder.reset();
+ encoder.reset();
CharBuffer mySource = CharBuffer.wrap(uSource);
- ByteBuffer myTarget = m_encoder.encode(mySource);
+ ByteBuffer myTarget = encoder.encode(mySource);
if (!equals(myTarget, bSource)) {
errln(
"--Test convertAll() "
- + m_encoding
+ + encoding
+ " to Unicode --FAILED");
}
} catch (Exception e) {