From 8e6898ae3d9f8a0192fcead3bb7cc5c4117b1305 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 20 Feb 2015 19:31:33 +0000 Subject: [PATCH] ICU-11296 based on patch from Jungshik, approved option name UCONFIG_ONLY_HTML_CONVERSION, turn off UTF-32, simplify changes, fix warnings X-SVN-Rev: 37045 --- icu4c/source/common/ucnv2022.cpp | 54 +++++++++++++++++++++------ icu4c/source/common/ucnv_bld.cpp | 50 ++++++++++++++++++++----- icu4c/source/common/ucnv_ct.c | 4 +- icu4c/source/common/ucnv_lmb.c | 4 +- icu4c/source/common/ucnv_u32.c | 4 +- icu4c/source/common/ucnv_u7.c | 4 +- icu4c/source/common/ucnv_u8.c | 21 ++++++++--- icu4c/source/common/ucnvbocu.cpp | 4 +- icu4c/source/common/ucnvhz.c | 6 +-- icu4c/source/common/ucnvisci.c | 4 +- icu4c/source/common/ucnvscsu.c | 4 +- icu4c/source/common/unicode/uconfig.h | 19 +++++++++- icu4c/source/i18n/csdetect.cpp | 4 +- icu4c/source/i18n/csr2022.cpp | 6 ++- icu4c/source/i18n/csr2022.h | 4 +- icu4c/source/i18n/csrsbcs.cpp | 8 +++- icu4c/source/i18n/csrsbcs.h | 6 ++- 17 files changed, 155 insertions(+), 51 deletions(-) diff --git a/icu4c/source/common/ucnv2022.cpp b/icu4c/source/common/ucnv2022.cpp index 9556dd2c096..fcdda5870a0 100644 --- a/icu4c/source/common/ucnv2022.cpp +++ b/icu4c/source/common/ucnv2022.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2000-2014, International Business Machines +* Copyright (C) 2000-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucnv2022.cpp @@ -75,8 +75,10 @@ */ #endif +#if !UCONFIG_ONLY_HTML_CONVERSION static const char SHIFT_IN_STR[] = "\x0F"; // static const char SHIFT_OUT_STR[] = "\x0E"; +#endif #define CR 0x0D #define LF 0x0A @@ -152,7 +154,11 @@ typedef enum { } StateEnum; /* is the StateEnum charset value for a DBCS charset? */ +#if UCONFIG_ONLY_HTML_CONVERSION +#define IS_JP_DBCS(cs) (JISX208==(cs)) +#else #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) +#endif #define CSM(cs) ((uint16_t)1<<(cs)) @@ -165,13 +171,19 @@ typedef enum { * all versions, not just JIS7 and JIS8. * - ICU does not distinguish between different versions of JIS X 0208. */ +#if UCONFIG_ONLY_HTML_CONVERSION +enum { MAX_JA_VERSION=0 }; +#else enum { MAX_JA_VERSION=4 }; +#endif static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), +#if !UCONFIG_ONLY_HTML_CONVERSION CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) +#endif }; typedef enum { @@ -358,15 +370,16 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = { ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 }; - /* Type def for refactoring changeState_2022 code*/ typedef enum{ #ifdef U_ENABLE_GENERIC_ISO_2022 ISO_2022=0, #endif ISO_2022_JP=1, +#if !UCONFIG_ONLY_HTML_CONVERSION ISO_2022_KR=2, ISO_2022_CN=3 +#endif } Variant2022; /*********** ISO 2022 Converter Protos ***********/ @@ -397,8 +410,11 @@ namespace { /*const UConverterSharedData _ISO2022Data;*/ extern const UConverterSharedData _ISO2022JPData; + +#if !UCONFIG_ONLY_HTML_CONVERSION extern const UConverterSharedData _ISO2022KRData; extern const UConverterSharedData _ISO2022CNData; +#endif } // namespace @@ -511,6 +527,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ myConverterData->name[len]=(char)(myConverterData->version+(int)'0'); myConverterData->name[len+1]='\0'; } +#if !UCONFIG_ONLY_HTML_CONVERSION else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && (myLocale[2]=='_' || myLocale[2]=='\0')) { @@ -580,6 +597,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2"); } } +#endif // !UCONFIG_ONLY_HTML_CONVERSION else{ #ifdef U_ENABLE_GENERIC_ISO_2022 myConverterData->isFirstBuffer = TRUE; @@ -714,6 +732,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= { ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE }; +#if !UCONFIG_ONLY_HTML_CONVERSION /*************** to unicode *******************/ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { /* 0 1 2 3 4 5 6 7 8 9 */ @@ -726,6 +745,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE }; +#endif static UCNV_TableStates_2022 @@ -898,6 +918,7 @@ DONE: } } break; +#if !UCONFIG_ONLY_HTML_CONVERSION case ISO_2022_CN: { StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; @@ -959,6 +980,7 @@ DONE: *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; } break; +#endif // !UCONFIG_ONLY_HTML_CONVERSION default: *err = U_ILLEGAL_ESCAPE_SEQUENCE; @@ -1001,6 +1023,7 @@ DONE: } } +#if !UCONFIG_ONLY_HTML_CONVERSION /*Checks the characters of the buffer against valid 2022 escape sequences *if the match we return a pointer to the initial start of the sequence otherwise *we return sourceLimit @@ -1055,7 +1078,7 @@ getEndOfBuffer_2022(const char** source, return mySource; #endif } - +#endif /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c * any future change in _MBCSFromUChar32() function should be reflected here. @@ -2269,6 +2292,7 @@ endloop: } +#if !UCONFIG_ONLY_HTML_CONVERSION /*************************************************************** * Rules for ISO-2022-KR encoding * i) The KSC5601 designator sequence should appear only once in a file, @@ -3412,6 +3436,7 @@ endloop: args->target = myTarget; args->source = mySource; } +#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */ static void _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { @@ -3638,6 +3663,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, sa->addRange(sa->set, HWKANA_START, HWKANA_END); } break; +#if !UCONFIG_ONLY_HTML_CONVERSION case 'c': case 'z': /* include ASCII for CN */ @@ -3649,6 +3675,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, cnvData->currentConverter, sa, which, pErrorCode); /* the loop over myConverterArray[] will simply not find another converter */ break; +#endif default: break; } @@ -3669,9 +3696,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, for (i=0; imyConverterArray[i]!=NULL) { - if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && - cnvData->version==0 && i==CNS_11643 - ) { + if(cnvData->locale[0]=='j' && i==JISX208) { + /* + * Only add code points that map to Shift-JIS codes + * corresponding to JIS X 0208. + */ + filter=UCNV_SET_FILTER_SJIS; +#if !UCONFIG_ONLY_HTML_CONVERSION + } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && + cnvData->version==0 && i==CNS_11643) { /* * Version-specific for CN: * CN version 0 does not map CNS planes 3..7 although @@ -3680,18 +3713,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, * The two versions create different Unicode sets. */ filter=UCNV_SET_FILTER_2022_CN; - } else if(cnvData->locale[0]=='j' && i==JISX208) { - /* - * Only add code points that map to Shift-JIS codes - * corresponding to JIS X 0208. - */ - filter=UCNV_SET_FILTER_SJIS; } else if(i==KSC5601) { /* * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables) * are broader than GR94. */ filter=UCNV_SET_FILTER_GR94DBCS; +#endif } else { filter=UCNV_SET_FILTER_NONE; } @@ -3829,6 +3857,7 @@ const UConverterSharedData _ISO2022JPData={ } // namespace +#if !UCONFIG_ONLY_HTML_CONVERSION /************* KR ***************/ static const UConverterImpl _ISO2022KRImpl={ UCNV_ISO_2022, @@ -3945,5 +3974,6 @@ const UConverterSharedData _ISO2022CNData={ }; } // namespace +#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */ #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/icu4c/source/common/ucnv_bld.cpp b/icu4c/source/common/ucnv_bld.cpp index e75926ec544..58335d1877e 100644 --- a/icu4c/source/common/ucnv_bld.cpp +++ b/icu4c/source/common/ucnv_bld.cpp @@ -1,11 +1,11 @@ /* ******************************************************************** * COPYRIGHT: - * Copyright (c) 1996-2014, International Business Machines Corporation and + * Copyright (c) 1996-2015, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************** * - * uconv_bld.cpp: + * ucnv_bld.cpp: * * Defines functions that are used in the creation/initialization/deletion * of converters and related structures. @@ -64,33 +64,51 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ #endif &_Latin1Data, - &_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_UTF32BEData, &_UTF32LEData, + &_UTF8Data, &_UTF16BEData, &_UTF16LEData, +#if UCONFIG_ONLY_HTML_CONVERSION + NULL, NULL, +#else + &_UTF32BEData, &_UTF32LEData, +#endif NULL, #if UCONFIG_NO_LEGACY_CONVERSION NULL, +#else + &_ISO2022Data, +#endif + +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, #else - &_ISO2022Data, &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, &_HZData, #endif +#if UCONFIG_ONLY_HTML_CONVERSION + NULL, +#else &_SCSUData, +#endif -#if UCONFIG_NO_LEGACY_CONVERSION + +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION NULL, #else &_ISCIIData, #endif &_ASCIIData, +#if UCONFIG_ONLY_HTML_CONVERSION + NULL, NULL, &_UTF16Data, NULL, NULL, NULL, +#else &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, +#endif -#if UCONFIG_NO_LEGACY_CONVERSION +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION NULL, #else &_CompoundTextData @@ -105,18 +123,24 @@ static struct { const char *name; const UConverterType type; } const cnvNameType[] = { +#if !UCONFIG_ONLY_HTML_CONVERSION { "bocu1", UCNV_BOCU1 }, { "cesu8", UCNV_CESU8 }, -#if !UCONFIG_NO_LEGACY_CONVERSION +#endif +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION { "hz",UCNV_HZ }, #endif +#if !UCONFIG_ONLY_HTML_CONVERSION { "imapmailboxname", UCNV_IMAP_MAILBOX }, -#if !UCONFIG_NO_LEGACY_CONVERSION +#endif +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION { "iscii", UCNV_ISCII }, +#endif +#if !UCONFIG_NO_LEGACY_CONVERSION { "iso2022", UCNV_ISO_2022 }, #endif { "iso88591", UCNV_LATIN_1 }, -#if !UCONFIG_NO_LEGACY_CONVERSION +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION { "lmbcs1", UCNV_LMBCS_1 }, { "lmbcs11",UCNV_LMBCS_11 }, { "lmbcs16",UCNV_LMBCS_16 }, @@ -130,7 +154,9 @@ static struct { { "lmbcs6", UCNV_LMBCS_6 }, { "lmbcs8", UCNV_LMBCS_8 }, #endif +#if !UCONFIG_ONLY_HTML_CONVERSION { "scsu", UCNV_SCSU }, +#endif { "usascii", UCNV_US_ASCII }, { "utf16", UCNV_UTF16 }, { "utf16be", UCNV_UTF16_BigEndian }, @@ -142,6 +168,7 @@ static struct { { "utf16oppositeendian", UCNV_UTF16_BigEndian}, { "utf16platformendian", UCNV_UTF16_LittleEndian }, #endif +#if !UCONFIG_ONLY_HTML_CONVERSION { "utf32", UCNV_UTF32 }, { "utf32be", UCNV_UTF32_BigEndian }, { "utf32le", UCNV_UTF32_LittleEndian }, @@ -152,9 +179,14 @@ static struct { { "utf32oppositeendian", UCNV_UTF32_BigEndian }, { "utf32platformendian", UCNV_UTF32_LittleEndian }, #endif +#endif +#if !UCONFIG_ONLY_HTML_CONVERSION { "utf7", UCNV_UTF7 }, +#endif { "utf8", UCNV_UTF8 }, +#if !UCONFIG_ONLY_HTML_CONVERSION { "x11compoundtext", UCNV_COMPOUND_TEXT} +#endif }; diff --git a/icu4c/source/common/ucnv_ct.c b/icu4c/source/common/ucnv_ct.c index ec0e9c2bb5a..165dfe3f908 100644 --- a/icu4c/source/common/ucnv_ct.c +++ b/icu4c/source/common/ucnv_ct.c @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2010-2014, International Business Machines +* Copyright (C) 2010-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucnv_ct.c @@ -14,7 +14,7 @@ #include "unicode/utypes.h" -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION #include "unicode/ucnv.h" #include "unicode/uset.h" diff --git a/icu4c/source/common/ucnv_lmb.c b/icu4c/source/common/ucnv_lmb.c index 1d921dd7b37..c2b4872428f 100644 --- a/icu4c/source/common/ucnv_lmb.c +++ b/icu4c/source/common/ucnv_lmb.c @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2000-2014, International Business Machines +* Copyright (C) 2000-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucnv_lmb.cpp @@ -25,7 +25,7 @@ #include "unicode/utypes.h" -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION #include "unicode/ucnv_err.h" #include "unicode/ucnv.h" diff --git a/icu4c/source/common/ucnv_u32.c b/icu4c/source/common/ucnv_u32.c index c24aaeeed79..59141ed1014 100644 --- a/icu4c/source/common/ucnv_u32.c +++ b/icu4c/source/common/ucnv_u32.c @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2002-2011, International Business Machines +* Copyright (C) 2002-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucnv_u32.c @@ -16,7 +16,7 @@ #include "unicode/utypes.h" -#if !UCONFIG_NO_CONVERSION +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION #include "unicode/ucnv.h" #include "unicode/utf.h" diff --git a/icu4c/source/common/ucnv_u7.c b/icu4c/source/common/ucnv_u7.c index 42943f4129a..d51bee47da2 100644 --- a/icu4c/source/common/ucnv_u7.c +++ b/icu4c/source/common/ucnv_u7.c @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2002-2011, International Business Machines +* Copyright (C) 2002-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucnv_u7.c @@ -16,7 +16,7 @@ #include "unicode/utypes.h" -#if !UCONFIG_NO_CONVERSION +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION #include "unicode/ucnv.h" #include "ucnv_bld.h" diff --git a/icu4c/source/common/ucnv_u8.c b/icu4c/source/common/ucnv_u8.c index 8ee9fe54764..dba2ca1a292 100644 --- a/icu4c/source/common/ucnv_u8.c +++ b/icu4c/source/common/ucnv_u8.c @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2002-2012, International Business Machines +* Copyright (C) 2002-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucnv_u8.c @@ -87,6 +87,15 @@ static const int8_t bytesFromUTF8[256] = { static const uint32_t utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; +static UBool hasCESU8Data(const UConverter *cnv) +{ +#if UCONFIG_ONLY_HTML_CONVERSION + return FALSE; +#else + return (UBool)(cnv->sharedData == &_CESU8Data); +#endif +} + static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, UErrorCode * err) { @@ -96,10 +105,10 @@ static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; const UChar *targetLimit = args->targetLimit; unsigned char *toUBytes = cnv->toUBytes; - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); + UBool isCESU8 = hasCESU8Data(cnv); uint32_t ch, ch2 = 0; int32_t i, inBytes; - + /* Restore size of current sequence */ if (cnv->toUnicodeStatus && myTarget < targetLimit) { @@ -226,7 +235,7 @@ static void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args, const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; const UChar *targetLimit = args->targetLimit; unsigned char *toUBytes = cnv->toUBytes; - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); + UBool isCESU8 = hasCESU8Data(cnv); uint32_t ch, ch2 = 0; int32_t i, inBytes; @@ -357,7 +366,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args, UChar32 ch; uint8_t tempBuf[4]; int32_t indexToWrite; - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); + UBool isNotCESU8 = !hasCESU8Data(cnv); if (cnv->fromUChar32 && myTarget < targetLimit) { @@ -473,7 +482,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * ar int32_t offsetNum, nextSourceIndex; int32_t indexToWrite; uint8_t tempBuf[4]; - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); + UBool isNotCESU8 = !hasCESU8Data(cnv); if (cnv->fromUChar32 && myTarget < targetLimit) { diff --git a/icu4c/source/common/ucnvbocu.cpp b/icu4c/source/common/ucnvbocu.cpp index b97d6662c7d..1a8f6ad73fb 100644 --- a/icu4c/source/common/ucnvbocu.cpp +++ b/icu4c/source/common/ucnvbocu.cpp @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 2002-2011, International Business Machines +* Copyright (C) 2002-2015, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -19,7 +19,7 @@ #include "unicode/utypes.h" -#if !UCONFIG_NO_CONVERSION +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION #include "unicode/ucnv.h" #include "unicode/ucnv_cb.h" diff --git a/icu4c/source/common/ucnvhz.c b/icu4c/source/common/ucnvhz.c index 3760c3913d0..c10ee3f3780 100644 --- a/icu4c/source/common/ucnvhz.c +++ b/icu4c/source/common/ucnvhz.c @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2000-2014, International Business Machines +* Copyright (C) 2000-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucnvhz.c @@ -16,7 +16,7 @@ #include "unicode/utypes.h" -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION #include "cmemory.h" #include "unicode/ucnv.h" @@ -635,4 +635,4 @@ const UConverterSharedData _HZData={ 0 }; -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION */ diff --git a/icu4c/source/common/ucnvisci.c b/icu4c/source/common/ucnvisci.c index fe61d40ac6c..8b883819b44 100644 --- a/icu4c/source/common/ucnvisci.c +++ b/icu4c/source/common/ucnvisci.c @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2000-2012, International Business Machines +* Copyright (C) 2000-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucnvisci.c @@ -17,7 +17,7 @@ #include "unicode/utypes.h" -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION #include "unicode/ucnv.h" #include "unicode/ucnv_cb.h" diff --git a/icu4c/source/common/ucnvscsu.c b/icu4c/source/common/ucnvscsu.c index c6e96e1f074..cb49fa9f98d 100644 --- a/icu4c/source/common/ucnvscsu.c +++ b/icu4c/source/common/ucnvscsu.c @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 2000-2011, International Business Machines +* Copyright (C) 2000-2015, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -21,7 +21,7 @@ #include "unicode/utypes.h" -#if !UCONFIG_NO_CONVERSION +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION #include "unicode/ucnv.h" #include "unicode/ucnv_cb.h" diff --git a/icu4c/source/common/unicode/uconfig.h b/icu4c/source/common/unicode/uconfig.h index ed073b63ea6..f6223bb8a87 100644 --- a/icu4c/source/common/unicode/uconfig.h +++ b/icu4c/source/common/unicode/uconfig.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2002-2014, International Business Machines +* Copyright (C) 2002-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: uconfig.h @@ -200,7 +200,7 @@ * It does not turn off legacy conversion because that is necessary * for ICU to work on EBCDIC platforms (for the default converter). * If you want "only collation" and do not build for EBCDIC, - * then you can define UCONFIG_NO_LEGACY_CONVERSION 1 as well. + * then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well. * * @stable ICU 2.4 */ @@ -269,6 +269,21 @@ # define UCONFIG_NO_LEGACY_CONVERSION 1 #endif +/** + * \def UCONFIG_ONLY_HTML_CONVERSION + * This switch turns off all of the converters NOT listed in + * the HTML encoding standard: + * http://www.w3.org/TR/encoding/#names-and-labels + * + * This is not possible on EBCDIC platforms + * because they need ibm-37 or ibm-1047 default converters. + * + * @draft ICU 55 + */ +#ifndef UCONFIG_ONLY_HTML_CONVERSION +# define UCONFIG_ONLY_HTML_CONVERSION 0 +#endif + /** * \def UCONFIG_NO_LEGACY_CONVERSION * This switch turns off all converters except for diff --git a/icu4c/source/i18n/csdetect.cpp b/icu4c/source/i18n/csdetect.cpp index 3efbd49260b..66d8f3a2ae3 100644 --- a/icu4c/source/i18n/csdetect.cpp +++ b/icu4c/source/i18n/csdetect.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (C) 2005-2013, International Business Machines + * Copyright (C) 2005-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -110,6 +110,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) { new CSRecognizerInfo(new CharsetRecog_big5(), TRUE), new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE), +#if !UCONFIG_ONLY_HTML_CONVERSION new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE), new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE), @@ -117,6 +118,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) { new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE), new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE), new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE) +#endif }; int32_t rCount = ARRAY_SIZE(tempArray); diff --git a/icu4c/source/i18n/csr2022.cpp b/icu4c/source/i18n/csr2022.cpp index 3db0bc9f36a..236a5267145 100644 --- a/icu4c/source/i18n/csr2022.cpp +++ b/icu4c/source/i18n/csr2022.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (C) 2005-2012, International Business Machines + * Copyright (C) 2005-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -119,6 +119,7 @@ static const uint8_t escapeSequences_2022JP[][5] = { {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 }; +#if !UCONFIG_ONLY_HTML_CONVERSION static const uint8_t escapeSequences_2022KR[][5] = { {0x1b, 0x24, 0x29, 0x43, 0x00} }; @@ -136,6 +137,7 @@ static const uint8_t escapeSequences_2022CN[][5] = { {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 }; +#endif CharsetRecog_2022JP::~CharsetRecog_2022JP() {} @@ -152,6 +154,7 @@ UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetMatch *results) const return (confidence > 0); } +#if !UCONFIG_ONLY_HTML_CONVERSION CharsetRecog_2022KR::~CharsetRecog_2022KR() {} const char *CharsetRecog_2022KR::getName() const { @@ -181,6 +184,7 @@ UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetMatch *results) const results->set(textIn, this, confidence); return (confidence > 0); } +#endif CharsetRecog_2022::~CharsetRecog_2022() { // nothing to do diff --git a/icu4c/source/i18n/csr2022.h b/icu4c/source/i18n/csr2022.h index 2ac2b87db8d..f63e3374070 100644 --- a/icu4c/source/i18n/csr2022.h +++ b/icu4c/source/i18n/csr2022.h @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (C) 2005-2012, International Business Machines + * Copyright (C) 2005-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -65,6 +65,7 @@ public: UBool match(InputText *textIn, CharsetMatch *results) const; }; +#if !UCONFIG_ONLY_HTML_CONVERSION class CharsetRecog_2022KR :public CharsetRecog_2022 { public: virtual ~CharsetRecog_2022KR(); @@ -84,6 +85,7 @@ public: UBool match(InputText *textIn, CharsetMatch *results) const; }; +#endif U_NAMESPACE_END diff --git a/icu4c/source/i18n/csrsbcs.cpp b/icu4c/source/i18n/csrsbcs.cpp index d03367cc4ee..72fb959e31d 100644 --- a/icu4c/source/i18n/csrsbcs.cpp +++ b/icu4c/source/i18n/csrsbcs.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (C) 2005-2013, International Business Machines + * Copyright (C) 2005-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -137,6 +137,7 @@ int32_t NGramParser::parse(InputText *det) return (int32_t) (rawPercent * 300.0); } +#if !UCONFIG_ONLY_HTML_CONVERSION static const uint8_t unshapeMap_IBM420[] = { /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, @@ -232,6 +233,7 @@ void NGramParser_IBM420::parseCharacters(InputText *det) } } } +#endif CharsetRecog_sbcs::CharsetRecog_sbcs() { @@ -624,6 +626,7 @@ static const uint8_t charMap_KOI8_R[] = { 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, }; +#if !UCONFIG_ONLY_HTML_CONVERSION static const int32_t ngrams_IBM424_he_rtl[] = { 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, @@ -691,6 +694,7 @@ static const uint8_t charMap_IBM420_ar[]= { /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF, /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40, }; +#endif //ISO-8859-1,2,5,6,7,8,9 Ngrams @@ -1155,6 +1159,7 @@ UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const return (confidence > 0); } +#if !UCONFIG_ONLY_HTML_CONVERSION CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he() { // nothing to do @@ -1253,6 +1258,7 @@ UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results results->set(textIn, this, confidence); return (confidence > 0); } +#endif U_NAMESPACE_END #endif diff --git a/icu4c/source/i18n/csrsbcs.h b/icu4c/source/i18n/csrsbcs.h index 2579c02905f..bd2a264530e 100644 --- a/icu4c/source/i18n/csrsbcs.h +++ b/icu4c/source/i18n/csrsbcs.h @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (C) 2005-2013, International Business Machines + * Copyright (C) 2005-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -50,6 +50,7 @@ public: }; +#if !UCONFIG_ONLY_HTML_CONVERSION class NGramParser_IBM420 : public NGramParser { private: @@ -61,6 +62,7 @@ private: public: NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap); }; +#endif class CharsetRecog_sbcs : public CharsetRecognizer @@ -229,6 +231,7 @@ public: virtual UBool match(InputText *det, CharsetMatch *results) const; }; +#if !UCONFIG_ONLY_HTML_CONVERSION class CharsetRecog_IBM424_he : public CharsetRecog_sbcs { public: @@ -280,6 +283,7 @@ class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar { virtual UBool match(InputText *det, CharsetMatch *results) const; }; +#endif U_NAMESPACE_END -- 2.40.0