/*
**********************************************************************
-* Copyright (C) 2000-2014, International Business Machines
+* Copyright (C) 2000-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv2022.cpp
*/
#endif
+#if !UCONFIG_ONLY_HTML_CONVERSION
static const char SHIFT_IN_STR[] = "\x0F";
// static const char SHIFT_OUT_STR[] = "\x0E";
+#endif
#define CR 0x0D
#define LF 0x0A
} StateEnum;
/* is the StateEnum charset value for a DBCS charset? */
+#if UCONFIG_ONLY_HTML_CONVERSION
+#define IS_JP_DBCS(cs) (JISX208==(cs))
+#else
#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
+#endif
#define CSM(cs) ((uint16_t)1<<(cs))
* all versions, not just JIS7 and JIS8.
* - ICU does not distinguish between different versions of JIS X 0208.
*/
+#if UCONFIG_ONLY_HTML_CONVERSION
+enum { MAX_JA_VERSION=0 };
+#else
enum { MAX_JA_VERSION=4 };
+#endif
static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
+#if !UCONFIG_ONLY_HTML_CONVERSION
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
+#endif
};
typedef enum {
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
};
-
/* Type def for refactoring changeState_2022 code*/
typedef enum{
#ifdef U_ENABLE_GENERIC_ISO_2022
ISO_2022=0,
#endif
ISO_2022_JP=1,
+#if !UCONFIG_ONLY_HTML_CONVERSION
ISO_2022_KR=2,
ISO_2022_CN=3
+#endif
} Variant2022;
/*********** ISO 2022 Converter Protos ***********/
/*const UConverterSharedData _ISO2022Data;*/
extern const UConverterSharedData _ISO2022JPData;
+
+#if !UCONFIG_ONLY_HTML_CONVERSION
extern const UConverterSharedData _ISO2022KRData;
extern const UConverterSharedData _ISO2022CNData;
+#endif
} // namespace
myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
myConverterData->name[len+1]='\0';
}
+#if !UCONFIG_ONLY_HTML_CONVERSION
else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
(myLocale[2]=='_' || myLocale[2]=='\0'))
{
(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
}
}
+#endif // !UCONFIG_ONLY_HTML_CONVERSION
else{
#ifdef U_ENABLE_GENERIC_ISO_2022
myConverterData->isFirstBuffer = TRUE;
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
};
+#if !UCONFIG_ONLY_HTML_CONVERSION
/*************** to unicode *******************/
static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
/* 0 1 2 3 4 5 6 7 8 9 */
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
};
+#endif
static UCNV_TableStates_2022
}
}
break;
+#if !UCONFIG_ONLY_HTML_CONVERSION
case ISO_2022_CN:
{
StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
}
break;
+#endif // !UCONFIG_ONLY_HTML_CONVERSION
default:
*err = U_ILLEGAL_ESCAPE_SEQUENCE;
}
}
+#if !UCONFIG_ONLY_HTML_CONVERSION
/*Checks the characters of the buffer against valid 2022 escape sequences
*if the match we return a pointer to the initial start of the sequence otherwise
*we return sourceLimit
return mySource;
#endif
}
-
+#endif
/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
* any future change in _MBCSFromUChar32() function should be reflected here.
}
+#if !UCONFIG_ONLY_HTML_CONVERSION
/***************************************************************
* Rules for ISO-2022-KR encoding
* i) The KSC5601 designator sequence should appear only once in a file,
args->target = myTarget;
args->source = mySource;
}
+#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
static void
_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
sa->addRange(sa->set, HWKANA_START, HWKANA_END);
}
break;
+#if !UCONFIG_ONLY_HTML_CONVERSION
case 'c':
case 'z':
/* include ASCII for CN */
cnvData->currentConverter, sa, which, pErrorCode);
/* the loop over myConverterArray[] will simply not find another converter */
break;
+#endif
default:
break;
}
for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
UConverterSetFilter filter;
if(cnvData->myConverterArray[i]!=NULL) {
- if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
- cnvData->version==0 && i==CNS_11643
- ) {
+ if(cnvData->locale[0]=='j' && i==JISX208) {
+ /*
+ * Only add code points that map to Shift-JIS codes
+ * corresponding to JIS X 0208.
+ */
+ filter=UCNV_SET_FILTER_SJIS;
+#if !UCONFIG_ONLY_HTML_CONVERSION
+ } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
+ cnvData->version==0 && i==CNS_11643) {
/*
* Version-specific for CN:
* CN version 0 does not map CNS planes 3..7 although
* The two versions create different Unicode sets.
*/
filter=UCNV_SET_FILTER_2022_CN;
- } else if(cnvData->locale[0]=='j' && i==JISX208) {
- /*
- * Only add code points that map to Shift-JIS codes
- * corresponding to JIS X 0208.
- */
- filter=UCNV_SET_FILTER_SJIS;
} else if(i==KSC5601) {
/*
* Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
* are broader than GR94.
*/
filter=UCNV_SET_FILTER_GR94DBCS;
+#endif
} else {
filter=UCNV_SET_FILTER_NONE;
}
} // namespace
+#if !UCONFIG_ONLY_HTML_CONVERSION
/************* KR ***************/
static const UConverterImpl _ISO2022KRImpl={
UCNV_ISO_2022,
};
} // namespace
+#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
/*
********************************************************************
* COPYRIGHT:
- * Copyright (c) 1996-2014, International Business Machines Corporation and
+ * Copyright (c) 1996-2015, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************
*
- * uconv_bld.cpp:
+ * ucnv_bld.cpp:
*
* Defines functions that are used in the creation/initialization/deletion
* of converters and related structures.
#endif
&_Latin1Data,
- &_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_UTF32BEData, &_UTF32LEData,
+ &_UTF8Data, &_UTF16BEData, &_UTF16LEData,
+#if UCONFIG_ONLY_HTML_CONVERSION
+ NULL, NULL,
+#else
+ &_UTF32BEData, &_UTF32LEData,
+#endif
NULL,
#if UCONFIG_NO_LEGACY_CONVERSION
NULL,
+#else
+ &_ISO2022Data,
+#endif
+
+#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL,
#else
- &_ISO2022Data,
&_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6,
&_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19,
&_HZData,
#endif
+#if UCONFIG_ONLY_HTML_CONVERSION
+ NULL,
+#else
&_SCSUData,
+#endif
-#if UCONFIG_NO_LEGACY_CONVERSION
+
+#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
NULL,
#else
&_ISCIIData,
#endif
&_ASCIIData,
+#if UCONFIG_ONLY_HTML_CONVERSION
+ NULL, NULL, &_UTF16Data, NULL, NULL, NULL,
+#else
&_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData,
+#endif
-#if UCONFIG_NO_LEGACY_CONVERSION
+#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
NULL,
#else
&_CompoundTextData
const char *name;
const UConverterType type;
} const cnvNameType[] = {
+#if !UCONFIG_ONLY_HTML_CONVERSION
{ "bocu1", UCNV_BOCU1 },
{ "cesu8", UCNV_CESU8 },
-#if !UCONFIG_NO_LEGACY_CONVERSION
+#endif
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
{ "hz",UCNV_HZ },
#endif
+#if !UCONFIG_ONLY_HTML_CONVERSION
{ "imapmailboxname", UCNV_IMAP_MAILBOX },
-#if !UCONFIG_NO_LEGACY_CONVERSION
+#endif
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
{ "iscii", UCNV_ISCII },
+#endif
+#if !UCONFIG_NO_LEGACY_CONVERSION
{ "iso2022", UCNV_ISO_2022 },
#endif
{ "iso88591", UCNV_LATIN_1 },
-#if !UCONFIG_NO_LEGACY_CONVERSION
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
{ "lmbcs1", UCNV_LMBCS_1 },
{ "lmbcs11",UCNV_LMBCS_11 },
{ "lmbcs16",UCNV_LMBCS_16 },
{ "lmbcs6", UCNV_LMBCS_6 },
{ "lmbcs8", UCNV_LMBCS_8 },
#endif
+#if !UCONFIG_ONLY_HTML_CONVERSION
{ "scsu", UCNV_SCSU },
+#endif
{ "usascii", UCNV_US_ASCII },
{ "utf16", UCNV_UTF16 },
{ "utf16be", UCNV_UTF16_BigEndian },
{ "utf16oppositeendian", UCNV_UTF16_BigEndian},
{ "utf16platformendian", UCNV_UTF16_LittleEndian },
#endif
+#if !UCONFIG_ONLY_HTML_CONVERSION
{ "utf32", UCNV_UTF32 },
{ "utf32be", UCNV_UTF32_BigEndian },
{ "utf32le", UCNV_UTF32_LittleEndian },
{ "utf32oppositeendian", UCNV_UTF32_BigEndian },
{ "utf32platformendian", UCNV_UTF32_LittleEndian },
#endif
+#endif
+#if !UCONFIG_ONLY_HTML_CONVERSION
{ "utf7", UCNV_UTF7 },
+#endif
{ "utf8", UCNV_UTF8 },
+#if !UCONFIG_ONLY_HTML_CONVERSION
{ "x11compoundtext", UCNV_COMPOUND_TEXT}
+#endif
};
/*
**********************************************************************
-* Copyright (C) 2010-2014, International Business Machines
+* Copyright (C) 2010-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_ct.c
#include "unicode/utypes.h"
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/uset.h"
/*
**********************************************************************
-* Copyright (C) 2000-2014, International Business Machines
+* Copyright (C) 2000-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_lmb.cpp
#include "unicode/utypes.h"
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
#include "unicode/ucnv_err.h"
#include "unicode/ucnv.h"
/*
**********************************************************************
-* Copyright (C) 2002-2011, International Business Machines
+* Copyright (C) 2002-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_u32.c
#include "unicode/utypes.h"
-#if !UCONFIG_NO_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/utf.h"
/*
**********************************************************************
-* Copyright (C) 2002-2011, International Business Machines
+* Copyright (C) 2002-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_u7.c
#include "unicode/utypes.h"
-#if !UCONFIG_NO_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
#include "unicode/ucnv.h"
#include "ucnv_bld.h"
/*
**********************************************************************
-* Copyright (C) 2002-2012, International Business Machines
+* Copyright (C) 2002-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_u8.c
static const uint32_t
utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
+static UBool hasCESU8Data(const UConverter *cnv)
+{
+#if UCONFIG_ONLY_HTML_CONVERSION
+ return FALSE;
+#else
+ return (UBool)(cnv->sharedData == &_CESU8Data);
+#endif
+}
+
static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
UErrorCode * err)
{
const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
const UChar *targetLimit = args->targetLimit;
unsigned char *toUBytes = cnv->toUBytes;
- UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
+ UBool isCESU8 = hasCESU8Data(cnv);
uint32_t ch, ch2 = 0;
int32_t i, inBytes;
-
+
/* Restore size of current sequence */
if (cnv->toUnicodeStatus && myTarget < targetLimit)
{
const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
const UChar *targetLimit = args->targetLimit;
unsigned char *toUBytes = cnv->toUBytes;
- UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
+ UBool isCESU8 = hasCESU8Data(cnv);
uint32_t ch, ch2 = 0;
int32_t i, inBytes;
UChar32 ch;
uint8_t tempBuf[4];
int32_t indexToWrite;
- UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
+ UBool isNotCESU8 = !hasCESU8Data(cnv);
if (cnv->fromUChar32 && myTarget < targetLimit)
{
int32_t offsetNum, nextSourceIndex;
int32_t indexToWrite;
uint8_t tempBuf[4];
- UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
+ UBool isNotCESU8 = !hasCESU8Data(cnv);
if (cnv->fromUChar32 && myTarget < targetLimit)
{
/*
******************************************************************************
*
-* Copyright (C) 2002-2011, International Business Machines
+* Copyright (C) 2002-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
#include "unicode/utypes.h"
-#if !UCONFIG_NO_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/ucnv_cb.h"
/*
**********************************************************************
-* Copyright (C) 2000-2014, International Business Machines
+* Copyright (C) 2000-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnvhz.c
#include "unicode/utypes.h"
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
#include "cmemory.h"
#include "unicode/ucnv.h"
0
};
-#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
+#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION */
/*
**********************************************************************
-* Copyright (C) 2000-2012, International Business Machines
+* Copyright (C) 2000-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnvisci.c
#include "unicode/utypes.h"
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/ucnv_cb.h"
/*
******************************************************************************
*
-* Copyright (C) 2000-2011, International Business Machines
+* Copyright (C) 2000-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
#include "unicode/utypes.h"
-#if !UCONFIG_NO_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/ucnv_cb.h"
/*
**********************************************************************
-* Copyright (C) 2002-2014, International Business Machines
+* Copyright (C) 2002-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: uconfig.h
* It does not turn off legacy conversion because that is necessary
* for ICU to work on EBCDIC platforms (for the default converter).
* If you want "only collation" and do not build for EBCDIC,
- * then you can define UCONFIG_NO_LEGACY_CONVERSION 1 as well.
+ * then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well.
*
* @stable ICU 2.4
*/
# define UCONFIG_NO_LEGACY_CONVERSION 1
#endif
+/**
+ * \def UCONFIG_ONLY_HTML_CONVERSION
+ * This switch turns off all of the converters NOT listed in
+ * the HTML encoding standard:
+ * http://www.w3.org/TR/encoding/#names-and-labels
+ *
+ * This is not possible on EBCDIC platforms
+ * because they need ibm-37 or ibm-1047 default converters.
+ *
+ * @draft ICU 55
+ */
+#ifndef UCONFIG_ONLY_HTML_CONVERSION
+# define UCONFIG_ONLY_HTML_CONVERSION 0
+#endif
+
/**
* \def UCONFIG_NO_LEGACY_CONVERSION
* This switch turns off all converters except for
/*
**********************************************************************
- * Copyright (C) 2005-2013, International Business Machines
+ * Copyright (C) 2005-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),
new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
+#if !UCONFIG_ONLY_HTML_CONVERSION
new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),
new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
+#endif
};
int32_t rCount = ARRAY_SIZE(tempArray);
/*
**********************************************************************
- * Copyright (C) 2005-2012, International Business Machines
+ * Copyright (C) 2005-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
{0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7
};
+#if !UCONFIG_ONLY_HTML_CONVERSION
static const uint8_t escapeSequences_2022KR[][5] = {
{0x1b, 0x24, 0x29, 0x43, 0x00}
};
{0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2
{0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3
};
+#endif
CharsetRecog_2022JP::~CharsetRecog_2022JP() {}
return (confidence > 0);
}
+#if !UCONFIG_ONLY_HTML_CONVERSION
CharsetRecog_2022KR::~CharsetRecog_2022KR() {}
const char *CharsetRecog_2022KR::getName() const {
results->set(textIn, this, confidence);
return (confidence > 0);
}
+#endif
CharsetRecog_2022::~CharsetRecog_2022() {
// nothing to do
/*
**********************************************************************
- * Copyright (C) 2005-2012, International Business Machines
+ * Copyright (C) 2005-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
UBool match(InputText *textIn, CharsetMatch *results) const;
};
+#if !UCONFIG_ONLY_HTML_CONVERSION
class CharsetRecog_2022KR :public CharsetRecog_2022 {
public:
virtual ~CharsetRecog_2022KR();
UBool match(InputText *textIn, CharsetMatch *results) const;
};
+#endif
U_NAMESPACE_END
/*
**********************************************************************
- * Copyright (C) 2005-2013, International Business Machines
+ * Copyright (C) 2005-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
return (int32_t) (rawPercent * 300.0);
}
+#if !UCONFIG_ONLY_HTML_CONVERSION
static const uint8_t unshapeMap_IBM420[] = {
/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
/* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
}
}
}
+#endif
CharsetRecog_sbcs::CharsetRecog_sbcs()
{
0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
};
+#if !UCONFIG_ONLY_HTML_CONVERSION
static const int32_t ngrams_IBM424_he_rtl[] = {
0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
/* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF,
/* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40,
};
+#endif
//ISO-8859-1,2,5,6,7,8,9 Ngrams
return (confidence > 0);
}
+#if !UCONFIG_ONLY_HTML_CONVERSION
CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
{
// nothing to do
results->set(textIn, this, confidence);
return (confidence > 0);
}
+#endif
U_NAMESPACE_END
#endif
/*
**********************************************************************
- * Copyright (C) 2005-2013, International Business Machines
+ * Copyright (C) 2005-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
};
+#if !UCONFIG_ONLY_HTML_CONVERSION
class NGramParser_IBM420 : public NGramParser
{
private:
public:
NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
};
+#endif
class CharsetRecog_sbcs : public CharsetRecognizer
virtual UBool match(InputText *det, CharsetMatch *results) const;
};
+#if !UCONFIG_ONLY_HTML_CONVERSION
class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
{
public:
virtual UBool match(InputText *det, CharsetMatch *results) const;
};
+#endif
U_NAMESPACE_END