CSRecognizerInfo *tempArray[] = {
new CSRecognizerInfo(new CharsetRecog_UTF8(), TRUE),
-#if !UCONFIG_ONLY_HTML_CONVERSION
new CSRecognizerInfo(new CharsetRecog_UTF_16_BE(), TRUE),
-#endif
new CSRecognizerInfo(new CharsetRecog_UTF_16_LE(), TRUE),
-#if !UCONFIG_ONLY_HTML_CONVERSION
new CSRecognizerInfo(new CharsetRecog_UTF_32_BE(), TRUE),
new CSRecognizerInfo(new CharsetRecog_UTF_32_LE(), TRUE),
-#endif
new CSRecognizerInfo(new CharsetRecog_8859_1(), TRUE),
new CSRecognizerInfo(new CharsetRecog_8859_2(), TRUE),
// nothing to do
}
-#if !UCONFIG_ONLY_HTML_CONVERSION
CharsetRecog_UTF_16_BE::~CharsetRecog_UTF_16_BE()
{
// nothing to do
{
return "UTF-16BE";
}
-#endif
// UTF-16 confidence calculation. Very simple minded, but better than nothing.
// Any 8 bit non-control characters bump the confidence up. These have a zero high byte,
}
-#if !UCONFIG_ONLY_HTML_CONVERSION
UBool CharsetRecog_UTF_16_BE::match(InputText* textIn, CharsetMatch *results) const
{
const uint8_t *input = textIn->fRawInput;
results->set(textIn, this, confidence);
return (confidence > 0);
}
-#endif
CharsetRecog_UTF_16_LE::~CharsetRecog_UTF_16_LE()
{
return (confidence > 0);
}
-#if !UCONFIG_ONLY_HTML_CONVERSION
CharsetRecog_UTF_32::~CharsetRecog_UTF_32()
{
// nothing to do
return input[index + 3] << 24 | input[index + 2] << 16 |
input[index + 1] << 8 | input[index + 0];
}
-#endif
U_NAMESPACE_END
#endif
};
-#if !UCONFIG_ONLY_HTML_CONVERSION
class CharsetRecog_UTF_16_BE : public CharsetRecog_Unicode
{
public:
UBool match(InputText* textIn, CharsetMatch *results) const;
};
-#endif
class CharsetRecog_UTF_16_LE : public CharsetRecog_Unicode
{
UBool match(InputText* textIn, CharsetMatch *results) const;
};
-#if !UCONFIG_ONLY_HTML_CONVERSION
class CharsetRecog_UTF_32 : public CharsetRecog_Unicode
{
protected:
const char* getName() const;
};
-#endif
U_NAMESPACE_END