const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
- if(allModes!=NULL) {
- allModes->impl.getFCDTrie(errorCode);
- return &allModes->fcd;
- } else {
- return NULL;
- }
+ return allModes!=NULL ? &allModes->fcd : NULL;
}
const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
return &((Normalizer2WithImpl *)norm2)->impl;
}
-const UTrie2 *
-Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) {
- Norm2AllModes *allModes=
- Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
- if(allModes!=NULL) {
- return allModes->impl.getFCDTrie(errorCode);
- } else {
- return NULL;
- }
-}
-
const Normalizer2 *
Normalizer2::getInstance(const char *packageName,
const char *name,
case UNORM2_DECOMPOSE:
return &allModes->decomp;
case UNORM2_FCD:
- allModes->impl.getFCDTrie(errorCode);
return &allModes->fcd;
case UNORM2_COMPOSE_CONTIGUOUS:
return &allModes->fcc;
}
U_CFUNC uint16_t
-unorm_getFCD16Simple(UChar32 c) {
+unorm_getFCD16(UChar32 c) {
UErrorCode errorCode=U_ZERO_ERROR;
- const UTrie2 *trie=Normalizer2Factory::getFCDTrie(errorCode);
+ const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
if(U_SUCCESS(errorCode)) {
- return UTRIE2_GET16(trie, c);
+ return impl->getFCD16(c);
} else {
return 0;
}
}
-U_CAPI const uint16_t * U_EXPORT2
-unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
- const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);
- if(U_SUCCESS(*pErrorCode)) {
- fcdHighStart=trie->highStart;
- return trie->index;
- } else {
- return NULL;
- }
-}
-
#endif // !UCONFIG_NO_NORMALIZATION
Normalizer2Impl::~Normalizer2Impl() {
udata_close(memory);
utrie2_close(normTrie);
- UTrie2Singleton(fcdTrieSingleton).deleteInstance();
delete (CanonIterData *)canonIterDataSingleton.fInstance;
}
return iter.codePointStart;
}
-class FCDTrieSingleton : public UTrie2Singleton {
-public:
- FCDTrieSingleton(SimpleSingleton &s, Normalizer2Impl &ni, UErrorCode &ec) :
- UTrie2Singleton(s), impl(ni), errorCode(ec) {}
- UTrie2 *getInstance(UErrorCode &errorCode) {
- return UTrie2Singleton::getInstance(createInstance, this, errorCode);
- }
- static void *createInstance(const void *context, UErrorCode &errorCode);
- UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
- if(value!=0) {
- impl.setFCD16FromNorm16(start, end, (uint16_t)value, newFCDTrie, errorCode);
- }
- return U_SUCCESS(errorCode);
- }
-
- Normalizer2Impl &impl;
- UTrie2 *newFCDTrie;
- UErrorCode &errorCode;
-};
-
-U_CDECL_BEGIN
-
-// Set the FCD value for a range of same-norm16 characters.
-static UBool U_CALLCONV
-enumRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) {
- return ((FCDTrieSingleton *)context)->rangeHandler(start, end, value);
-}
-
-// Collect (OR together) the FCD values for a range of supplementary characters,
-// for their lead surrogate code unit.
-static UBool U_CALLCONV
-enumRangeOrValue(const void *context, UChar32 /*start*/, UChar32 /*end*/, uint32_t value) {
- *((uint32_t *)context)|=value;
- return TRUE;
-}
-
-U_CDECL_END
-
-void *FCDTrieSingleton::createInstance(const void *context, UErrorCode &errorCode) {
- FCDTrieSingleton *me=(FCDTrieSingleton *)context;
- me->newFCDTrie=utrie2_open(0, 0, &errorCode);
- if(U_SUCCESS(errorCode)) {
- utrie2_enum(me->impl.getNormTrie(), NULL, enumRangeHandler, me);
- for(UChar lead=0xd800; lead<0xdc00; ++lead) {
- uint32_t oredValue=utrie2_get32(me->newFCDTrie, lead);
- utrie2_enumForLeadSurrogate(me->newFCDTrie, lead, NULL, enumRangeOrValue, &oredValue);
- if(oredValue!=0) {
- // Set a "bad" value for makeFCD() to break the quick check loop
- // and look up the value for the supplementary code point.
- // If there is any lccc, then set the worst-case lccc of 1.
- // The ORed-together value's tccc is already the worst case.
- if(oredValue>0xff) {
- oredValue=0x100|(oredValue&0xff);
- }
- utrie2_set32ForLeadSurrogateCodeUnit(me->newFCDTrie, lead, oredValue, &errorCode);
- }
- }
- utrie2_freeze(me->newFCDTrie, UTRIE2_16_VALUE_BITS, &errorCode);
- if(U_SUCCESS(errorCode)) {
- return me->newFCDTrie;
- }
- }
- utrie2_close(me->newFCDTrie);
- return NULL;
-}
-
-void Normalizer2Impl::setFCD16FromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
- UTrie2 *newFCDTrie, UErrorCode &errorCode) const {
- // Only loops for 1:1 algorithmic mappings.
- for(;;) {
- if(norm16>=MIN_NORMAL_MAYBE_YES) {
- norm16&=0xff;
- norm16|=norm16<<8;
- } else if(norm16<=minYesNo || minMaybeYes<=norm16) {
- // no decomposition or Hangul syllable, all zeros
- break;
- } else if(limitNoNo<=norm16) {
- int32_t delta=norm16-(minMaybeYes-MAX_DELTA-1);
- if(start==end) {
- start+=delta;
- norm16=getNorm16(start);
- } else {
- // the same delta leads from different original characters to different mappings
- do {
- UChar32 c=start+delta;
- setFCD16FromNorm16(c, c, getNorm16(c), newFCDTrie, errorCode);
- } while(++start<=end);
- break;
- }
- } else {
- // c decomposes, get everything from the variable-length extra data
- const uint16_t *mapping=getMapping(norm16);
- uint16_t firstUnit=*mapping;
- if((firstUnit&MAPPING_LENGTH_MASK)==0) {
- // A character that is deleted (maps to an empty string) must
- // get the worst-case lccc and tccc values because arbitrary
- // characters on both sides will become adjacent.
- norm16=0x1ff;
- } else {
- norm16=firstUnit>>8; // tccc
- if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
- norm16|=*(mapping-1)&0xff00; // lccc
- }
- }
- }
- utrie2_setRange32(newFCDTrie, start, end, norm16, TRUE, &errorCode);
- break;
- }
-}
-
-const UTrie2 *Normalizer2Impl::getFCDTrie(UErrorCode &errorCode) const {
- // Logically const: Synchronized instantiation.
- Normalizer2Impl *me=const_cast<Normalizer2Impl *>(this);
- return FCDTrieSingleton(me->fcdTrieSingleton, *me, errorCode).getInstance(errorCode);
-}
+// Note: normalizer2impl.cpp r30982 (2011-nov-27)
+// still had getFCDTrie() which built and cached an FCD trie.
+// That provided faster access to FCD data than getFCD16FromNormData()
+// but required synchronization and consumed some 10kB of heap memory
+// in any process that uses FCD (e.g., via collation).
+// tccc180[] and smallFCD[] are intended to help with any loss of performance,
+// at least for Latin & CJK.
// Gets the FCD value from the regular normalization data.
uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const {
prevBoundary=src;
// We know that the previous character's lccc==0.
// Fetching the fcd16 value was deferred for this below-U+0300 code point.
- prevFCD16=getFCD16FromSingleLead(*(src-1));
+ prevFCD16=getFCD16(*(src-1));
if(prevFCD16>1) {
--prevBoundary;
}
// The exception is the call to decomposeShort() which uses the buffer
// in the normal way.
- const UTrie2 *trie=fcdTrie();
-
const UChar *prevSrc;
UChar32 c=0;
uint16_t fcd16=0;
if((c=*src)<MIN_CCC_LCCC_CP) {
prevFCD16=~c;
++src;
- } else if((fcd16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c))<=0xff) {
- prevFCD16=fcd16;
+ } else if(!singleLeadMightHaveNonZeroFCD16(c)) {
+ prevFCD16=0;
++src;
- } else if(!U16_IS_SURROGATE(c)) {
- break;
} else {
- UChar c2;
- if(U16_IS_SURROGATE_LEAD(c)) {
- if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
- c=U16_GET_SUPPLEMENTARY(c, c2);
- }
- } else /* trail surrogate */ {
- if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
- --src;
- c=U16_GET_SUPPLEMENTARY(c2, c);
+ if(U16_IS_SURROGATE(c)) {
+ UChar c2;
+ if(U16_IS_SURROGATE_LEAD(c)) {
+ if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
+ c=U16_GET_SUPPLEMENTARY(c, c2);
+ }
+ } else /* trail surrogate */ {
+ if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
+ --src;
+ c=U16_GET_SUPPLEMENTARY(c2, c);
+ }
}
}
- if((fcd16=getFCD16(c))<=0xff) {
+ if((fcd16=getFCD16FromNormData(c))<=0xff) {
prevFCD16=fcd16;
src+=U16_LENGTH(c);
} else {
// We know that the previous character's lccc==0.
if(prevFCD16<0) {
// Fetching the fcd16 value was deferred for this below-U+0300 code point.
- prevFCD16=getFCD16FromSingleLead((UChar)~prevFCD16);
+ UChar32 prev=~prevFCD16;
+ prevFCD16= prev<0x180 ? tccc180[prev] : getFCD16FromNormData(prev);
if(prevFCD16>1) {
--prevBoundary;
}
--p;
// Need to fetch the previous character's FCD value because
// prevFCD16 was just for the trail surrogate code point.
- prevFCD16=getFCD16FromSurrogatePair(p[0], p[1]);
+ prevFCD16=getFCD16FromNormData(U16_GET_SUPPLEMENTARY(p[0], p[1]));
// Still known to have lccc==0 because its lead surrogate unit had lccc==0.
}
if(prevFCD16>1) {
}
const UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const UChar *p) const {
- BackwardUTrie2StringIterator iter(fcdTrie(), start, p);
- uint16_t fcd16;
- do {
- fcd16=iter.previous16();
- } while(fcd16>0xff);
- return iter.codePointStart;
+ while(start<p && previousFCD16(start, p)>0xff) {}
+ return p;
}
const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *limit) const {
- ForwardUTrie2StringIterator iter(fcdTrie(), p, limit);
- uint16_t fcd16;
- do {
- fcd16=iter.next16();
- } while(fcd16>0xff);
- return iter.codePointStart;
+ while(p<limit) {
+ const UChar *codePointStart=p;
+ if(nextFCD16(p, limit)<=0xff) {
+ return codePointStart;
+ }
+ }
+ return p;
}
// CanonicalIterator data -------------------------------------------------- ***
class U_COMMON_API Normalizer2Impl : public UMemory {
public:
Normalizer2Impl() : memory(NULL), normTrie(NULL) {
- fcdTrieSingleton.fInstance=NULL;
canonIterDataSingleton.fInstance=NULL;
}
~Normalizer2Impl();
// low-level properties ------------------------------------------------ ***
const UTrie2 *getNormTrie() const { return normTrie; }
- const UTrie2 *getFCDTrie(UErrorCode &errorCode) const ;
UBool ensureCanonIterData(UErrorCode &errorCode) const;
return norm16>=MIN_NORMAL_MAYBE_YES ? (uint8_t)norm16 : 0;
}
- uint16_t getFCD16(UChar32 c) const { return UTRIE2_GET16(fcdTrie(), c); }
- uint16_t getFCD16FromSingleLead(UChar c) const {
- return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(fcdTrie(), c);
+ /**
+ * Returns the FCD data for code point c.
+ * @param c A Unicode code point.
+ * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
+ */
+ uint16_t getFCD16(UChar32 c) const {
+ if(c<0) {
+ return 0;
+ } else if(c<0x180) {
+ return tccc180[c];
+ } else if(c<=0xffff) {
+ if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
+ }
+ return getFCD16FromNormData(c);
}
- uint16_t getFCD16FromSupplementary(UChar32 c) const {
- return UTRIE2_GET16_FROM_SUPP(fcdTrie(), c);
+ /**
+ * Returns the FCD data for the next code point (post-increment).
+ * Might skip only a lead surrogate rather than the whole surrogate pair if none of
+ * the supplementary code points associated with the lead surrogate have non-zero FCD data.
+ * @param s A valid pointer into a string. Requires s!=limit.
+ * @param limit The end of the string, or NULL.
+ * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
+ */
+ uint16_t nextFCD16(const UChar *&s, const UChar *limit) const {
+ UChar32 c=*s++;
+ if(c<0x180) {
+ return tccc180[c];
+ } else if(!singleLeadMightHaveNonZeroFCD16(c)) {
+ return 0;
+ }
+ UChar c2;
+ if(U16_IS_LEAD(c) && s!=limit && U16_IS_TRAIL(c2=*s)) {
+ c=U16_GET_SUPPLEMENTARY(c, c2);
+ ++s;
+ }
+ return getFCD16FromNormData(c);
}
- uint16_t getFCD16FromSurrogatePair(UChar c, UChar c2) const {
- return getFCD16FromSupplementary(U16_GET_SUPPLEMENTARY(c, c2));
+ /**
+ * Returns the FCD data for the previous code point (pre-decrement).
+ * @param start The start of the string.
+ * @param s A valid pointer into a string. Requires start<s.
+ * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
+ */
+ uint16_t previousFCD16(const UChar *start, const UChar *&s) const {
+ UChar32 c=*--s;
+ if(c<0x180) {
+ return tccc180[c];
+ }
+ if(!U16_IS_TRAIL(c)) {
+ if(!singleLeadMightHaveNonZeroFCD16(c)) {
+ return 0;
+ }
+ } else {
+ UChar c2;
+ if(start<s && U16_IS_LEAD(c2=*(s-1))) {
+ c=U16_GET_SUPPLEMENTARY(c2, c);
+ --s;
+ }
+ }
+ return getFCD16FromNormData(c);
}
+ /** Returns the FCD data for U+0000<=c<U+0180. */
+ uint16_t getFCD16FromBelow180(UChar32 c) const { return tccc180[c]; }
+ /** Returns TRUE if the single-or-lead code unit c might have non-zero FCD data. */
+ UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const {
+ // 0<=lead<=0xffff
+ uint8_t bits=smallFCD[lead>>8];
+ if(bits==0) { return false; }
+ return (UBool)((bits>>((lead>>5)&7))&1);
+ }
+ /** Returns the FCD value from the regular normalization data. */
uint16_t getFCD16FromNormData(UChar32 c) const;
- void setFCD16FromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
- UTrie2 *newFCDTrie, UErrorCode &errorCode) const;
-
void makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
CanonIterData &newData, UErrorCode &errorCode) const;
const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p) const;
const UChar *findNextCompBoundary(const UChar *p, const UChar *limit) const;
- const UTrie2 *fcdTrie() const { return (const UTrie2 *)fcdTrieSingleton.fInstance; }
-
const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) const;
const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const;
const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0
uint8_t tccc180[0x180]; // tccc values for U+0000..U+017F
- SimpleSingleton fcdTrieSingleton;
SimpleSingleton canonIterDataSingleton;
};
// Get the Impl instance of the Normalizer2.
// Must be used only when it is known that norm2 is a Normalizer2WithImpl instance.
static const Normalizer2Impl *getImpl(const Normalizer2 *norm2);
-
- static const UTrie2 *getFCDTrie(UErrorCode &errorCode);
private:
Normalizer2Factory(); // No instantiation.
};
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode);
/**
- * Get the 16-bit FCD value (lead & trail CCs) for a code point, for u_getIntPropertyValue().
+ * Gets the 16-bit FCD value (lead & trail CCs) for a code point, for u_getIntPropertyValue().
* @internal
*/
U_CFUNC uint16_t
-unorm_getFCD16Simple(UChar32 c);
-
-/**
- * Internal API, used by collation code.
- * Get access to the internal FCD trie table to be able to perform
- * incremental, per-code unit, FCD checks in collation.
- * One pointer is sufficient because the trie index values are offset
- * by the index size, so that the same pointer is used to access the trie data.
- * Code points at fcdHighStart and above have a zero FCD value.
- * @internal
- */
-U_CAPI const uint16_t * U_EXPORT2
-unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode);
-
-/**
- * Internal API, used by collation code.
- * Get the FCD value for a code unit, with
- * bits 15..8 lead combining class
- * bits 7..0 trail combining class
- *
- * If c is a lead surrogate and the value is not 0,
- * then some of c's associated supplementary code points have a non-zero FCD value.
- *
- * @internal
- */
-static inline uint16_t
-unorm_getFCD16(const uint16_t *fcdTrieIndex, UChar c) {
- return fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)];
-}
-
-/**
- * Internal API, used by collation code.
- * Get the FCD value of the next code point (post-increment), with
- * bits 15..8 lead combining class
- * bits 7..0 trail combining class
- *
- * @internal
- */
-static inline uint16_t
-unorm_nextFCD16(const uint16_t *fcdTrieIndex, UChar32 fcdHighStart,
- const UChar *&s, const UChar *limit) {
- UChar32 c=*s++;
- uint16_t fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)];
- if(fcd!=0 && U16_IS_LEAD(c)) {
- UChar c2;
- if(s!=limit && U16_IS_TRAIL(c2=*s)) {
- ++s;
- c=U16_GET_SUPPLEMENTARY(c, c2);
- if(c<fcdHighStart) {
- fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_SUPP(fcdTrieIndex, c)];
- } else {
- fcd=0;
- }
- } else /* unpaired lead surrogate */ {
- fcd=0;
- }
- }
- return fcd;
-}
-
-/**
- * Internal API, used by collation code.
- * Get the FCD value of the previous code point (pre-decrement), with
- * bits 15..8 lead combining class
- * bits 7..0 trail combining class
- *
- * @internal
- */
-static inline uint16_t
-unorm_prevFCD16(const uint16_t *fcdTrieIndex, UChar32 fcdHighStart,
- const UChar *start, const UChar *&s) {
- UChar32 c=*--s;
- uint16_t fcd;
- if(!U16_IS_SURROGATE(c)) {
- fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)];
- } else {
- UChar c2;
- if(U16_IS_SURROGATE_TRAIL(c) && s!=start && U16_IS_LEAD(c2=*(s-1))) {
- --s;
- c=U16_GET_SUPPLEMENTARY(c2, c);
- if(c<fcdHighStart) {
- fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_SUPP(fcdTrieIndex, c)];
- } else {
- fcd=0;
- }
- } else /* unpaired surrogate */ {
- fcd=0;
- }
- }
- return fcd;
-}
+unorm_getFCD16(UChar32 c);
/**
* Format of Normalizer2 .nrm data files.
}
#else
static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return unorm_getFCD16Simple(c)>>8;
+ return unorm_getFCD16(c)>>8;
}
#endif
}
#else
static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return unorm_getFCD16Simple(c)&0xff;
+ return unorm_getFCD16(c)&0xff;
}
#endif
#define ZERO_CC_LIMIT_ 0xC0
-// this is static pointer to the normalizer fcdTrieIndex
+// This is static pointer to the NFC implementation instance.
// it is always the same between calls to u_cleanup
// and therefore writing to it is not synchronized.
// It is cleaned in ucol_cleanup
-static const uint16_t *fcdTrieIndex=NULL;
-// Code points at fcdHighStart and above have a zero FCD value.
-static UChar32 fcdHighStart = 0;
+static const Normalizer2Impl *g_nfcImpl = NULL;
// These are values from UCA required for
// implicit generation and supressing sort key compression
static UBool U_CALLCONV
ucol_cleanup(void)
{
- fcdTrieIndex = NULL;
+ g_nfcImpl = NULL;
return TRUE;
}
// init FCD data
static inline
UBool initializeFCD(UErrorCode *status) {
- if (fcdTrieIndex != NULL) {
+ if (g_nfcImpl != NULL) {
return TRUE;
} else {
// The result is constant, until the library is reloaded.
- fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
+ g_nfcImpl = Normalizer2Factory::getNFCImpl(*status);
+ // Note: Alternatively, we could also store this pointer in each collIterate struct,
+ // same as Normalizer2Factory::getImpl(collIterate->nfd).
ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup);
return U_SUCCESS(*status);
}
endP = NULL;
}
- // Get the trailing combining class of the current character. If it's zero,
- // we are OK.
- /* trie access */
- fcd = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, srcP, endP);
+ // Get the trailing combining class of the current character. If it's zero, we are OK.
+ fcd = g_nfcImpl->nextFCD16(srcP, endP);
if (fcd != 0) {
prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
{
const UChar *savedSrcP = srcP;
- /* trie access */
- fcd = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, srcP, endP);
+ fcd = g_nfcImpl->nextFCD16(srcP, endP);
leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
if (leadingCC == 0) {
srcP = savedSrcP; // Hit char that is not part of combining sequence.
src = data->pos + 1;
/* Get the trailing combining class of the current character. */
- fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, start, src);
+ fcd = g_nfcImpl->previousFCD16(start, src);
leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
return result;
}
- fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, start, src);
+ fcd = g_nfcImpl->previousFCD16(start, src);
trailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
UColToken *tok = lh->first;
UColToken *expt = NULL;
uint32_t i = 0, j = 0;
- UChar32 fcdHighStart;
- const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
+ const Normalizer2Impl *nfcImpl = Normalizer2Factory::getNFCImpl(*status);
while(tok != NULL && U_SUCCESS(*status)) {
/* first, check if there are any expansions */
if (!src->buildCCTabFlag && el.cSize > 0) {
// Check the trailing canonical combining class (tccc) of the last character.
const UChar *s = el.cPoints + el.cSize;
- uint16_t fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, el.cPoints, s);
+ uint16_t fcd = nfcImpl->previousFCD16(el.cPoints, s);
if ((fcd & 0xff) != 0) {
src->buildCCTabFlag = TRUE;
}
static void uprv_uca_unsafeCPAddCCNZ(tempUCATable *t, UErrorCode *status) {
UChar c;
- uint16_t fcd; // Hi byte is lead combining class.
- // lo byte is trailing combing class.
- const uint16_t *fcdTrieIndex;
- UChar32 fcdHighStart;
+ uint16_t fcd; // Hi byte is lead combining class. lo byte is trailing combing class.
UBool buildCMTable = (t->cmLookup==NULL); // flag for building combining class table
UChar *cm=NULL;
uint16_t index[256];
int32_t count=0;
- fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
+ const Normalizer2Impl *nfcImpl = Normalizer2Factory::getNFCImpl(*status);
if (U_FAILURE(*status)) {
return;
}
uprv_memset(index, 0, sizeof(index));
}
for (c=0; c<0xffff; c++) {
- fcd = unorm_getFCD16(fcdTrieIndex, c);
+ if (U16_IS_LEAD(c)) {
+ fcd = 0;
+ if (nfcImpl->singleLeadMightHaveNonZeroFCD16(c)) {
+ UChar32 supp = U16_GET_SUPPLEMENTARY(c, 0xdc00);
+ UChar32 suppLimit = supp + 0x400;
+ while (supp < suppLimit) {
+ fcd |= nfcImpl->getFCD16FromNormData(supp++);
+ }
+ }
+ } else {
+ fcd = nfcImpl->getFCD16(c);
+ }
if (fcd >= 0x100 || // if the leading combining class(c) > 0 ||
(U16_IS_LEAD(c) && fcd != 0)) {// c is a leading surrogate with some FCD data
if (buildCMTable) {
CombinClassTable *cmLookup = t->cmLookup;
UChar newDecomp[256];
int32_t maxComp, newDecLen;
- UChar32 fcdHighStart;
- const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
+ const Normalizer2Impl *nfcImpl = Normalizer2Factory::getNFCImpl(*status);
if (U_FAILURE(*status)) {
return;
}
- int16_t curClass = (unorm_getFCD16(fcdTrieIndex, c->tailoringCM) & 0xff);
+ int16_t curClass = nfcImpl->getFCD16(c->tailoringCM) & 0xff;
CompData *precomp = c->precomp;
int32_t compLen = c->compLen;
UChar *comp = c->comp;
UCAElements *el,
UErrorCode *status) {
CombinClassTable *cmLookup = t->cmLookup;
- UChar32 fcdHighStart;
- const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
+ const Normalizer2Impl *nfcImpl = Normalizer2Factory::getNFCImpl(*status);
if (U_FAILURE(*status)) {
return;
}
- int16_t maxIndex = (unorm_getFCD16(fcdTrieIndex, cMark) & 0xff );
+ int16_t maxIndex = nfcImpl->getFCD16(cMark) & 0xff;
UCAElements element;
uint16_t *index;
UChar decomp[256];
return;
}
index = cmLookup->index;
- int32_t cClass=(unorm_getFCD16(fcdTrieIndex, cMark) & 0xff);
- maxIndex = (int32_t)index[(unorm_getFCD16(fcdTrieIndex, cMark) & 0xff)-1];
+ int32_t cClass=nfcImpl->getFCD16(cMark) & 0xff;
+ maxIndex = (int32_t)index[(nfcImpl->getFCD16(cMark) & 0xff)-1];
c.comp = comp;
c.decomp = decomp;
c.precomp = precomp;
// other combining mark combinations.
precomp[precompLen].cp=comp[0];
curClass = precomp[precompLen].cClass =
- index[unorm_getFCD16(fcdTrieIndex, decomp[1]) & 0xff];
+ index[nfcImpl->getFCD16(decomp[1]) & 0xff];
precompLen++;
replacedPos=0;
for (decompLen=0; decompLen< (int32_t)el->cSize; decompLen++) {
// This is a fix for tailoring contractions with accented
// character at the end of contraction string.
if ((len>2) &&
- (unorm_getFCD16(fcdTrieIndex, comp[len-2]) & 0xff00)==0) {
+ (nfcImpl->getFCD16(comp[len-2]) & 0xff00)==0) {
uprv_uca_addFCD4AccentedContractions(t, colEl, comp, len, &element, status);
}
UColToken *tok;
uint32_t i = 0, j = 0;
UChar baseChar, firstCM;
- UChar32 fcdHighStart;
- const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
context.nfcImpl=Normalizer2Factory::getNFCImpl(*status);
if(U_FAILURE(*status)) {
return 0;
}
if(src->UCA != NULL) {
for(j = 0; j<el.cSize; j++) {
- int16_t fcd = unorm_getFCD16(fcdTrieIndex, el.cPoints[j]);
+ int16_t fcd = context.nfcImpl->getFCD16(el.cPoints[j]);
if ( (fcd & 0xff) == 0 ) {
baseChar = el.cPoints[j]; // last base character
firstCM=0; // reset combining mark value
#define SECOND_LAST_BYTE_SHIFT_ 8
#define SUPPLEMENTARY_MIN_VALUE_ 0x10000
-static const uint16_t *fcdTrieIndex = NULL;
-static UChar32 fcdHighStart = 0;
+static const Normalizer2Impl *g_nfcImpl = NULL;
// internal methods -------------------------------------------------
U_CDECL_BEGIN
static UBool U_CALLCONV
usearch_cleanup(void) {
- fcdTrieIndex = NULL;
+ g_nfcImpl = NULL;
return TRUE;
}
U_CDECL_END
static
inline void initializeFCD(UErrorCode *status)
{
- if (fcdTrieIndex == NULL) {
- fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
+ if (g_nfcImpl == NULL) {
+ g_nfcImpl = Normalizer2Factory::getNFCImpl(*status);
ucln_i18n_registerCleanup(UCLN_I18N_USEARCH, usearch_cleanup);
}
}
int32_t strlength)
{
const UChar *temp = str + *offset;
- uint16_t result = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, temp, str + strlength);
+ uint16_t result = g_nfcImpl->nextFCD16(temp, str + strlength);
*offset = (int32_t)(temp - str);
return result;
}