inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
+ inline void initSize(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t size, UErrorCode& status);
+
public:
/**
* Construct a hashtable
*/
Hashtable(UBool ignoreKeyCase, UErrorCode& status);
+ /**
+ * Construct a hashtable
+ * @param ignoreKeyCase If true, keys are case insensitive.
+ * @param size initial size allocation
+ * @param status Error code
+ */
+ Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status);
+
/**
* Construct a hashtable
* @param keyComp Comparator for comparing the keys
int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
void* get(const UnicodeString& key) const;
-
+
int32_t geti(const UnicodeString& key) const;
-
+
void* remove(const UnicodeString& key);
int32_t removei(const UnicodeString& key);
* @see uhash_nextElement
*/
const UHashElement* nextElement(int32_t& pos) const;
-
+
UKeyComparator* setKeyComparator(UKeyComparator*keyComp);
-
+
UValueComparator* setValueComparator(UValueComparator* valueComp);
UBool equals(const Hashtable& that) const;
* Implementation
********************************************************************/
-inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
+inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
UValueComparator *valueComp, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
}
-inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
+inline void Hashtable::initSize(UHashFunction *keyHash, UKeyComparator *keyComp,
+ UValueComparator *valueComp, int32_t size, UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ uhash_initSize(&hashObj, keyHash, keyComp, valueComp, size, &status);
+ if (U_SUCCESS(status)) {
+ hash = &hashObj;
+ uhash_setKeyDeleter(hash, uprv_deleteUObject);
+ }
+}
+
+inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
UErrorCode& status) : hash(0) {
init( uhash_hashUnicodeString, keyComp, valueComp, status);
}
+
inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
: hash(0)
{
status);
}
+inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status)
+ : hash(0)
+{
+ initSize(ignoreKeyCase ? uhash_hashCaselessUnicodeString
+ : uhash_hashUnicodeString,
+ ignoreKeyCase ? uhash_compareCaselessUnicodeString
+ : uhash_compareUnicodeString,
+ NULL, size,
+ status);
+}
+
inline Hashtable::Hashtable(UErrorCode& status)
: hash(0)
{
inline UKeyComparator* Hashtable::setKeyComparator(UKeyComparator*keyComp){
return uhash_setKeyComparator(hash, keyComp);
}
-
+
inline UValueComparator* Hashtable::setValueComparator(UValueComparator* valueComp){
return uhash_setValueComparator(hash, valueComp);
}
* prime number while being less than a power of two.
*/
static const int32_t PRIMES[] = {
- 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
+ 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
1073741789, 2147483647 /*, 4294967291 */
};
#define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES)
-#define DEFAULT_PRIME_INDEX 3
+#define DEFAULT_PRIME_INDEX 4
/* These ratios are tuned to the PRIMES array such that a resize
* places the table back into the zone of non-resizing. That is,
emptytok.pointer = NULL; /* Only one of these two is needed */
emptytok.integer = 0; /* but we don't know which one. */
-
+
limit = p + hash->length;
while (p < limit) {
p->key = emptytok;
static UHashtable*
_uhash_init(UHashtable *result,
- UHashFunction *keyHash,
+ UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t primeIndex,
}
static UHashtable*
-_uhash_create(UHashFunction *keyHash,
+_uhash_create(UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t primeIndex,
if (U_FAILURE(*status)) {
hash->elements = old;
- hash->length = oldLength;
+ hash->length = oldLength;
return;
}
********************************************************************/
U_CAPI UHashtable* U_EXPORT2
-uhash_open(UHashFunction *keyHash,
+uhash_open(UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
UErrorCode *status) {
}
U_CAPI UHashtable* U_EXPORT2
-uhash_openSize(UHashFunction *keyHash,
+uhash_openSize(UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t size,
U_CAPI UHashtable* U_EXPORT2
uhash_init(UHashtable *fillinResult,
- UHashFunction *keyHash,
+ UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
UErrorCode *status) {
return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
}
+U_CAPI UHashtable* U_EXPORT2
+uhash_initSize(UHashtable *fillinResult,
+ UHashFunction *keyHash,
+ UKeyComparator *keyComp,
+ UValueComparator *valueComp,
+ int32_t size,
+ UErrorCode *status) {
+
+ // Find the smallest index i for which PRIMES[i] >= size.
+ int32_t i = 0;
+ while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
+ ++i;
+ }
+ return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status);
+}
+
U_CAPI void U_EXPORT2
uhash_close(UHashtable *hash) {
if (hash == NULL) {
hash->keyComparator = fn;
return result;
}
-U_CAPI UValueComparator *U_EXPORT2
+U_CAPI UValueComparator *U_EXPORT2
uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){
UValueComparator *result = hash->valueComparator;
hash->valueComparator = fn;
UErrorCode status = U_ZERO_ERROR;
_uhash_internalSetResizePolicy(hash, policy);
hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
- hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
+ hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
_uhash_rehash(hash, &status);
}
return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s));
}
-U_CAPI UBool U_EXPORT2
+U_CAPI UBool U_EXPORT2
uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
int32_t count1, count2, pos, i;
if(count1!=count2){
return FALSE;
}
-
+
pos=UHASH_FIRST;
for(i=0; i<count1; i++){
const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
const UHashTok key1 = elem1->key;
const UHashTok val1 = elem1->value;
/* here the keys are not compared, instead the key form hash1 is used to fetch
- * value from hash2. If the hashes are equal then then both hashes should
+ * value from hash2. If the hashes are equal then then both hashes should
* contain equal values for the same key!
*/
const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));
* If NULL won't do anything */
/* Size parameters */
-
+
int32_t count; /* The number of key-value pairs in this table.
* 0 <= count <= length. In practice we
* never let count == length (see code). */
* and values. Must be prime. */
/* Rehashing thresholds */
-
+
int32_t highWaterMark; /* If count > highWaterMark, rehash */
int32_t lowWaterMark; /* If count < lowWaterMark, rehash */
float highWaterRatio; /* 0..1; high water as a fraction of length */
float lowWaterRatio; /* 0..1; low water as a fraction of length */
-
+
int8_t primeIndex; /* Index into our prime table for length.
* length == PRIMES[primeIndex] */
UBool allocated; /* Was this UHashtable allocated? */
* @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_openSize
*/
-U_CAPI UHashtable* U_EXPORT2
+U_CAPI UHashtable* U_EXPORT2
uhash_open(UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
* @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_open
*/
-U_CAPI UHashtable* U_EXPORT2
+U_CAPI UHashtable* U_EXPORT2
uhash_openSize(UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
* @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_openSize
*/
-U_CAPI UHashtable* U_EXPORT2
+U_CAPI UHashtable* U_EXPORT2
uhash_init(UHashtable *hash,
UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
UErrorCode *status);
+/**
+ * Initialize an existing UHashtable.
+ * @param keyHash A pointer to the key hashing function. Must not be
+ * NULL.
+ * @param keyComp A pointer to the function that compares keys. Must
+ * not be NULL.
+ * @param size The initial capacity of this hash table.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return A pointer to a UHashtable, or 0 if an error occurred.
+ * @see uhash_openSize
+ */
+U_CAPI UHashtable* U_EXPORT2
+uhash_initSize(UHashtable *hash,
+ UHashFunction *keyHash,
+ UKeyComparator *keyComp,
+ UValueComparator *valueComp,
+ int32_t size,
+ UErrorCode *status);
+
/**
* Close a UHashtable, releasing the memory used.
* @param hash The UHashtable to close. If hash is NULL no operation is performed.
*/
-U_CAPI void U_EXPORT2
+U_CAPI void U_EXPORT2
uhash_close(UHashtable *hash);
* @param fn the function to be used hash keys; must not be NULL
* @return the previous key hasher; non-NULL
*/
-U_CAPI UHashFunction *U_EXPORT2
+U_CAPI UHashFunction *U_EXPORT2
uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn);
/**
* @param fn the function to be used compare keys; must not be NULL
* @return the previous key comparator; non-NULL
*/
-U_CAPI UKeyComparator *U_EXPORT2
+U_CAPI UKeyComparator *U_EXPORT2
uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn);
/**
* @param fn the function to be used compare keys; must not be NULL
* @return the previous key comparator; non-NULL
*/
-U_CAPI UValueComparator *U_EXPORT2
+U_CAPI UValueComparator *U_EXPORT2
uhash_setValueComparator(UHashtable *hash, UValueComparator *fn);
/**
* @param fn the function to be used delete keys, or NULL
* @return the previous key deleter; may be NULL
*/
-U_CAPI UObjectDeleter *U_EXPORT2
+U_CAPI UObjectDeleter *U_EXPORT2
uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn);
/**
* @param fn the function to be used delete values, or NULL
* @return the previous value deleter; may be NULL
*/
-U_CAPI UObjectDeleter *U_EXPORT2
+U_CAPI UObjectDeleter *U_EXPORT2
uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn);
/**
* @param hash The UHashtable to set
* @param policy The way the hashtable resizes itself, {U_GROW, U_GROW_AND_SHRINK, U_FIXED}
*/
-U_CAPI void U_EXPORT2
+U_CAPI void U_EXPORT2
uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy);
/**
* @param hash The UHashtable to query.
* @return The number of key-value pairs stored in hash.
*/
-U_CAPI int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
uhash_count(const UHashtable *hash);
/**
* @return The previous value, or NULL if none.
* @see uhash_get
*/
-U_CAPI void* U_EXPORT2
+U_CAPI void* U_EXPORT2
uhash_put(UHashtable *hash,
void *key,
void *value,
* @return The previous value, or NULL if none.
* @see uhash_get
*/
-U_CAPI void* U_EXPORT2
+U_CAPI void* U_EXPORT2
uhash_iput(UHashtable *hash,
int32_t key,
void* value,
* @return The previous value, or 0 if none.
* @see uhash_get
*/
-U_CAPI int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
uhash_puti(UHashtable *hash,
void* key,
int32_t value,
* @return The previous value, or 0 if none.
* @see uhash_get
*/
-U_CAPI int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
uhash_iputi(UHashtable *hash,
int32_t key,
int32_t value,
* @param key A pointer key stored in a hashtable
* @return The requested item, or NULL if not found.
*/
-U_CAPI void* U_EXPORT2
-uhash_get(const UHashtable *hash,
+U_CAPI void* U_EXPORT2
+uhash_get(const UHashtable *hash,
const void *key);
/**
* @param key An integer key stored in a hashtable
* @return The requested item, or NULL if not found.
*/
-U_CAPI void* U_EXPORT2
+U_CAPI void* U_EXPORT2
uhash_iget(const UHashtable *hash,
int32_t key);
* @param key A pointer key stored in a hashtable
* @return The requested item, or 0 if not found.
*/
-U_CAPI int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
uhash_geti(const UHashtable *hash,
const void* key);
/**
* @param key An integer key stored in a hashtable
* @return The requested item, or 0 if not found.
*/
-U_CAPI int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
uhash_igeti(const UHashtable *hash,
int32_t key);
* @param key A key stored in a hashtable
* @return The item removed, or NULL if not found.
*/
-U_CAPI void* U_EXPORT2
+U_CAPI void* U_EXPORT2
uhash_remove(UHashtable *hash,
const void *key);
* @param key An integer key stored in a hashtable
* @return The item removed, or NULL if not found.
*/
-U_CAPI void* U_EXPORT2
+U_CAPI void* U_EXPORT2
uhash_iremove(UHashtable *hash,
int32_t key);
* @param key An key stored in a hashtable
* @return The item removed, or 0 if not found.
*/
-U_CAPI int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
uhash_removei(UHashtable *hash,
const void* key);
* @param key An integer key stored in a hashtable
* @return The item removed, or 0 if not found.
*/
-U_CAPI int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
uhash_iremovei(UHashtable *hash,
int32_t key);
* Remove all items from a UHashtable.
* @param hash The target UHashtable.
*/
-U_CAPI void U_EXPORT2
+U_CAPI void U_EXPORT2
uhash_removeAll(UHashtable *hash);
/**
* @param key A key stored in a hashtable
* @return a hash element, or NULL if the key is not found.
*/
-U_CAPI const UHashElement* U_EXPORT2
+U_CAPI const UHashElement* U_EXPORT2
uhash_find(const UHashtable *hash, const void* key);
/**
* @return a hash element, or NULL if no further key-value pairs
* exist in the table.
*/
-U_CAPI const UHashElement* U_EXPORT2
+U_CAPI const UHashElement* U_EXPORT2
uhash_nextElement(const UHashtable *hash,
int32_t *pos);
* modified.
* @return the value that was removed.
*/
-U_CAPI void* U_EXPORT2
+U_CAPI void* U_EXPORT2
uhash_removeElement(UHashtable *hash, const UHashElement* e);
/********************************************************************
* @param i The given integer
* @return a UHashTok for an integer.
*/
-/*U_CAPI UHashTok U_EXPORT2
+/*U_CAPI UHashTok U_EXPORT2
uhash_toki(int32_t i);*/
/**
* @param p The given pointer
* @return a UHashTok for a pointer.
*/
-/*U_CAPI UHashTok U_EXPORT2
+/*U_CAPI UHashTok U_EXPORT2
uhash_tokp(void* p);*/
/********************************************************************
* @param key The string (const UChar*) to hash.
* @return A hash code for the key.
*/
-U_CAPI int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
uhash_hashUChars(const UHashTok key);
/**
* @param key The string (const char*) to hash.
* @return A hash code for the key.
*/
-U_CAPI int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
uhash_hashChars(const UHashTok key);
/**
* @param key2 The string for comparison
* @return true if key1 and key2 are equal, return false otherwise.
*/
-U_CAPI UBool U_EXPORT2
+U_CAPI UBool U_EXPORT2
uhash_compareUChars(const UHashTok key1, const UHashTok key2);
/**
* @param key2 The string for comparison
* @return true if key1 and key2 are equal, return false otherwise.
*/
-U_CAPI UBool U_EXPORT2
+U_CAPI UBool U_EXPORT2
uhash_compareChars(const UHashTok key1, const UHashTok key2);
/**
* @param key2 The string for comparison
* @return true if key1 and key2 are equal, return false otherwise.
*/
-U_CAPI UBool U_EXPORT2
+U_CAPI UBool U_EXPORT2
uhash_compareIChars(const UHashTok key1, const UHashTok key2);
/********************************************************************
* @param key The string (const char*) to hash.
* @return A hash code for the key.
*/
-U_CAPI int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
uhash_hashUnicodeString(const UElement key);
/**
* @param key The string (const char*) to hash.
* @return A hash code for the key.
*/
-U_CAPI int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
uhash_hashCaselessUnicodeString(const UElement key);
/********************************************************************
* @param key The string (const char*) to hash.
* @return A hash code for the key.
*/
-U_CAPI int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
uhash_hashLong(const UHashTok key);
/**
* @param Key2 The integer for comparison
* @return true if key1 and key2 are equal, return false otherwise
*/
-U_CAPI UBool U_EXPORT2
+U_CAPI UBool U_EXPORT2
uhash_compareLong(const UHashTok key1, const UHashTok key2);
/********************************************************************
* Deleter for Hashtable objects.
* @param obj The object to be deleted
*/
-U_CAPI void U_EXPORT2
+U_CAPI void U_EXPORT2
uhash_deleteHashtable(void *obj);
/* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */
* @param hash2
* @return true if the hashtables are equal and false if not.
*/
-U_CAPI UBool U_EXPORT2
+U_CAPI UBool U_EXPORT2
uhash_equals(const UHashtable* hash1, const UHashtable* hash2);
static const UChar TARGET_SEP = 45; // '-'
static const UChar VARIANT_SEP = 47; // '/'
-static const UChar ANY[] = {65,110,121,0}; // "Any"
+static const UChar ANY[] = {0x41,0x6E,0x79,0}; // "Any"
static const UChar NULL_ID[] = {78,117,108,108,0}; // "Null"
-static const UChar LATIN_PIVOT[] = {45,76,97,116,105,110,59,76,97,116,105,110,45,0}; // "-Latin;Latin-"
+static const UChar LATIN_PIVOT[] = {0x2D,0x4C,0x61,0x74,0x6E,0x3B,0x4C,0x61,0x74,0x6E,0x2D,0}; // "-Latn;Latn-"
+
+// initial size for an Any-XXXX transform's cache of script-XXXX transforms
+// (will grow as necessary, but we don't expect to have source text with more than 7 scripts)
+#define ANY_TRANS_CACHE_INIT_SIZE 7
//------------------------------------------------------------
Transliterator(id, NULL),
targetScript(theTargetScript)
{
- cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec);
+ cache = uhash_openSize(uhash_hashLong, uhash_compareLong, NULL, ANY_TRANS_CACHE_INIT_SIZE, &ec);
if (U_FAILURE(ec)) {
return;
}
{
// Don't copy the cache contents
UErrorCode ec = U_ZERO_ERROR;
- cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec);
+ cache = uhash_openSize(uhash_hashLong, uhash_compareLong, NULL, ANY_TRANS_CACHE_INIT_SIZE, &ec);
if (U_FAILURE(ec)) {
return;
}
}
if (t == NULL) {
UErrorCode ec = U_ZERO_ERROR;
- UnicodeString sourceName(uscript_getName(source), -1, US_INV);
+ UnicodeString sourceName(uscript_getShortName(source), -1, US_INV);
UnicodeString id(sourceName);
id.append(TARGET_SEP).append(target);
//static const UChar VARIANT_SEP = 0x002F; // '/'
// String constants
-static const UChar ANY[] = { 65, 110, 121, 0 }; // Any
+static const UChar ANY[] = { 0x41, 0x6E, 0x79, 0 }; // Any
+static const UChar LAT[] = { 0x4C, 0x61, 0x74, 0 }; // Lat
// empty string
#define NO_VARIANT UnicodeString()
+// initial estimate for specDAG size
+// ICU 60 Transliterator::countAvailableSources()
+#define SPECDAG_INIT_SIZE 149
+
+// initial estimate for number of variant names
+#define VARIANT_LIST_INIT_SIZE 11
+#define VARIANT_LIST_MAX_SIZE 31
+
+// initial estimate for availableIDs count (default estimate is 8 => multiple reallocs)
+// ICU 60 Transliterator::countAvailableIDs()
+#define AVAILABLE_IDS_INIT_SIZE 641
+
+// initial estimate for number of targets for source "Any", "Lat"
+// ICU 60 Transliterator::countAvailableTargets("Any")/("Latn")
+#define ANY_TARGETS_INIT_SIZE 125
+#define LAT_TARGETS_INIT_SIZE 23
+
/**
* Resource bundle key for the RuleBasedTransliterator rule.
*/
TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
registry(TRUE, status),
- specDAG(TRUE, status),
- availableIDs(status)
+ specDAG(TRUE, SPECDAG_INIT_SIZE, status),
+ variantList(VARIANT_LIST_INIT_SIZE, status),
+ availableIDs(AVAILABLE_IDS_INIT_SIZE, status)
{
registry.setValueDeleter(deleteEntry);
+ variantList.setDeleter(uprv_deleteUObject);
+ variantList.setComparer(uhash_compareCaselessUnicodeString);
+ UnicodeString *emptyString = new UnicodeString();
+ if (emptyString != NULL) {
+ variantList.addElement(emptyString, status);
+ }
availableIDs.setDeleter(uprv_deleteUObject);
availableIDs.setComparer(uhash_compareCaselessUnicodeString);
specDAG.setValueDeleter(uhash_deleteHashtable);
if (targets == 0) {
return 0;
}
- UVector *variants = (UVector*) targets->get(target);
- // variants may be 0 if the source/target are invalid
- return (variants == 0) ? 0 : variants->size();
+ int32_t varMask = targets->geti(target);
+ int32_t varCount = 0;
+ while (varMask > 0) {
+ if (varMask & 1) {
+ varCount++;
+ }
+ varMask >>= 1;
+ }
+ return varCount;
}
UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
result.truncate(0); // invalid source
return result;
}
- UVector *variants = (UVector*) targets->get(target);
- if (variants == 0) {
- result.truncate(0); // invalid target
- return result;
- }
- UnicodeString *v = (UnicodeString*) variants->elementAt(index);
- if (v == 0) {
- result.truncate(0); // invalid index
- } else {
- result = *v;
+ int32_t varMask = targets->geti(target);
+ int32_t varCount = 0;
+ int32_t varListIndex = 0;
+ while (varMask > 0) {
+ if (varMask & 1) {
+ if (varCount == index) {
+ UnicodeString *v = (UnicodeString*) variantList.elementAt(varListIndex);
+ if (v != NULL) {
+ result = *v;
+ return result;
+ }
+ break;
+ }
+ varCount++;
+ }
+ varMask >>= 1;
+ varListIndex++;
}
+ result.truncate(0); // invalid target or index
return result;
}
UnicodeString *newID = (UnicodeString *)ID.clone();
// Check to make sure newID was created.
if (newID != NULL) {
- // NUL-terminate the ID string
- newID->getTerminatedBuffer();
- availableIDs.addElement(newID, status);
+ // NUL-terminate the ID string
+ newID->getTerminatedBuffer();
+ availableIDs.addElement(newID, status);
}
}
} else {
/**
* Register a source-target/variant in the specDAG. Variant may be
- * empty, but source and target must not be. If variant is empty then
- * the special variant NO_VARIANT is stored in slot zero of the
- * UVector of variants.
+ * empty, but source and target must not be.
*/
void TransliteratorRegistry::registerSTV(const UnicodeString& source,
const UnicodeString& target,
UErrorCode status = U_ZERO_ERROR;
Hashtable *targets = (Hashtable*) specDAG.get(source);
if (targets == 0) {
- targets = new Hashtable(TRUE, status);
- if (U_FAILURE(status) || targets == 0) {
+ int32_t size = 3;
+ if (source.compare(ANY,3) == 0) {
+ size = ANY_TARGETS_INIT_SIZE;
+ } else if (source.compare(LAT,3) == 0) {
+ size = LAT_TARGETS_INIT_SIZE;
+ }
+ targets = new Hashtable(TRUE, size, status);
+ if (U_FAILURE(status) || targets == NULL) {
return;
}
- targets->setValueDeleter(uprv_deleteUObject);
specDAG.put(source, targets, status);
}
- UVector *variants = (UVector*) targets->get(target);
- if (variants == 0) {
- variants = new UVector(uprv_deleteUObject,
- uhash_compareCaselessUnicodeString, status);
- if (variants == 0) {
+ int32_t variantListIndex = variantList.indexOf((void*) &variant, 0);
+ if (variantListIndex < 0) {
+ if (variantList.size() >= VARIANT_LIST_MAX_SIZE) {
+ // can't handle any more variants
return;
}
- targets->put(target, variants, status);
- }
- // assert(NO_VARIANT == "");
- // We add the variant string. If it is the special "no variant"
- // string, that is, the empty string, we add it at position zero.
- if (!variants->contains((void*) &variant)) {
- UnicodeString *tempus; // Used for null pointer check.
- if (variant.length() > 0) {
- tempus = new UnicodeString(variant);
- if (tempus != NULL) {
- variants->addElement(tempus, status);
- }
- } else {
- tempus = new UnicodeString(); // = NO_VARIANT
- if (tempus != NULL) {
- variants->insertElementAt(tempus, 0, status);
- }
+ UnicodeString *variantEntry = new UnicodeString(variant);
+ if (variantEntry != NULL) {
+ variantList.addElement(variantEntry, status);
+ if (U_SUCCESS(status)) {
+ variantListIndex = variantList.size() - 1;
+ }
+ }
+ if (variantListIndex < 0) {
+ return;
}
}
+ int32_t addMask = 1 << variantListIndex;
+ int32_t varMask = targets->geti(target);
+ targets->puti(target, varMask | addMask, status);
}
/**
const UnicodeString& variant) {
// assert(source.length() > 0);
// assert(target.length() > 0);
-// UErrorCode status = U_ZERO_ERROR;
+ UErrorCode status = U_ZERO_ERROR;
Hashtable *targets = (Hashtable*) specDAG.get(source);
- if (targets == 0) {
+ if (targets == NULL) {
return; // should never happen for valid s-t/v
}
- UVector *variants = (UVector*) targets->get(target);
- if (variants == 0) {
+ int32_t varMask = targets->geti(target);
+ if (varMask == 0) {
return; // should never happen for valid s-t/v
}
- variants->removeElement((void*) &variant);
- if (variants->size() == 0) {
+ int32_t variantListIndex = variantList.indexOf((void*) &variant, 0);
+ if (variantListIndex < 0) {
+ return; // should never happen for valid s-t/v
+ }
+ int32_t remMask = 1 << variantListIndex;
+ varMask &= (~remMask);
+ if (varMask != 0) {
+ targets->puti(target, varMask, status);
+ } else {
targets->remove(target); // should delete variants
if (targets->count() == 0) {
specDAG.remove(source); // should delete targets
UVector* rbts = new UVector(entry->u.dataVector->size(), status);
// Check for null pointer
if (rbts == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
}
int32_t passNumber = 1;
for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) {
/**
* DAG of visible IDs by spec. Hashtable: source => (Hashtable:
- * target => (UVector: variant)) The UVector of variants is never
- * empty. For a source-target with no variant, the special
- * variant NO_VARIANT (the empty string) is stored in slot zero of
- * the UVector.
+ * target => variant bitmask)
*/
Hashtable specDAG;
+ /**
+ * Vector of all variant names
+ */
+ UVector variantList;
+
/**
* Vector of public full IDs.
*/
static void TestExtractBetween(void);
static void TestUnicodeIDs(void);
static void TestGetRulesAndSourceSet(void);
+static void TestDataVariantsCompounds(void);
static void _expectRules(const char*, const char*, const char*);
static void _expect(const UTransliterator* trans, const char* cfrom, const char* cto);
TEST(TestExtractBetween);
TEST(TestUnicodeIDs);
TEST(TestGetRulesAndSourceSet);
+ TEST(TestDataVariantsCompounds);
}
/*------------------------------------------------------------------
}
}
+typedef struct {
+ const char * transID;
+ const char * sourceText;
+ const char * targetText;
+} TransIDSourceTarg;
+
+static const TransIDSourceTarg dataVarCompItems[] = {
+ { "Simplified-Traditional",
+ "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u4ECE\\u7B80\\u4F53\\u8F6C\\u6362\\u4E3A\\u7E41\\u4F53\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002",
+ "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u5F9E\\u7C21\\u9AD4\\u8F49\\u63DB\\u70BA\\u7E41\\u9AD4\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002" },
+ { "Halfwidth-Fullwidth",
+ "Sample text, \\uFF7B\\uFF9D\\uFF8C\\uFF9F\\uFF99\\uFF83\\uFF77\\uFF7D\\uFF84.",
+ "\\uFF33\\uFF41\\uFF4D\\uFF50\\uFF4C\\uFF45\\u3000\\uFF54\\uFF45\\uFF58\\uFF54\\uFF0C\\u3000\\u30B5\\u30F3\\u30D7\\u30EB\\u30C6\\u30AD\\u30B9\\u30C8\\uFF0E" },
+ { "Han-Latin/Names; Latin-Bopomofo",
+ "\\u4E07\\u4FDF\\u919C\\u5974\\u3001\\u533A\\u695A\\u826F\\u3001\\u4EFB\\u70E8\\u3001\\u5CB3\\u98DB",
+ "\\u3107\\u311B\\u02CB \\u3111\\u3127\\u02CA \\u3114\\u3121\\u02C7 \\u310B\\u3128\\u02CA\\u3001 \\u3121 \\u3114\\u3128\\u02C7 \\u310C\\u3127\\u3124\\u02CA\\u3001 \\u3116\\u3123\\u02CA \\u3127\\u311D\\u02CB\\u3001 \\u3129\\u311D\\u02CB \\u3108\\u311F" },
+ { "Greek-Latin",
+ "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
+ "A \\u0100I H\\u0100I RH" },
+ { "Greek-Latin/BGN",
+ "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
+ "A\\u0313 A\\u0345 A\\u0314\\u0345 \\u1FEC" },
+ { "Greek-Latin/UNGEGN",
+ "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
+ "A A A R" },
+ { NULL, NULL, NULL }
+};
+
+enum { kBBufMax = 384 };
+static void TestDataVariantsCompounds() {
+ const TransIDSourceTarg* itemsPtr;
+ for (itemsPtr = dataVarCompItems; itemsPtr->transID != NULL; itemsPtr++) {
+ UErrorCode status = U_ZERO_ERROR;
+ UChar utrid[kUBufMax];
+ int32_t utridlen = u_unescape(itemsPtr->transID, utrid, kUBufMax);
+ UTransliterator* utrans = utrans_openU(utrid, utridlen, UTRANS_FORWARD, NULL, 0, NULL, &status);
+ if (U_FAILURE(status)) {
+ log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n", itemsPtr->transID, u_errorName(status));
+ continue;
+ }
+ UChar text[kUBufMax];
+ int32_t textLen = u_unescape(itemsPtr->sourceText, text, kUBufMax);
+ int32_t textLim = textLen;
+ utrans_transUChars(utrans, text, &textLen, kUBufMax, 0, &textLim, &status);
+ if (U_FAILURE(status)) {
+ log_err("FAIL: utrans_transUChars(%s) failed, error=%s\n", itemsPtr->transID, u_errorName(status));
+ } else {
+ UChar expect[kUBufMax];
+ int32_t expectLen = u_unescape(itemsPtr->targetText, expect, kUBufMax);
+ if (textLen != expectLen || u_strncmp(text, expect, textLen) != 0) {
+ char btext[kBBufMax], bexpect[kBBufMax];
+ u_austrncpy(btext, text, textLen);
+ u_austrncpy(bexpect, expect, expectLen);
+ log_err("FAIL: utrans_transUChars(%s),\n expect %s\n get %s\n", itemsPtr->transID, bexpect, btext);
+ }
+ }
+ utrans_close(utrans);
+ }
+}
static void _expectRules(const char* crules,
const char* cfrom,