Microsoft Visual Studio Solution File, Format Version 11.00
-# Visual Studio 2010
+# Visual C++ Express 2010
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cal", "..\samples\cal\cal.vcxproj", "{F7659D77-09CF-4FE9-ACEE-927287AA9509}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cintltst", "..\test\cintltst\cintltst.vcxproj", "{3D1246AE-1B32-479B-BECA-AEFA97BE2321}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "icupkg", "..\tools\icupkg\icupkg.vcxproj", "{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}"
EndProject
-Project("{9D4211F7-2C77-439C-82F0-30A4E43BA569}") = "gendict", "..\tools\gendict\gendict.vcxproj", "{9D4211F7-2C77-439C-82F0-30A4E43BA569}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gendict", "..\tools\gendict\gendict.vcxproj", "{9D4211F7-2C77-439C-82F0-30A4E43BA569}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "letest", "..\test\letest\letest.vcxproj", "{67351485-4D18-4245-BE39-A7EF0675ACD2}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "testplug", "..\tools\icuinfo\testplug.vcxproj", "{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}"
EndProject
Global
- GlobalSection(SubversionScc) = preSolution
- Svn-Managed = True
- Manager = AnkhSVN - Subversion Support for Visual Studio
- EndGlobalSection
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Debug|x64 = Debug|x64
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|x64.Build.0 = Release|x64
{0178B127-6269-407D-B112-93877BB62776}.Debug|Win32.ActiveCfg = Debug|Win32
{0178B127-6269-407D-B112-93877BB62776}.Debug|Win32.Build.0 = Debug|Win32
- {0178B127-6269-407D-B112-93877BB62776}.Debug|x64.ActiveCfg = Debug|x64
- {0178B127-6269-407D-B112-93877BB62776}.Debug|x64.Build.0 = Debug|x64
+ {0178B127-6269-407D-B112-93877BB62776}.Debug|x64.ActiveCfg = Debug|Win32
{0178B127-6269-407D-B112-93877BB62776}.Release|Win32.ActiveCfg = Release|Win32
{0178B127-6269-407D-B112-93877BB62776}.Release|Win32.Build.0 = Release|Win32
- {0178B127-6269-407D-B112-93877BB62776}.Release|x64.ActiveCfg = Release|x64
- {0178B127-6269-407D-B112-93877BB62776}.Release|x64.Build.0 = Release|x64
+ {0178B127-6269-407D-B112-93877BB62776}.Release|x64.ActiveCfg = Release|Win32
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|Win32.ActiveCfg = Debug|Win32
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|Win32.Build.0 = Debug|Win32
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|x64.ActiveCfg = Debug|x64
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
+ GlobalSection(SubversionScc) = preSolution
+ Svn-Managed = True
+ Manager = AnkhSVN - Subversion Support for Visual Studio
+ EndGlobalSection
EndGlobal
csdetect.o csmatch.o csr2022.o csrecog.o csrmbcs.o csrsbcs.o csrucode.o csrutf8.o inputext.o \
wintzimpl.o windtfmt.o winnmfmt.o basictz.o dtrule.o rbtz.o tzrule.o tztrans.o vtzone.o zonemeta.o \
upluralrules.o plurrule.o plurfmt.o selfmt.o dtitvfmt.o dtitvinf.o udateintervalformat.o \
-tmunit.o tmutamt.o tmutfmt.o colldata.o bmsearch.o bms.o currpinf.o \
+tmunit.o tmutamt.o tmutfmt.o currpinf.o \
uspoof.o uspoof_impl.o uspoof_build.o uspoof_conf.o uspoof_wsconf.o decfmtst.o smpdtfst.o \
ztrans.o zrule.o vzone.o fphdlimp.o fpositer.o locdspnm.o \
decNumber.o decContext.o alphaindex.o tznames.o tznames_impl.o tzgnames.o \
+++ /dev/null
-/*
- * Copyright (C) 2008-2011, International Business Machines Corporation and Others.
- * All rights reserved.
- */
-
-#include "unicode/utypes.h"
-#include "cmemory.h"
-#include "unicode/bms.h"
-#include "unicode/unistr.h"
-#include "unicode/colldata.h"
-#include "unicode/bmsearch.h"
-
-
-#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
-
-
-//#define USE_SAFE_CASTS
-#ifdef USE_SAFE_CASTS
-#define STATIC_CAST(type,value) static_cast<type>(value)
-#define CONST_CAST(type,value) const_cast<type>(value)
-#else
-#define STATIC_CAST(type,value) (type) (value)
-#define CONST_CAST(type,value) (type) (value)
-#endif
-
-U_NAMESPACE_USE
-
-U_CAPI UCD * U_EXPORT2
-ucd_open(UCollator *coll, UErrorCode *status)
-{
- return STATIC_CAST(UCD *, CollData::open(coll, *status));
-}
-
-U_CAPI void U_EXPORT2
-ucd_close(UCD *ucd)
-{
- if (ucd != NULL) {
- CollData *data = STATIC_CAST(CollData *, ucd);
-
- CollData::close(data);
- }
-}
-
-U_CAPI UCollator * U_EXPORT2
-ucd_getCollator(UCD *ucd)
-{
- CollData *data = STATIC_CAST(CollData *, ucd);
-
- return data->getCollator();
-}
-
-U_CAPI void U_EXPORT2
-ucd_freeCache()
-{
- CollData::freeCollDataCache();
-}
-
-U_CAPI void U_EXPORT2
-ucd_flushCache()
-{
- CollData::flushCollDataCache();
-}
-
-struct BMS
-{
- BoyerMooreSearch *bms;
- const UnicodeString *targetString;
-};
-
-U_CAPI BMS * U_EXPORT2
-bms_open(UCD *ucd,
- const UChar *pattern, int32_t patternLength,
- const UChar *target, int32_t targetLength,
- UErrorCode *status)
-{
- BMS *bms = STATIC_CAST(BMS *, uprv_malloc(sizeof(BMS)));
-
- if (bms == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- CollData *data = (CollData *) ucd;
- UnicodeString patternString(pattern, patternLength);
-
- if (target != NULL) {
- bms->targetString = new UnicodeString(target, targetLength);
-
- if (bms->targetString == NULL) {
- bms->bms = NULL;
- *status = U_MEMORY_ALLOCATION_ERROR;
- return bms;
- }
- } else {
- bms->targetString = NULL;
- }
-
- bms->bms = new BoyerMooreSearch(data, patternString, bms->targetString, *status);
-
- if (bms->bms == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- }
-
- return bms;
-}
-
-U_CAPI void U_EXPORT2
-bms_close(BMS *bms)
-{
- delete bms->bms;
-
- delete bms->targetString;
-
- uprv_free(bms);
-}
-
-U_CAPI UBool U_EXPORT2
-bms_empty(BMS *bms)
-{
- return bms->bms->empty();
-}
-
-U_CAPI UCD * U_EXPORT2
-bms_getData(BMS *bms)
-{
- return STATIC_CAST(UCD *, bms->bms->getData());
-}
-
-U_CAPI UBool U_EXPORT2
-bms_search(BMS *bms, int32_t offset, int32_t *start, int32_t *end)
-{
- return bms->bms->search(offset, *start, *end);
-}
-
-U_CAPI void U_EXPORT2
-bms_setTargetString(BMS *bms, const UChar *target, int32_t targetLength, UErrorCode *status)
-{
- if (U_FAILURE(*status)) {
- return;
- }
-
- if (bms->targetString != NULL) {
- delete bms->targetString;
- }
-
- if (target != NULL) {
- bms->targetString = new UnicodeString(target, targetLength);
- } else {
- bms->targetString = NULL;
- }
-
- bms->bms->setTargetString(bms->targetString, *status);
-}
-
-#endif
+++ /dev/null
-/*
- ******************************************************************************
- * Copyright (C) 1996-2012, International Business Machines *
- * Corporation and others. All Rights Reserved. *
- ******************************************************************************
- */
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/unistr.h"
-#include "unicode/putil.h"
-#include "unicode/usearch.h"
-
-#include "cmemory.h"
-#include "unicode/coll.h"
-#include "unicode/tblcoll.h"
-#include "unicode/coleitr.h"
-#include "unicode/ucoleitr.h"
-
-#include "unicode/regex.h" // TODO: make conditional on regexp being built.
-
-#include "unicode/uniset.h"
-#include "unicode/uset.h"
-#include "unicode/ustring.h"
-#include "hash.h"
-#include "uhash.h"
-#include "ucol_imp.h"
-#include "normalizer2impl.h"
-
-#include "unicode/colldata.h"
-#include "unicode/bmsearch.h"
-
-U_NAMESPACE_BEGIN
-
-#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
-#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
-#define DELETE_ARRAY(array) uprv_free((void *) (array))
-
-
-struct CEI
-{
- uint32_t order;
- int32_t lowOffset;
- int32_t highOffset;
-};
-
-class Target : public UMemory
-{
-public:
- Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status);
- ~Target();
-
- void setTargetString(const UnicodeString *target);
-
- const CEI *nextCE(int32_t offset);
- const CEI *prevCE(int32_t offset);
-
- int32_t stringLength();
- UChar charAt(int32_t offset);
-
- UBool isBreakBoundary(int32_t offset);
- int32_t nextBreakBoundary(int32_t offset);
- int32_t nextSafeBoundary(int32_t offset);
-
- UBool isIdentical(UnicodeString &pattern, int32_t start, int32_t end);
-
- void setOffset(int32_t offset);
- void setLast(int32_t last);
- int32_t getOffset();
-
-private:
- CEI *ceb;
- int32_t bufferSize;
- int32_t bufferMin;
- int32_t bufferMax;
-
- uint32_t strengthMask;
- UCollationStrength strength;
- uint32_t variableTop;
- UBool toShift;
- UCollator *coll;
- const Normalizer2 &nfd;
-
- const UnicodeString *targetString;
- const UChar *targetBuffer;
- int32_t targetLength;
-
- UCollationElements *elements;
- UBreakIterator *charBreakIterator;
-};
-
-Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status)
- : bufferSize(0), bufferMin(0), bufferMax(0),
- strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator),
- nfd(*Normalizer2Factory::getNFDInstance(status)),
- targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL)
-{
- strength = ucol_getStrength(coll);
- toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED;
- variableTop = ucol_getVariableTop(coll, &status);
-
- // find the largest expansion
- uint8_t maxExpansion = 0;
- for (const uint8_t *expansion = coll->expansionCESize; *expansion != 0; expansion += 1) {
- if (*expansion > maxExpansion) {
- maxExpansion = *expansion;
- }
- }
-
- // room for an extra character on each end, plus 4 for safety
- bufferSize = patternLength + (2 * maxExpansion) + 4;
-
- ceb = NEW_ARRAY(CEI, bufferSize);
-
- if (ceb == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- if (target != NULL) {
- setTargetString(target);
- }
-
- switch (strength)
- {
- default:
- strengthMask |= UCOL_TERTIARYORDERMASK;
- /* fall through */
-
- case UCOL_SECONDARY:
- strengthMask |= UCOL_SECONDARYORDERMASK;
- /* fall through */
-
- case UCOL_PRIMARY:
- strengthMask |= UCOL_PRIMARYORDERMASK;
- }
-}
-
-Target::~Target()
-{
- ubrk_close(charBreakIterator);
- ucol_closeElements(elements);
-
- DELETE_ARRAY(ceb);
-}
-
-void Target::setTargetString(const UnicodeString *target)
-{
- if (charBreakIterator != NULL) {
- ubrk_close(charBreakIterator);
- ucol_closeElements(elements);
- }
-
- targetString = target;
-
- if (targetString != NULL) {
- UErrorCode status = U_ZERO_ERROR;
-
- targetBuffer = targetString->getBuffer();
- targetLength = targetString->length();
-
- elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status);
- ucol_forceHanImplicit(elements, &status);
-
- charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status),
- targetBuffer, targetLength, &status);
- } else {
- targetBuffer = NULL;
- targetLength = 0;
- }
-}
-
-const CEI *Target::nextCE(int32_t offset)
-{
- UErrorCode status = U_ZERO_ERROR;
- int32_t low = -1, high = -1;
- uint32_t order;
- UBool cont = FALSE;
-
- if (offset >= bufferMin && offset < bufferMax) {
- return &ceb[offset];
- }
-
- if (bufferMax >= bufferSize || offset != bufferMax) {
- return NULL;
- }
-
- do {
- low = ucol_getOffset(elements);
- order = ucol_next(elements, &status);
- high = ucol_getOffset(elements);
-
- if (order == (uint32_t)UCOL_NULLORDER) {
- //high = low = -1;
- break;
- }
-
- cont = isContinuation(order);
- order &= strengthMask;
-
- if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
- if (strength >= UCOL_QUATERNARY) {
- order &= UCOL_PRIMARYORDERMASK;
- } else {
- order = UCOL_IGNORABLE;
- }
- }
- } while (order == UCOL_IGNORABLE);
-
- if (cont) {
- order |= UCOL_CONTINUATION_MARKER;
- }
-
- ceb[offset].order = order;
- ceb[offset].lowOffset = low;
- ceb[offset].highOffset = high;
-
- bufferMax += 1;
-
- return &ceb[offset];
-}
-
-const CEI *Target::prevCE(int32_t offset)
-{
- UErrorCode status = U_ZERO_ERROR;
- int32_t low = -1, high = -1;
- uint32_t order;
- UBool cont = FALSE;
-
- if (offset >= bufferMin && offset < bufferMax) {
- return &ceb[offset];
- }
-
- if (bufferMax >= bufferSize || offset != bufferMax) {
- return NULL;
- }
-
- do {
- high = ucol_getOffset(elements);
- order = ucol_previous(elements, &status);
- low = ucol_getOffset(elements);
-
- if (order == (uint32_t)UCOL_NULLORDER) {
- break;
- }
-
- cont = isContinuation(order);
- order &= strengthMask;
-
- if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
- if (strength >= UCOL_QUATERNARY) {
- order &= UCOL_PRIMARYORDERMASK;
- } else {
- order = UCOL_IGNORABLE;
- }
- }
- } while (order == UCOL_IGNORABLE);
-
- bufferMax += 1;
-
- if (cont) {
- order |= UCOL_CONTINUATION_MARKER;
- }
-
- ceb[offset].order = order;
- ceb[offset].lowOffset = low;
- ceb[offset].highOffset = high;
-
- return &ceb[offset];
-}
-
-int32_t Target::stringLength()
-{
- if (targetString != NULL) {
- return targetLength;
- }
-
- return 0;
-}
-
-UChar Target::charAt(int32_t offset)
-{
- if (targetString != NULL) {
- return targetBuffer[offset];
- }
-
- return 0x0000;
-}
-
-void Target::setOffset(int32_t offset)
-{
- UErrorCode status = U_ZERO_ERROR;
-
- bufferMin = 0;
- bufferMax = 0;
-
- ucol_setOffset(elements, offset, &status);
-}
-
-void Target::setLast(int32_t last)
-{
- UErrorCode status = U_ZERO_ERROR;
-
- bufferMin = 0;
- bufferMax = 1;
-
- ceb[0].order = (uint32_t)UCOL_NULLORDER;
- ceb[0].lowOffset = last;
- ceb[0].highOffset = last;
-
- ucol_setOffset(elements, last, &status);
-}
-
-int32_t Target::getOffset()
-{
- return ucol_getOffset(elements);
-}
-
-UBool Target::isBreakBoundary(int32_t offset)
-{
- return ubrk_isBoundary(charBreakIterator, offset);
-}
-
-int32_t Target::nextBreakBoundary(int32_t offset)
-{
- return ubrk_following(charBreakIterator, offset);
-}
-
-int32_t Target::nextSafeBoundary(int32_t offset)
-{
- while (offset < targetLength) {
- //UChar ch = charAt(offset);
- UChar ch = targetBuffer[offset];
-
- if (U_IS_LEAD(ch) || ! ucol_unsafeCP(ch, coll)) {
- return offset;
- }
-
- offset += 1;
- }
-
- return targetLength;
-}
-
-UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end)
-{
- if (strength < UCOL_IDENTICAL) {
- return TRUE;
- }
-
- // Note: We could use Normalizer::compare() or similar, but for short strings
- // which may not be in FCD it might be faster to just NFD them.
- UErrorCode status = U_ZERO_ERROR;
- UnicodeString t2, p2;
- nfd.normalize(UnicodeString(FALSE, targetBuffer + start, end - start), t2, status);
- nfd.normalize(pattern, p2, status);
- // return FALSE if NFD failed
- return U_SUCCESS(status) && t2 == p2;
-}
-
-#define HASH_TABLE_SIZE 257
-
-class BadCharacterTable : public UMemory
-{
-public:
- BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status);
- ~BadCharacterTable();
-
- int32_t operator[](uint32_t ce) const;
- int32_t getMaxSkip() const;
- int32_t minLengthInChars(int32_t index);
-
-private:
- static int32_t hash(uint32_t ce);
-
- int32_t maxSkip;
- int32_t badCharacterTable[HASH_TABLE_SIZE];
-
- int32_t *minLengthCache;
-};
-
-BadCharacterTable::BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status)
- : minLengthCache(NULL)
-{
- int32_t plen = patternCEs.size();
-
- // **** need a better way to deal with this ****
- if (U_FAILURE(status) || plen == 0) {
- return;
- }
-
- int32_t *history = NEW_ARRAY(int32_t, plen);
-
- if (history == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- for (int32_t i = 0; i < plen; i += 1) {
- history[i] = -1;
- }
-
- minLengthCache = NEW_ARRAY(int32_t, plen + 1);
-
- if (minLengthCache == NULL) {
- DELETE_ARRAY(history);
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- maxSkip = minLengthCache[0] = data->minLengthInChars(&patternCEs, 0, history);
-
- for(int32_t j = 0; j < HASH_TABLE_SIZE; j += 1) {
- badCharacterTable[j] = maxSkip;
- }
-
- for(int32_t p = 1; p < plen; p += 1) {
- minLengthCache[p] = data->minLengthInChars(&patternCEs, p, history);
-
- // Make sure this entry is not bigger than the previous one.
- // Otherwise, we might skip too far in some cases.
- if (minLengthCache[p] < 0 || minLengthCache[p] > minLengthCache[p - 1]) {
- minLengthCache[p] = minLengthCache[p - 1];
- }
- }
-
- minLengthCache[plen] = 0;
-
- for(int32_t p = 0; p < plen - 1; p += 1) {
- badCharacterTable[hash(patternCEs[p])] = minLengthCache[p + 1];
- }
-
- DELETE_ARRAY(history);
-}
-
-BadCharacterTable::~BadCharacterTable()
-{
- DELETE_ARRAY(minLengthCache);
-}
-
-int32_t BadCharacterTable::operator[](uint32_t ce) const
-{
- return badCharacterTable[hash(ce)];
-}
-
-int32_t BadCharacterTable::getMaxSkip() const
-{
- return maxSkip;
-}
-
-int32_t BadCharacterTable::minLengthInChars(int32_t index)
-{
- return minLengthCache[index];
-}
-
-int32_t BadCharacterTable::hash(uint32_t ce)
-{
- return UCOL_PRIMARYORDER(ce) % HASH_TABLE_SIZE;
-}
-
-class GoodSuffixTable : public UMemory
-{
-public:
- GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status);
- ~GoodSuffixTable();
-
- int32_t operator[](int32_t offset) const;
-
-private:
- int32_t *goodSuffixTable;
-};
-
-GoodSuffixTable::GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status)
- : goodSuffixTable(NULL)
-{
- int32_t patlen = patternCEs.size();
-
- // **** need a better way to deal with this ****
- if (U_FAILURE(status) || patlen <= 0) {
- return;
- }
-
- int32_t *suff = NEW_ARRAY(int32_t, patlen);
- int32_t start = patlen - 1, end = - 1;
- int32_t maxSkip = badCharacterTable.getMaxSkip();
-
- if (suff == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- // initialze suff
- suff[patlen - 1] = patlen;
-
- for (int32_t i = patlen - 2; i >= 0; i -= 1) {
- // (i > start) means we're inside the last suffix match we found
- // ((patlen - 1) - end) is how far the end of that match is from end of pattern
- // (i - start) is how far we are from start of that match
- // (i + (patlen - 1) - end) is index of same character at end of pattern
- // so if any suffix match at that character doesn't extend beyond the last match,
- // it's the suffix for this character as well
- if (i > start && suff[i + patlen - 1 - end] < i - start) {
- suff[i] = suff[i + patlen - 1 - end];
- } else {
- start = end = i;
-
- int32_t s = patlen;
-
- while (start >= 0 && patternCEs[start] == patternCEs[--s]) {
- start -= 1;
- }
-
- suff[i] = end - start;
- }
- }
-
- // now build goodSuffixTable
- goodSuffixTable = NEW_ARRAY(int32_t, patlen);
-
- if (goodSuffixTable == NULL) {
- DELETE_ARRAY(suff);
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
-
- // initialize entries to minLengthInChars of the pattern
- for (int32_t i = 0; i < patlen; i += 1) {
- goodSuffixTable[i] = maxSkip;
- }
-
- int32_t prefix = 0;
-
- for (int32_t i = patlen - /*1*/ 2; i >= 0; i -= 1) {
- if (suff[i] == i + 1) {
- // this matching suffix is a prefix of the pattern
- int32_t prefixSkip = badCharacterTable.minLengthInChars(i + 1);
-
- // for any mis-match before this suffix, we should skip
- // so that the front of the pattern (i.e. the prefix)
- // lines up with the front of the suffix.
- // (patlen - 1 - i) is the start of the suffix
- while (prefix < patlen - 1 - i) {
- // value of maxSkip means never set...
- if (goodSuffixTable[prefix] == maxSkip) {
- goodSuffixTable[prefix] = prefixSkip;
- }
-
- prefix += 1;
- }
- }
- }
-
- for (int32_t i = 0; i < patlen - 1; i += 1) {
- goodSuffixTable[patlen - 1 - suff[i]] = badCharacterTable.minLengthInChars(i + 1);
- }
-
- DELETE_ARRAY(suff);
-}
-
-GoodSuffixTable::~GoodSuffixTable()
-{
- DELETE_ARRAY(goodSuffixTable);
-}
-
-int32_t GoodSuffixTable::operator[](int32_t offset) const
-{
- return goodSuffixTable[offset];
-}
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch)
-
-
-UBool BoyerMooreSearch::empty()
-{
- return patCEs->size() <= 0;
-}
-
-CollData *BoyerMooreSearch::getData()
-{
- return data;
-}
-
-CEList *BoyerMooreSearch::getPatternCEs()
-{
- return patCEs;
-}
-
-BadCharacterTable *BoyerMooreSearch::getBadCharacterTable()
-{
- return badCharacterTable;
-}
-
-GoodSuffixTable *BoyerMooreSearch::getGoodSuffixTable()
-{
- return goodSuffixTable;
-}
-
-BoyerMooreSearch::BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString,
- UErrorCode &status)
- : data(theData), patCEs(NULL), badCharacterTable(NULL), goodSuffixTable(NULL), pattern(patternString), target(NULL)
-{
-
- if (U_FAILURE(status)) {
- return;
- }
-
- UCollator *collator = data->getCollator();
-
- patCEs = new CEList(collator, patternString, status);
-
- if (patCEs == NULL || U_FAILURE(status)) {
- return;
- }
-
- badCharacterTable = new BadCharacterTable(*patCEs, data, status);
-
- if (badCharacterTable == NULL || U_FAILURE(status)) {
- return;
- }
-
- goodSuffixTable = new GoodSuffixTable(*patCEs, *badCharacterTable, status);
-
- if (targetString != NULL) {
- target = new Target(collator, targetString, patCEs->size(), status);
- }
-}
-
-BoyerMooreSearch::~BoyerMooreSearch()
-{
- delete target;
- delete goodSuffixTable;
- delete badCharacterTable;
- delete patCEs;
-}
-
-void BoyerMooreSearch::setTargetString(const UnicodeString *targetString, UErrorCode &status)
-{
- if (U_FAILURE(status)) {
- return;
- }
-
- if (target == NULL) {
- target = new Target(data->getCollator(), targetString, patCEs->size(), status);
- } else {
- target->setTargetString(targetString);
- }
-}
-
-// **** main flow of this code from Laura Werner's "Unicode Text Searching in Java" paper. ****
-/*
- * TODO:
- * * deal with trailing (and leading?) ignorables.
- * * Adding BoyerMooreSearch object slowed it down. How can we speed it up?
- */
-UBool BoyerMooreSearch::search(int32_t offset, int32_t &start, int32_t &end)
-{
- /*UCollator *coll =*/ data->getCollator();
- int32_t plen = patCEs->size();
- int32_t tlen = target->stringLength();
- int32_t maxSkip = badCharacterTable->getMaxSkip();
- int32_t tOffset = offset + maxSkip;
-
- if (plen <= 0) {
- // Searching for a zero length pattern always fails.
- start = end = -1;
- return FALSE;
- }
-
- while (tOffset <= tlen) {
- int32_t pIndex = plen - 1;
- int32_t tIndex = 0;
- int32_t lIndex = 0;
-
- if (tOffset < tlen) {
- // **** we really want to skip ahead enough to ****
- // **** be sure we get at least 1 non-ignorable ****
- // **** CE after the end of the pattern. ****
- int32_t next = target->nextSafeBoundary(tOffset + 1);
-
- target->setOffset(next);
-
- for (lIndex = 0; ; lIndex += 1) {
- const CEI *cei = target->prevCE(lIndex);
- int32_t low = cei->lowOffset;
- int32_t high = cei->highOffset;
-
- if (high == 0 || (low < high && low <= tOffset)) {
- if (low < tOffset) {
- while (lIndex >= 0 && target->prevCE(lIndex)->highOffset == high) {
- lIndex -= 1;
- }
-
- if (high > tOffset) {
- tOffset = high;
- }
- }
-
- break;
- }
- }
- } else {
- target->setLast(tOffset);
- lIndex = 0;
- }
-
- tIndex = ++lIndex;
-
- // Iterate backward until we hit the beginning of the pattern
- while (pIndex >= 0) {
- uint32_t pce = (*patCEs)[pIndex];
- const CEI *tcei = target->prevCE(tIndex++);
-
-
- if (tcei->order != pce) {
- // There is a mismatch at this position. Decide how far
- // over to shift the pattern, then try again.
-
- int32_t gsOffset = tOffset + (*goodSuffixTable)[pIndex];
-#ifdef EXTRA_CAUTIOUS
- int32_t old = tOffset;
-#endif
-
- tOffset += (*badCharacterTable)[tcei->order] - badCharacterTable->minLengthInChars(pIndex + 1);
-
- if (gsOffset > tOffset) {
- tOffset = gsOffset;
- }
-
-#ifdef EXTRA_CAUTIOUS
- // Make sure we don't skip backwards...
- if (tOffset <= old) {
- tOffset = old + 1;
- }
-#endif
-
- break;
- }
-
- pIndex -= 1;
- }
-
- if (pIndex < 0) {
- // We made it back to the beginning of the pattern,
- // which means we matched it all. Return the location.
- const CEI firstCEI = *target->prevCE(tIndex - 1);
- const CEI lastCEI = *target->prevCE(lIndex);
- int32_t mStart = firstCEI.lowOffset;
- int32_t minLimit = lastCEI.lowOffset;
- int32_t maxLimit = lastCEI.highOffset;
- int32_t mLimit;
- UBool found = TRUE;
-
- target->setOffset(/*tOffset*/maxLimit);
-
- const CEI nextCEI = *target->nextCE(0);
-
- if (nextCEI.lowOffset > maxLimit) {
- maxLimit = nextCEI.lowOffset;
- }
-
- if (nextCEI.lowOffset == nextCEI.highOffset && nextCEI.order != (uint32_t)UCOL_NULLORDER) {
- found = FALSE;
- }
-
- if (! target->isBreakBoundary(mStart)) {
- found = FALSE;
- }
-
- if (firstCEI.lowOffset == firstCEI.highOffset) {
- found = FALSE;
- }
-
- mLimit = maxLimit;
- if (minLimit < maxLimit) {
- // When the last CE's low index is same with its high index, the CE is likely
- // a part of expansion. In this case, the index is located just after the
- // character corresponding to the CEs compared above. If the index is right
- // at the break boundary, move the position to the next boundary will result
- // incorrect match length when there are ignorable characters exist between
- // the position and the next character produces CE(s). See ticket#8482.
- if (minLimit == lastCEI.highOffset && target->isBreakBoundary(minLimit)) {
- mLimit = minLimit;
- } else {
- int32_t nbb = target->nextBreakBoundary(minLimit);
-
- if (nbb >= lastCEI.highOffset) {
- mLimit = nbb;
- }
- }
- }
-
- if (mLimit > maxLimit) {
- found = FALSE;
- }
-
- if (! target->isBreakBoundary(mLimit)) {
- found = FALSE;
- }
-
- if (! target->isIdentical(pattern, mStart, mLimit)) {
- found = FALSE;
- }
-
- if (found) {
- start = mStart;
- end = mLimit;
-
- return TRUE;
- }
-
- tOffset += (*goodSuffixTable)[0]; // really? Maybe += 1 or += maxSkip?
- }
- // Otherwise, we're here because of a mismatch, so keep going....
- }
-
- // no match
- start = -1;
- end = -1;
- return FALSE;
-}
-
-U_NAMESPACE_END
-
-#endif // #if !UCONFIG_NO_COLLATION
</ItemDefinitionGroup>\r
<ItemGroup>\r
<ClCompile Include="alphaindex.cpp" />\r
- <ClCompile Include="bms.cpp" />\r
- <ClCompile Include="bmsearch.cpp" />\r
<ClCompile Include="bocsu.cpp" />\r
<ClCompile Include="coleitr.cpp" />\r
<ClCompile Include="coll.cpp" />\r
- <ClCompile Include="colldata.cpp" />\r
<ClCompile Include="search.cpp" />\r
<ClCompile Include="sortkey.cpp" />\r
<ClCompile Include="stsearch.cpp" />\r
</Command>\r
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode\r
-</Command>\r
- <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
- </CustomBuild>\r
- <CustomBuild Include="unicode\bms.h">\r
- <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode\r
-</Command>\r
- <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
- <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy "%(FullPath)" ..\..\include\unicode\r
-</Command>\r
- <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
- <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">copy "%(FullPath)" ..\..\include\unicode\r
-</Command>\r
- <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
- <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode\r
-</Command>\r
- <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
- </CustomBuild>\r
- <CustomBuild Include="unicode\bmsearch.h">\r
- <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode\r
-</Command>\r
- <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
- <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy "%(FullPath)" ..\..\include\unicode\r
-</Command>\r
- <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
- <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">copy "%(FullPath)" ..\..\include\unicode\r
-</Command>\r
- <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
- <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode\r
</Command>\r
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
</CustomBuild>\r
</Command>\r
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode\r
-</Command>\r
- <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
- </CustomBuild>\r
- <CustomBuild Include="unicode\colldata.h">\r
- <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode\r
-</Command>\r
- <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
- <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy "%(FullPath)" ..\..\include\unicode\r
-</Command>\r
- <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
- <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">copy "%(FullPath)" ..\..\include\unicode\r
-</Command>\r
- <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
- <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode\r
</Command>\r
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>\r
</CustomBuild>\r
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />\r
<ImportGroup Label="ExtensionTargets">\r
</ImportGroup>\r
-</Project>
+</Project>
\ No newline at end of file
</Filter>\r
</ItemGroup>\r
<ItemGroup>\r
- <ClCompile Include="bms.cpp">\r
- <Filter>collation</Filter>\r
- </ClCompile>\r
- <ClCompile Include="bmsearch.cpp">\r
- <Filter>collation</Filter>\r
- </ClCompile>\r
<ClCompile Include="coleitr.cpp">\r
<Filter>collation</Filter>\r
</ClCompile>\r
<ClCompile Include="coll.cpp">\r
<Filter>collation</Filter>\r
</ClCompile>\r
- <ClCompile Include="colldata.cpp">\r
- <Filter>collation</Filter>\r
- </ClCompile>\r
<ClCompile Include="search.cpp">\r
<Filter>collation</Filter>\r
</ClCompile>\r
</ResourceCompile>\r
</ItemGroup>\r
<ItemGroup>\r
- <CustomBuild Include="unicode\bms.h">\r
- <Filter>collation</Filter>\r
- </CustomBuild>\r
- <CustomBuild Include="unicode\bmsearch.h">\r
- <Filter>collation</Filter>\r
- </CustomBuild>\r
<CustomBuild Include="unicode\coleitr.h">\r
<Filter>collation</Filter>\r
</CustomBuild>\r
<CustomBuild Include="unicode\coll.h">\r
<Filter>collation</Filter>\r
</CustomBuild>\r
- <CustomBuild Include="unicode\colldata.h">\r
- <Filter>collation</Filter>\r
- </CustomBuild>\r
<CustomBuild Include="unicode\search.h">\r
<Filter>collation</Filter>\r
</CustomBuild>\r
<Filter>formatting</Filter>\r
</CustomBuild>\r
</ItemGroup>\r
-</Project>\r
+</Project>
\ No newline at end of file
UCLN_I18N_UCOL_RES,
UCLN_I18N_UCOL_BLD,
UCLN_I18N_CSDET,
- UCLN_I18N_COLL_DATA,
UCLN_I18N_INDEX_CHARACTERS,
UCLN_I18N_GENDERINFO,
UCLN_I18N_CDFINFO,
+++ /dev/null
-/*
- * Copyright (C) 1996-2012, International Business Machines Corporation and Others.
- * All rights reserved.
- */
-
-/**
- * \file
- * \brief C API: Boyer-Moore StringSearch prototype.
- * \internal
- */
-
-#ifndef _BMS_H
-#define _BMS_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/ucol.h"
-
-#ifndef U_HIDE_INTERNAL_API
-
-/**
- * A <code>UCD</code> object holds the Collator-specific data needed to
- * compute the length of the shortest string that can
- * generate a partcular list of CEs.
- *
- * <code>UCD</code> objects are quite expensive to compute. Because
- * of this, they are cached. When you call <code>ucd_open</code> it
- * returns a reference counted cached object. When you call <code>ucd_close</code>
- * the reference count on the object is decremented but the object is not deleted.
- *
- * If you do not need to reuse any unreferenced objects in the cache, you can call
- * <code>ucd_flushCCache</code>. If you no longer need any <code>UCD</code>
- * objects, you can call <code>ucd_freeCache</code>
- *
- * @internal ICU 4.0.1 technology preview
- */
-typedef void UCD;
-
-/**
- * Open a <code>UCD</code> object.
- *
- * @param coll - the collator
- * @param status - will be set if any errors occur.
- *
- * @return the <code>UCD</code> object. You must call
- * <code>ucd_close</code> when you are done using the object.
- *
- * Note: if on return status is set to an error, the only safe
- * thing to do with the returned object is to call <code>ucd_close</code>.
- *
- * @internal ICU 4.0.1 technology preview
- */
-U_INTERNAL UCD * U_EXPORT2
-ucd_open(UCollator *coll, UErrorCode *status);
-
-/**
- * Release a <code>UCD</code> object.
- *
- * @param ucd - the object
- *
- * @internal ICU 4.0.1 technology preview
- */
-U_INTERNAL void U_EXPORT2
-ucd_close(UCD *ucd);
-
-/**
- * Get the <code>UCollator</code> object used to create a <code>UCD</code> object.
- * The <code>UCollator</code> object returned may not be the exact
- * object that was used to create this object, but it will have the
- * same behavior.
- *
- * @param ucd - the <code>UCD</code> object
- *
- * @return the <code>UCollator</code> used to create the given
- * <code>UCD</code> object.
- *
- * @internal ICU 4.0.1 technology preview
- */
-U_INTERNAL UCollator * U_EXPORT2
-ucd_getCollator(UCD *ucd);
-
-/**
- * <code>UCD</code> objects are expensive to compute, and so
- * may be cached. This routine will free the cached objects and delete
- * the cache.
- *
- * WARNING: Don't call this until you are have called <code>close</code>
- * for each <code>UCD</code> object that you have used. also,
- * DO NOT call this if another thread may be calling <code>ucd_flushCache</code>
- * at the same time.
- *
- * @internal ICU 4.0.1 technology preview
- */
-U_INTERNAL void U_EXPORT2
-ucd_freeCache();
-
-/**
- * <code>UCD</code> objects are expensive to compute, and so
- * may be cached. This routine will remove any unused <code>UCD</code>
- * objects from the cache.
- *
- * @internal 4.0.1 technology preview
- */
-U_INTERNAL void U_EXPORT2
-ucd_flushCache();
-
-/**
- * BMS
- *
- * This object holds the information needed to do a Collation sensitive Boyer-Moore search. It encapulates
- * the pattern, the "bad character" and "good suffix" tables, the Collator-based data needed to compute them,
- * and a reference to the text being searched.
- *
- * To do a search, you first need to get a <code>UCD</code> object by calling <code>ucd_open</code>.
- * Then you construct a <code>BMS</code> object from the <code>UCD</code> object, the pattern
- * string and the target string. Then you call the <code>search</code> method. Here's a code sample:
- *
- * <pre>
- * void boyerMooreExample(UCollator *collator, UChar *pattern, int32_t patternLen, UChar *target, int32_t targetLength)
- * {
- * UErrorCode status = U_ZERO_ERROR;
- * int32_t offset = 0, start = -1, end = -1;
- * UCD *ucd = NULL);
- * BMS *bms = NULL;
- *
- * ucd = ucd_open(collator, &status);
- * if (U_FAILURE(status)) {
- * // could not create a UCD object
- * return;
- * }
- *
- * BMS *bms = bms_open(ucd, pattern, patternLength, target, targetlength, &status);
- * if (U_FAILURE(status)) {
- * // could not create a BMS object
- * ucd_close(ucd);
- * return;
- * }
- *
- *
- * // Find all matches
- * while (bms_search(bms, offset, &start, &end)) {
- * // process the match between start and end
- * ...
- *
- * // advance past the match
- * offset = end;
- * }
- *
- * // at this point, if offset == 0, there were no matches
- * if (offset == 0) {
- * // handle the case of no matches
- * }
- *
- * bms_close(bms);
- * ucd_close(ucd);
- *
- * // UCD objects are cached, so the call to
- * // ucd_close doesn't delete the object.
- * // Call this if you don't need the object any more.
- * ucd_flushCache();
- * }
- * </pre>
- *
- * NOTE: This is a technology preview. The final version of this API may not bear any resenblence to this API.
- *
- * Knows linitations:
- * 1) Backwards searching has not been implemented.
- *
- * 2) For Han and Hangul characters, this code ignores any Collation tailorings. In general,
- * this isn't a problem, but in Korean locals, at strength 1, Hangul characters are tailored
- * to be equal to Han characters with the same pronounciation. Because this code ignroes
- * tailorings, searching for a Hangul character will not find a Han character and visa-versa.
- *
- * 3) In some cases, searching for a pattern that needs to be normalized and ends
- * in a discontiguous contraction may fail. The only known cases of this are with
- * the Tibetan script. For example searching for the pattern
- * "\u0F7F\u0F80\u0F81\u0F82\u0F83\u0F84\u0F85" will fail. (This case is artificial. We've
- * been unable to find a pratical, real-world example of this failure.)
- *
- * NOTE: This is a technology preview. The final version of this API may not bear any resenblence to this API.
- *
- * @internal ICU 4.0.1 technology preview
- */
-struct BMS;
-typedef struct BMS BMS; /**< @see BMS */
-
-/**
- * Construct a <code>MBS</code> object.
- *
- * @param ucd - A <code>UCD</code> object holding the Collator-sensitive data
- * @param pattern - the string for which to search
- * @param patternLength - the length of the string for which to search
- * @param target - the string in which to search
- * @param targetLength - the length of the string in which to search
- * @param status - will be set if any errors occur.
- *
- * @return the <code>BMS</code> object.
- *
- * Note: if on return status is set to an error, the only safe
- * thing to do with the returned object is to call
- * <code>bms_close</code>.
- *
- * @internal ICU 4.0.1 technology preview
- */
-U_INTERNAL BMS * U_EXPORT2
-bms_open(UCD *ucd,
- const UChar *pattern, int32_t patternLength,
- const UChar *target, int32_t targetLength,
- UErrorCode *status);
-
-/**
- * Close a <code>BMS</code> object and release all the
- * storage associated with it.
- *
- * @param bms - the <code>BMS</code> object to close.
- * @internal ICU 4.0.1 technology preview
- */
-U_INTERNAL void U_EXPORT2
-bms_close(BMS *bms);
-
-/**
- * Test the pattern to see if it generates any CEs.
- *
- * @param bms - the <code>BMS</code> object
- * @return <code>TRUE</code> if the pattern string did not generate any CEs
- *
- * @internal ICU 4.0.1 technology preview
- */
-U_INTERNAL UBool U_EXPORT2
-bms_empty(BMS *bms);
-
-/**
- * Get the <code>UCD</code> object used to create
- * a given <code>BMS</code> object.
- *
- * @param bms - the <code>BMS</code> object
- *
- * @return - the <code>UCD</code> object used to create
- * the given <code>BMS</code> object.
- *
- * @internal ICU 4.0.1 technology preview
- */
-U_INTERNAL UCD * U_EXPORT2
-bms_getData(BMS *bms);
-
-/**
- * Search for the pattern string in the target string.
- *
- * @param bms - the <code>BMS</code> object
- * @param offset - the offset in the target string at which to begin the search
- * @param start - will be set to the starting offset of the match, or -1 if there's no match
- * @param end - will be set to the ending offset of the match, or -1 if there's no match
- *
- * @return <code>TRUE</code> if the match succeeds, <code>FALSE</code> otherwise.
- *
- * @internal ICU 4.0.1 technology preview
- */
-U_INTERNAL UBool U_EXPORT2
-bms_search(BMS *bms, int32_t offset, int32_t *start, int32_t *end);
-
-/**
- * Set the target string for the match.
- *
- * @param bms - the <code>BMS</code> object
- * @param target - the new target string
- * @param targetLength - the length of the new target string
- * @param status - will be set if any errors occur.
- *
- * @internal ICU 4.0.1 technology preview
- */
-U_INTERNAL void U_EXPORT2
-bms_setTargetString(BMS *bms, const UChar *target, int32_t targetLength, UErrorCode *status);
-
-#endif /* U_HIDE_INTERNAL_API */
-
-#endif
-
-#endif /* _BMS_H */
+++ /dev/null
-/*
- ******************************************************************************
- * Copyright (C) 1996-2011, International Business Machines *
- * Corporation and others. All Rights Reserved. *
- ******************************************************************************
- */
-
-/**
- * \file
- * \brief C++ API: Boyer-Moore StringSearch technology preview
- * \internal ICU 4.0.1 technology preview
- */
-
-#ifndef B_M_SEARCH_H
-#define B_M_SEARCH_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/uobject.h"
-#include "unicode/ucol.h"
-
-#include "unicode/colldata.h"
-
-U_NAMESPACE_BEGIN
-
-class BadCharacterTable;
-class GoodSuffixTable;
-class Target;
-
-#ifndef U_HIDE_INTERNAL_API
-/**
- * BoyerMooreSearch
- *
- * This object holds the information needed to do a Collation sensitive Boyer-Moore search. It encapulates
- * the pattern, the "bad character" and "good suffix" tables, the Collator-based data needed to compute them,
- * and a reference to the text being searched.
- *
- * To do a search, you fist need to get a <code>CollData</code> object by calling <code>CollData::open</code>.
- * Then you construct a <code>BoyerMooreSearch</code> object from the <code>CollData</code> object, the pattern
- * string and the target string. Then you call the <code>search</code> method. Here's a code sample:
- *
- * <pre>
- * void boyerMooreExample(UCollator *collator, UnicodeString *pattern, UnicodeString *target)
- * {
- * UErrorCode status = U_ZERO_ERROR;
- * CollData *collData = CollData::open(collator, status);
- *
- * if (U_FAILURE(status)) {
- * // could not create a CollData object
- * return;
- * }
- *
- * BoyerMooreSearch *search = new BoyerMooreSearch(collData, *patternString, target, status);
- *
- * if (U_FAILURE(status)) {
- * // could not create a BoyerMooreSearch object
- * CollData::close(collData);
- * return;
- * }
- *
- * int32_t offset = 0, start = -1, end = -1;
- *
- * // Find all matches
- * while (search->search(offset, start, end)) {
- * // process the match between start and end
- * ...
- * // advance past the match
- * offset = end;
- * }
- *
- * // at this point, if offset == 0, there were no matches
- * if (offset == 0) {
- * // handle the case of no matches
- * }
- *
- * delete search;
- * CollData::close(collData);
- *
- * // CollData objects are cached, so the call to
- * // CollData::close doesn't delete the object.
- * // Call this if you don't need the object any more.
- * CollData::flushCollDataCache();
- * }
- * </pre>
- *
- * NOTE: This is a technology preview. The final version of this API may not bear any resenblence to this API.
- *
- * Knows linitations:
- * 1) Backwards searching has not been implemented.
- *
- * 2) For Han and Hangul characters, this code ignores any Collation tailorings. In general,
- * this isn't a problem, but in Korean locals, at strength 1, Hangul characters are tailored
- * to be equal to Han characters with the same pronounciation. Because this code ignroes
- * tailorings, searching for a Hangul character will not find a Han character and visa-versa.
- *
- * 3) In some cases, searching for a pattern that needs to be normalized and ends
- * in a discontiguous contraction may fail. The only known cases of this are with
- * the Tibetan script. For example searching for the pattern
- * "\u0F7F\u0F80\u0F81\u0F82\u0F83\u0F84\u0F85" will fail. (This case is artificial. We've
- * been unable to find a pratical, real-world example of this failure.)
- *
- * @internal ICU 4.0.1 technology preview
- *
- * @see CollData
- */
-class U_I18N_API BoyerMooreSearch : public UObject
-{
-public:
- /**
- * Construct a <code>BoyerMooreSearch</code> object.
- *
- * @param theData - A <code>CollData</code> object holding the Collator-sensitive data
- * @param patternString - the string for which to search
- * @param targetString - the string in which to search or <code>NULL</code> if youu will
- * set it later by calling <code>setTargetString</code>.
- * @param status - will be set if any errors occur.
- *
- * Note: if on return, status is set to an error code,
- * the only safe thing to do with this object is to call
- * the destructor.
- *
- * @internal ICU 4.0.1 technology preview
- */
- BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString, UErrorCode &status);
-
- /**
- * The desstructor
- *
- * @internal ICU 4.0.1 technology preview
- */
- ~BoyerMooreSearch();
-
- /**
- * Test the pattern to see if it generates any CEs.
- *
- * @return <code>TRUE</code> if the pattern string did not generate any CEs
- *
- * @internal ICU 4.0.1 technology preview
- */
- UBool empty();
-
- /**
- * Search for the pattern string in the target string.
- *
- * @param offset - the offset in the target string at which to begin the search
- * @param start - will be set to the starting offset of the match, or -1 if there's no match
- * @param end - will be set to the ending offset of the match, or -1 if there's no match
- *
- * @return <code>TRUE</code> if the match succeeds, <code>FALSE</code> otherwise.
- *
- * @internal ICU 4.0.1 technology preview
- */
- UBool search(int32_t offset, int32_t &start, int32_t &end);
-
- /**
- * Set the target string for the match.
- *
- * @param targetString - the new target string
- * @param status - will be set if any errors occur.
- *
- * @internal ICU 4.0.1 technology preview
- */
- void setTargetString(const UnicodeString *targetString, UErrorCode &status);
-
- // **** no longer need these? ****
- /**
- * Return the <code>CollData</code> object used for searching
- *
- * @return the <code>CollData</code> object used for searching
- *
- * @internal ICU 4.0.1 technology preview
- */
- CollData *getData();
-
- /**
- * Return the CEs generated by the pattern string.
- *
- * @return a <code>CEList</code> object holding the CEs generated by the pattern string.
- *
- * @internal ICU 4.0.1 technology preview
- */
- CEList *getPatternCEs();
-
- /**
- * Return the <code>BadCharacterTable</code> object computed for the pattern string.
- *
- * @return the <code>BadCharacterTable</code> object.
- *
- * @internal ICU 4.0.1 technology preview
- */
- BadCharacterTable *getBadCharacterTable();
-
- /**
- * Return the <code>GoodSuffixTable</code> object computed for the pattern string.
- *
- * @return the <code>GoodSuffixTable</code> object computed for the pattern string.
- *
- * @internal ICU 4.0.1 technology preview
- */
- GoodSuffixTable *getGoodSuffixTable();
-
- /**
- * UObject glue...
- * @internal ICU 4.0.1 technology preview
- */
- virtual UClassID getDynamicClassID() const;
- /**
- * UObject glue...
- * @internal ICU 4.0.1 technology preview
- */
- static UClassID getStaticClassID();
-
-private:
- CollData *data;
- CEList *patCEs;
- BadCharacterTable *badCharacterTable;
- GoodSuffixTable *goodSuffixTable;
- UnicodeString pattern;
- Target *target;
-};
-#endif /* U_HIDE_INTERNAL_API */
-
-U_NAMESPACE_END
-
-#endif // #if !UCONFIG_NO_COLLATION
-#endif // #ifndef B_M_SEARCH_H
LIBS = $(LIBCTESTFW) $(LIBICUI18N) $(LIBICUUC) $(LIBICUTOOLUTIL) $(DEFAULT_LIBS) $(LIB_M) $(LIB_THREAD)
OBJECTS = aliastst.o allcoll.o apicoll.o astrotst.o callimts.o calregts.o caltest.o \
-caltztst.o canittst.o citrtest.o cntabcol.o convtest.o currcoll.o \
+caltztst.o canittst.o citrtest.o cntabcol.o colldata.o convtest.o currcoll.o \
fldset.o dadrfmt.o dadrcal.o dadrcoll.o dcfmapts.o decoll.o dtfmapts.o dtfmrgts.o dtfmtrtts.o dtfmttst.o \
dtptngts.o encoll.o escoll.o ficoll.o frcoll.o g7coll.o intltest.o \
itercoll.o itformat.o itmajor.o itutil.o jacoll.o lcukocol.o \
#if !UCONFIG_NO_COLLATION
#include "unicode/unistr.h"
-#include "unicode/putil.h"
#include "unicode/usearch.h"
#include "cmemory.h"
#include "unicode/ustring.h"
#include "hash.h"
#include "uhash.h"
-#include "ucln_in.h"
#include "ucol_imp.h"
-#include "umutex.h"
#include "uassert.h"
-#include "unicode/colldata.h"
-
-U_NAMESPACE_BEGIN
+#include "colldata.h"
#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
#define DELETE_ARRAY(array) uprv_free((void *) (array))
#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CEList)
-
-#ifdef INSTRUMENT_CELIST
-int32_t CEList::_active = 0;
-int32_t CEList::_histogram[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-#endif
-
CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status)
: ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0)
{
strengthMask |= UCOL_PRIMARYORDERMASK;
}
-#ifdef INSTRUMENT_CELIST
- _active += 1;
- _histogram[0] += 1;
-#endif
-
ces = ceBuffer;
while ((order = ucol_next(elems, &status)) != UCOL_NULLORDER) {
CEList::~CEList()
{
-#ifdef INSTRUMENT_CELIST
- _active -= 1;
-#endif
-
if (ces != ceBuffer) {
DELETE_ARRAY(ces);
}
if (listSize >= listMax) {
int32_t newMax = listMax + CELIST_BUFFER_SIZE;
-
-#ifdef INSTRUMENT_CELIST
- _histogram[listSize / CELIST_BUFFER_SIZE] += 1;
-#endif
-
uint32_t *newCEs = NEW_ARRAY(uint32_t, newMax);
if (newCEs == NULL) {
return listSize;
}
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringList)
-
-#ifdef INSTRUMENT_STRING_LIST
-int32_t StringList::_lists = 0;
-int32_t StringList::_strings = 0;
-int32_t StringList::_histogram[101] = {0};
-#endif
-
StringList::StringList(UErrorCode &status)
: strings(NULL), listMax(STRING_LIST_BUFFER_SIZE), listSize(0)
{
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
-
-#ifdef INSTRUMENT_STRING_LIST
- _lists += 1;
- _histogram[0] += 1;
-#endif
}
StringList::~StringList()
if (U_FAILURE(status)) {
return;
}
-
-#ifdef INSTRUMENT_STRING_LIST
- _strings += 1;
-#endif
-
if (listSize >= listMax) {
int32_t newMax = listMax + STRING_LIST_BUFFER_SIZE;
UnicodeString *newStrings = new UnicodeString[newMax];
for (int32_t i=0; i<listSize; ++i) {
newStrings[i] = strings[i];
}
-
-#ifdef INSTRUMENT_STRING_LIST
- int32_t _h = listSize / STRING_LIST_BUFFER_SIZE;
-
- if (_h > 100) {
- _h = 100;
- }
-
- _histogram[_h] += 1;
-#endif
-
delete[] strings;
strings = newStrings;
listMax = newMax;
delete strings;
}
-static void U_CALLCONV
-deleteCEList(void *obj)
-{
- CEList *list = (CEList *) obj;
-
- delete list;
-}
-
-static void U_CALLCONV
-deleteUnicodeStringKey(void *obj)
-{
- UnicodeString *key = (UnicodeString *) obj;
-
- delete key;
-}
-
-static void U_CALLCONV
-deleteChars(void * /*obj*/)
-{
- // char *chars = (char *) obj;
- // All the key strings are owned by the
- // CollData objects and don't need to
- // be freed here.
- //DELETE_ARRAY(chars);
-}
-
U_CDECL_END
-class CEToStringsMap : public UMemory
+class CEToStringsMap
{
public:
-
CEToStringsMap(UErrorCode &status);
~CEToStringsMap();
StringList *getStringList(uint32_t ce) const;
private:
-
void putStringList(uint32_t ce, StringList *stringList, UErrorCode &status);
UHashtable *map;
};
uhash_iput(map, ce, (void *) stringList, &status);
}
-class StringToCEsMap : public UMemory
-{
-public:
- StringToCEsMap(UErrorCode &status);
- ~StringToCEsMap();
-
- void put(const UnicodeString *string, const CEList *ces, UErrorCode &status);
- const CEList *get(const UnicodeString *string);
- void free(const CEList *list);
-
-private:
-
-
- UHashtable *map;
-};
-
-StringToCEsMap::StringToCEsMap(UErrorCode &status)
- : map(NULL)
-{
- if (U_FAILURE(status)) {
- return;
- }
-
- map = uhash_open(uhash_hashUnicodeString,
- uhash_compareUnicodeString,
- uhash_compareLong,
- &status);
-
- if (U_FAILURE(status)) {
- return;
- }
-
- uhash_setValueDeleter(map, deleteCEList);
- uhash_setKeyDeleter(map, deleteUnicodeStringKey);
-}
-
-StringToCEsMap::~StringToCEsMap()
-{
- uhash_close(map);
-}
-
-void StringToCEsMap::put(const UnicodeString *string, const CEList *ces, UErrorCode &status)
-{
- uhash_put(map, (void *) string, (void *) ces, &status);
-}
-
-const CEList *StringToCEsMap::get(const UnicodeString *string)
-{
- return (const CEList *) uhash_get(map, string);
-}
-
-class CollDataCacheEntry : public UMemory
-{
-public:
- CollDataCacheEntry(CollData *theData);
- ~CollDataCacheEntry();
-
- CollData *data;
- int32_t refCount;
-};
-
-CollDataCacheEntry::CollDataCacheEntry(CollData *theData)
- : data(theData), refCount(1)
-{
- // nothing else to do
-}
-
-CollDataCacheEntry::~CollDataCacheEntry()
-{
- // check refCount?
- delete data;
-}
-
-class CollDataCache : public UMemory
-{
-public:
- CollDataCache(UErrorCode &status);
- ~CollDataCache();
-
- CollData *get(UCollator *collator, UErrorCode &status);
- void unref(CollData *collData);
-
- void flush();
-
-private:
- static char *getKey(UCollator *collator, char *keyBuffer, int32_t *charBufferLength);
- static void deleteKey(char *key);
-
- UHashtable *cache;
-};
-static UMutex lock = U_MUTEX_INITIALIZER;
-
-U_CDECL_BEGIN
-static void U_CALLCONV
-deleteCollDataCacheEntry(void *obj)
-{
- CollDataCacheEntry *entry = (CollDataCacheEntry *) obj;
-
- delete entry;
-}
-U_CDECL_END
-
-CollDataCache::CollDataCache(UErrorCode &status)
- : cache(NULL)
-{
- if (U_FAILURE(status)) {
- return;
- }
-
- cache = uhash_open(uhash_hashChars, uhash_compareChars, uhash_compareLong, &status);
-
- if (U_FAILURE(status)) {
- return;
- }
-
- uhash_setValueDeleter(cache, deleteCollDataCacheEntry);
- uhash_setKeyDeleter(cache, deleteChars);
-}
-
-CollDataCache::~CollDataCache()
-{
- umtx_lock(&lock);
- uhash_close(cache);
- cache = NULL;
- umtx_unlock(&lock);
-}
-
-CollData *CollDataCache::get(UCollator *collator, UErrorCode &status)
-{
- char keyBuffer[KEY_BUFFER_SIZE];
- int32_t keyLength = KEY_BUFFER_SIZE;
- char *key = getKey(collator, keyBuffer, &keyLength);
- CollData *result = NULL, *newData = NULL;
- CollDataCacheEntry *entry = NULL, *newEntry = NULL;
-
- umtx_lock(&lock);
- entry = (CollDataCacheEntry *) uhash_get(cache, key);
-
- if (entry == NULL) {
- umtx_unlock(&lock);
-
- newData = new CollData(collator, key, keyLength, status);
- newEntry = new CollDataCacheEntry(newData);
-
- if (U_FAILURE(status) || newData == NULL || newEntry == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- umtx_lock(&lock);
- entry = (CollDataCacheEntry *) uhash_get(cache, key);
-
- if (entry == NULL) {
- uhash_put(cache, newData->key, newEntry, &status);
- umtx_unlock(&lock);
-
- if (U_FAILURE(status)) {
- delete newEntry;
- delete newData;
-
- return NULL;
- }
-
- return newData;
- }
- }
-
- result = entry->data;
- entry->refCount += 1;
- umtx_unlock(&lock);
-
- if (key != keyBuffer) {
- deleteKey(key);
- }
-
- if (newEntry != NULL) {
- delete newEntry;
- delete newData;
- }
-
- return result;
-}
-
-void CollDataCache::unref(CollData *collData)
-{
- CollDataCacheEntry *entry = NULL;
-
- umtx_lock(&lock);
- entry = (CollDataCacheEntry *) uhash_get(cache, collData->key);
-
- if (entry != NULL) {
- entry->refCount -= 1;
- }
- umtx_unlock(&lock);
-}
-
-char *CollDataCache::getKey(UCollator *collator, char *keyBuffer, int32_t *keyBufferLength)
-{
- UErrorCode status = U_ZERO_ERROR;
- int32_t len = ucol_getShortDefinitionString(collator, NULL, keyBuffer, *keyBufferLength, &status);
-
- if (len >= *keyBufferLength) {
- *keyBufferLength = (len + 2) & ~1; // round to even length, leaving room for terminating null
- keyBuffer = NEW_ARRAY(char, *keyBufferLength);
- status = U_ZERO_ERROR;
-
- len = ucol_getShortDefinitionString(collator, NULL, keyBuffer, *keyBufferLength, &status);
- }
-
- keyBuffer[len] = '\0';
-
- return keyBuffer;
-}
-
-void CollDataCache::flush()
-{
- const UHashElement *element;
- int32_t pos = -1;
-
- umtx_lock(&lock);
- while ((element = uhash_nextElement(cache, &pos)) != NULL) {
- CollDataCacheEntry *entry = (CollDataCacheEntry *) element->value.pointer;
-
- if (entry->refCount <= 0) {
- uhash_removeElement(cache, element);
- }
- }
- umtx_unlock(&lock);
-}
-
-void CollDataCache::deleteKey(char *key)
-{
- DELETE_ARRAY(key);
-}
-
-U_CDECL_BEGIN
-static UBool coll_data_cleanup(void) {
- CollData::freeCollDataCache();
- return TRUE;
-}
-U_CDECL_END
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollData)
-
-CollData::CollData()
-{
- // nothing
-}
-
#define CLONE_COLLATOR
-//#define CACHE_CELISTS
-CollData::CollData(UCollator *collator, char *cacheKey, int32_t cacheKeyLength, UErrorCode &status)
- : coll(NULL), charsToCEList(NULL), ceToCharsStartingWith(NULL), key(NULL)
+CollData::CollData(UCollator *collator, UErrorCode &status)
+ : coll(NULL), ceToCharsStartingWith(NULL)
{
// [:c:] == [[:cn:][:cc:][:co:][:cf:][:cs:]]
// i.e. other, control, private use, format, surrogate
USet *contractions = uset_openEmpty();
int32_t itemCount;
-#ifdef CACHE_CELISTS
- charsToCEList = new StringToCEsMap(status);
-
- if (U_FAILURE(status)) {
- goto bail;
- }
-#else
- charsToCEList = NULL;
-#endif
-
ceToCharsStartingWith = new CEToStringsMap(status);
if (U_FAILURE(status)) {
goto bail;
}
- if (cacheKeyLength > KEY_BUFFER_SIZE) {
- key = NEW_ARRAY(char, cacheKeyLength);
-
- if (key == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- goto bail;
- }
- } else {
- key = keyBuffer;
- }
-
- ARRAY_COPY(key, cacheKey, cacheKeyLength);
-
#ifdef CLONE_COLLATOR
coll = ucol_safeClone(collator, NULL, NULL, &status);
ceToCharsStartingWith->put(ceList->get(0), st, status);
-#ifdef CACHE_CELISTS
- charsToCEList->put(st, ceList, status);
-#else
delete ceList;
delete st;
-#endif
}
} else if (len > 0) {
UnicodeString *st = new UnicodeString(buffer, len);
ceToCharsStartingWith->put(ceList->get(0), st, status);
-#ifdef CACHE_CELISTS
- charsToCEList->put(st, ceList, status);
-#else
delete ceList;
delete st;
-#endif
} else {
// shouldn't happen...
}
ucol_close(coll);
#endif
- if (key != keyBuffer) {
- DELETE_ARRAY(key);
- }
-
delete ceToCharsStartingWith;
-
-#ifdef CACHE_CELISTS
- delete charsToCEList;
-#endif
}
UCollator *CollData::getCollator() const
const CEList *CollData::getCEList(const UnicodeString *string) const
{
-#ifdef CACHE_CELISTS
- return charsToCEList->get(string);
-#else
UErrorCode status = U_ZERO_ERROR;
const CEList *list = new CEList(coll, *string, status);
}
return list;
-#endif
}
void CollData::freeCEList(const CEList *list)
{
-#ifndef CACHE_CELISTS
delete list;
-#endif
}
int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t *history) const
for (int32_t s = 0; s < stringCount; s += 1) {
const UnicodeString *string = strings->get(s);
-#ifdef CACHE_CELISTS
- const CEList *ceList2 = charsToCEList->get(string);
-#else
UErrorCode status = U_ZERO_ERROR;
const CEList *ceList2 = new CEList(coll, *string, status);
delete ceList2;
ceList2 = NULL;
}
-#endif
if (ceList->matchesAt(offset, ceList2)) {
U_ASSERT(ceList2 != NULL);
if (rlength <= 0) {
// delete before continue to avoid memory leak.
-#ifndef CACHE_CELISTS
delete ceList2;
-#endif
+
// ignore any dead ends
continue;
}
}
}
-#ifndef CACHE_CELISTS
delete ceList2;
-#endif
}
}
return minLength;
}
-CollData *CollData::open(UCollator *collator, UErrorCode &status)
-{
- if (U_FAILURE(status)) {
- return NULL;
- }
-
- CollDataCache *cache = getCollDataCache();
-
- return cache->get(collator, status);
-}
-
-void CollData::close(CollData *collData)
-{
- CollDataCache *cache = getCollDataCache();
-
- cache->unref(collData);
-}
-
-CollDataCache *CollData::collDataCache = NULL;
-
-CollDataCache *CollData::getCollDataCache()
-{
- UErrorCode status = U_ZERO_ERROR;
- CollDataCache *cache = NULL;
-
- UMTX_CHECK(NULL, collDataCache, cache);
-
- if (cache == NULL) {
- cache = new CollDataCache(status);
-
- if (U_FAILURE(status)) {
- delete cache;
- return NULL;
- }
-
- umtx_lock(NULL);
- if (collDataCache == NULL) {
- collDataCache = cache;
-
- ucln_i18n_registerCleanup(UCLN_I18N_COLL_DATA, coll_data_cleanup);
- }
- umtx_unlock(NULL);
-
- if (collDataCache != cache) {
- delete cache;
- }
- }
-
- return collDataCache;
-}
-
-void CollData::freeCollDataCache()
-{
- CollDataCache *cache = NULL;
-
- UMTX_CHECK(NULL, collDataCache, cache);
-
- if (cache != NULL) {
- umtx_lock(NULL);
- if (collDataCache != NULL) {
- collDataCache = NULL;
- } else {
- cache = NULL;
- }
- umtx_unlock(NULL);
-
- delete cache;
- }
-}
-
-void CollData::flushCollDataCache()
-{
- CollDataCache *cache = NULL;
-
- UMTX_CHECK(NULL, collDataCache, cache);
-
- // **** this will fail if the another ****
- // **** thread deletes the cache here ****
- if (cache != NULL) {
- cache->flush();
- }
-}
-
-U_NAMESPACE_END
-
#endif // #if !UCONFIG_NO_COLLATION
/**
* \file
- * \brief C++ API: Collation data used to compute minLengthInChars.
+ * \brief Originally, added as C++ API for Collation data used to compute minLengthInChars
* \internal
*/
-
+
+/*
+ * Note: This module was incldued in ICU 4.0.1 as @internal technology preview for supporting
+ * Boyer-Moore string search API. For now, only SSearchTest depends on this module. I temporaly
+ * moved the module from i18n directory to intltest, because we have no plan to publish this
+ * as public API. (2012-12-18 yoshito)
+ */
+
#ifndef COLL_DATA_H
#define COLL_DATA_H
#if !UCONFIG_NO_COLLATION
-#include "unicode/uobject.h"
#include "unicode/ucol.h"
-U_NAMESPACE_BEGIN
-
-#ifndef U_HIDE_INTERNAL_API
-/**
- * The size of the internal buffer for the Collator's short description string.
- * @internal ICU 4.0.1 technology preview
- */
-#define KEY_BUFFER_SIZE 64
-
/**
* The size of the internal CE buffer in a <code>CEList</code> object
- * @internal ICU 4.0.1 technology preview
*/
#define CELIST_BUFFER_SIZE 4
* \def INSTRUMENT_CELIST
* Define this to enable the <code>CEList</code> objects to collect
* statistics.
- * @internal ICU 4.0.1 technology preview
*/
-//#define INSTRUMENT_CELIST
/**
* The size of the initial list in a <code>StringList</code> object.
- * @internal ICU 4.0.1 technology preview
*/
#define STRING_LIST_BUFFER_SIZE 16
-/**
- * \def INSTRUMENT_STRING_LIST
- * Define this to enable the <code>StringList</code> objects to
- * collect statistics.
- * @internal ICU 4.0.1 technology preview
- */
-//#define INSTRUMENT_STRING_LIST
-
/**
* This object holds a list of CEs generated from a particular
* <code>UnicodeString</code>
*
- * @internal ICU 4.0.1 technology preview
*/
-class U_I18N_API CEList : public UObject
+class CEList
{
public:
/**
* Note: if on return, status is set to an error code,
* the only safe thing to do with this object is to call
* the destructor.
- *
- * @internal ICU 4.0.1 technology preview
*/
CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status);
/**
* The destructor.
- * @internal ICU 4.0.1 technology preview
*/
~CEList();
* Return the number of CEs in the list.
*
* @return the number of CEs in the list.
- *
- * @internal ICU 4.0.1 technology preview
*/
int32_t size() const;
* @param index - the index of the CE to return
*
* @return the CE, or <code>0</code> if <code>index</code> is out of range
- *
- * @internal ICU 4.0.1 technology preview
*/
uint32_t get(int32_t index) const;
* @param other - the other <code>CEList</code>
*
* @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise.
- *
- * @internal ICU 4.0.1 technology preview
*/
UBool matchesAt(int32_t offset, const CEList *other) const;
* @param index - the index
*
* @return a reference to the given CE in the list
- *
- * @internal ICU 4.0.1 technology preview
*/
uint32_t &operator[](int32_t index) const;
- /**
- * UObject glue...
- * @internal ICU 4.0.1 technology preview
- */
- virtual UClassID getDynamicClassID() const;
- /**
- * UObject glue...
- * @internal ICU 4.0.1 technology preview
- */
- static UClassID getStaticClassID();
-
private:
void add(uint32_t ce, UErrorCode &status);
uint32_t *ces;
int32_t listMax;
int32_t listSize;
-
-#ifdef INSTRUMENT_CELIST
- static int32_t _active;
- static int32_t _histogram[10];
-#endif
};
/**
* StringList
*
* This object holds a list of <code>UnicodeString</code> objects.
- *
- * @internal ICU 4.0.1 technology preview
*/
-class U_I18N_API StringList : public UObject
+class StringList
{
public:
/**
* Note: if on return, status is set to an error code,
* the only safe thing to do with this object is to call
* the destructor.
- *
- * @internal ICU 4.0.1 technology preview
*/
StringList(UErrorCode &status);
/**
* The destructor.
- *
- * @internal ICU 4.0.1 technology preview
*/
~StringList();
*
* @param string - the string to add
* @param status - will be set if any errors occur.
- *
- * @internal ICU 4.0.1 technology preview
*/
void add(const UnicodeString *string, UErrorCode &status);
* @param chars - the address of the array of code points
* @param count - the number of code points in the array
* @param status - will be set if any errors occur.
- *
- * @internal ICU 4.0.1 technology preview
*/
void add(const UChar *chars, int32_t count, UErrorCode &status);
*
* @return a pointer to the <code>UnicodeString</code> or <code>NULL</code>
* if <code>index</code> is out of bounds.
- *
- * @internal ICU 4.0.1 technology preview
*/
const UnicodeString *get(int32_t index) const;
* Get the number of stings in the list.
*
* @return the number of strings in the list.
- *
- * @internal ICU 4.0.1 technology preview
*/
int32_t size() const;
- /**
- * the UObject glue...
- * @internal ICU 4.0.1 technology preview
- */
- virtual UClassID getDynamicClassID() const;
- /**
- * the UObject glue...
- * @internal ICU 4.0.1 technology preview
- */
- static UClassID getStaticClassID();
-
private:
UnicodeString *strings;
int32_t listMax;
int32_t listSize;
-
-#ifdef INSTRUMENT_STRING_LIST
- static int32_t _lists;
- static int32_t _strings;
- static int32_t _histogram[101];
-#endif
};
-#endif /* U_HIDE_INTERNAL_API */
+
/*
* Forward references to internal classes.
*/
class StringToCEsMap;
class CEToStringsMap;
-class CollDataCache;
-#ifndef U_HIDE_INTERNAL_API
/**
* CollData
*
* If you do not need to reuse any unreferenced objects in the cache, you can call
* <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
* objects, you can call <code>CollData::freeCollDataCache</code>
- *
- * @internal ICU 4.0.1 technology preview
*/
-class U_I18N_API CollData : public UObject
+class CollData
{
public:
/**
*
* @param collator - the collator
* @param status - will be set if any errors occur.
- *
- * @return the <code>CollData</code> object. You must call
- * <code>close</code> when you are done using the object.
- *
- * Note: if on return, status is set to an error code,
- * the only safe thing to do with this object is to call
- * <code>CollData::close</code>.
- *
- * @internal ICU 4.0.1 technology preview
*/
- static CollData *open(UCollator *collator, UErrorCode &status);
+ CollData(UCollator *collator, UErrorCode &status);
/**
- * Release a <code>CollData</code> object.
- *
- * @param collData - the object
- *
- * @internal ICU 4.0.1 technology preview
+ * The destructor.
*/
- static void close(CollData *collData);
+ ~CollData();
/**
* Get the <code>UCollator</code> object used to create this object.
* The object returned may not be the exact object that was used to
* create this object, but it will have the same behavior.
- * @internal ICU 4.0.1 technology preview
*/
UCollator *getCollator() const;
* return a <code>StringList</code> object containing all
* the stirngs, or <code>NULL</code> if there are
* no such strings.
- *
- * @internal ICU 4.0.1 technology preview.
*/
const StringList *getStringList(int32_t ce) const;
* @return a <code>CEList</code> object containt the CEs. You
* must call <code>freeCEList</code> when you are finished
* using the <code>CEList</code>/
- *
- * @internal ICU 4.0.1 technology preview.
*/
const CEList *getCEList(const UnicodeString *string) const;
* Release a <code>CEList</code> returned by <code>getCEList</code>.
*
* @param list - the <code>CEList</code> to free.
- *
- * @internal ICU 4.0.1 technology preview
*/
void freeCEList(const CEList *list);
* @param offset - the offset of the first CE in the list to use.
*
* @return the length of the shortest string.
- *
- * @internal ICU 4.0.1 technology preview
*/
int32_t minLengthInChars(const CEList *ces, int32_t offset) const;
* the number of cEs in the <code>CEList</code>
*
* @return the length of the shortest string.
- *
- * @internal ICU 4.0.1 technology preview
*/
int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const;
- /**
- * UObject glue...
- * @internal ICU 4.0.1 technology preview
- */
- virtual UClassID getDynamicClassID() const;
- /**
- * UObject glue...
- * @internal ICU 4.0.1 technology preview
- */
- static UClassID getStaticClassID();
-
- /**
- * <code>CollData</code> objects are expensive to compute, and so
- * may be cached. This routine will free the cached objects and delete
- * the cache.
- *
- * WARNING: Don't call this until you are have called <code>close</code>
- * for each <code>CollData</code> object that you have used. also,
- * DO NOT call this if another thread may be calling <code>flushCollDataCache</code>
- * at the same time.
- *
- * @internal 4.0.1 technology preview
- */
- static void freeCollDataCache();
-
- /**
- * <code>CollData</code> objects are expensive to compute, and so
- * may be cached. This routine will remove any unused <code>CollData</code>
- * objects from the cache.
- *
- * @internal 4.0.1 technology preview
- */
- static void flushCollDataCache();
-
private:
- friend class CollDataCache;
- friend class CollDataCacheEntry;
-
- CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status);
- ~CollData();
-
- CollData();
-
- static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength);
-
- static CollDataCache *getCollDataCache();
-
UCollator *coll;
- StringToCEsMap *charsToCEList;
CEToStringsMap *ceToCharsStartingWith;
- char keyBuffer[KEY_BUFFER_SIZE];
- char *key;
-
- static CollDataCache *collDataCache;
-
uint32_t minHan;
uint32_t maxHan;
uint32_t jamoLimits[4];
};
-#endif /* U_HIDE_INTERNAL_API */
-
-U_NAMESPACE_END
#endif // #if !UCONFIG_NO_COLLATION
#endif // #ifndef COLL_DATA_H
-<?xml version="1.0" encoding="utf-8"?>\r
+<?xml version="1.0" encoding="utf-8"?>\r
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">\r
<ItemGroup Label="ProjectConfigurations">\r
<ProjectConfiguration Include="Debug|Win32">\r
</ItemDefinitionGroup>\r
<ItemGroup>\r
<ClCompile Include="bytestrietest.cpp" />\r
+ <ClCompile Include="colldata.cpp" />\r
<ClCompile Include="ucharstrietest.cpp" />\r
<ClCompile Include="itrbbi.cpp" />\r
<ClCompile Include="rbbiapts.cpp" />\r
<ClCompile Include="listformattertest.cpp" />\r
</ItemGroup>\r
<ItemGroup>\r
+ <ClInclude Include="colldata.h" />\r
<ClInclude Include="itrbbi.h" />\r
<ClInclude Include="rbbiapts.h" />\r
<ClInclude Include="rbbitst.h" />\r
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />\r
<ImportGroup Label="ExtensionTargets">\r
</ImportGroup>\r
-</Project>\r
+</Project>
\ No newline at end of file
<ClCompile Include="alphaindextst.cpp">\r
<Filter>collation</Filter>\r
</ClCompile>\r
+ <ClCompile Include="listformattertest.cpp" />\r
+ <ClCompile Include="colldata.cpp">\r
+ <Filter>collation</Filter>\r
+ </ClCompile>\r
</ItemGroup>\r
<ItemGroup>\r
<ClInclude Include="itrbbi.h">\r
<ClInclude Include="alphaindextst.h">\r
<Filter>collation</Filter>\r
</ClInclude>\r
+ <ClInclude Include="listformattertest.h" />\r
+ <ClInclude Include="colldata.h">\r
+ <Filter>collation</Filter>\r
+ </ClInclude>\r
</ItemGroup>\r
-</Project>\r
+</Project>
\ No newline at end of file
**********************************************************************
*/
-
#include "unicode/utypes.h"
#if !UCONFIG_NO_COLLATION
-#include "unicode/unistr.h"
-#include "unicode/putil.h"
-#include "unicode/usearch.h"
-
#include "cmemory.h"
+#include "cstring.h"
+#include "ucol_imp.h"
+
#include "unicode/coll.h"
#include "unicode/tblcoll.h"
-#include "unicode/coleitr.h"
-#include "unicode/ucoleitr.h"
-
-#include "unicode/regex.h" // TODO: make conditional on regexp being built.
-
-#include "unicode/uniset.h"
+#include "unicode/usearch.h"
#include "unicode/uset.h"
#include "unicode/ustring.h"
-#include "hash.h"
-#include "uhash.h"
-#include "ucol_imp.h"
-#include "intltest.h"
-#include "ssearch.h"
-
-#include "unicode/colldata.h"
-#include "unicode/bmsearch.h"
-#include "unicode/bms.h"
+#include "unicode/coleitr.h"
+#include "unicode/regex.h" // TODO: make conditional on regexp being built.
+#include "colldata.h"
+#include "ssearch.h"
#include "xmlparser.h"
-#include "ucbuf.h"
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
+#include <stdio.h> // for sprintf
char testId[100];
if (exec) monkeyTest(params);
break;
- case 3: name = "bmMonkeyTest";
- if (exec) bmMonkeyTest(params);
- break;
-
- case 4: name = "boyerMooreTest";
- if (exec) boyerMooreTest();
+ case 3: name = "sharpSTest";
+ if (exec) sharpSTest();
break;
- case 5: name = "goodSuffixTest";
+ case 4: name = "goodSuffixTest";
if (exec) goodSuffixTest();
break;
- case 6: name = "searchTime";
+ case 5: name = "searchTime";
if (exec) searchTime();
break;
-
- case 7: name = "bmsTest";
- if (exec) bmsTest();
- break;
-
- case 8: name = "bmSearchTest";
- if (exec) bmSearchTest();
- break;
-
- case 9: name = "udhrTest";
- if (exec) udhrTest();
- break;
- case 10: name = "stringListTest";
- if (exec) stringListTest();
- break;
#endif
default: name = "";
break; //needed to end loop
#endif
}
-struct UdhrTestCase
-{
- const char *locale;
- const char *file;
-};
-
-void SSearchTest::udhrTest()
-{
- UErrorCode status = U_ZERO_ERROR;
- char path[PATH_BUFFER_SIZE];
- const char *udhrPath = getPath(path, "udhr");
-
- if (udhrPath == NULL) {
- // couldn't get path: error message already output...
- return;
- }
-
- UdhrTestCase testCases[] = {
- {"en", "udhr_eng.txt"},
- {"de", "udhr_deu_1996.txt"},
- {"fr", "udhr_fra.txt"},
- {"ru", "udhr_rus.txt"},
- {"th", "udhr_tha.txt"},
- {"ja", "udhr_jpn.txt"},
- {"ko", "udhr_kor.txt"},
- {"zh", "udhr_cmn_hans.txt"},
- {"zh_Hant", "udhr_cmn_hant.txt"}
- };
-
- int32_t testCount = ARRAY_SIZE(testCases);
-
- for (int32_t t = 0; t < testCount; t += 1) {
- int32_t len = 0;
- char *resolvedFileName = NULL;
- const char *encoding = NULL;
- UCHARBUF *ucharBuf = NULL;
-
- ucbuf_resolveFileName(udhrPath, testCases[t].file, NULL, &len, &status);
- resolvedFileName = NEW_ARRAY(char, len);
-
- if(resolvedFileName == NULL){
- continue;
- }
-
- if(status == U_BUFFER_OVERFLOW_ERROR){
- status = U_ZERO_ERROR;
- }
-
- ucbuf_resolveFileName(udhrPath, testCases[t].file, resolvedFileName, &len, &status);
- ucharBuf = ucbuf_open(resolvedFileName, &encoding, TRUE, FALSE, &status);
-
- DELETE_ARRAY(resolvedFileName);
-
- if(U_FAILURE(status)){
- infoln("Could not open the input file %s. Test skipped\n", testCases[t].file);
- continue;
- }
-
- int32_t targetLen = 0;
- const UChar *target = ucbuf_getBuffer(ucharBuf, &targetLen, &status);
-
- /* The first line of the file contains the pattern */
- int32_t start = 0, end = 0, plen = 0;
-
- for(end = start; ; end += 1) {
- UChar ch = target[end];
-
- if (ch == 0x000A || ch == 0x000D || ch == 0x2028) {
- break;
- }
- }
-
- plen = end - start;
-
- UChar *pattern = NEW_ARRAY(UChar, plen);
- for (int32_t i = 0; i < plen; i += 1) {
- pattern[i] = target[start++];
- }
-
- int32_t offset = 0;
- UCollator *coll = ucol_open(testCases[t].locale, &status);
- UCD *ucd = NULL;
- BMS *bms = NULL;
-
- if (U_FAILURE(status)) {
- errln("Could not open collator for %s", testCases[t].locale);
- goto delete_collator;
- }
-
- ucd = ucd_open(coll, &status);
-
- if (U_FAILURE(status)) {
- errln("Could not open CollData object for %s", testCases[t].locale);
- goto delete_ucd;
- }
-
- bms = bms_open(ucd, pattern, plen, target, targetLen, &status);
-
- if (U_FAILURE(status)) {
- errln("Could not open search object for %s", testCases[t].locale);
- goto delete_bms;
- }
-
- start = end = -1;
- while (bms_search(bms, offset, &start, &end)) {
- offset = end;
- }
-
- if (offset == 0) {
- errln("Could not find pattern - locale: %s, file: %s ", testCases[t].locale, testCases[t].file);
- }
-
-delete_bms:
- bms_close(bms);
-
-delete_ucd:
- ucd_close(ucd);
-
-delete_collator:
- ucol_close(coll);
-
- DELETE_ARRAY(pattern);
- ucbuf_close(ucharBuf);
- }
-
- ucd_flushCache();
-}
-
-void SSearchTest::bmSearchTest()
-{
-#if !UCONFIG_NO_REGULAR_EXPRESSIONS
- UErrorCode status = U_ZERO_ERROR;
- char path[PATH_BUFFER_SIZE];
- const char *testFilePath = getPath(path, "ssearch.xml");
-
- if (testFilePath == NULL) {
- return; /* Couldn't get path: error message already output. */
- }
-
- UXMLParser *parser = UXMLParser::createParser(status);
- TEST_ASSERT_SUCCESS(status);
- UXMLElement *root = parser->parseFile(testFilePath, status);
- TEST_ASSERT_SUCCESS(status);
- if (U_FAILURE(status)) {
- return;
- }
-
- const UnicodeString *debugTestCase = root->getAttribute("debug");
- if (debugTestCase != NULL) {
-// setenv("USEARCH_DEBUG", "1", 1);
- }
-
-
- const UXMLElement *testCase;
- int32_t tc = 0;
-
- while((testCase = root->nextChildElement(tc)) != NULL) {
-
- if (testCase->getTagName().compare("test-case") != 0) {
- errln("ssearch, unrecognized XML Element in test file");
- continue;
- }
- const UnicodeString *id = testCase->getAttribute("id");
- *testId = 0;
- if (id != NULL) {
- id->extract(0, id->length(), testId, sizeof(testId), US_INV);
- }
-
- // If debugging test case has been specified and this is not it, skip to next.
- if (id!=NULL && debugTestCase!=NULL && *id != *debugTestCase) {
- continue;
- }
- //
- // Get the requested collation strength.
- // Default is tertiary if the XML attribute is missing from the test case.
- //
- const UnicodeString *strength = testCase->getAttribute("strength");
- UColAttributeValue collatorStrength = UCOL_PRIMARY;
- if (strength==NULL) { collatorStrength = UCOL_TERTIARY;}
- else if (*strength=="PRIMARY") { collatorStrength = UCOL_PRIMARY;}
- else if (*strength=="SECONDARY") { collatorStrength = UCOL_SECONDARY;}
- else if (*strength=="TERTIARY") { collatorStrength = UCOL_TERTIARY;}
- else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;}
- else if (*strength=="IDENTICAL") { collatorStrength = UCOL_IDENTICAL;}
- else {
- // Bogus value supplied for strength. Shouldn't happen, even from
- // typos, if the XML source has been validated.
- // This assert is a little deceiving in that strength can be
- // any of the allowed values, not just TERTIARY, but it will
- // do the job of getting the error output.
- TEST_ASSERT(*strength=="TERTIARY")
- }
-
- //
- // Get the collator normalization flag. Default is UCOL_OFF.
- //
- UColAttributeValue normalize = UCOL_OFF;
- const UnicodeString *norm = testCase->getAttribute("norm");
- TEST_ASSERT (norm==NULL || *norm=="ON" || *norm=="OFF");
- if (norm!=NULL && *norm=="ON") {
- normalize = UCOL_ON;
- }
-
- //
- // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE.
- //
- UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE;
- const UnicodeString *alt = testCase->getAttribute("alternate_handling");
- TEST_ASSERT (alt == NULL || *alt == "SHIFTED" || *alt == "NON_IGNORABLE");
- if (alt != NULL && *alt == "SHIFTED") {
- alternateHandling = UCOL_SHIFTED;
- }
-
- const UnicodeString defLocale("en");
- char clocale[100];
- const UnicodeString *locale = testCase->getAttribute("locale");
- if (locale == NULL || locale->length()==0) {
- locale = &defLocale;
- };
- locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL);
-
-
- UnicodeString text;
- UnicodeString target;
- UnicodeString pattern;
- int32_t expectedMatchStart = -1;
- int32_t expectedMatchLimit = -1;
- const UXMLElement *n;
- int32_t nodeCount = 0;
-
- n = testCase->getChildElement("pattern");
- TEST_ASSERT(n != NULL);
- if (n==NULL) {
- continue;
- }
- text = n->getText(FALSE);
- text = text.unescape();
- pattern.append(text);
- nodeCount++;
-
- n = testCase->getChildElement("pre");
- if (n!=NULL) {
- text = n->getText(FALSE);
- text = text.unescape();
- target.append(text);
- nodeCount++;
- }
-
- n = testCase->getChildElement("m");
- if (n!=NULL) {
- expectedMatchStart = target.length();
- text = n->getText(FALSE);
- text = text.unescape();
- target.append(text);
- expectedMatchLimit = target.length();
- nodeCount++;
- }
-
- n = testCase->getChildElement("post");
- if (n!=NULL) {
- text = n->getText(FALSE);
- text = text.unescape();
- target.append(text);
- nodeCount++;
- }
-
- // Check that there weren't extra things in the XML
- TEST_ASSERT(nodeCount == testCase->countChildren());
-
- // Open a collator and StringSearch based on the parameters
- // obtained from the XML.
- //
- status = U_ZERO_ERROR;
- UCollator *collator = ucol_open(clocale, &status);
- ucol_setStrength(collator, collatorStrength);
- ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, normalize, &status);
- ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, alternateHandling, &status);
- UCD *ucd = ucd_open(collator, &status);
- BMS *bms = bms_open(ucd, pattern.getBuffer(), pattern.length(), target.getBuffer(), target.length(), &status);
-
- TEST_ASSERT_SUCCESS(status);
- if (U_FAILURE(status)) {
- bms_close(bms);
- ucd_close(ucd);
- ucol_close(collator);
- continue;
- }
-
- int32_t foundStart = 0;
- int32_t foundLimit = 0;
- UBool foundMatch;
-
- //
- // Do the search, check the match result against the expected results.
- //
- foundMatch = bms_search(bms, 0, &foundStart, &foundLimit);
- //TEST_ASSERT_SUCCESS(status);
- if ((foundMatch && expectedMatchStart < 0) ||
- (foundStart != expectedMatchStart) ||
- (foundLimit != expectedMatchLimit)) {
- TEST_ASSERT(FALSE); // ouput generic error position
- infoln("Found, expected match start = %d, %d \n"
- "Found, expected match limit = %d, %d",
- foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);
- }
-
- bms_close(bms);
- ucd_close(ucd);
- ucol_close(collator);
- }
-
- ucd_flushCache();
- delete root;
- delete parser;
-#endif
-}
-
struct Order
{
int32_t order;
}
#endif
-#if 1
-
-struct PCE
-{
- uint64_t ce;
- int32_t lowOffset;
- int32_t highOffset;
-};
-
-class PCEList
-{
-public:
- PCEList(UCollator *coll, const UnicodeString &string);
- ~PCEList();
-
- int32_t size() const;
-
- const PCE *get(int32_t index) const;
-
- int32_t getLowOffset(int32_t index) const;
- int32_t getHighOffset(int32_t index) const;
- uint64_t getOrder(int32_t index) const;
-
- UBool matchesAt(int32_t offset, const PCEList &other) const;
-
- uint64_t operator[](int32_t index) const;
-
-private:
- void add(uint64_t ce, int32_t low, int32_t high);
-
- PCE *list;
- int32_t listMax;
- int32_t listSize;
-};
-
-PCEList::PCEList(UCollator *coll, const UnicodeString &string)
-{
- UErrorCode status = U_ZERO_ERROR;
- UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status);
- uint64_t order;
- int32_t low, high;
-
- list = new PCE[listMax];
-
- ucol_setOffset(elems, 0, &status);
-
- do {
- order = ucol_nextProcessed(elems, &low, &high, &status);
- add(order, low, high);
- } while (order != UCOL_PROCESSED_NULLORDER);
-
- ucol_closeElements(elems);
-}
-
-PCEList::~PCEList()
-{
- delete[] list;
-}
-
-void PCEList::add(uint64_t order, int32_t low, int32_t high)
-{
- if (listSize >= listMax) {
- listMax *= 2;
-
- PCE *newList = new PCE[listMax];
-
- uprv_memcpy(newList, list, listSize * sizeof(Order));
- delete[] list;
- list = newList;
- }
-
- list[listSize].ce = order;
- list[listSize].lowOffset = low;
- list[listSize].highOffset = high;
-
- listSize += 1;
-}
-
-const PCE *PCEList::get(int32_t index) const
-{
- if (index >= listSize) {
- return NULL;
- }
-
- return &list[index];
-}
-
-int32_t PCEList::getLowOffset(int32_t index) const
-{
- const PCE *pce = get(index);
-
- if (pce != NULL) {
- return pce->lowOffset;
- }
-
- return -1;
-}
-
-int32_t PCEList::getHighOffset(int32_t index) const
-{
- const PCE *pce = get(index);
-
- if (pce != NULL) {
- return pce->highOffset;
- }
-
- return -1;
-}
-
-uint64_t PCEList::getOrder(int32_t index) const
-{
- const PCE *pce = get(index);
-
- if (pce != NULL) {
- return pce->ce;
- }
-
- return UCOL_PROCESSED_NULLORDER;
-}
-
-int32_t PCEList::size() const
-{
- return listSize;
-}
-
-UBool PCEList::matchesAt(int32_t offset, const PCEList &other) const
-{
- // NOTE: sizes include the NULLORDER, which we don't want to compare.
- int32_t otherSize = other.size() - 1;
-
- if (listSize - 1 - offset < otherSize) {
- return FALSE;
- }
-
- for (int32_t i = offset, j = 0; j < otherSize; i += 1, j += 1) {
- if (getOrder(i) != other.getOrder(j)) {
- return FALSE;
- }
- }
-
- return TRUE;
-}
-
-uint64_t PCEList::operator[](int32_t index) const
-{
- return getOrder(index);
-}
-
-void SSearchTest::boyerMooreTest()
+void SSearchTest::sharpSTest()
{
UErrorCode status = U_ZERO_ERROR;
UCollator *coll = NULL;
- CollData *data = NULL;
- const CEList* ce = NULL;
- const CEList* ce1 = NULL;
UnicodeString lp = "fuss";
UnicodeString sp = "fu\\u00DF";
- BoyerMooreSearch *longPattern = NULL;
- BoyerMooreSearch *shortPattern = NULL;
UnicodeString targets[] = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball",
"ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF",
"fuss", "ffuss", "fufuss", "fusfuss", "1fuss", "12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu\\u00DF", "1234fu\\u00DF"};
int32_t start = -1, end = -1;
coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status);
- if (U_FAILURE(status)) {
- errcheckln(status, "Could not open collator. - %s", u_errorName(status));
- return;
- }
-
- data = CollData::open(coll, status);
- if (U_FAILURE(status)) {
- errln("Could not open CollData object.");
- goto close_data;
- }
-
- data->getDynamicClassID();
- if (U_FAILURE(status)) {
- errln("Could not get dynamic class ID of CollData.");
- goto close_patterns;
- }
-
- data->getStaticClassID();
- if (U_FAILURE(status)) {
- errln("Could not get static class ID of CollData.");
- goto close_patterns;
- }
-
- longPattern = new BoyerMooreSearch(data, lp.unescape(), NULL, status);
- shortPattern = new BoyerMooreSearch(data, sp.unescape(), NULL, status);
- if (U_FAILURE(status)) {
- errln("Could not create pattern objects.");
- goto close_patterns;
- }
-
- longPattern->getBadCharacterTable();
- shortPattern->getBadCharacterTable();
- if (U_FAILURE(status)) {
- errln("Could not get bad character table.");
- goto close_patterns;
- }
-
- longPattern->getGoodSuffixTable();
- shortPattern->getGoodSuffixTable();
- if (U_FAILURE(status)) {
- errln("Could not get good suffix table.");
- goto close_patterns;
- }
-
- longPattern->getDynamicClassID();
- shortPattern->getDynamicClassID();
- if (U_FAILURE(status)) {
- errln("Could not get dynamic class ID of BoyerMooreSearch.");
- goto close_patterns;
- }
-
- longPattern->getStaticClassID();
- shortPattern->getStaticClassID();
- if (U_FAILURE(status)) {
- errln("Could not get static class ID of BoyerMooreSearch.");
- goto close_patterns;
- }
-
- longPattern->getData();
- shortPattern->getData();
- if (U_FAILURE(status)) {
- errln("Could not get collate data.");
- goto close_patterns;
- }
-
- ce = longPattern->getPatternCEs();
- ce1 = shortPattern->getPatternCEs();
- if (U_FAILURE(status)) {
- errln("Could not get pattern CEs.");
- goto close_patterns;
- }
-
- ce->getDynamicClassID();
- ce1->getDynamicClassID();
- if (U_FAILURE(status)) {
- errln("Could not get dynamic class ID of CEList.");
- goto close_patterns;
- }
-
- ce->getStaticClassID();
- ce1->getStaticClassID();
- if (U_FAILURE(status)) {
- errln("Could not get static class ID of CEList.");
- goto close_patterns;
- }
-
- if(data->minLengthInChars(ce,0) != 3){
- errln("Minimal Length in Characters for 'data' with 'ce' was suppose to give 3.");
- goto close_patterns;
- }
-
- if(data->minLengthInChars(ce1,0) != 3){
- errln("Minimal Length in Characters for 'data' with 'ce1' was suppose to give 3.");
- goto close_patterns;
- }
-
- for (uint32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) {
- UnicodeString target = targets[t].unescape();
-
- longPattern->setTargetString(&target, status);
- if (longPattern->search(0, start, end)) {
- logln("Test %d: found long pattern at [%d, %d].", t, start, end);
- } else {
- errln("Test %d: did not find long pattern.", t);
- }
-
- shortPattern->setTargetString(&target, status);
- if (shortPattern->search(0, start, end)) {
- logln("Test %d: found short pattern at [%d, %d].", t, start, end);
- } else {
- errln("Test %d: did not find short pattern.", t);
- }
-
- if(longPattern->empty()){
- errln("Test %d: Long pattern should not have been empty.");
- }
-
- if(shortPattern->empty()){
- errln("Test %d: Short pattern should not have been empty.");
- }
- }
-
-close_patterns:
- delete shortPattern;
- delete longPattern;
-
-close_data:
- CollData::close(data);
- ucol_close(coll);
-}
+ TEST_ASSERT_SUCCESS(status);
-void SSearchTest::bmsTest()
-{
- UErrorCode status = U_ZERO_ERROR;
- UCollator *coll = NULL;
- UCD *data = NULL;
- UnicodeString lp = "fuss";
- UnicodeString lpu = lp.unescape();
- UnicodeString sp = "fu\\u00DF";
- UnicodeString spu = sp.unescape();
- BMS *longPattern = NULL;
- BMS *shortPattern = NULL;
- UnicodeString targets[] = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball",
- "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF",
- "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", "12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu\\u00DF", "1234fu\\u00DF"};
- int32_t start = -1, end = -1;
+ UnicodeString lpUnescaped = lp.unescape();
+ UnicodeString spUnescaped = sp.unescape();
- coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status);
- if (U_FAILURE(status)) {
- errcheckln(status, "Could not open collator. - %s", u_errorName(status));
- return;
- }
-
- data = ucd_open(coll, &status);
- if (U_FAILURE(status)) {
- errln("Could not open CollData object.");
- goto close_data;
- }
+ LocalUStringSearchPointer ussLong(usearch_openFromCollator(lpUnescaped.getBuffer(), lpUnescaped.length(),
+ lpUnescaped.getBuffer(), lpUnescaped.length(), // actual test data will be set later
+ coll,
+ NULL, // the break iterator
+ &status));
- longPattern = bms_open(data, lpu.getBuffer(), lpu.length(), NULL, 0, &status);
- shortPattern = bms_open(data, spu.getBuffer(), spu.length(), NULL, 0, &status);
- if (U_FAILURE(status)) {
- errln("Couldn't open pattern objects.");
- goto close_patterns;
- }
+ LocalUStringSearchPointer ussShort(usearch_openFromCollator(spUnescaped.getBuffer(), spUnescaped.length(),
+ spUnescaped.getBuffer(), spUnescaped.length(), // actual test data will be set later
+ coll,
+ NULL, // the break iterator
+ &status));
+ TEST_ASSERT_SUCCESS(status);
for (uint32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) {
+ UBool bFound;
UnicodeString target = targets[t].unescape();
- bms_setTargetString(longPattern, target.getBuffer(), target.length(), &status);
- if (bms_search(longPattern, 0, &start, &end)) {
+ start = end = -1;
+ usearch_setText(ussLong.getAlias(), target.getBuffer(), target.length(), &status);
+ bFound = usearch_search(ussLong.getAlias(), 0, &start, &end, &status);
+ TEST_ASSERT_SUCCESS(status);
+ if (bFound) {
logln("Test %d: found long pattern at [%d, %d].", t, start, end);
} else {
errln("Test %d: did not find long pattern.", t);
}
- bms_setTargetString(shortPattern, target.getBuffer(), target.length(), &status);
- if (bms_search(shortPattern, 0, &start, &end)) {
- logln("Test %d: found short pattern at [%d, %d].", t, start, end);
+ usearch_setText(ussShort.getAlias(), target.getBuffer(), target.length(), &status);
+ bFound = usearch_search(ussShort.getAlias(), 0, &start, &end, &status);
+ TEST_ASSERT_SUCCESS(status);
+ if (bFound) {
+ logln("Test %d: found long pattern at [%d, %d].", t, start, end);
} else {
- errln("Test %d: did not find short pattern.", t);
+ errln("Test %d: did not find long pattern.", t);
}
}
- /* Add better coverage for bms code. */
- if(bms_empty(longPattern)) {
- errln("FAIL: longgPattern is empty.");
- }
-
- if (!bms_getData(longPattern)) {
- errln("FAIL: bms_getData returned NULL.");
- }
-
- if (!ucd_getCollator(data)) {
- errln("FAIL: ucd_getCollator returned NULL.");
- }
-
-close_patterns:
- bms_close(shortPattern);
- bms_close(longPattern);
-
-close_data:
- ucd_close(data);
- ucd_freeCache();
ucol_close(coll);
}
{
UErrorCode status = U_ZERO_ERROR;
UCollator *coll = NULL;
- CollData *data = NULL;
UnicodeString pat = /*"gcagagag"*/ "fxeld";
UnicodeString target = /*"gcatcgcagagagtatacagtacg"*/ "cloveldfxeld";
- BoyerMooreSearch *pattern = NULL;
int32_t start = -1, end = -1;
+ UBool bFound;
coll = ucol_open(NULL, &status);
- if (U_FAILURE(status)) {
- errcheckln(status, "Couldn't open collator. - %s", u_errorName(status));
- return;
- }
-
- data = CollData::open(coll, status);
- if (U_FAILURE(status)) {
- errln("Couldn't open CollData object.");
- goto close_data;
- }
+ TEST_ASSERT_SUCCESS(status);
- pattern = new BoyerMooreSearch(data, pat, &target, status);
- if (U_FAILURE(status)) {
- errln("Couldn't open pattern object.");
- goto close_pattern;
- }
+ LocalUStringSearchPointer ss(usearch_openFromCollator(pat.getBuffer(), pat.length(),
+ target.getBuffer(), target.length(),
+ coll,
+ NULL, // the break iterator
+ &status));
+ TEST_ASSERT_SUCCESS(status);
- if (pattern->search(0, start, end)) {
+ bFound = usearch_search(ss.getAlias(), 0, &start, &end, &status);
+ TEST_ASSERT_SUCCESS(status);
+ if (bFound) {
logln("Found pattern at [%d, %d].", start, end);
} else {
errln("Did not find pattern.");
}
-close_pattern:
- delete pattern;
-
-close_data:
- CollData::close(data);
ucol_close(coll);
}
"Neither to been y-buried nor y-brent,\n"
"But maketh houndes ete hem in despyt. zet'\n";
-#define TEST_BOYER_MOORE 1
const char *cPattern = "maketh houndes ete hem";
//const char *cPattern = "Whylom";
//const char *cPattern = "zet";
LocalUCollatorPointer collator(ucol_open("en", &status));
- CollData *data = CollData::open(collator.getAlias(), status);
- if (U_FAILURE(status) || collator.isNull() || data == NULL) {
- errcheckln(status, "Unable to open UCollator or CollData. - %s", u_errorName(status));
- return;
- }
//ucol_setStrength(collator.getAlias(), collatorStrength);
//ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normalize, &status);
UnicodeString uPattern = cPattern;
-#ifndef TEST_BOYER_MOORE
LocalUStringSearchPointer uss(usearch_openFromCollator(uPattern.getBuffer(), uPattern.length(),
target.getBuffer(), target.length(),
collator.getAlias(),
NULL, // the break iterator
&status));
TEST_ASSERT_SUCCESS(status);
-#else
- BoyerMooreSearch bms(data, uPattern, &target, status);
- TEST_ASSERT_SUCCESS(status);
-#endif
// int32_t foundStart;
// int32_t foundEnd;
int32_t refMatchPos = (int32_t)(pm - longishText);
int32_t icuMatchPos;
int32_t icuMatchEnd;
-#ifndef TEST_BOYER_MOORE
usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &status);
TEST_ASSERT_SUCCESS(status);
-#else
- found = bms.search(0, icuMatchPos, icuMatchEnd);
-#endif
TEST_ASSERT_M(refMatchPos == icuMatchPos, "strstr and icu give different match positions.");
int32_t i;
// Try loopcounts around 100000 to some millions, depending on the operation,
// to get runtimes of at least several seconds.
for (i=0; i<10000; i++) {
-#ifndef TEST_BOYER_MOORE
found = usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &status);
-#else
- found = bms.search(0, icuMatchPos, icuMatchEnd);
-#endif
//TEST_ASSERT_SUCCESS(status);
//TEST_ASSERT(found);
}
//printf("%ld, %d\n", pm-longishText, j);
-#ifdef TEST_BOYER_MOORE
- CollData::close(data);
-#endif
}
-#endif
//----------------------------------------------------------------------------------------
//
} while (! matches);
}
-//
-// Find the next acceptable boundary following the specified starting index
-// in the target text being searched.
-// TODO: refine what is an acceptable boundary. For the moment,
-// choose the next position not within a combining sequence.
-//
-#if 0
-static int32_t nextBoundaryAfter(const UnicodeString &string, int32_t startIndex) {
- const UChar *text = string.getBuffer();
- int32_t textLen = string.length();
-
- if (startIndex >= textLen) {
- return startIndex;
- }
-
- UChar32 c;
- int32_t i = startIndex;
-
- U16_NEXT(text, i, textLen, c);
-
- // If we are on a control character, stop without looking for combining marks.
- // Control characters do not combine.
- int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
- if (gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR) {
- return i;
- }
-
- // The initial character was not a control, and can thus accept trailing
- // combining characters. Advance over however many of them there are.
- int32_t indexOfLastCharChecked;
-
- for (;;) {
- indexOfLastCharChecked = i;
-
- if (i>=textLen) {
- break;
- }
-
- U16_NEXT(text, i, textLen, c);
- gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
-
- if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
- break;
- }
- }
-
- return indexOfLastCharChecked;
-}
-#endif
-
-#if 0
-static UBool isInCombiningSequence(const UnicodeString &string, int32_t index) {
- const UChar *text = string.getBuffer();
- int32_t textLen = string.length();
-
- if (index>=textLen || index<=0) {
- return FALSE;
- }
-
- // If the character at the current index is not a GRAPHEME_EXTEND
- // then we can not be within a combining sequence.
- UChar32 c;
- U16_GET(text, 0, index, textLen, c);
- int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
- if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
- return FALSE;
- }
-
- // We are at a combining mark. If the preceding character is anything
- // except a CONTROL, CR or LF, we are in a combining sequence.
- U16_PREV(text, 0, index, c);
- gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
-
- return !(gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR);
-}
-#endif
-
static UBool simpleSearch(UCollator *coll, const UnicodeString &target, int32_t offset, const UnicodeString &pattern, int32_t &matchStart, int32_t &matchEnd)
{
UErrorCode status = U_ZERO_ERROR;
}
params.extract(m.start(1, status), paramLength, valString, sizeof(valString));
- val = strtol(valString, NULL, 10);
+ val = uprv_strtol(valString, NULL, 10);
// Delete this parameter from the params string.
m.reset();
notFoundCount += 1;
}
- return notFoundCount;
-}
-
-int32_t SSearchTest::bmMonkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern,
- BoyerMooreSearch *bms, BoyerMooreSearch *abms,
- const char *name, const char *strength, uint32_t seed)
-{
- UErrorCode status = U_ZERO_ERROR;
- int32_t actualStart = -1, actualEnd = -1;
- //int32_t expectedStart = prefix.length(), expectedEnd = prefix.length() + altPattern.length();
- int32_t expectedStart = -1, expectedEnd = -1;
- int32_t notFoundCount = 0;
-
- // **** TODO: find *all* matches, not just first one ****
- simpleSearch(coll, testCase, 0, pattern, expectedStart, expectedEnd);
-
- bms->setTargetString(&testCase, status);
- bms->search(0, actualStart, actualEnd);
-
- if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) {
- errln("Boyer-Moore Search for <pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
- " strength=%s seed=%d",
- name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed);
- errln(UNICODE_STRING_SIMPLE(" <pattern>: ") + prettify(pattern));
- }
-
- if (expectedStart == -1 && actualStart == -1) {
- notFoundCount += 1;
- }
-
- // **** TODO: find *all* matches, not just first one ****
- simpleSearch(coll, testCase, 0, altPattern, expectedStart, expectedEnd);
-
- abms->setTargetString(&testCase, status);
- abms->search(0, actualStart, actualEnd);
-
- if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) {
- errln("Boyer-Moore Search for <alt_pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
- " strength=%s seed=%d",
- name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed);
- errln(UNICODE_STRING_SIMPLE(" <alt_pattern>: ") + prettify(altPattern));
- }
-
- if (expectedStart == -1 && actualStart == -1) {
- notFoundCount += 1;
- }
-
-
return notFoundCount;
}
#endif
return;
}
- CollData *monkeyData = CollData::open(coll, status);
+ CollData *monkeyData = new CollData(coll, status);
USet *expansions = uset_openEmpty();
USet *contractions = uset_openEmpty();
uset_close(contractions);
uset_close(expansions);
uset_close(letters);
-
- CollData::close(monkeyData);
-
- ucol_close(coll);
-}
-
-void SSearchTest::bmMonkeyTest(char *params)
-{
- static const UChar skipChars[] = { 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0xAAB5, 0xAAB6, 0xAAB9, 0xAABB, 0xAABC, 0 }; // for timebomb
- // ook!
- UErrorCode status = U_ZERO_ERROR;
- UCollator *coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status);
-
- if (U_FAILURE(status)) {
- errcheckln(status, "Failed to create collator in MonkeyTest! - %s", u_errorName(status));
- return;
- }
-
- CollData *monkeyData = CollData::open(coll, status);
-
- USet *expansions = uset_openEmpty();
- USet *contractions = uset_openEmpty();
-
- ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status);
-
- U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
- U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
- USet *letters = uset_openPattern(letter_pattern, 39, &status);
- SetMonkey letterMonkey(letters);
- StringSetMonkey contractionMonkey(contractions, coll, monkeyData);
- StringSetMonkey expansionMonkey(expansions, coll, monkeyData);
- UnicodeString testCase;
- UnicodeString alternate;
- UnicodeString pattern, altPattern;
- UnicodeString prefix, altPrefix;
- UnicodeString suffix, altSuffix;
-
- Monkey *monkeys[] = {
- &letterMonkey,
- &contractionMonkey,
- &expansionMonkey,
- &contractionMonkey,
- &expansionMonkey,
- &contractionMonkey,
- &expansionMonkey,
- &contractionMonkey,
- &expansionMonkey};
- int32_t monkeyCount = sizeof(monkeys) / sizeof(monkeys[0]);
- // int32_t nonMatchCount = 0;
-
- UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY};
- const char *strengthNames[] = {"primary", "secondary", "tertiary"};
- int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]);
- int32_t loopCount = quick? 1000 : 10000;
- int32_t firstStrength = 0;
- int32_t lastStrength = strengthCount - 1; //*/ 0;
-
- if (params != NULL) {
-#if !UCONFIG_NO_REGULAR_EXPRESSIONS
- UnicodeString p(params);
-
- loopCount = getIntParam("loop", p, loopCount);
- m_seed = getIntParam("seed", p, m_seed);
-
- RegexMatcher m(" *strength *= *(primary|secondary|tertiary) *", p, 0, status);
- if (m.find()) {
- UnicodeString breakType = m.group(1, status);
-
- for (int32_t s = 0; s < strengthCount; s += 1) {
- if (breakType == strengthNames[s]) {
- firstStrength = lastStrength = s;
- break;
- }
- }
-
- m.reset();
- p = m.replaceFirst("", status);
- }
-
- if (RegexMatcher("\\S", p, 0, status).find()) {
- // Each option is stripped out of the option string as it is processed.
- // All options have been checked. The option string should have been completely emptied..
- char buf[100];
- p.extract(buf, sizeof(buf), NULL, status);
- buf[sizeof(buf)-1] = 0;
- errln("Unrecognized or extra parameter: %s\n", buf);
- return;
- }
-#else
- infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters.");
-#endif
- }
-
- for(int32_t s = firstStrength; s <= lastStrength; s += 1) {
- int32_t notFoundCount = 0;
-
- logln("Setting strength to %s.", strengthNames[s]);
- ucol_setStrength(coll, strengths[s]);
-
- CollData *data = CollData::open(coll, status);
-
- UnicodeSet skipSet;
- if(isICUVersionBefore(51, 1)) {
- // timebomb until ticket #9156 (was #8081) is resolved
- UnicodeString skipString(skipChars);
- skipSet.addAll(skipString);
- }
- if(isICUVersionBefore(51, 1)) {
- // Time bomb until ticket #9490 is fixed.
- skipSet.add(0x12327);
- skipSet.add(0x1311b);
- skipSet.add(0x1200d);
- }
- skipSet.freeze();
- // TODO: try alternate prefix and suffix too?
- // TODO: alternates are only equal at primary strength. Is this OK?
- for(int32_t t = 0; t < loopCount; t += 1) {
- uint32_t seed = m_seed;
- // int32_t nmc = 0;
-
- generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern);
- generateTestCase(coll, monkeys, monkeyCount, prefix, altPrefix);
- generateTestCase(coll, monkeys, monkeyCount, suffix, altSuffix);
-
- if (skipSet.containsSome(pattern)) {
- continue; // time bomb
- }
-
- BoyerMooreSearch pat(data, pattern, NULL, status);
- BoyerMooreSearch alt(data, altPattern, NULL, status);
-
- // **** need a better way to deal with this ****
-#if 0
- if (pat.empty() ||
- alt.empty()) {
- continue;
- }
-#endif
-
- // pattern
- notFoundCount += bmMonkeyTestCase(coll, pattern, pattern, altPattern, &pat, &alt, "pattern", strengthNames[s], seed);
-
- testCase.remove();
- testCase.append(prefix);
- testCase.append(/*alt*/pattern);
-
- // prefix + pattern
- notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "prefix + pattern", strengthNames[s], seed);
-
- testCase.append(suffix);
-
- // prefix + pattern + suffix
- notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "prefix + pattern + suffix", strengthNames[s], seed);
-
- testCase.remove();
- testCase.append(pattern);
- testCase.append(suffix);
-
- // pattern + suffix
- notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "pattern + suffix", strengthNames[s], seed);
- }
-
- CollData::close(data);
-
- logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount);
- }
-
- uset_close(contractions);
- uset_close(expansions);
- uset_close(letters);
-
- CollData::close(monkeyData);
+ delete monkeyData;
ucol_close(coll);
}
-void SSearchTest::stringListTest(){
- UErrorCode status = U_ZERO_ERROR;
- StringList *sl = new StringList(status);
- if(U_FAILURE(status)){
- errln("ERROR: stringListTest: Could not start StringList");
- }
-
- const UChar chars[] = {
- 0x0000
- };
- sl->add(chars, (int32_t) 0, status);
- if(U_FAILURE(status)){
- errln("ERROR: stringListTest: StringList::add");
- }
-
- if(sl->getDynamicClassID() != StringList::getStaticClassID()){
- errln("ERROR: stringListTest: getDynamicClassID and getStaticClassID does not match");
- }
- delete sl;
-}
-
#endif
#endif
/*
**********************************************************************
- * Copyright (C) 2005-2009, International Business Machines
+ * Copyright (C) 2005-2012, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/ucol.h"
-#include "unicode/bmsearch.h"
#include "intltest.h"
virtual void searchTest();
virtual void offsetTest();
virtual void monkeyTest(char *params);
-
- virtual void bmMonkeyTest(char *params);
- virtual void boyerMooreTest();
+ virtual void sharpSTest();
virtual void goodSuffixTest();
virtual void searchTime();
-
- virtual void bmsTest();
- virtual void bmSearchTest();
-
- virtual void udhrTest();
- virtual void stringListTest();
private:
virtual const char *getPath(char buffer[2048], const char *filename);
virtual int32_t monkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern,
const char *name, const char *strength, uint32_t seed);
-
- virtual int32_t bmMonkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern,
- BoyerMooreSearch *bms, BoyerMooreSearch *abms,
- const char *name, const char *strength, uint32_t seed);
#endif
-
};
#endif
-#endif
+#endif
/********************************************************************
* COPYRIGHT:
- * Copyright (C) 2008-2009 IBM, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2012 IBM, Inc. All Rights Reserved.
*
********************************************************************/
/**
StringSearchPerformanceTest::StringSearchPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
:UPerfTest(argc,argv,status){
int32_t start, end;
-
-#ifdef TEST_BOYER_MOORE_SEARCH
- bms = NULL;
-#else
srch = NULL;
-#endif
-
pttrn = NULL;
if(status== U_ILLEGAL_ARGUMENT_ERROR || line_mode){
fprintf(stderr,gUsageString, "strsrchperf");
pttrn = temp; /* store word in pttrn */
#endif
-#ifdef TEST_BOYER_MOORE_SEARCH
- UnicodeString patternString(pttrn, pttrnLen);
- UCollator *coll = ucol_open(locale, &status);
- CollData *data = CollData::open(coll, status);
-
- targetString = new UnicodeString(src, srcLen);
- bms = new BoyerMooreSearch(data, patternString, targetString, status);
-#else
/* Create the StringSearch object to be use in performance test. */
srch = usearch_open(pttrn, pttrnLen, src, srcLen, locale, NULL, &status);
-#endif
if(U_FAILURE(status)){
fprintf(stderr, "FAILED to create UPerfTest object. Error: %s\n", u_errorName(status));
}
StringSearchPerformanceTest::~StringSearchPerformanceTest() {
- CollData *data = bms->getData();
- UCollator *coll = data->getCollator();
-
- delete bms;
- delete targetString;
- CollData::close(data);
- ucol_close(coll);
-
if (pttrn != NULL) {
free(pttrn);
}
-
-#ifndef TEST_BOYER_MOORE_SEARCH
if (srch != NULL) {
usearch_close(srch);
}
-#endif
}
UPerfFunction* StringSearchPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par) {
}
UPerfFunction* StringSearchPerformanceTest::Test_ICU_Forward_Search(){
-#ifdef TEST_BOYER_MOORE_SEARCH
- StringSearchPerfFunction *func = new StringSearchPerfFunction(ICUForwardSearch, bms, src, srcLen, pttrn, pttrnLen);
-#else
StringSearchPerfFunction* func = new StringSearchPerfFunction(ICUForwardSearch, srch, src, srcLen, pttrn, pttrnLen);
-#endif
return func;
}
UPerfFunction* StringSearchPerformanceTest::Test_ICU_Backward_Search(){
-#ifdef TEST_BOYER_MOORE_SEARCH
- StringSearchPerfFunction *func = new StringSearchPerfFunction(ICUBackwardSearch, bms, src, srcLen, pttrn, pttrnLen);
-#else
StringSearchPerfFunction* func = new StringSearchPerfFunction(ICUBackwardSearch, srch, src, srcLen, pttrn, pttrnLen);
-#endif
return func;
}
/********************************************************************
* COPYRIGHT:
- * Copyright (C) 2008-2009 IBM, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2012 IBM, Inc. All Rights Reserved.
*
********************************************************************/
#ifndef _STRSRCHPERF_H
#define _STRSRCHPERF_H
-#include "unicode/ubrk.h"
#include "unicode/usearch.h"
-#include "unicode/colldata.h"
-#include "unicode/bmsearch.h"
#include "unicode/uperf.h"
#include <stdlib.h>
#include <stdio.h>
-#define TEST_BOYER_MOORE_SEARCH
-
-#ifdef TEST_BOYER_MOORE_SEARCH
-typedef void (*StrSrchFn) (BoyerMooreSearch * bms, const UChar *src, int32_t srcLen, const UChar *pttrn, int32_t pttrnLen, UErrorCode *status);
-#else
typedef void (*StrSrchFn)(UStringSearch* srch, const UChar* src,int32_t srcLen, const UChar* pttrn, int32_t pttrnLen, UErrorCode* status);
-#endif
class StringSearchPerfFunction : public UPerfFunction {
private:
int32_t srcLen;
const UChar* pttrn;
int32_t pttrnLen;
-#ifdef TEST_BOYER_MOORE_SEARCH
- BoyerMooreSearch *bms;
-#else
UStringSearch* srch;
-#endif
public:
virtual void call(UErrorCode* status) {
-#ifdef TEST_BOYER_MOORE_SEARCH
- (*fn)(bms, src, srcLen, pttrn, pttrnLen, status);
-#else
(*fn)(srch, src, srcLen, pttrn, pttrnLen, status);
-#endif
}
virtual long getOperationsPerIteration() {
-#if 0
- return (long)(srcLen/pttrnLen);
-#else
return (long) srcLen;
-#endif
}
-#ifdef TEST_BOYER_MOORE_SEARCH
- StringSearchPerfFunction(StrSrchFn func, BoyerMooreSearch *search, const UChar *source, int32_t sourceLen, const UChar *pattern, int32_t patternLen) {
- fn = func;
- src = source;
- srcLen = sourceLen;
- pttrn = pattern;
- pttrnLen = patternLen;
- bms = search;
- }
-#else
StringSearchPerfFunction(StrSrchFn func, UStringSearch* search, const UChar* source,int32_t sourceLen, const UChar* pattern, int32_t patternLen) {
fn = func;
src = source;
pttrnLen = patternLen;
srch = search;
}
-#endif
};
class StringSearchPerformanceTest : public UPerfTest {
int32_t srcLen;
UChar* pttrn;
int32_t pttrnLen;
-#ifdef TEST_BOYER_MOORE_SEARCH
- UnicodeString *targetString;
- BoyerMooreSearch *bms;
-#else
UStringSearch* srch;
-#endif
public:
StringSearchPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status);
~StringSearchPerformanceTest();
virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = NULL);
-
UPerfFunction* Test_ICU_Forward_Search();
-
UPerfFunction* Test_ICU_Backward_Search();
};
-#ifdef TEST_BOYER_MOORE_SEARCH
-void ICUForwardSearch(BoyerMooreSearch *bms, const UChar *source, int32_t sourceLen, const UChar *pattern, int32_t patternLen, UErrorCode * /*status*/) {
- int32_t offset = 0, start = -1, end = -1;
-
- while (bms->search(offset, start, end)) {
- offset = end;
- }
-}
-
-void ICUBackwardSearch(BoyerMooreSearch *bms, const UChar *source, int32_t sourceLen, const UChar *pattern, int32_t patternLen, UErrorCode * /*status*/) {
- int32_t offset = 0, start = -1, end = -1;
-
- /* NOTE: No Boyer-Moore backward search yet... */
- while (bms->search(offset, start, end)) {
- offset = end;
- }
-}
-#else
void ICUForwardSearch(UStringSearch *srch, const UChar* source, int32_t sourceLen, const UChar* pattern, int32_t patternLen, UErrorCode* status) {
int32_t match;
match = usearch_previous(srch, status);
}
}
-#endif
#endif /* _STRSRCHPERF_H */