/*
******************************************************************************
-* *
-* Copyright (C) 1999-2012, International Business Machines *
-* Corporation and others. All Rights Reserved. *
-* *
+* Copyright (C) 1999-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
******************************************************************************
* file name: uresdata.h
* encoding: US-ASCII
* All values are URES_STRING_V2 strings.
*/
URES_ARRAY16=9
+
+ /* Resource type 15 is not defined but effectively used by RES_BOGUS=0xffffffff. */
} UResInternalType;
/*
typedef uint32_t Resource;
#define RES_BOGUS 0xffffffff
+#define RES_MAX_OFFSET 0x0fffffff
#define RES_GET_TYPE(res) ((int32_t)((res)>>28UL))
#define RES_GET_OFFSET(res) ((res)&0x0fffffff)
/* indexes[] value names; indexes are generally 32-bit (Resource) indexes */
enum {
- URES_INDEX_LENGTH, /* [0] contains URES_INDEX_TOP==the length of indexes[];
- * formatVersion==1: all bits contain the length of indexes[]
- * but the length is much less than 0xff;
- * formatVersion>1:
- * only bits 7..0 contain the length of indexes[],
- * bits 31..8 are reserved and set to 0 */
- URES_INDEX_KEYS_TOP, /* [1] contains the top of the key strings, */
- /* same as the bottom of resources or UTF-16 strings, rounded up */
- URES_INDEX_RESOURCES_TOP, /* [2] contains the top of all resources */
- URES_INDEX_BUNDLE_TOP, /* [3] contains the top of the bundle, */
- /* in case it were ever different from [2] */
- URES_INDEX_MAX_TABLE_LENGTH,/* [4] max. length of any table */
- URES_INDEX_ATTRIBUTES, /* [5] attributes bit set, see URES_ATT_* (new in formatVersion 1.2) */
- URES_INDEX_16BIT_TOP, /* [6] top of the 16-bit units (UTF-16 string v2 UChars, URES_TABLE16, URES_ARRAY16),
- * rounded up (new in formatVersion 2.0, ICU 4.4) */
- URES_INDEX_POOL_CHECKSUM, /* [7] checksum of the pool bundle (new in formatVersion 2.0, ICU 4.4) */
+ /**
+ * [0] contains the length of indexes[]
+ * which is at most URES_INDEX_TOP of the latest format version
+ *
+ * formatVersion==1: all bits contain the length of indexes[]
+ * but the length is much less than 0xff;
+ * formatVersion>1:
+ * only bits 7..0 contain the length of indexes[],
+ * bits 31..8 are reserved and set to 0
+ * formatVersion>=3:
+ * bits 31..8 poolStringIndexLimit bits 23..0
+ */
+ URES_INDEX_LENGTH,
+ /**
+ * [1] contains the top of the key strings,
+ * same as the bottom of resources or UTF-16 strings, rounded up
+ */
+ URES_INDEX_KEYS_TOP,
+ /** [2] contains the top of all resources */
+ URES_INDEX_RESOURCES_TOP,
+ /**
+ * [3] contains the top of the bundle,
+ * in case it were ever different from [2]
+ */
+ URES_INDEX_BUNDLE_TOP,
+ /** [4] max. length of any table */
+ URES_INDEX_MAX_TABLE_LENGTH,
+ /**
+ * [5] attributes bit set, see URES_ATT_* (new in formatVersion 1.2)
+ *
+ * formatVersion>=3:
+ * bits 31..16 poolStringIndex16Limit
+ * bits 15..12 poolStringIndexLimit bits 27..24
+ */
+ URES_INDEX_ATTRIBUTES,
+ /**
+ * [6] top of the 16-bit units (UTF-16 string v2 UChars, URES_TABLE16, URES_ARRAY16),
+ * rounded up (new in formatVersion 2.0, ICU 4.4)
+ */
+ URES_INDEX_16BIT_TOP,
+ /** [7] checksum of the pool bundle (new in formatVersion 2.0, ICU 4.4) */
+ URES_INDEX_POOL_CHECKSUM,
URES_INDEX_TOP
};
#define URES_ATT_USES_POOL_BUNDLE 4
/*
- * File format for .res resource bundle files (formatVersion=2, ICU 4.4)
- *
- * New in formatVersion 2 compared with 1.3: -------------
+ * File format for .res resource bundle files
+ *
+ * ICU 56: New in formatVersion 3 compared with 2: -------------
+ *
+ * Resource bundles can optionally use shared string-v2 values
+ * stored in the pool bundle.
+ * If so, then the indexes[] contain two new values
+ * in previously-unused bits of existing indexes[] slots:
+ * - poolStringIndexLimit:
+ * String-v2 offsets (in 32-bit Resource words) below this limit
+ * point to pool bundle string-v2 values.
+ * - poolStringIndex16Limit:
+ * Resource16 string-v2 offsets below this limit
+ * point to pool bundle string-v2 values.
+ * Guarantee: poolStringIndex16Limit <= poolStringIndexLimit
+ *
+ * The local bundle's poolStringIndexLimit is greater than
+ * any pool bundle string index used in the local bundle.
+ * The poolStringIndexLimit should not be greater than
+ * the maximum possible pool bundle string index.
+ *
+ * The maximum possible pool bundle string index is the index to the last non-NUL
+ * pool string character, due to suffix sharing.
+ *
+ * In the pool bundle, there is no structure that lists the strings.
+ * (The root resource is an empty Table.)
+ * If the strings need to be enumerated (as genrb --usePoolBundle does),
+ * then iterate through the pool bundle's 16-bit-units array from the beginning.
+ * Stop at the end of the array, or when an explicit or implicit string length
+ * would lead beyond the end of the array,
+ * or when an apparent string is not NUL-terminated.
+ * (Future genrb version might terminate the strings with
+ * what looks like a large explicit string length.)
+ *
+ * ICU 4.4: New in formatVersion 2 compared with 1.3: -------------
*
* Three new resource types -- String-v2, Table16 and Array16 -- have their
* values stored in a new array of 16-bit units between the table key strings
* at 2-byte offsets from the start of a contiguous 16-bit-unit array between
* the table key strings and the other resources. (new in formatVersion 2/ICU 4.4)
* At offset 0 of that array is a 16-bit zero value for empty 16-bit resources.
+ *
* Resource16 values in Table16 and Array16 are 16-bit offsets to String-v2
* resources, with the offsets relative to the start of the 16-bit-units array.
+ * Starting with formatVersion 3/ICU 56, if offset<poolStringIndex16Limit
+ * then use the pool bundle's 16-bit-units array,
+ * otherwise subtract that limit and use the local 16-bit-units array.
*
* Type Name Memory layout of values
* (in parentheses: scalar, non-offset values)
* - if first<0xdfff then length=((first-0xdfef)<<16) | second UChar
* - if first==0xdfff then length=((second UChar)<<16) | third UChar
* (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4)
+ *
+ * Starting with formatVersion 3/ICU 56, if offset<poolStringIndexLimit
+ * then use the pool bundle's 16-bit-units array,
+ * otherwise subtract that limit and use the local 16-bit-units array.
+ * (Note different limits for Resource16 vs. Resource.)
+ *
* 7 Integer: (28-bit offset is integer value)
* 8 Array: int32_t count, Resource[count]
* 9 Array16: uint16_t count, Resource16[count]
const char *poolBundleKeys;
Resource rootRes;
int32_t localKeyLimit;
+ const uint16_t *poolBundleStrings;
+ int32_t poolStringIndexLimit;
+ int32_t poolStringIndex16Limit;
UBool noFallback; /* see URES_ATT_NO_FALLBACK */
UBool isPoolBundle;
UBool usesPoolBundle;
*******************************************************************************
*/
+#include <assert.h>
#include "genrb.h"
#include "unicode/localpointer.h"
#include "unicode/uclean.h"
+#include "unicode/utf16.h"
#include "reslist.h"
#include "ucmndata.h" /* TODO: for reading the pool bundle */
#endif
void ResFile::close() {
- uprv_free(fBytes);
+ delete[] fBytes;
fBytes = NULL;
+ delete fStrings;
+ fStrings = NULL;
}
enum
}
if(options[FORMAT_VERSION].doesOccur) {
const char *s = options[FORMAT_VERSION].value;
- if(uprv_strlen(s) != 1 || (s[0] != '1' && s[0] != '2')) {
+ if(uprv_strlen(s) != 1 || (s[0] < '1' && '3' < s[0])) {
fprintf(stderr, "%s: unsupported --formatVersion %s\n", argv[0], s);
argc = -1;
} else if(s[0] == '1' &&
fprintf(stderr, "the pool bundle file %s is too small\n", theCurrentFileName);
return 1;
}
- poolBundle.fBytes = (uint8_t *)uprv_malloc((poolFileSize + 15) & ~15);
+ poolBundle.fBytes = new uint8_t[(poolFileSize + 15) & ~15];
if (poolFileSize > 0 && poolBundle.fBytes == NULL) {
fprintf(stderr, "unable to allocate memory for the pool bundle file %s\n", theCurrentFileName);
return U_MEMORY_ALLOCATION_ERROR;
- } else {
- UDataSwapper *ds;
- const DataHeader *header;
- int32_t bytesRead = T_FileStream_read(poolFile, poolBundle.fBytes, poolFileSize);
- int32_t keysBottom;
- if (bytesRead != poolFileSize) {
- fprintf(stderr, "unable to read the pool bundle file %s\n", theCurrentFileName);
- return 1;
- }
- /*
- * Swap the pool bundle so that a single checked-in file can be used.
- * The swapper functions also test that the data looks like
- * a well-formed .res file.
- */
- ds = udata_openSwapperForInputData(poolBundle.fBytes, bytesRead,
- U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &status);
- if (U_FAILURE(status)) {
- fprintf(stderr, "udata_openSwapperForInputData(pool bundle %s) failed: %s\n",
- theCurrentFileName, u_errorName(status));
- return status;
- }
- ures_swap(ds, poolBundle.fBytes, bytesRead, poolBundle.fBytes, &status);
- udata_closeSwapper(ds);
- if (U_FAILURE(status)) {
- fprintf(stderr, "ures_swap(pool bundle %s) failed: %s\n",
- theCurrentFileName, u_errorName(status));
- return status;
- }
- header = (const DataHeader *)poolBundle.fBytes;
- if (header->info.formatVersion[0]!=2) {
- fprintf(stderr, "invalid format of pool bundle file %s\n", theCurrentFileName);
- return U_INVALID_FORMAT_ERROR;
- }
- poolBundle.fKeys = (const char *)header + header->dataHeader.headerSize;
- poolBundle.fIndexes = (const int32_t *)poolBundle.fKeys + 1;
- indexLength = poolBundle.fIndexes[URES_INDEX_LENGTH] & 0xff;
- if (indexLength <= URES_INDEX_POOL_CHECKSUM) {
- fprintf(stderr, "insufficient indexes[] in pool bundle file %s\n", theCurrentFileName);
- return U_INVALID_FORMAT_ERROR;
- }
- keysBottom = (1 + indexLength) * 4;
- poolBundle.fKeys += keysBottom;
- poolBundle.fKeysLength = (poolBundle.fIndexes[URES_INDEX_KEYS_TOP] * 4) - keysBottom;
- poolBundle.fChecksum = poolBundle.fIndexes[URES_INDEX_POOL_CHECKSUM];
}
+
+ UDataSwapper *ds;
+ const DataHeader *header;
+ int32_t bytesRead = T_FileStream_read(poolFile, poolBundle.fBytes, poolFileSize);
+ if (bytesRead != poolFileSize) {
+ fprintf(stderr, "unable to read the pool bundle file %s\n", theCurrentFileName);
+ return 1;
+ }
+ /*
+ * Swap the pool bundle so that a single checked-in file can be used.
+ * The swapper functions also test that the data looks like
+ * a well-formed .res file.
+ */
+ ds = udata_openSwapperForInputData(poolBundle.fBytes, bytesRead,
+ U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &status);
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "udata_openSwapperForInputData(pool bundle %s) failed: %s\n",
+ theCurrentFileName, u_errorName(status));
+ return status;
+ }
+ ures_swap(ds, poolBundle.fBytes, bytesRead, poolBundle.fBytes, &status);
+ udata_closeSwapper(ds);
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "ures_swap(pool bundle %s) failed: %s\n",
+ theCurrentFileName, u_errorName(status));
+ return status;
+ }
+ header = (const DataHeader *)poolBundle.fBytes;
+ if (header->info.formatVersion[0] < 2) {
+ fprintf(stderr, "invalid format of pool bundle file %s\n", theCurrentFileName);
+ return U_INVALID_FORMAT_ERROR;
+ }
+ const int32_t *pRoot = (const int32_t *)(
+ (const char *)header + header->dataHeader.headerSize);
+ poolBundle.fIndexes = pRoot + 1;
+ indexLength = poolBundle.fIndexes[URES_INDEX_LENGTH] & 0xff;
+ if (indexLength <= URES_INDEX_POOL_CHECKSUM) {
+ fprintf(stderr, "insufficient indexes[] in pool bundle file %s\n", theCurrentFileName);
+ return U_INVALID_FORMAT_ERROR;
+ }
+ int32_t keysBottom = 1 + indexLength;
+ int32_t keysTop = poolBundle.fIndexes[URES_INDEX_KEYS_TOP];
+ poolBundle.fKeys = (const char *)(pRoot + keysBottom);
+ poolBundle.fKeysLength = (keysTop - keysBottom) * 4;
+ poolBundle.fChecksum = poolBundle.fIndexes[URES_INDEX_POOL_CHECKSUM];
+
for (i = 0; i < poolBundle.fKeysLength; ++i) {
if (poolBundle.fKeys[i] == 0) {
++poolBundle.fKeysCount;
}
}
+
+ // 16BitUnits[] begins with strings-v2.
+ // The strings-v2 may optionally be terminated by what looks like
+ // an explicit string length that exceeds the number of remaining 16-bit units.
+ int32_t stringUnitsLength = (poolBundle.fIndexes[URES_INDEX_16BIT_TOP] - keysTop) * 2;
+ if (stringUnitsLength >= 2 && getFormatVersion() >= 3) {
+ poolBundle.fStrings = new PseudoListResource(NULL, status);
+ if (poolBundle.fStrings == NULL) {
+ fprintf(stderr, "unable to allocate memory for the pool bundle strings %s\n", theCurrentFileName);
+ return U_MEMORY_ALLOCATION_ERROR;
+ }
+ // The PseudoListResource constructor call did not allocate further memory.
+ assert(U_SUCCESS(status));
+ const UChar *p = (const UChar *)(pRoot + keysTop);
+ int32_t remaining = stringUnitsLength;
+ do {
+ int32_t first = *p;
+ int8_t numCharsForLength;
+ int32_t length;
+ if (!U16_IS_TRAIL(first)) {
+ // NUL-terminated
+ numCharsForLength = 0;
+ for (length = 0;
+ length < remaining && p[length] != 0;
+ ++length) {}
+ } else if (first < 0xdfef) {
+ numCharsForLength = 1;
+ length = first & 0x3ff;
+ } else if (first < 0xdfff && remaining >= 2) {
+ numCharsForLength = 2;
+ length = ((first - 0xdfef) << 16) | p[1];
+ } else if (first == 0xdfff && remaining >= 3) {
+ numCharsForLength = 3;
+ length = ((int32_t)p[1] << 16) | p[2];
+ } else {
+ break; // overrun
+ }
+ // Check for overrun before changing remaining,
+ // so that it is always accurate after the loop body.
+ if ((numCharsForLength + length) >= remaining ||
+ p[numCharsForLength + length] != 0) {
+ break; // overrun or explicitly terminated
+ }
+ int32_t poolStringIndex = stringUnitsLength - remaining;
+ // Maximum pool string index when suffix-sharing the last character.
+ int32_t maxStringIndex = poolStringIndex + numCharsForLength + length - 1;
+ if (maxStringIndex >= RES_MAX_OFFSET) {
+ // pool string index overrun
+ break;
+ }
+ p += numCharsForLength;
+ remaining -= numCharsForLength;
+ if (length != 0) {
+ StringResource *sr =
+ new StringResource(poolStringIndex, numCharsForLength,
+ p, length, status);
+ if (sr == NULL) {
+ fprintf(stderr, "unable to allocate memory for a pool bundle string %s\n",
+ theCurrentFileName);
+ return U_MEMORY_ALLOCATION_ERROR;
+ }
+ poolBundle.fStrings->add(sr);
+ poolBundle.fStringIndexLimit = maxStringIndex + 1;
+ // The StringResource constructor did not allocate further memory.
+ assert(U_SUCCESS(status));
+ }
+ p += length + 1;
+ remaining -= length + 1;
+ } while (remaining > 0);
+ }
+
T_FileStream_close(poolFile);
setUsePoolBundle(TRUE);
+ if (isVerbose() && poolBundle.fStrings != NULL) {
+ printf("number of shared strings: %d\n", (int)poolBundle.fStrings->fCount);
+ int32_t length = poolBundle.fStringIndexLimit + 1; // incl. last NUL
+ printf("16-bit units for strings: %6d = %6d bytes\n",
+ (int)length, (int)length * 2);
+ }
}
if(options[INCLUDE_UNIHAN_COLL].doesOccur) {
goto finish;
}
if(options[WRITE_POOL_BUNDLE].doesOccur) {
+ data->fWritePoolBundle = newPoolBundle;
data->compactKeys(*status);
int32_t newKeysLength;
const char *newKeys = data->getKeyBytes(&newKeysLength);
*/
#define BIN_ALIGNMENT 16
+// This numeric constant must be at least 1.
+// If StringResource.fNumUnitsSaved == 0 then the string occurs only once,
+// and it makes no sense to move it to the pool bundle.
+// The larger the threshold for fNumUnitsSaved
+// the smaller the savings, and the smaller the pool bundle.
+// We trade some total size reduction to reduce the pool bundle a bit,
+// so that one can reasonably save data size by
+// removing bundle files without rebuilding the pool bundle.
+// This can also help to keep the pool and total (pool+local) string indexes
+// within 16 bits, that is, within range of Table16 and Array16 containers.
+#ifndef GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING
+# define GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 10
+#endif
+
U_NAMESPACE_USE
static UBool gIncludeCopyright = FALSE;
static UBool gUsePoolBundle = FALSE;
-static int32_t gFormatVersion = 2;
+static int32_t gFormatVersion = 3;
/* How do we store string values? */
enum {
STRINGS_UTF16_V1, /* formatVersion 1: int length + UChars + NUL + padding to 4 bytes */
- STRINGS_UTF16_V2 /* formatVersion 2: optional length in 1..3 UChars + UChars + NUL */
+ STRINGS_UTF16_V2 /* formatVersion 2 & up: optional length in 1..3 UChars + UChars + NUL */
};
-enum {
- MAX_IMPLICIT_STRING_LENGTH = 40 /* do not store the length explicitly for such strings */
-};
+static const int32_t MAX_IMPLICIT_STRING_LENGTH = 40; /* do not store the length explicitly for such strings */
static const ResFile kNoPoolBundle;
{1, 4, 0, 0} /* dataVersion take a look at version inside parsed resb*/
};
-static const UVersionInfo gFormatVersions[3] = { /* indexed by a major-formatVersion integer */
+static const UVersionInfo gFormatVersions[4] = { /* indexed by a major-formatVersion integer */
{ 0, 0, 0, 0 },
{ 1, 3, 0, 0 },
- { 2, 0, 0, 0 }
+ { 2, 0, 0, 0 },
+ { 3, 0, 0, 0 }
};
+// Remember to update genrb.h GENRB_VERSION when changing the data format.
+// (Or maybe we should remove GENRB_VERSION and report the ICU version number?)
static uint8_t calcPadding(uint32_t size) {
/* returns space we need to pad */
gFormatVersion = formatVersion;
}
+int32_t getFormatVersion() {
+ return gFormatVersion;
+}
+
void setUsePoolBundle(UBool use) {
gUsePoolBundle = use;
}
}
SResource::SResource()
- : fType(URES_NONE), fWritten(FALSE), fRes(RES_BOGUS), fKey(-1), line(0),
- fNext(NULL) {
+ : fType(URES_NONE), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1), fKey(-1), fKey16(-1),
+ line(0), fNext(NULL) {
ustr_init(&fComment);
}
SResource::SResource(SRBRoot *bundle, const char *tag, int8_t type, const UString* comment,
UErrorCode &errorCode)
- : fType(type), fWritten(FALSE), fRes(RES_BOGUS),
- fKey(bundle->addTag(tag, errorCode)),
+ : fType(type), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1),
+ fKey(bundle != NULL ? bundle->addTag(tag, errorCode) : -1), fKey16(-1),
line(0), fNext(NULL) {
ustr_init(&fComment);
if(comment != NULL) {
}
}
+PseudoListResource::~PseudoListResource() {}
+
+void PseudoListResource::add(SResource *res) {
+ if (res != NULL && res != &kNoResource) {
+ res->fNext = fFirst;
+ fFirst = res;
+ ++fCount;
+ }
+}
+
StringBaseResource::StringBaseResource(SRBRoot *bundle, const char *tag, int8_t type,
const UChar *value, int32_t len,
const UString* comment, UErrorCode &errorCode)
}
}
+StringBaseResource::StringBaseResource(SRBRoot *bundle, int8_t type,
+ const icu::UnicodeString &value, UErrorCode &errorCode)
+ : SResource(bundle, NULL, type, NULL, errorCode), fString(value) {
+ if (value.isEmpty() && gFormatVersion > 1) {
+ fRes = URES_MAKE_EMPTY_RESOURCE(type);
+ fWritten = TRUE;
+ return;
+ }
+
+ fString.getTerminatedBuffer(); // Some code relies on NUL-termination.
+ if (U_SUCCESS(errorCode) && fString.isBogus()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+// Pool bundle string, alias the buffer. Guaranteed NUL-terminated and not empty.
+StringBaseResource::StringBaseResource(int8_t type, const UChar *value, int32_t len,
+ UErrorCode &errorCode)
+ : SResource(NULL, NULL, type, NULL, errorCode), fString(TRUE, value, len) {
+ assert(len > 0);
+ assert(!fString.isBogus());
+}
+
StringBaseResource::~StringBaseResource() {}
static int32_t U_CALLCONV
const UString* comment, UErrorCode &errorCode)
: SResource(bundle, tag, URES_INT, comment, errorCode) {
fValue = value;
- fRes = URES_MAKE_RESOURCE(URES_INT, value & 0x0FFFFFFF);
+ fRes = URES_MAKE_RESOURCE(URES_INT, value & RES_MAX_OFFSET);
fWritten = TRUE;
}
void
StringResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet,
UErrorCode &errorCode) {
+ assert(fSame == NULL);
fSame = static_cast<StringResource *>(uhash_get(stringSet, this));
if (fSame != NULL) {
- return; /* This is a duplicate of an earlier-visited string. */
+ // This is a duplicate of a pool bundle string or of an earlier-visited string.
+ if (++fSame->fNumCopies == 1) {
+ assert(fSame->fWritten);
+ int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(fSame->fRes);
+ if (poolStringIndex >= bundle->fPoolStringIndexLimit) {
+ bundle->fPoolStringIndexLimit = poolStringIndex + 1;
+ }
+ }
+ return;
}
/* Put this string into the set for finding duplicates. */
+ fNumCopies = 1;
uhash_put(stringSet, this, this, &errorCode);
if (bundle->fStringsForm != STRINGS_UTF16_V1) {
- const UChar *s = getBuffer();
int32_t len = length();
- if (len <= MAX_IMPLICIT_STRING_LENGTH && !U16_IS_TRAIL(s[0]) && len == u_strlen(s)) {
+ if (len <= MAX_IMPLICIT_STRING_LENGTH &&
+ !U16_IS_TRAIL(fString[0]) && fString.indexOf((UChar)0) < 0) {
/*
* This string will be stored without an explicit length.
* Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen().
/* Neither a string nor a container. */
}
-static int32_t
-makeRes16(uint32_t resWord) {
- uint32_t type, offset;
+int32_t
+SRBRoot::makeRes16(uint32_t resWord) const {
if (resWord == 0) {
return 0; /* empty string */
}
- type = RES_GET_TYPE(resWord);
- offset = RES_GET_OFFSET(resWord);
- if (type == URES_STRING_V2 && offset <= 0xffff) {
- return (int32_t)offset;
+ uint32_t type = RES_GET_TYPE(resWord);
+ int32_t offset = (int32_t)RES_GET_OFFSET(resWord);
+ if (type == URES_STRING_V2) {
+ assert(offset > 0);
+ if (offset < fPoolStringIndexLimit) {
+ if (offset < fPoolStringIndex16Limit) {
+ return offset;
+ }
+ } else {
+ offset = offset - fPoolStringIndexLimit + fPoolStringIndex16Limit;
+ if (offset <= 0xffff) {
+ return offset;
+ }
+ }
}
return -1;
}
return map[start].newpos;
}
-uint16_t
-SRBRoot::makeKey16(int32_t key) const {
- if (key >= 0) {
- return (uint16_t)key;
- } else {
- return (uint16_t)(key + fLocalKeyLimit); /* offset in the pool bundle */
- }
-}
-
/*
* Only called for UTF-16 v1 strings and duplicate UTF-16 v2 strings.
* For unique UTF-16 v2 strings, write16() sees fRes != RES_BOGUS
* and exits early.
*/
void
-StringResource::handleWrite16(SRBRoot * /*bundle*/, UErrorCode & /*errorCode*/) {
+StringResource::handleWrite16(SRBRoot * /*bundle*/) {
SResource *same;
if ((same = fSame) != NULL) {
/* This is a duplicate. */
}
void
-ArrayResource::handleWrite16(SRBRoot *bundle, UErrorCode &errorCode) {
+ContainerResource::writeAllRes16(SRBRoot *bundle) {
+ for (SResource *current = fFirst; current != NULL; current = current->fNext) {
+ bundle->f16BitUnits.append((UChar)current->fRes16);
+ }
+ fWritten = TRUE;
+}
+
+void
+ArrayResource::handleWrite16(SRBRoot *bundle) {
if (fCount == 0 && gFormatVersion > 1) {
fRes = URES_MAKE_EMPTY_RESOURCE(URES_ARRAY);
fWritten = TRUE;
int32_t res16 = 0;
for (SResource *current = fFirst; current != NULL; current = current->fNext) {
- current->write16(bundle, errorCode);
- res16 |= makeRes16(current->fRes);
+ current->write16(bundle);
+ res16 |= current->fRes16;
}
- if (U_SUCCESS(errorCode) && fCount <= 0xffff && res16 >= 0 && gFormatVersion > 1) {
+ if (fCount <= 0xffff && res16 >= 0 && gFormatVersion > 1) {
fRes = URES_MAKE_RESOURCE(URES_ARRAY16, bundle->f16BitUnits.length());
bundle->f16BitUnits.append((UChar)fCount);
- for (SResource *current = fFirst; current != NULL; current = current->fNext) {
- bundle->f16BitUnits.append((UChar)makeRes16(current->fRes));
- }
- fWritten = TRUE;
- if (bundle->f16BitUnits.isBogus()) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- }
+ writeAllRes16(bundle);
}
}
void
-TableResource::handleWrite16(SRBRoot *bundle, UErrorCode &errorCode) {
- int32_t maxKey = 0, maxPoolKey = 0x80000000;
- int32_t res16 = 0;
- UBool hasLocalKeys = FALSE, hasPoolKeys = FALSE;
-
+TableResource::handleWrite16(SRBRoot *bundle) {
if (fCount == 0 && gFormatVersion > 1) {
fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
fWritten = TRUE;
return;
}
/* Find the smallest table type that fits the data. */
+ int32_t key16 = 0;
+ int32_t res16 = 0;
for (SResource *current = fFirst; current != NULL; current = current->fNext) {
- int32_t key;
- current->write16(bundle, errorCode);
- key = current->fKey = bundle->mapKey(current->fKey);
- if (key >= 0) {
- hasLocalKeys = TRUE;
- if (key > maxKey) {
- maxKey = key;
- }
- } else {
- hasPoolKeys = TRUE;
- if (key > maxPoolKey) {
- maxPoolKey = key;
- }
- }
- res16 |= makeRes16(current->fRes);
- }
- if (U_FAILURE(errorCode)) {
- return;
+ current->write16(bundle);
+ key16 |= current->fKey16;
+ res16 |= current->fRes16;
}
if(fCount > (uint32_t)bundle->fMaxTableLength) {
bundle->fMaxTableLength = fCount;
}
- maxPoolKey &= 0x7fffffff;
- if (fCount <= 0xffff &&
- (!hasLocalKeys || maxKey < bundle->fLocalKeyLimit) &&
- (!hasPoolKeys || maxPoolKey < (0x10000 - bundle->fLocalKeyLimit))
- ) {
+ if (fCount <= 0xffff && key16 >= 0) {
if (res16 >= 0 && gFormatVersion > 1) {
/* 16-bit count, key offsets and values */
fRes = URES_MAKE_RESOURCE(URES_TABLE16, bundle->f16BitUnits.length());
bundle->f16BitUnits.append((UChar)fCount);
for (SResource *current = fFirst; current != NULL; current = current->fNext) {
- bundle->f16BitUnits.append((UChar)bundle->makeKey16(current->fKey));
- }
- for (SResource *current = fFirst; current != NULL; current = current->fNext) {
- bundle->f16BitUnits.append((UChar)makeRes16(current->fRes));
- }
- fWritten = TRUE;
- if (bundle->f16BitUnits.isBogus()) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
+ bundle->f16BitUnits.append((UChar)current->fKey16);
}
+ writeAllRes16(bundle);
} else {
/* 16-bit count, 16-bit key offsets, 32-bit values */
fTableType = URES_TABLE;
}
void
-SResource::write16(SRBRoot *bundle, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) {
- return;
+PseudoListResource::handleWrite16(SRBRoot * /*bundle*/) {
+ fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
+ fWritten = TRUE;
+}
+
+void
+SResource::write16(SRBRoot *bundle) {
+ if (fKey >= 0) {
+ // A tagged resource has a non-negative key index into the parsed key strings.
+ // compactKeys() built a map from parsed key index to the final key index.
+ // After the mapping, negative key indexes are used for shared pool bundle keys.
+ fKey = bundle->mapKey(fKey);
+ // If the key index fits into a Key16 for a Table or Table16,
+ // then set the fKey16 field accordingly.
+ // Otherwise keep it at -1.
+ if (fKey >= 0) {
+ if (fKey < bundle->fLocalKeyLimit) {
+ fKey16 = fKey;
+ }
+ } else {
+ int32_t poolKeyIndex = fKey & 0x7fffffff;
+ if (poolKeyIndex <= 0xffff) {
+ poolKeyIndex += bundle->fLocalKeyLimit;
+ if (poolKeyIndex <= 0xffff) {
+ fKey16 = poolKeyIndex;
+ }
+ }
+ }
}
- if (fRes != RES_BOGUS) {
- /*
- * The resource item word was already precomputed, which means
- * no further data needs to be written.
- * This might be an integer, or an empty or UTF-16 v2 string,
- * an empty binary, etc.
- */
- return;
+ /*
+ * fRes != RES_BOGUS:
+ * The resource item word was already precomputed, which means
+ * no further data needs to be written.
+ * This might be an integer, or an empty or UTF-16 v2 string,
+ * an empty binary, etc.
+ */
+ if (fRes == RES_BOGUS) {
+ handleWrite16(bundle);
}
- handleWrite16(bundle, errorCode);
+ // Compute fRes16 for precomputed as well as just-computed fRes.
+ fRes16 = bundle->makeRes16(fRes);
}
void
-SResource::handleWrite16(SRBRoot * /*bundle*/, UErrorCode & /*errorCode*/) {
+SResource::handleWrite16(SRBRoot * /*bundle*/) {
/* Only a few resource types write 16-bit units. */
}
* and exits early.
*/
void
-StringBaseResource::handlePreWrite(uint32_t *byteOffset, SRBRoot * /*bundle*/,
- UErrorCode & /*errorCode*/) {
+StringBaseResource::handlePreWrite(uint32_t *byteOffset) {
/* Write the UTF-16 v1 string. */
fRes = URES_MAKE_RESOURCE(fType, *byteOffset >> 2);
*byteOffset += 4 + (length() + 1) * U_SIZEOF_UCHAR;
}
void
-IntVectorResource::handlePreWrite(uint32_t *byteOffset, SRBRoot * /*bundle*/,
- UErrorCode & /*errorCode*/) {
+IntVectorResource::handlePreWrite(uint32_t *byteOffset) {
if (fCount == 0 && gFormatVersion > 1) {
fRes = URES_MAKE_EMPTY_RESOURCE(URES_INT_VECTOR);
fWritten = TRUE;
}
void
-BinaryResource::handlePreWrite(uint32_t *byteOffset, SRBRoot * /*bundle*/,
- UErrorCode & /*errorCode*/) {
+BinaryResource::handlePreWrite(uint32_t *byteOffset) {
uint32_t pad = 0;
uint32_t dataStart = *byteOffset + sizeof(fLength);
}
void
-ArrayResource::handlePreWrite(uint32_t *byteOffset, SRBRoot *bundle, UErrorCode &errorCode) {
+ContainerResource::preWriteAllRes(uint32_t *byteOffset) {
for (SResource *current = fFirst; current != NULL; current = current->fNext) {
- current->preWrite(byteOffset, bundle, errorCode);
+ current->preWrite(byteOffset);
}
+}
+
+void
+ArrayResource::handlePreWrite(uint32_t *byteOffset) {
+ preWriteAllRes(byteOffset);
fRes = URES_MAKE_RESOURCE(URES_ARRAY, *byteOffset >> 2);
*byteOffset += (1 + fCount) * 4;
}
void
-TableResource::handlePreWrite(uint32_t *byteOffset, SRBRoot *bundle, UErrorCode &errorCode) {
- for (SResource *current = fFirst; current != NULL; current = current->fNext) {
- current->preWrite(byteOffset, bundle, errorCode);
- }
+TableResource::handlePreWrite(uint32_t *byteOffset) {
+ preWriteAllRes(byteOffset);
if (fTableType == URES_TABLE) {
/* 16-bit count, 16-bit key offsets, 32-bit values */
fRes = URES_MAKE_RESOURCE(URES_TABLE, *byteOffset >> 2);
}
void
-SResource::preWrite(uint32_t *byteOffset, SRBRoot *bundle, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) {
- return;
- }
+SResource::preWrite(uint32_t *byteOffset) {
if (fRes != RES_BOGUS) {
/*
* The resource item word was already precomputed, which means
*/
return;
}
- handlePreWrite(byteOffset, bundle, errorCode);
+ handlePreWrite(byteOffset);
*byteOffset += calcPadding(*byteOffset);
}
void
-SResource::handlePreWrite(uint32_t * /*byteOffset*/, SRBRoot * /*bundle*/,
- UErrorCode &errorCode) {
- errorCode = U_INTERNAL_PROGRAM_ERROR;
+SResource::handlePreWrite(uint32_t * /*byteOffset*/) {
+ assert(FALSE);
}
/*
* write() sees fWritten and exits early.
*/
void
-StringBaseResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset,
- SRBRoot * /*bundle*/, UErrorCode & /*errorCode*/) {
+StringBaseResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
/* Write the UTF-16 v1 string. */
int32_t len = length();
udata_write32(mem, len);
}
void
-ArrayResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset,
- SRBRoot *bundle, UErrorCode &errorCode) {
+ContainerResource::writeAllRes(UNewDataMemory *mem, uint32_t *byteOffset) {
uint32_t i = 0;
for (SResource *current = fFirst; current != NULL; ++i, current = current->fNext) {
- current->write(mem, byteOffset, bundle, errorCode);
+ current->write(mem, byteOffset);
}
assert(i == fCount);
+}
- udata_write32(mem, fCount);
+void
+ContainerResource::writeAllRes32(UNewDataMemory *mem, uint32_t *byteOffset) {
for (SResource *current = fFirst; current != NULL; current = current->fNext) {
udata_write32(mem, current->fRes);
}
- *byteOffset += (1 + fCount) * 4;
+ *byteOffset += fCount * 4;
}
void
-IntVectorResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset,
- SRBRoot * /*bundle*/, UErrorCode & /*errorCode*/) {
+ArrayResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
+ writeAllRes(mem, byteOffset);
+ udata_write32(mem, fCount);
+ *byteOffset += 4;
+ writeAllRes32(mem, byteOffset);
+}
+
+void
+IntVectorResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
udata_write32(mem, fCount);
for(uint32_t i = 0; i < fCount; ++i) {
udata_write32(mem, fArray[i]);
}
void
-BinaryResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset,
- SRBRoot * /*bundle*/, UErrorCode & /*errorCode*/) {
+BinaryResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
uint32_t pad = 0;
uint32_t dataStart = *byteOffset + sizeof(fLength);
}
void
-TableResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset,
- SRBRoot *bundle, UErrorCode &errorCode) {
- uint32_t i = 0;
- for (SResource *current = fFirst; current != NULL; ++i, current = current->fNext) {
- assert(i < fCount);
- current->write(mem, byteOffset, bundle, errorCode);
- }
- assert(i == fCount);
-
+TableResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
+ writeAllRes(mem, byteOffset);
if(fTableType == URES_TABLE) {
udata_write16(mem, (uint16_t)fCount);
for (SResource *current = fFirst; current != NULL; current = current->fNext) {
- udata_write16(mem, bundle->makeKey16(current->fKey));
+ udata_write16(mem, current->fKey16);
}
*byteOffset += (1 + fCount)* 2;
if ((fCount & 1) == 0) {
}
*byteOffset += (1 + fCount)* 4;
}
- for (SResource *current = fFirst; current != NULL; current = current->fNext) {
- udata_write32(mem, current->fRes);
- }
- *byteOffset += fCount * 4;
+ writeAllRes32(mem, byteOffset);
}
void
-SResource::write(UNewDataMemory *mem, uint32_t *byteOffset,
- SRBRoot *bundle, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) {
- return;
- }
+SResource::write(UNewDataMemory *mem, uint32_t *byteOffset) {
if (fWritten) {
assert(fRes != RES_BOGUS);
return;
}
- handleWrite(mem, byteOffset, bundle, errorCode);
+ handleWrite(mem, byteOffset);
uint8_t paddingSize = calcPadding(*byteOffset);
if (paddingSize > 0) {
udata_writePadding(mem, paddingSize);
}
void
-SResource::handleWrite(UNewDataMemory * /*mem*/, uint32_t * /*byteOffset*/,
- SRBRoot * /*bundle*/, UErrorCode &errorCode) {
- errorCode = U_INTERNAL_PROGRAM_ERROR;
+SResource::handleWrite(UNewDataMemory * /*mem*/, uint32_t * /*byteOffset*/) {
+ assert(FALSE);
}
void SRBRoot::write(const char *outputDir, const char *outputPkg,
UHashtable *stringSet;
if (gFormatVersion > 1) {
stringSet = uhash_open(string_hash, string_comp, string_comp, &errorCode);
+ if (U_SUCCESS(errorCode) &&
+ fUsePoolBundle != NULL && fUsePoolBundle->fStrings != NULL) {
+ for (SResource *current = fUsePoolBundle->fStrings->fFirst;
+ current != NULL;
+ current = current->fNext) {
+ StringResource *sr = static_cast<StringResource *>(current);
+ sr->fNumCopies = 0;
+ sr->fNumUnitsSaved = 0;
+ uhash_put(stringSet, sr, sr, &errorCode);
+ }
+ }
fRoot->preflightStrings(this, stringSet, errorCode);
} else {
stringSet = NULL;
return;
}
- fRoot->write16(this, errorCode);
+ if (fPoolStringIndexLimit != 0) {
+ int32_t sum = fPoolStringIndexLimit + fLocalStringIndexLimit;
+ if ((sum - 1) > RES_MAX_OFFSET) {
+ errorCode = U_BUFFER_OVERFLOW_ERROR;
+ return;
+ }
+ if (fPoolStringIndexLimit < 0x10000 && sum <= 0x10000) {
+ // 16-bit indexes work for all pool + local strings.
+ fPoolStringIndex16Limit = fPoolStringIndexLimit;
+ } else {
+ // Set the pool index threshold so that 16-bit indexes work
+ // for some pool strings and some local strings.
+ fPoolStringIndex16Limit = (int32_t)(
+ ((int64_t)fPoolStringIndexLimit * 0xffff) / sum);
+ }
+ }
+
+ fRoot->write16(this);
+ if (f16BitUnits.isBogus()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
if (f16BitUnits.length() & 1) {
f16BitUnits.append((UChar)0xaaaa); /* pad to multiple of 4 bytes */
}
fKeyMap = NULL;
byteOffset = fKeysTop + f16BitUnits.length() * 2;
- fRoot->preWrite(&byteOffset, this, errorCode);
+ fRoot->preWrite(&byteOffset);
/* total size including the root item */
top = byteOffset;
- if (U_FAILURE(errorCode)) {
- return;
- }
-
if (writtenFilename && writtenFilenameLen) {
*writtenFilename = 0;
}
/*
* formatVersion 1.1 (ICU 2.8):
- * write int32_t indexes[] after root and before the strings
+ * write int32_t indexes[] after root and before the key strings
* to make it easier to parse resource bundles in icuswap or from Java etc.
*/
uprv_memset(indexes, 0, sizeof(indexes));
if (URES_INDEX_POOL_CHECKSUM < fIndexLength) {
if (fIsPoolBundle) {
indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_IS_POOL_BUNDLE | URES_ATT_NO_FALLBACK;
- indexes[URES_INDEX_POOL_CHECKSUM] =
- (int32_t)computeCRC((char *)(fKeys + fKeysBottom),
- (uint32_t)(fKeysTop - fKeysBottom),
- 0);
+ uint32_t checksum = computeCRC((const char *)(fKeys + fKeysBottom),
+ (uint32_t)(fKeysTop - fKeysBottom), 0);
+ if (f16BitUnits.length() <= 1) {
+ // no pool strings to checksum
+ } else if (U_IS_BIG_ENDIAN) {
+ checksum = computeCRC((const char *)f16BitUnits.getBuffer(),
+ (uint32_t)f16BitUnits.length() * 2, checksum);
+ } else {
+ // Swap to big-endian so we get the same checksum on all platforms
+ // (except for charset family, due to the key strings).
+ UnicodeString s(f16BitUnits);
+ s.append((UChar)1); // Ensure that we own this buffer.
+ assert(!s.isBogus());
+ uint16_t *p = (uint16_t *)s.getBuffer();
+ for (int32_t count = f16BitUnits.length(); count > 0; --count) {
+ uint16_t x = *p;
+ *p++ = (uint16_t)((x << 8) | (x >> 8));
+ }
+ checksum = computeCRC((const char *)p,
+ (uint32_t)f16BitUnits.length() * 2, checksum);
+ }
+ indexes[URES_INDEX_POOL_CHECKSUM] = (int32_t)checksum;
} else if (gUsePoolBundle) {
indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_USES_POOL_BUNDLE;
indexes[URES_INDEX_POOL_CHECKSUM] = fUsePoolBundle->fChecksum;
}
}
+ // formatVersion 3 (ICU 56):
+ // share string values via pool bundle strings
+ indexes[URES_INDEX_LENGTH] |= fPoolStringIndexLimit << 8; // bits 23..0 -> 31..8
+ indexes[URES_INDEX_ATTRIBUTES] |= (fPoolStringIndexLimit >> 12) & 0xf000; // bits 27..24 -> 15..12
+ indexes[URES_INDEX_ATTRIBUTES] |= fPoolStringIndex16Limit << 16;
/* write the indexes[] */
udata_writeBlock(mem, indexes, fIndexLength*4);
/* write all of the bundle contents: the root item and its children */
byteOffset = fKeysTop + f16BitUnits.length() * 2;
- fRoot->write(mem, &byteOffset, this, errorCode);
+ fRoot->write(mem, &byteOffset);
assert(byteOffset == top);
size = udata_finish(mem, &errorCode);
fKeys(NULL), fKeyMap(NULL),
fKeysBottom(0), fKeysTop(0), fKeysCapacity(0), fKeysCount(0), fLocalKeyLimit(0),
f16BitUnits(), f16BitStringsLength(0),
- fUsePoolBundle(&kNoPoolBundle) {
+ fUsePoolBundle(&kNoPoolBundle),
+ fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0),
+ fWritePoolBundle(NULL) {
if (U_FAILURE(errorCode)) {
return;
}
}
fKeys = (char *) uprv_malloc(sizeof(char) * KEY_SPACE_SIZE);
- fRoot = new TableResource(this, NULL, comment, errorCode);
+ if (isPoolBundle) {
+ fRoot = new PseudoListResource(this, errorCode);
+ } else {
+ fRoot = new TableResource(this, NULL, comment, errorCode);
+ }
if (fKeys == NULL || fRoot == NULL || U_FAILURE(errorCode)) {
if (U_SUCCESS(errorCode)) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
}
fKeysCapacity = KEY_SPACE_SIZE;
- /* formatVersion 1.1: start fKeysTop after the root item and indexes[] */
+ /* formatVersion 1.1 and up: start fKeysTop after the root item and indexes[] */
if (gUsePoolBundle || isPoolBundle) {
fIndexLength = URES_INDEX_POOL_CHECKSUM + 1;
} else if (gFormatVersion >= 2) {
return diff;
}
/* sort by ascending string length */
- return left->length() - right->length();
+ diff = left->length() - right->length();
+ if (diff != 0) {
+ return diff;
+ }
+ // sort by descending size reduction
+ diff = right->fNumUnitsSaved - left->fNumUnitsSaved;
+ if (diff != 0) {
+ return diff;
+ }
+ // sort lexically
+ return left->fString.compare(right->fString);
}
void
-StringResource::writeUTF16v2(UnicodeString &dest) {
+StringResource::writeUTF16v2(int32_t base, UnicodeString &dest) {
int32_t len = length();
- fRes = URES_MAKE_RESOURCE(URES_STRING_V2, dest.length());
+ fRes = URES_MAKE_RESOURCE(URES_STRING_V2, base + dest.length());
fWritten = TRUE;
switch(fNumCharsForLength) {
case 0:
* suffixes of this one.
*/
StringResource *res = array[i];
+ res->fNumUnitsSaved = (res->fNumCopies - 1) * res->get16BitStringsLength();
+ // Whole duplicates of pool strings are already account for in fPoolStringIndexLimit,
+ // see StringResource::handlePreflightStrings().
int32_t j;
for (j = i + 1; j < count; ++j) {
StringResource *suffixRes = array[j];
/* Is it a suffix of the earlier, longer string? */
if (res->fString.endsWith(suffixRes->fString)) {
assert(res->length() != suffixRes->length()); // Set strings are unique.
- if (suffixRes->fNumCharsForLength == 0) {
+ if (suffixRes->fWritten) {
+ // Pool string, skip.
+ } else if (suffixRes->fNumCharsForLength == 0) {
/* yes, point to the earlier string */
suffixRes->fSame = res;
suffixRes->fSuffixOffset = res->length() - suffixRes->length();
+ if (res->fWritten) {
+ // Suffix-share res which is a pool string.
+ // Compute the resource word and collect the maximum.
+ suffixRes->fRes =
+ res->fRes + res->fNumCharsForLength + suffixRes->fSuffixOffset;
+ int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(suffixRes->fRes);
+ if (poolStringIndex >= fPoolStringIndexLimit) {
+ fPoolStringIndexLimit = poolStringIndex + 1;
+ }
+ suffixRes->fWritten = TRUE;
+ }
+ res->fNumUnitsSaved += suffixRes->fNumCopies * suffixRes->get16BitStringsLength();
} else {
/* write the suffix by itself if we need explicit length */
}
if (U_FAILURE(errorCode)) {
return;
}
- /* Write the non-suffix strings. */
- int32_t i;
- for (i = 0; i < count && array[i]->fSame == NULL; ++i) {
- array[i]->writeUTF16v2(f16BitUnits);
- }
- if (f16BitUnits.isBogus()) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- /* Write the suffix strings. Make each point to the real string. */
- for (; i < count; ++i) {
- StringResource *res = array[i];
- StringResource *same = res->fSame;
- assert(res->length() != same->length()); // Set strings are unique.
- res->fRes = same->fRes + same->fNumCharsForLength + res->fSuffixOffset;
- res->fSame = NULL;
- res->fWritten = TRUE;
+ if (fIsPoolBundle) {
+ // Write strings that are sufficiently shared.
+ // Avoid writing other strings.
+ int32_t numStringsWritten = 0;
+ int32_t numUnitsSaved = 0;
+ int32_t numUnitsNotSaved = 0;
+ for (int32_t i = 0; i < count; ++i) {
+ StringResource *res = array[i];
+ // Maximum pool string index when suffix-sharing the last character.
+ int32_t maxStringIndex =
+ f16BitUnits.length() + res->fNumCharsForLength + res->length() - 1;
+ if (res->fNumUnitsSaved >= GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING &&
+ maxStringIndex < RES_MAX_OFFSET) {
+ res->writeUTF16v2(0, f16BitUnits);
+ ++numStringsWritten;
+ numUnitsSaved += res->fNumUnitsSaved;
+ } else {
+ numUnitsNotSaved += res->fNumUnitsSaved;
+ res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_STRING);
+ res->fWritten = TRUE;
+ }
+ }
+ if (f16BitUnits.isBogus()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+ if (getShowWarning()) { // not quiet
+ printf("number of shared strings: %d\n", (int)numStringsWritten);
+ printf("16-bit units for strings: %6d = %6d bytes\n",
+ (int)f16BitUnits.length(), (int)f16BitUnits.length() * 2);
+ printf("16-bit units saved: %6d = %6d bytes\n",
+ (int)numUnitsSaved, (int)numUnitsSaved * 2);
+ printf("16-bit units not saved: %6d = %6d bytes\n",
+ (int)numUnitsNotSaved, (int)numUnitsNotSaved * 2);
+ }
+ } else {
+ assert(fPoolStringIndexLimit <= fUsePoolBundle->fStringIndexLimit);
+ /* Write the non-suffix strings. */
+ int32_t i;
+ for (i = 0; i < count && array[i]->fSame == NULL; ++i) {
+ StringResource *res = array[i];
+ if (!res->fWritten) {
+ int32_t localStringIndex = f16BitUnits.length();
+ if (localStringIndex >= fLocalStringIndexLimit) {
+ fLocalStringIndexLimit = localStringIndex + 1;
+ }
+ res->writeUTF16v2(fPoolStringIndexLimit, f16BitUnits);
+ }
+ }
+ if (f16BitUnits.isBogus()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ if (fWritePoolBundle != NULL && gFormatVersion >= 3) {
+ PseudoListResource *poolStrings =
+ static_cast<PseudoListResource *>(fWritePoolBundle->fRoot);
+ for (i = 0; i < count && array[i]->fSame == NULL; ++i) {
+ assert(!array[i]->fString.isEmpty());
+ StringResource *poolString =
+ new StringResource(fWritePoolBundle, array[i]->fString, errorCode);
+ if (poolString == NULL) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ poolStrings->add(poolString);
+ }
+ }
+ /* Write the suffix strings. Make each point to the real string. */
+ for (; i < count; ++i) {
+ StringResource *res = array[i];
+ if (res->fWritten) {
+ continue;
+ }
+ StringResource *same = res->fSame;
+ assert(res->length() != same->length()); // Set strings are unique.
+ res->fRes = same->fRes + same->fNumCharsForLength + res->fSuffixOffset;
+ int32_t localStringIndex = (int32_t)RES_GET_OFFSET(res->fRes) - fPoolStringIndexLimit;
+ // Suffixes of pool strings have been set already.
+ assert(localStringIndex >= 0);
+ if (localStringIndex >= fLocalStringIndexLimit) {
+ fLocalStringIndexLimit = localStringIndex + 1;
+ }
+ res->fWritten = TRUE;
+ }
}
// +1 to account for the initial zero in f16BitUnits
assert(f16BitUnits.length() <= (f16BitStringsLength + 1));
U_CDECL_BEGIN
+class PseudoListResource;
+
struct ResFile {
ResFile()
: fBytes(NULL), fIndexes(NULL),
- fKeys(NULL), fKeysLength(0), fKeysCount(0), fChecksum(0) {}
+ fKeys(NULL), fKeysLength(0), fKeysCount(0),
+ fStrings(NULL), fStringIndexLimit(0),
+ fChecksum(0) {}
~ResFile() { close(); }
void close();
const char *fKeys;
int32_t fKeysLength;
int32_t fKeysCount;
+
+ PseudoListResource *fStrings;
+ int32_t fStringIndexLimit;
+
int32_t fChecksum;
};
void compactKeys(UErrorCode &errorCode);
+ int32_t makeRes16(uint32_t resWord) const;
int32_t mapKey(int32_t oldpos) const;
- uint16_t makeKey16(int32_t key) const;
private:
void compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode);
int32_t f16BitStringsLength;
const ResFile *fUsePoolBundle;
+ int32_t fPoolStringIndexLimit;
+ int32_t fPoolStringIndex16Limit;
+ int32_t fLocalStringIndexLimit;
+ SRBRoot *fWritePoolBundle;
};
/* write a java resource file */
* Writes resource values into f16BitUnits
* and determines the resource item word, if possible.
*/
- void write16(SRBRoot *bundle, UErrorCode &errorCode);
- virtual void handleWrite16(SRBRoot *bundle, UErrorCode &errorCode);
+ void write16(SRBRoot *bundle);
+ virtual void handleWrite16(SRBRoot *bundle);
/**
* Calculates ("preflights") and advances the *byteOffset
* before actually writing the bundle contents to the file,
* which is necessary because the root item is stored at the beginning.
*/
- void preWrite(uint32_t *byteOffset, SRBRoot *bundle, UErrorCode &errorCode);
- virtual void handlePreWrite(uint32_t *byteOffset, SRBRoot *bundle, UErrorCode &errorCode);
+ void preWrite(uint32_t *byteOffset);
+ virtual void handlePreWrite(uint32_t *byteOffset);
/**
* Writes the resource's data to mem and updates the byteOffset
* in parallel.
*/
- void write(UNewDataMemory *mem, uint32_t *byteOffset, SRBRoot *bundle, UErrorCode &errorCode);
- virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset, SRBRoot *bundle,
- UErrorCode &errorCode);
+ void write(UNewDataMemory *mem, uint32_t *byteOffset);
+ virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset);
int8_t fType; /* nominal type: fRes (when != 0xffffffff) may use subtype */
UBool fWritten; /* res_write() can exit early */
uint32_t fRes; /* resource item word; RES_BOGUS=0xffffffff if not known yet */
+ int32_t fRes16; /* Res16 version of fRes for Table, Table16, Array16; -1 if it does not fit. */
int32_t fKey; /* Index into bundle->fKeys; -1 if no key. */
+ int32_t fKey16; /* Key16 version of fKey for Table & Table16; -1 if no key or it does not fit. */
int line; /* used internally to report duplicate keys in tables */
SResource *fNext; /* This is for internal chaining while building */
struct UString fComment;
virtual ~ContainerResource();
virtual void handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode);
+protected:
+ void writeAllRes16(SRBRoot *bundle);
+ void preWriteAllRes(uint32_t *byteOffset);
+ void writeAllRes(UNewDataMemory *mem, uint32_t *byteOffset);
+ void writeAllRes32(UNewDataMemory *mem, uint32_t *byteOffset);
+public:
// TODO: private with getter?
uint32_t fCount;
SResource *fFirst;
void add(SResource *res, int linenumber, UErrorCode &errorCode);
- virtual void handleWrite16(SRBRoot *bundle, UErrorCode &errorCode);
- virtual void handlePreWrite(uint32_t *byteOffset, SRBRoot *bundle, UErrorCode &errorCode);
- virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset, SRBRoot *bundle,
- UErrorCode &errorCode);
+ virtual void handleWrite16(SRBRoot *bundle);
+ virtual void handlePreWrite(uint32_t *byteOffset);
+ virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset);
int8_t fTableType; // determined by table_write16() for table_preWrite() & table_write()
SRBRoot *fRoot;
void add(SResource *res);
- virtual void handleWrite16(SRBRoot *bundle, UErrorCode &errorCode);
- virtual void handlePreWrite(uint32_t *byteOffset, SRBRoot *bundle, UErrorCode &errorCode);
- virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset, SRBRoot *bundle,
- UErrorCode &errorCode);
+ virtual void handleWrite16(SRBRoot *bundle);
+ virtual void handlePreWrite(uint32_t *byteOffset);
+ virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset);
SResource *fLast;
};
+/**
+ * List of resources for a pool bundle.
+ * Writes an empty table resource, rather than a container structure.
+ */
+class PseudoListResource : public ContainerResource {
+public:
+ PseudoListResource(SRBRoot *bundle, UErrorCode &errorCode)
+ : ContainerResource(bundle, NULL, URES_TABLE, NULL, errorCode) {}
+ virtual ~PseudoListResource();
+
+ void add(SResource *res);
+
+ virtual void handleWrite16(SRBRoot *bundle);
+};
+
class StringBaseResource : public SResource {
public:
StringBaseResource(SRBRoot *bundle, const char *tag, int8_t type,
const UChar *value, int32_t len,
const UString* comment, UErrorCode &errorCode);
+ StringBaseResource(SRBRoot *bundle, int8_t type,
+ const icu::UnicodeString &value, UErrorCode &errorCode);
+ StringBaseResource(int8_t type, const UChar *value, int32_t len, UErrorCode &errorCode);
virtual ~StringBaseResource();
const UChar *getBuffer() const { return fString.getBuffer(); }
int32_t length() const { return fString.length(); }
- virtual void handlePreWrite(uint32_t *byteOffset, SRBRoot *bundle, UErrorCode &errorCode);
- virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset, SRBRoot *bundle,
- UErrorCode &errorCode);
+ virtual void handlePreWrite(uint32_t *byteOffset);
+ virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset);
// TODO: private with getter?
icu::UnicodeString fString;
StringResource(SRBRoot *bundle, const char *tag, const UChar *value, int32_t len,
const UString* comment, UErrorCode &errorCode)
: StringBaseResource(bundle, tag, URES_STRING, value, len, comment, errorCode),
- fSame(NULL), fSuffixOffset(0), fNumCharsForLength(0) {}
+ fSame(NULL), fSuffixOffset(0),
+ fNumCopies(0), fNumUnitsSaved(0), fNumCharsForLength(0) {}
+ StringResource(SRBRoot *bundle, const icu::UnicodeString &value, UErrorCode &errorCode)
+ : StringBaseResource(bundle, URES_STRING, value, errorCode),
+ fSame(NULL), fSuffixOffset(0),
+ fNumCopies(0), fNumUnitsSaved(0), fNumCharsForLength(0) {}
+ StringResource(int32_t poolStringIndex, int8_t numCharsForLength,
+ const UChar *value, int32_t length,
+ UErrorCode &errorCode)
+ : StringBaseResource(URES_STRING, value, length, errorCode),
+ fSame(NULL), fSuffixOffset(0),
+ fNumCopies(0), fNumUnitsSaved(0), fNumCharsForLength(numCharsForLength) {
+ // v3 pool string encoded as string-v2 with low offset
+ fRes = URES_MAKE_RESOURCE(URES_STRING_V2, poolStringIndex);
+ fWritten = TRUE;
+ }
virtual ~StringResource();
+ int32_t get16BitStringsLength() const {
+ return fNumCharsForLength + length() + 1; // +1 for the NUL
+ }
+
virtual void handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode);
- virtual void handleWrite16(SRBRoot *bundle, UErrorCode &errorCode);
+ virtual void handleWrite16(SRBRoot *bundle);
- void writeUTF16v2(icu::UnicodeString &dest);
+ void writeUTF16v2(int32_t base, icu::UnicodeString &dest);
StringResource *fSame; // used for duplicates
int32_t fSuffixOffset; // this string is a suffix of fSame at this offset
+ int32_t fNumCopies; // number of equal strings represented by one stringSet element
+ int32_t fNumUnitsSaved; // from not writing duplicates and suffixes
int8_t fNumCharsForLength;
};
void add(int32_t value, UErrorCode &errorCode);
- virtual void handlePreWrite(uint32_t *byteOffset, SRBRoot *bundle, UErrorCode &errorCode);
- virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset, SRBRoot *bundle,
- UErrorCode &errorCode);
+ virtual void handlePreWrite(uint32_t *byteOffset);
+ virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset);
// TODO: UVector32
uint32_t fCount;
const UString* comment, UErrorCode &errorCode);
virtual ~BinaryResource();
- virtual void handlePreWrite(uint32_t *byteOffset, SRBRoot *bundle, UErrorCode &errorCode);
- virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset, SRBRoot *bundle,
- UErrorCode &errorCode);
+ virtual void handlePreWrite(uint32_t *byteOffset);
+ virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset);
// TODO: CharString?
uint32_t fLength;
void setFormatVersion(int32_t formatVersion);
+int32_t getFormatVersion();
+
void setUsePoolBundle(UBool use);
/* in wrtxml.cpp */
-uint32_t computeCRC(char *ptr, uint32_t len, uint32_t lastcrc);
+uint32_t computeCRC(const char *ptr, uint32_t len, uint32_t lastcrc);
U_CDECL_END
#endif /* #ifndef RESLIST_H */