static UTrie2 *pTrie=NULL;
-/* -------------------------------------------------------------------------- */
-
-static void
-initStore() {
- UErrorCode errorCode=U_ZERO_ERROR;
- pTrie=utrie2_open(0, 0, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "genprops error: corepropswriter utrie2_open() failed - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
-}
-
-static void
-exitStore() {
- utrie2_close(pTrie);
-}
-
/* store a character's properties ------------------------------------------- */
U_CFUNC uint32_t
}
}
-/* generate output data ----------------------------------------------------- */
+class CorePropsWriter : public PropsWriter {
+public:
+ CorePropsWriter(UErrorCode &errorCode);
+ virtual ~CorePropsWriter();
-U_CFUNC void
-generateData(const char *dataDir, UBool csource) {
- static int32_t indexes[UPROPS_INDEX_COUNT]={
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0
- };
- static uint8_t trieBlock[40000];
- static uint8_t additionalProps[120000];
-
- UNewDataMemory *pData;
- UErrorCode errorCode=U_ZERO_ERROR;
- uint32_t size = 0;
- int32_t trieSize, additionalPropsSize, offset;
- long dataLength;
+ virtual void setUnicodeVersion(const UVersionInfo version);
+ virtual void setProps(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode);
+ virtual void finalizeData(UErrorCode &errorCode);
+ virtual void writeCSourceFile(const char *path, UErrorCode &errorCode);
+ virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode);
+};
+
+CorePropsWriter::CorePropsWriter(UErrorCode &errorCode) {
+ pTrie=utrie2_open(0, 0, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "genprops error: corepropswriter utrie2_open() failed - %s\n",
+ u_errorName(errorCode));
+ }
+}
+
+CorePropsWriter::~CorePropsWriter() {
+ utrie2_close(pTrie);
+}
+
+void
+CorePropsWriter::setUnicodeVersion(const UVersionInfo version) {
+ uprv_memcpy(dataInfo.dataVersion, version, 4);
+}
+
+void
+CorePropsWriter::setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode) {
+}
+
+static int32_t indexes[UPROPS_INDEX_COUNT]={
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+
+static uint8_t trieBlock[40000];
+static int32_t trieSize;
+static int32_t totalSize;
+
+void
+CorePropsWriter::finalizeData(UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
utrie2_freeze(pTrie, UTRIE2_16_VALUE_BITS, &errorCode);
trieSize=utrie2_serialize(pTrie, trieBlock, sizeof(trieBlock), &errorCode);
if(U_FAILURE(errorCode)) {
- fprintf(stderr, "error: utrie2_freeze(main trie)+utrie2_serialize() failed: %s (length %ld)\n",
+ fprintf(stderr, "genprops error: utrie2_freeze(main trie)+utrie2_serialize() failed: %s (length %ld)\n",
u_errorName(errorCode), (long)trieSize);
- exit(errorCode);
+ return;
}
- offset=sizeof(indexes)/4; /* uint32_t offset to the properties trie */
-
- /* round up trie size to 4-alignment */
- while(trieSize&3) {
- trieBlock[trieSize++]=0;
- }
+ int32_t offset=sizeof(indexes)/4; /* uint32_t offset to the properties trie */
offset+=trieSize>>2;
indexes[UPROPS_PROPS32_INDEX]= /* set indexes to the same offsets for empty */
indexes[UPROPS_EXCEPTIONS_INDEX]= /* structures from the old format version 3 */
printf("trie size in bytes: %5u\n", (int)trieSize);
}
- if(csource) {
- /* write .c file for hardcoded data */
- FILE *f=usrc_createFromGenerator(dataDir, "uchar_props_data.h",
- "icu/tools/src/unicode/c/genprops/corepropswriter.cpp");
- if(f!=NULL) {
- fputs("#ifndef INCLUDED_FROM_UCHAR_C\n"
- "# error This file must be #included from uchar.c only.\n"
- "#endif\n\n", f);
- /* unused
- usrc_writeArray(f,
- "static const UVersionInfo formatVersion={",
- dataInfo.formatVersion, 8, 4,
- "};\n\n");
- */
- usrc_writeArray(f,
- "static const UVersionInfo dataVersion={",
- dataInfo.dataVersion, 8, 4,
- "};\n\n");
- usrc_writeUTrie2Arrays(f,
- "static const uint16_t propsTrie_index[%ld]={\n", NULL,
- pTrie,
- "\n};\n\n");
- usrc_writeUTrie2Struct(f,
- "static const UTrie2 propsTrie={\n",
- pTrie, "propsTrie_index", NULL,
- "};\n\n");
-
- additionalPropsSize=writeAdditionalData(f, additionalProps, sizeof(additionalProps), indexes);
- size=4*offset+additionalPropsSize; /* total size of data */
-
- usrc_writeArray(f,
- "static const int32_t indexes[UPROPS_INDEX_COUNT]={",
- indexes, 32, UPROPS_INDEX_COUNT,
- "};\n\n");
- fclose(f);
- }
- } else {
- /* write the data */
- pData=udata_create(dataDir, DATA_TYPE, DATA_NAME, &dataInfo,
- haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "genprops: udata_create(%s, %s.%s) failed - %s\n",
- dataDir, DATA_NAME, DATA_TYPE,
- u_errorName(errorCode));
- exit(errorCode);
- }
-
- additionalPropsSize=writeAdditionalData(NULL, additionalProps, sizeof(additionalProps), indexes);
- size=4*offset+additionalPropsSize; /* total size of data */
-
- udata_writeBlock(pData, indexes, sizeof(indexes));
- udata_writeBlock(pData, trieBlock, trieSize);
- udata_writeBlock(pData, additionalProps, additionalPropsSize);
-
- /* finish up */
- dataLength=udata_finish(pData, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "genprops: error %d writing the output file\n", errorCode);
- exit(errorCode);
- }
-
- if(dataLength!=(long)size) {
- fprintf(stderr, "genprops: data length %ld != calculated size %lu\n",
- dataLength, (unsigned long)size);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
- }
+ totalSize=4*offset+props2FinalizeData(indexes, errorCode);
if(beVerbose) {
- printf("data size: %6lu\n", (unsigned long)size);
+ printf("data size: %6ld\n", (long)totalSize);
}
}
-class CorePropsWriter : public PropsWriter {
-public:
- CorePropsWriter() { initStore(); }
- virtual ~CorePropsWriter() { exitStore(); }
-
- virtual void setUnicodeVersion(const UVersionInfo version);
- virtual void setProps(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode);
-};
-
void
-CorePropsWriter::setUnicodeVersion(const UVersionInfo version) {
- uprv_memcpy(dataInfo.dataVersion, version, 4);
+CorePropsWriter::writeCSourceFile(const char *path, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+
+ FILE *f=usrc_createFromGenerator(path, "uchar_props_data.h",
+ "icu/tools/src/unicode/c/genprops/corepropswriter.cpp");
+ if(f==NULL) {
+ errorCode=U_FILE_ACCESS_ERROR;
+ return;
+ }
+ fputs("#ifndef INCLUDED_FROM_UCHAR_C\n"
+ "# error This file must be #included from uchar.c only.\n"
+ "#endif\n\n", f);
+ usrc_writeArray(f,
+ "static const UVersionInfo dataVersion={",
+ dataInfo.dataVersion, 8, 4,
+ "};\n\n");
+ usrc_writeUTrie2Arrays(f,
+ "static const uint16_t propsTrie_index[%ld]={\n", NULL,
+ pTrie,
+ "\n};\n\n");
+ usrc_writeUTrie2Struct(f,
+ "static const UTrie2 propsTrie={\n",
+ pTrie, "propsTrie_index", NULL,
+ "};\n\n");
+
+ props2AppendToCSourceFile(f, errorCode);
+
+ usrc_writeArray(f,
+ "static const int32_t indexes[UPROPS_INDEX_COUNT]={",
+ indexes, 32, UPROPS_INDEX_COUNT,
+ "};\n\n");
+ fclose(f);
}
void
-CorePropsWriter::setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode) {
+CorePropsWriter::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+
+ UNewDataMemory *pData=udata_create(path, "icu", "uprops", &dataInfo,
+ withCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "genprops: udata_create(%s, uprops.icu) failed - %s\n",
+ path, u_errorName(errorCode));
+ return;
+ }
+
+ udata_writeBlock(pData, indexes, sizeof(indexes));
+ udata_writeBlock(pData, trieBlock, trieSize);
+ props2AppendToBinaryFile(pData, errorCode);
+
+ long dataLength=udata_finish(pData, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "genprops: error %s writing the output file\n", u_errorName(errorCode));
+ return;
+ }
+
+ if(dataLength!=(long)totalSize) {
+ fprintf(stderr, "genprops: data length %ld != calculated size %ld\n",
+ dataLength, (long)totalSize);
+ errorCode=U_INTERNAL_PROGRAM_ERROR;
+ }
}
PropsWriter *
createCorePropsWriter(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return NULL; }
- PropsWriter *pw=new CorePropsWriter();
+ PropsWriter *pw=new CorePropsWriter(errorCode);
if(pw==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
#include "uparse.h"
#include "uprops.h"
-#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
// TODO: remove
#define USE_NEW 1
U_NAMESPACE_USE
-UBool beVerbose=FALSE, haveCopyright=TRUE;
+UBool beVerbose=FALSE;
PropsWriter::~PropsWriter() {}
-void PropsWriter::setUnicodeVersion(const UVersionInfo version) {}
+void PropsWriter::setUnicodeVersion(const UVersionInfo) {}
void PropsWriter::setProps(const UniProps &, const UnicodeSet &, UErrorCode &) {}
+void PropsWriter::finalizeData(UErrorCode &) {}
+void PropsWriter::writeCSourceFile(const char *, UErrorCode &) {}
+void PropsWriter::writeBinaryData(const char *, UBool, UErrorCode &) {}
/* prototypes --------------------------------------------------------------- */
HELP_QUESTION_MARK,
VERBOSE,
COPYRIGHT,
- DESTDIR,
SOURCEDIR,
- ICUDATADIR,
- CSOURCE
+ ICUDATADIR
};
/* Keep these values in sync with the above enums */
UOPTION_HELP_QUESTION_MARK,
UOPTION_VERBOSE,
UOPTION_COPYRIGHT,
- UOPTION_DESTDIR,
UOPTION_SOURCEDIR,
- UOPTION_ICUDATADIR,
- UOPTION_DEF("csource", 'C', UOPT_NO_ARG)
+ UOPTION_ICUDATADIR
};
extern int
main(int argc, char* argv[]) {
char filename[300];
- const char *srcDir=NULL, *destDir=NULL, *suffix=NULL;
+ const char *srcDir=NULL;
char *basename=NULL;
U_MAIN_INIT_ARGS(argc, argv);
/* preset then read command line options */
- options[DESTDIR].value=u_getDataDirectory();
options[SOURCEDIR].value="";
options[ICUDATADIR].value=u_getDataDirectory();
- argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
+ argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
/* error handling, printing usage message */
if(argc<0) {
"error in command line argument \"%s\"\n",
argv[-argc]);
}
- if(argc<0 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
+ if(argc<2 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
/*
- * Broken into chucks because the C89 standard says the minimum
+ * Broken into chunks because the C89 standard says the minimum
* required supported string length is 509 bytes.
*/
fprintf(stderr,
- "Usage: %s [-options] [suffix]\n"
+ "Usage: %s [-options] path/to/ICU/src/root\n"
"\n"
- "read the UnicodeData.txt file and other Unicode properties files and\n"
- "create a binary file " DATA_NAME "." DATA_TYPE " with the character properties\n"
+ "Reads the preparsed UCD file path/to/ICU/src/root/source/data/unidata/ppucd.txt and\n"
+ "writes source and binary data files with the character properties.\n"
+ "(UCD=Unicode Character Database)\n"
"\n",
argv[0]);
fprintf(stderr,
"Options:\n"
"\t-h or -? or --help this usage text\n"
"\t-v or --verbose verbose output\n"
- "\t-c or --copyright include a copyright notice\n"
- "\t-u or --unicode Unicode version, followed by the version like 3.0.0\n"
- "\t-C or --csource generate a .c source file rather than the .icu binary\n");
+ "\t-c or --copyright include a copyright notice\n");
fprintf(stderr,
- "\t-d or --destdir destination directory, followed by the path\n"
"\t-s or --sourcedir source directory, followed by the path\n"
"\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
- "\t followed by path, defaults to %s\n"
- "\tsuffix suffix that is to be appended with a '-'\n"
- "\t to the source file basenames before opening;\n"
- "\t 'genprops new' will read UnicodeData-new.txt etc.\n",
+ "\t followed by path, defaults to %s\n",
u_getDataDirectory());
- return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
+ return argc<2 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}
/* get the options values */
beVerbose=options[VERBOSE].doesOccur;
- haveCopyright=options[COPYRIGHT].doesOccur;
srcDir=options[SOURCEDIR].value;
- destDir=options[DESTDIR].value;
/* initialize */
IcuToolErrorCode errorCode("genprops");
return errorCode.reset();
}
- CharString ppucdPath(srcDir, errorCode);
+ CharString icuSrcRoot(argv[1], errorCode);
+
+ CharString icuSource(icuSrcRoot, errorCode);
+ icuSource.appendPathPart("source", errorCode);
+
+ CharString icuSourceData(icuSource, errorCode);
+ icuSourceData.appendPathPart("data", errorCode);
+
+ CharString ppucdPath(icuSourceData, errorCode);
+ ppucdPath.appendPathPart("unidata", errorCode);
ppucdPath.appendPathPart("ppucd.txt", errorCode);
PreparsedUCD ppucd(ppucdPath.data(), errorCode);
}
}
- if(argc>=2) {
- suffix=argv[1];
- } else {
- suffix=NULL;
- }
-
if (options[ICUDATADIR].doesOccur) {
u_setDataDirectory(options[ICUDATADIR].value);
}
}
/* process UnicodeData.txt */
- writeUCDFilename(basename, "UnicodeData", suffix);
+ writeUCDFilename(basename, "UnicodeData", NULL);
parseDB(filename, errorCode);
/* process additional properties files */
*basename=0;
- generateAdditionalProperties(filename, suffix, errorCode);
+ generateAdditionalProperties(filename, NULL, errorCode);
- /* process parsed data */
- if(U_SUCCESS(errorCode)) {
- /* write the properties data file */
- generateData(destDir, options[CSOURCE].doesOccur);
+ corePropsWriter->finalizeData(errorCode);
+ if(errorCode.isFailure()) {
+ fprintf(stderr, "genprops error: failure finalizing the data - %s\n",
+ errorCode.errorName());
+ return errorCode.reset();
}
+ // Write the files with the generated data.
+ CharString sourceCommon(icuSource, errorCode);
+ sourceCommon.appendPathPart("common", errorCode);
+
+ CharString sourceDataIn(icuSourceData, errorCode);
+ sourceDataIn.appendPathPart("in", errorCode);
+
+ UBool withCopyright=options[COPYRIGHT].doesOccur;
+
+ corePropsWriter->writeCSourceFile(sourceCommon.data(), errorCode);
+ corePropsWriter->writeBinaryData(sourceDataIn.data(), withCopyright, errorCode);
+
return errorCode;
}
#include "unicode/uniset.h"
#include "ppucd.h"
#include "propsvec.h"
+#include "unewdata.h"
/* file definitions */
#define DATA_NAME "uprops"
virtual ~PropsWriter();
virtual void setUnicodeVersion(const UVersionInfo version);
virtual void setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode);
- // virtual writeCSourceFile(icusrcroot);
- // virtual writeBinaryData(icusrcroot);
+ virtual void finalizeData(UErrorCode &errorCode);
+ virtual void writeCSourceFile(const char *path, UErrorCode &errorCode);
+ virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode);
};
PropsWriter *createCorePropsWriter(UErrorCode &errorCode);
} Props;
/* global flags */
-U_CFUNC UBool beVerbose, haveCopyright;
+U_CFUNC UBool beVerbose;
U_CFUNC const char *const
genCategoryNames[];
U_CFUNC void
repeatProps(uint32_t first, uint32_t last, uint32_t props);
-U_CFUNC void
-generateData(const char *dataDir, UBool csource);
-
U_CFUNC void
generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pErrorCode);
-U_CFUNC int32_t
-writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[16]);
+int32_t
+props2FinalizeData(int32_t indexes[], UErrorCode &errorCode);
+
+void
+props2AppendToCSourceFile(FILE *f, UErrorCode &errorCode);
+
+void
+props2AppendToBinaryFile(UNewDataMemory *pData, UErrorCode &errorCode);
#endif
#include "uparse.h"
#include "writesrc.h"
#include "genprops.h"
+#include "unewdata.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
/* data --------------------------------------------------------------------- */
-static UTrie2 *newTrie;
-static UPropsVectors *pv;
+static UTrie2 *newTrie=NULL;
+static UPropsVectors *pv=NULL;
-static UnicodeString *scriptExtensions;
+static UnicodeString *scriptExtensions=NULL;
/* miscellaneous ------------------------------------------------------------ */
/* -------------------------------------------------------------------------- */
-static void
-initAdditionalProperties() {
- UErrorCode errorCode=U_ZERO_ERROR;
- pv=upvec_open(UPROPS_VECTOR_WORDS, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "genprops error: props2writer upvec_open() failed - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
- scriptExtensions=new UnicodeString;
-}
-
-static void
-exitAdditionalProperties() {
- utrie2_close(newTrie);
- upvec_close(pv);
- delete scriptExtensions;
-}
-
U_CFUNC void
generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pErrorCode) {
char *basename;
parseMultiFieldFile(filename, basename, "DerivedNumericValues", suffix, 2, numericLineFn, pErrorCode);
parseTwoFieldFile(filename, basename, "ScriptExtensions", suffix, scriptExtensionsLineFn, pErrorCode);
-
- newTrie=upvec_compactToUTrie2WithRowIndexes(pv, pErrorCode);
-// TODO: remove
-#if 0
-const uint32_t *pvArray;
-int32_t pvRows;
-pvArray=upvec_getArray(pv, &pvRows, NULL);
-for(int32_t c=0; c<=0x10ffff; ++c) {
- uint16_t ri=utrie2_get32(newTrie, c);
- uint32_t v2=pvArray[ri+2];
- int32_t dt=v2&UPROPS_DT_MASK;
- if(dt!=0) {
- printf("%04x %d\n", c, dt);
- }
-}
-#endif
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n",
- u_errorName(*pErrorCode));
- exit(*pErrorCode);
- }
}
/* ScriptExtensions.txt ----------------------------------------------------- */
}
}
-/* data serialization ------------------------------------------------------- */
-
-U_CFUNC int32_t
-writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[UPROPS_INDEX_COUNT]) {
- const uint32_t *pvArray;
- int32_t pvRows, pvCount;
- int32_t length;
- UErrorCode errorCode;
+class Props2Writer : public PropsWriter {
+public:
+ Props2Writer(UErrorCode &errorCode);
+ virtual ~Props2Writer();
- pvArray=upvec_getArray(pv, &pvRows, NULL);
- pvCount=pvRows*UPROPS_VECTOR_WORDS;
+ virtual void setProps(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode);
+};
- errorCode=U_ZERO_ERROR;
- length=utrie2_serialize(newTrie, p, capacity, &errorCode);
+Props2Writer::Props2Writer(UErrorCode &errorCode) {
+ pv=upvec_open(UPROPS_VECTOR_WORDS, &errorCode);
if(U_FAILURE(errorCode)) {
- fprintf(stderr,
- "genprops error: utrie2_freeze(additional properties)+utrie2_serialize() failed: %s\n",
+ fprintf(stderr, "genprops error: props2writer upvec_open() failed - %s\n",
u_errorName(errorCode));
- exit(errorCode);
- }
-
- /* round up scriptExtensions to multiple of 4 bytes */
- if(scriptExtensions->length()&1) {
- scriptExtensions->append((UChar)0);
}
-
- /* set indexes */
- indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]=
- indexes[UPROPS_ADDITIONAL_TRIE_INDEX]+length/4;
- indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]=UPROPS_VECTOR_WORDS;
- indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]=
- indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]+pvCount;
- indexes[UPROPS_RESERVED_INDEX_7]=
- indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]+scriptExtensions->length()/2;
- indexes[UPROPS_RESERVED_INDEX_8]=indexes[UPROPS_RESERVED_INDEX_7];
- indexes[UPROPS_DATA_TOP_INDEX]=indexes[UPROPS_RESERVED_INDEX_8];
-
- indexes[UPROPS_MAX_VALUES_INDEX]=
- (((int32_t)U_EA_COUNT-1)<<UPROPS_EA_SHIFT)|
- (((int32_t)UBLOCK_COUNT-1)<<UPROPS_BLOCK_SHIFT)|
- (((int32_t)USCRIPT_CODE_LIMIT-1)&UPROPS_SCRIPT_MASK);
- indexes[UPROPS_MAX_VALUES_2_INDEX]=
- (((int32_t)U_LB_COUNT-1)<<UPROPS_LB_SHIFT)|
- (((int32_t)U_SB_COUNT-1)<<UPROPS_SB_SHIFT)|
- (((int32_t)U_WB_COUNT-1)<<UPROPS_WB_SHIFT)|
- (((int32_t)U_GCB_COUNT-1)<<UPROPS_GCB_SHIFT)|
- ((int32_t)U_DT_COUNT-1);
-
- int32_t additionalPropsSize=4*(indexes[UPROPS_DATA_TOP_INDEX]-indexes[UPROPS_ADDITIONAL_TRIE_INDEX]);
- if(p!=NULL && additionalPropsSize<=capacity) {
- if(beVerbose) {
- printf("size in bytes of additional props trie:%5u\n", (int)length);
- }
- if(f!=NULL) {
- usrc_writeUTrie2Arrays(f,
- "static const uint16_t propsVectorsTrie_index[%ld]={\n", NULL,
- newTrie,
- "\n};\n\n");
- usrc_writeUTrie2Struct(f,
- "static const UTrie2 propsVectorsTrie={\n",
- newTrie, "propsVectorsTrie_index", NULL,
- "};\n\n");
-
- usrc_writeArray(f,
- "static const uint32_t propsVectors[%ld]={\n",
- pvArray, 32, pvCount,
- "};\n\n");
- fprintf(f, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount);
- fprintf(f, "static const int32_t propsVectorsColumns=%ld;\n", (long)indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]);
-
- usrc_writeArray(f,
- "static const uint16_t scriptExtensions[%ld]={\n",
- scriptExtensions->getBuffer(), 16, scriptExtensions->length(),
- "};\n\n");
- } else {
- p+=length;
- length=pvCount*4;
- uprv_memcpy(p, pvArray, length);
-
- p+=length;
- length=scriptExtensions->length()*2;
- uprv_memcpy(p, scriptExtensions->getBuffer(), length);
- }
- if(beVerbose) {
- printf("number of additional props vectors: %5u\n", (int)pvRows);
- printf("number of 32-bit words per vector: %5u\n", UPROPS_VECTOR_WORDS);
- printf("number of 16-bit scriptExtensions: %5u\n", (int)scriptExtensions->length());
- }
- }
-
- return additionalPropsSize;
+ scriptExtensions=new UnicodeString;
}
-class Props2Writer : public PropsWriter {
-public:
- Props2Writer() { initAdditionalProperties(); }
- virtual ~Props2Writer() { exitAdditionalProperties(); }
-
- virtual void setProps(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode);
-};
+Props2Writer::~Props2Writer() {
+ utrie2_close(newTrie);
+ upvec_close(pv);
+ delete scriptExtensions;
+}
struct PropToBinary {
int32_t prop; // UProperty
}
}
+static uint8_t trieBlock[100000];
+static int32_t trieSize;
+
+int32_t
+props2FinalizeData(int32_t indexes[UPROPS_INDEX_COUNT], UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return 0; }
+
+ newTrie=upvec_compactToUTrie2WithRowIndexes(pv, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n",
+ u_errorName(errorCode));
+ return 0;
+ }
+
+ trieSize=utrie2_serialize(newTrie, trieBlock, (int32_t)sizeof(trieBlock), &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr,
+ "genprops error: utrie2_freeze(additional properties)+utrie2_serialize() failed: %s\n",
+ u_errorName(errorCode));
+ return 0;
+ }
+
+ int32_t pvRows;
+ const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
+ int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
+// TODO: remove
+#if 0
+for(int32_t c=0; c<=0x10ffff; ++c) {
+ uint16_t ri=utrie2_get32(newTrie, c);
+ uint32_t v2=pvArray[ri+2];
+ int32_t dt=v2&UPROPS_DT_MASK;
+ if(dt!=0) {
+ printf("%04x %d\n", c, dt);
+ }
+}
+#endif
+
+ /* round up scriptExtensions to multiple of 4 bytes */
+ if(scriptExtensions->length()&1) {
+ scriptExtensions->append((UChar)0);
+ }
+
+ /* set indexes */
+ indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]=
+ indexes[UPROPS_ADDITIONAL_TRIE_INDEX]+trieSize/4;
+ indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]=UPROPS_VECTOR_WORDS;
+ indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]=
+ indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]+pvCount;
+ indexes[UPROPS_RESERVED_INDEX_7]=
+ indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]+scriptExtensions->length()/2;
+ indexes[UPROPS_RESERVED_INDEX_8]=indexes[UPROPS_RESERVED_INDEX_7];
+ indexes[UPROPS_DATA_TOP_INDEX]=indexes[UPROPS_RESERVED_INDEX_8];
+
+ indexes[UPROPS_MAX_VALUES_INDEX]=
+ (((int32_t)U_EA_COUNT-1)<<UPROPS_EA_SHIFT)|
+ (((int32_t)UBLOCK_COUNT-1)<<UPROPS_BLOCK_SHIFT)|
+ (((int32_t)USCRIPT_CODE_LIMIT-1)&UPROPS_SCRIPT_MASK);
+ indexes[UPROPS_MAX_VALUES_2_INDEX]=
+ (((int32_t)U_LB_COUNT-1)<<UPROPS_LB_SHIFT)|
+ (((int32_t)U_SB_COUNT-1)<<UPROPS_SB_SHIFT)|
+ (((int32_t)U_WB_COUNT-1)<<UPROPS_WB_SHIFT)|
+ (((int32_t)U_GCB_COUNT-1)<<UPROPS_GCB_SHIFT)|
+ ((int32_t)U_DT_COUNT-1);
+
+ if(beVerbose) {
+ printf("size in bytes of additional props trie:%5u\n", (int)trieSize);
+ printf("number of additional props vectors: %5u\n", (int)pvRows);
+ printf("number of 32-bit words per vector: %5u\n", UPROPS_VECTOR_WORDS);
+ printf("number of 16-bit scriptExtensions: %5u\n", (int)scriptExtensions->length());
+ }
+
+ return 4*(indexes[UPROPS_DATA_TOP_INDEX]-indexes[UPROPS_ADDITIONAL_TRIE_INDEX]);
+}
+
+void
+props2AppendToCSourceFile(FILE *f, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+
+ int32_t pvRows;
+ const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
+ int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
+
+ usrc_writeUTrie2Arrays(f,
+ "static const uint16_t propsVectorsTrie_index[%ld]={\n", NULL,
+ newTrie,
+ "\n};\n\n");
+ usrc_writeUTrie2Struct(f,
+ "static const UTrie2 propsVectorsTrie={\n",
+ newTrie, "propsVectorsTrie_index", NULL,
+ "};\n\n");
+
+ usrc_writeArray(f,
+ "static const uint32_t propsVectors[%ld]={\n",
+ pvArray, 32, pvCount,
+ "};\n\n");
+ fprintf(f, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount);
+ fprintf(f, "static const int32_t propsVectorsColumns=%ld;\n", (long)UPROPS_VECTOR_WORDS);
+
+ usrc_writeArray(f,
+ "static const uint16_t scriptExtensions[%ld]={\n",
+ scriptExtensions->getBuffer(), 16, scriptExtensions->length(),
+ "};\n\n");
+}
+
+void
+props2AppendToBinaryFile(UNewDataMemory *pData, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+
+ int32_t pvRows;
+ const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
+ int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
+
+ udata_writeBlock(pData, trieBlock, trieSize);
+ udata_writeBlock(pData, pvArray, pvCount*4);
+ udata_writeBlock(pData, scriptExtensions->getBuffer(), scriptExtensions->length()*2);
+}
+
PropsWriter *
createProps2Writer(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return NULL; }
- PropsWriter *pw=new Props2Writer();
+ PropsWriter *pw=new Props2Writer(errorCode);
if(pw==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}