]> granicus.if.org Git - icu/commitdiff
ICU-8972 genprops: merge props2writer.cpp (which used to be props2.cpp) into coreprop...
authorMarkus Scherer <markus.icu@gmail.com>
Sun, 18 Dec 2011 08:17:16 +0000 (08:17 +0000)
committerMarkus Scherer <markus.icu@gmail.com>
Sun, 18 Dec 2011 08:17:16 +0000 (08:17 +0000)
X-SVN-Rev: 31150

tools/unicode/c/genprops/CMakeLists.txt
tools/unicode/c/genprops/corepropswriter.cpp
tools/unicode/c/genprops/genprops.cpp
tools/unicode/c/genprops/genprops.h
tools/unicode/c/genprops/props2writer.cpp [deleted file]

index 9bdd48a99f04cf6164398dd60e37f3c772aaf7f7..3310e79721496e1ce2270161f6784130bae9c0bc 100644 (file)
@@ -5,5 +5,5 @@
 # created by: Markus W. Scherer
 # edited on: 2010jul20
 # edited by: Stuart G. Gill
-add_executable(genprops genprops.cpp corepropswriter.cpp props2writer.cpp)
+add_executable(genprops genprops.cpp corepropswriter.cpp)
 target_link_libraries(genprops icuuc icutu)
index e693822203a47d51ddd96b120ce5a82b44b5e57f..f2f8b314bc898751b78719b5069b4a416306dff1 100644 (file)
@@ -5,7 +5,7 @@
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
-*   file name:  corepropswriter.cpp (was store.c)
+*   file name:  corepropswriter.cpp (was store.c && props2.cpp)
 *   encoding:   US-ASCII
 *   tab size:   8 (not used)
 *   indentation:4
 #include "unicode/utypes.h"
 #include "unicode/uchar.h"
 #include "unicode/udata.h"
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+#include "unicode/usetiter.h"
+#include "unicode/uscript.h"
 #include "cmemory.h"
 #include "cstring.h"
 #include "genprops.h"
+#include "propsvec.h"
+#include "uassert.h"
 #include "unewdata.h"
 #include "uprops.h"
 #include "utrie2.h"
@@ -230,6 +236,8 @@ Change from UTrie to UTrie2.
 
 ----------------------------------------------------------------------------- */
 
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
 U_NAMESPACE_USE
 
 /* UDataInfo cf. udata.h */
@@ -247,8 +255,6 @@ static UDataInfo dataInfo={
     { 6, 0, 0, 0 }                              /* dataVersion */
 };
 
-static UTrie2 *pTrie=NULL;
-
 class CorePropsWriter : public PropsWriter {
 public:
     CorePropsWriter(UErrorCode &errorCode);
@@ -259,18 +265,34 @@ public:
     virtual void finalizeData(UErrorCode &errorCode);
     virtual void writeCSourceFile(const char *path, UErrorCode &errorCode);
     virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode);
+
+private:
+    void setGcAndNumeric(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode);
+
+    UTrie2 *pTrie;
+    UTrie2 *props2Trie;
+    UPropsVectors *pv;
+    UnicodeString scriptExtensions;
 };
 
-CorePropsWriter::CorePropsWriter(UErrorCode &errorCode) {
+CorePropsWriter::CorePropsWriter(UErrorCode &errorCode)
+        : pTrie(NULL), props2Trie(NULL), pv(NULL) {
     pTrie=utrie2_open(0, 0, &errorCode);
     if(U_FAILURE(errorCode)) {
         fprintf(stderr, "genprops error: corepropswriter utrie2_open() failed - %s\n",
                 u_errorName(errorCode));
     }
+    pv=upvec_open(UPROPS_VECTOR_WORDS, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "genprops error: corepropswriter upvec_open() failed - %s\n",
+                u_errorName(errorCode));
+    }
 }
 
 CorePropsWriter::~CorePropsWriter() {
     utrie2_close(pTrie);
+    utrie2_close(props2Trie);
+    upvec_close(pv);
 }
 
 void
@@ -361,7 +383,8 @@ encodeNumericValue(UChar32 start, const char *s, UErrorCode &errorCode) {
 }
 
 void
-CorePropsWriter::setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode) {
+CorePropsWriter::setGcAndNumeric(const UniProps &props, const UnicodeSet &newValues,
+                                 UErrorCode &errorCode) {
     if(U_FAILURE(errorCode)) { return; }
     UChar32 start=props.start;
     UChar32 end=props.end;
@@ -428,6 +451,177 @@ CorePropsWriter::setProps(const UniProps &props, const UnicodeSet &newValues, UE
     }
 }
 
+struct PropToBinary {
+    int32_t prop;  // UProperty
+    int32_t vecWord, vecShift;
+};
+
+static const PropToBinary
+propToBinaries[]={
+    { UCHAR_WHITE_SPACE,                    1, UPROPS_WHITE_SPACE },
+    { UCHAR_DASH,                           1, UPROPS_DASH },
+    // Note: The Hyphen property is stabilized since Unicode 4.0
+    // and deprecated since Unicode 6.0.
+    { UCHAR_HYPHEN,                         1, UPROPS_HYPHEN },
+    { UCHAR_QUOTATION_MARK,                 1, UPROPS_QUOTATION_MARK },
+    { UCHAR_TERMINAL_PUNCTUATION,           1, UPROPS_TERMINAL_PUNCTUATION },
+    // Note: The Hex_Digit and ASCII_Hex_Digit properties are probably stable enough
+    // so that they could be hardcoded.
+    { UCHAR_HEX_DIGIT,                      1, UPROPS_HEX_DIGIT },
+    { UCHAR_ASCII_HEX_DIGIT,                1, UPROPS_ASCII_HEX_DIGIT },
+    { UCHAR_IDEOGRAPHIC,                    1, UPROPS_IDEOGRAPHIC },
+    { UCHAR_DIACRITIC,                      1, UPROPS_DIACRITIC },
+    { UCHAR_EXTENDER,                       1, UPROPS_EXTENDER },
+    // Note: The Noncharacter_Code_Point property is probably stable enough
+    // so that it could be hardcoded.
+    { UCHAR_NONCHARACTER_CODE_POINT,        1, UPROPS_NONCHARACTER_CODE_POINT },
+    // Note: The Grapheme_Link property is deprecated since Unicode 5.0
+    // because it is a "Duplication of ccc=9" (UAX #44).
+    { UCHAR_GRAPHEME_LINK,                  1, UPROPS_GRAPHEME_LINK },
+    { UCHAR_IDS_BINARY_OPERATOR,            1, UPROPS_IDS_BINARY_OPERATOR },
+    { UCHAR_IDS_TRINARY_OPERATOR,           1, UPROPS_IDS_TRINARY_OPERATOR },
+    { UCHAR_RADICAL,                        1, UPROPS_RADICAL },
+    { UCHAR_UNIFIED_IDEOGRAPH,              1, UPROPS_UNIFIED_IDEOGRAPH },
+    { UCHAR_DEPRECATED,                     1, UPROPS_DEPRECATED },
+    { UCHAR_LOGICAL_ORDER_EXCEPTION,        1, UPROPS_LOGICAL_ORDER_EXCEPTION },
+    { UCHAR_S_TERM,                         1, UPROPS_S_TERM },
+    { UCHAR_VARIATION_SELECTOR,             1, UPROPS_VARIATION_SELECTOR },
+    // Note: Pattern_Syntax & Pattern_White_Space are available via
+    // the internal PatternProps class and need not be stored here any more.
+    { UCHAR_PATTERN_SYNTAX,                 1, UPROPS_PATTERN_SYNTAX },
+    { UCHAR_PATTERN_WHITE_SPACE,            1, UPROPS_PATTERN_WHITE_SPACE },
+    { UCHAR_XID_START,                      1, UPROPS_XID_START },
+    { UCHAR_XID_CONTINUE,                   1, UPROPS_XID_CONTINUE },
+    { UCHAR_MATH,                           1, UPROPS_MATH },
+    { UCHAR_ALPHABETIC,                     1, UPROPS_ALPHABETIC },
+    { UCHAR_GRAPHEME_EXTEND,                1, UPROPS_GRAPHEME_EXTEND },
+    { UCHAR_DEFAULT_IGNORABLE_CODE_POINT,   1, UPROPS_DEFAULT_IGNORABLE_CODE_POINT },
+    { UCHAR_ID_START,                       1, UPROPS_ID_START },
+    { UCHAR_ID_CONTINUE,                    1, UPROPS_ID_CONTINUE },
+    { UCHAR_GRAPHEME_BASE,                  1, UPROPS_GRAPHEME_BASE },
+};
+
+struct PropToEnum {
+    int32_t prop;  // UProperty
+    int32_t vecWord, vecShift;
+    uint32_t vecMask;
+};
+
+static const PropToEnum
+propToEnums[]={
+    // Use UPROPS_SCRIPT_X_MASK not UPROPS_SCRIPT_MASK:
+    // When writing a Script code, remove Script_Extensions bits as well.
+    // If needed, they will get written again.
+    { UCHAR_SCRIPT,                     0, 0, UPROPS_SCRIPT_X_MASK },
+    { UCHAR_BLOCK,                      0, UPROPS_BLOCK_SHIFT, UPROPS_BLOCK_MASK },
+    { UCHAR_EAST_ASIAN_WIDTH,           0, UPROPS_EA_SHIFT, UPROPS_EA_MASK },
+    { UCHAR_DECOMPOSITION_TYPE,         2, 0, UPROPS_DT_MASK },
+    { UCHAR_GRAPHEME_CLUSTER_BREAK,     2, UPROPS_GCB_SHIFT, UPROPS_GCB_MASK },
+    { UCHAR_WORD_BREAK,                 2, UPROPS_WB_SHIFT, UPROPS_WB_MASK },
+    { UCHAR_SENTENCE_BREAK,             2, UPROPS_SB_SHIFT, UPROPS_SB_MASK },
+    { UCHAR_LINE_BREAK,                 2, UPROPS_LB_SHIFT, UPROPS_LB_MASK },
+};
+
+void
+CorePropsWriter::setProps(const UniProps &props, const UnicodeSet &newValues,
+                          UErrorCode &errorCode) {
+    setGcAndNumeric(props, newValues, errorCode);
+    if(U_FAILURE(errorCode)) { return; }
+
+    UChar32 start=props.start;
+    UChar32 end=props.end;
+    if(start==0 && end==0x10ffff) {
+        // Also set bits for initialValue and errorValue.
+        end=UPVEC_MAX_CP;
+    }
+    if(newValues.containsSome(0, UCHAR_BINARY_LIMIT-1)) {
+        for(int32_t i=0; i<LENGTHOF(propToBinaries); ++i) {
+            const PropToBinary &p2b=propToBinaries[i];
+            U_ASSERT(p2b.vecShift<32);
+            if(newValues.contains(p2b.prop)) {
+                uint32_t mask=U_MASK(p2b.vecShift);
+                uint32_t value= props.binProps[p2b.prop] ? mask : 0;
+                upvec_setValue(pv, start, end, p2b.vecWord, value, mask, &errorCode);
+            }
+        }
+    }
+    if(newValues.containsSome(UCHAR_INT_START, UCHAR_INT_LIMIT-1)) {
+        for(int32_t i=0; i<LENGTHOF(propToEnums); ++i) {
+            const PropToEnum &p2e=propToEnums[i];
+            U_ASSERT(p2e.vecShift<32);
+            if(newValues.contains(p2e.prop)) {
+                uint32_t mask=p2e.vecMask;
+                uint32_t value=(uint32_t)(props.getIntProp(p2e.prop)<<p2e.vecShift);
+                U_ASSERT((value&mask)==value);
+                upvec_setValue(pv, start, end, p2e.vecWord, value, mask, &errorCode);
+            }
+        }
+    }
+    if(newValues.contains(UCHAR_AGE)) {
+        if(props.age[0]>15 || props.age[1]>15 || props.age[2]!=0 || props.age[3]!=0) {
+            char buffer[U_MAX_VERSION_STRING_LENGTH];
+            u_versionToString(props.age, buffer);
+            fprintf(stderr, "genprops error: age %s cannot be encoded\n", buffer);
+            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+        uint32_t version=(props.age[0]<<4)|props.age[1];
+        upvec_setValue(pv, start, end,
+                       0, version<<UPROPS_AGE_SHIFT, UPROPS_AGE_MASK,
+                       &errorCode);
+    }
+    // Write a new (Script, Script_Extensions) value if there are Script_Extensions
+    // and either Script or Script_Extensions are new on the current line.
+    // (If only Script is new, then it just clobbered the relevant bits.)
+    if( !props.scx.isEmpty() &&
+        (newValues.contains(UCHAR_SCRIPT) || newValues.contains(UCHAR_SCRIPT_EXTENSIONS))
+    ) {
+        UnicodeString codes;  // vector of 16-bit UScriptCode values
+        UnicodeSetIterator iter(props.scx);
+        while(iter.next()) { codes.append((UChar)iter.getCodepoint()); }
+
+        // Set bit 15 on the last script code, for termination.
+        int32_t length=codes.length();
+        codes.setCharAt(length-1, (UChar)(codes[length-1]|0x8000));
+        // Find this list of codes in the Script_Extensions data so far, or add this list.
+        int32_t index=scriptExtensions.indexOf(codes);
+        if(index<0) {
+            index=scriptExtensions.length();
+            scriptExtensions.append(codes);
+        }
+
+        // Encode the (Script, Script_Extensions index) pair.
+        int32_t script=props.getIntProp(UCHAR_SCRIPT);
+        uint32_t scriptX;
+        if(script==USCRIPT_COMMON) {
+            scriptX=UPROPS_SCRIPT_X_WITH_COMMON|(uint32_t)index;
+        } else if(script==USCRIPT_INHERITED) {
+            scriptX=UPROPS_SCRIPT_X_WITH_INHERITED|(uint32_t)index;
+        } else {
+            // Store an additional pair of 16-bit units for an unusual main Script code
+            // together with the Script_Extensions index.
+            UnicodeString codeIndexPair;
+            codeIndexPair.append((UChar)script).append((UChar)index);
+            index=scriptExtensions.indexOf(codeIndexPair);
+            if(index<0) {
+                index=scriptExtensions.length();
+                scriptExtensions.append(codeIndexPair);
+            }
+            scriptX=UPROPS_SCRIPT_X_WITH_OTHER|(uint32_t)index;
+        }
+        if(index>UPROPS_SCRIPT_MASK) {
+            fprintf(stderr, "genprops: Script_Extensions indexes overflow bit field\n");
+            errorCode=U_BUFFER_OVERFLOW_ERROR;
+            return;
+        }
+        upvec_setValue(pv, start, end, 0, scriptX, UPROPS_SCRIPT_X_MASK, &errorCode);
+    }
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "genprops error: unable to set props2 values for %04lX..%04lX: %s\n",
+                (long)start, (long)end, u_errorName(errorCode));
+    }
+}
+
 static int32_t indexes[UPROPS_INDEX_COUNT]={
     0, 0, 0, 0,
     0, 0, 0, 0,
@@ -437,6 +631,9 @@ static int32_t indexes[UPROPS_INDEX_COUNT]={
 
 static uint8_t trieBlock[40000];
 static int32_t trieSize;
+static uint8_t props2TrieBlock[100000];
+static int32_t props2TrieSize;
+
 static int32_t totalSize;
 
 void
@@ -446,11 +643,40 @@ CorePropsWriter::finalizeData(UErrorCode &errorCode) {
     utrie2_freeze(pTrie, UTRIE2_16_VALUE_BITS, &errorCode);
     trieSize=utrie2_serialize(pTrie, trieBlock, sizeof(trieBlock), &errorCode);
     if(U_FAILURE(errorCode)) {
-        fprintf(stderr, "genprops error: utrie2_freeze(main trie)+utrie2_serialize() failed: %s (length %ld)\n",
+        fprintf(stderr,
+                "genprops error: utrie2_freeze(main trie)+utrie2_serialize() "
+                "failed: %s (length %ld)\n",
                 u_errorName(errorCode), (long)trieSize);
         return;
     }
 
+    props2Trie=upvec_compactToUTrie2WithRowIndexes(pv, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n",
+                u_errorName(errorCode));
+        return;
+    }
+
+    props2TrieSize=utrie2_serialize(props2Trie,
+                                    props2TrieBlock, (int32_t)sizeof(props2TrieBlock),
+                                    &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr,
+                "genprops error: utrie2_freeze(additional properties)+utrie2_serialize() failed: %s\n",
+                u_errorName(errorCode));
+        return;
+    }
+
+    int32_t pvRows;
+    const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
+    int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
+
+    /* round up scriptExtensions to multiple of 4 bytes */
+    if(scriptExtensions.length()&1) {
+        scriptExtensions.append((UChar)0);
+    }
+
+    /* set indexes */
     int32_t offset=sizeof(indexes)/4;       /* uint32_t offset to the properties trie */
     offset+=trieSize>>2;
     indexes[UPROPS_PROPS32_INDEX]=          /* set indexes to the same offsets for empty */
@@ -458,13 +684,34 @@ CorePropsWriter::finalizeData(UErrorCode &errorCode) {
     indexes[UPROPS_EXCEPTIONS_TOP_INDEX]=   /* so that less runtime code has to be changed */
     indexes[UPROPS_ADDITIONAL_TRIE_INDEX]=offset;
 
-    if(beVerbose) {
-        printf("trie size in bytes:                    %5u\n", (int)trieSize);
-    }
-
-    totalSize=4*offset+props2FinalizeData(indexes, errorCode);
+    offset+=props2TrieSize/4;
+    indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]=offset;
+    indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]=UPROPS_VECTOR_WORDS;
+    offset+=pvCount;
+    indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]=offset;
+    offset+=scriptExtensions.length()/2;
+    indexes[UPROPS_RESERVED_INDEX_7]=offset;
+    indexes[UPROPS_RESERVED_INDEX_8]=offset;
+    indexes[UPROPS_DATA_TOP_INDEX]=offset;
+    totalSize=4*offset;
+
+    indexes[UPROPS_MAX_VALUES_INDEX]=
+        (((int32_t)U_EA_COUNT-1)<<UPROPS_EA_SHIFT)|
+        (((int32_t)UBLOCK_COUNT-1)<<UPROPS_BLOCK_SHIFT)|
+        (((int32_t)USCRIPT_CODE_LIMIT-1)&UPROPS_SCRIPT_MASK);
+    indexes[UPROPS_MAX_VALUES_2_INDEX]=
+        (((int32_t)U_LB_COUNT-1)<<UPROPS_LB_SHIFT)|
+        (((int32_t)U_SB_COUNT-1)<<UPROPS_SB_SHIFT)|
+        (((int32_t)U_WB_COUNT-1)<<UPROPS_WB_SHIFT)|
+        (((int32_t)U_GCB_COUNT-1)<<UPROPS_GCB_SHIFT)|
+        ((int32_t)U_DT_COUNT-1);
 
     if(beVerbose) {
+        printf("trie size in bytes:                    %5u\n", (int)trieSize);
+        printf("size in bytes of additional props trie:%5u\n", (int)props2TrieSize);
+        printf("number of additional props vectors:    %5u\n", (int)pvRows);
+        printf("number of 32-bit words per vector:     %5u\n", UPROPS_VECTOR_WORDS);
+        printf("number of 16-bit scriptExtensions:     %5u\n", (int)scriptExtensions.length());
         printf("data size:                            %6ld\n", (long)totalSize);
     }
 }
@@ -473,6 +720,10 @@ void
 CorePropsWriter::writeCSourceFile(const char *path, UErrorCode &errorCode) {
     if(U_FAILURE(errorCode)) { return; }
 
+    int32_t pvRows;
+    const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
+    int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
+
     FILE *f=usrc_createFromGenerator(path, "uchar_props_data.h",
                                      "icu/tools/src/unicode/c/genprops/corepropswriter.cpp");
     if(f==NULL) {
@@ -495,7 +746,26 @@ CorePropsWriter::writeCSourceFile(const char *path, UErrorCode &errorCode) {
         pTrie, "propsTrie_index", NULL,
         "};\n\n");
 
-    props2AppendToCSourceFile(f, errorCode);
+    usrc_writeUTrie2Arrays(f,
+        "static const uint16_t propsVectorsTrie_index[%ld]={\n", NULL,
+        props2Trie,
+        "\n};\n\n");
+    usrc_writeUTrie2Struct(f,
+        "static const UTrie2 propsVectorsTrie={\n",
+        props2Trie, "propsVectorsTrie_index", NULL,
+        "};\n\n");
+
+    usrc_writeArray(f,
+        "static const uint32_t propsVectors[%ld]={\n",
+        pvArray, 32, pvCount,
+        "};\n\n");
+    fprintf(f, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount);
+    fprintf(f, "static const int32_t propsVectorsColumns=%ld;\n", (long)UPROPS_VECTOR_WORDS);
+
+    usrc_writeArray(f,
+        "static const uint16_t scriptExtensions[%ld]={\n",
+        scriptExtensions.getBuffer(), 16, scriptExtensions.length(),
+        "};\n\n");
 
     usrc_writeArray(f,
         "static const int32_t indexes[UPROPS_INDEX_COUNT]={",
@@ -508,6 +778,10 @@ void
 CorePropsWriter::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
     if(U_FAILURE(errorCode)) { return; }
 
+    int32_t pvRows;
+    const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
+    int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
+
     UNewDataMemory *pData=udata_create(path, "icu", "uprops", &dataInfo,
                                        withCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
     if(U_FAILURE(errorCode)) {
@@ -518,7 +792,9 @@ CorePropsWriter::writeBinaryData(const char *path, UBool withCopyright, UErrorCo
 
     udata_writeBlock(pData, indexes, sizeof(indexes));
     udata_writeBlock(pData, trieBlock, trieSize);
-    props2AppendToBinaryFile(pData, errorCode);
+    udata_writeBlock(pData, props2TrieBlock, props2TrieSize);
+    udata_writeBlock(pData, pvArray, pvCount*4);
+    udata_writeBlock(pData, scriptExtensions.getBuffer(), scriptExtensions.length()*2);
 
     long dataLength=udata_finish(pData, &errorCode);
     if(U_FAILURE(errorCode)) {
index 550e0bbbf672b16d08e9e0d036aaf2b6053bec2e..f90492a1e51ad6180418930b72aba932206d1f24 100644 (file)
@@ -95,7 +95,6 @@ main(int argc, char* argv[]) {
     /* initialize */
     IcuToolErrorCode errorCode("genprops");
     LocalPointer<PropsWriter> corePropsWriter(createCorePropsWriter(errorCode));
-    LocalPointer<PropsWriter> props2Writer(createProps2Writer(errorCode));
     if(errorCode.isFailure()) {
         fprintf(stderr, "genprops: unable to create PropsWriters - %s\n", errorCode.errorName());
         return errorCode.reset();
@@ -125,7 +124,6 @@ main(int argc, char* argv[]) {
         if(ppucd.lineHasPropertyValues()) {
             const UniProps *props=ppucd.getProps(newValues, errorCode);
             corePropsWriter->setProps(*props, newValues, errorCode);
-            props2Writer->setProps(*props, newValues, errorCode);
         } else if(lineType==PreparsedUCD::UNICODE_VERSION_LINE) {
             const UVersionInfo &version=ppucd.getUnicodeVersion();
             corePropsWriter->setUnicodeVersion(version);
index b0357586339a134197610e9338239b4ca3a9075c..002f2ef1afcd072e3e51eb57a24ac5e994ae2141 100644 (file)
@@ -34,19 +34,8 @@ public:
 };
 
 PropsWriter *createCorePropsWriter(UErrorCode &errorCode);
-PropsWriter *createProps2Writer(UErrorCode &errorCode);
 
 /* global flags */
 U_CFUNC UBool beVerbose;
 
-/* prototypes */
-int32_t
-props2FinalizeData(int32_t indexes[], UErrorCode &errorCode);
-
-void
-props2AppendToCSourceFile(FILE *f, UErrorCode &errorCode);
-
-void
-props2AppendToBinaryFile(UNewDataMemory *pData, UErrorCode &errorCode);
-
 #endif
diff --git a/tools/unicode/c/genprops/props2writer.cpp b/tools/unicode/c/genprops/props2writer.cpp
deleted file mode 100644 (file)
index f2bb75b..0000000
+++ /dev/null
@@ -1,350 +0,0 @@
-/*
-*******************************************************************************
-*
-*   Copyright (C) 2002-2011, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  props2writer.cpp (was props2.cpp)
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 2002feb24
-*   created by: Markus W. Scherer
-*
-*   Parse more Unicode Character Database files and store
-*   additional Unicode character properties in bit set vectors.
-*/
-
-#include <stdio.h>
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "unicode/uniset.h"
-#include "unicode/unistr.h"
-#include "unicode/usetiter.h"
-#include "unicode/uscript.h"
-#include "cstring.h"
-#include "genprops.h"
-#include "propsvec.h"
-#include "uassert.h"
-#include "unewdata.h"
-#include "uprops.h"
-#include "utrie2.h"
-#include "writesrc.h"
-
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
-U_NAMESPACE_USE
-
-static UTrie2 *newTrie=NULL;
-static UPropsVectors *pv=NULL;
-
-static UnicodeString *scriptExtensions=NULL;
-
-class Props2Writer : public PropsWriter {
-public:
-    Props2Writer(UErrorCode &errorCode);
-    virtual ~Props2Writer();
-
-    virtual void setProps(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode);
-};
-
-Props2Writer::Props2Writer(UErrorCode &errorCode) {
-    pv=upvec_open(UPROPS_VECTOR_WORDS, &errorCode);
-    if(U_FAILURE(errorCode)) {
-        fprintf(stderr, "genprops error: props2writer upvec_open() failed - %s\n",
-                u_errorName(errorCode));
-    }
-    scriptExtensions=new UnicodeString();
-}
-
-Props2Writer::~Props2Writer() {
-    utrie2_close(newTrie);
-    upvec_close(pv);
-    delete scriptExtensions;
-}
-
-struct PropToBinary {
-    int32_t prop;  // UProperty
-    int32_t vecWord, vecShift;
-};
-
-static const PropToBinary
-propToBinaries[]={
-    { UCHAR_WHITE_SPACE,                    1, UPROPS_WHITE_SPACE },
-    { UCHAR_DASH,                           1, UPROPS_DASH },
-    // Note: The Hyphen property is stabilized since Unicode 4.0
-    // and deprecated since Unicode 6.0.
-    { UCHAR_HYPHEN,                         1, UPROPS_HYPHEN },
-    { UCHAR_QUOTATION_MARK,                 1, UPROPS_QUOTATION_MARK },
-    { UCHAR_TERMINAL_PUNCTUATION,           1, UPROPS_TERMINAL_PUNCTUATION },
-    // Note: The Hex_Digit and ASCII_Hex_Digit properties are probably stable enough
-    // so that they could be hardcoded.
-    { UCHAR_HEX_DIGIT,                      1, UPROPS_HEX_DIGIT },
-    { UCHAR_ASCII_HEX_DIGIT,                1, UPROPS_ASCII_HEX_DIGIT },
-    { UCHAR_IDEOGRAPHIC,                    1, UPROPS_IDEOGRAPHIC },
-    { UCHAR_DIACRITIC,                      1, UPROPS_DIACRITIC },
-    { UCHAR_EXTENDER,                       1, UPROPS_EXTENDER },
-    // Note: The Noncharacter_Code_Point property is probably stable enough
-    // so that it could be hardcoded.
-    { UCHAR_NONCHARACTER_CODE_POINT,        1, UPROPS_NONCHARACTER_CODE_POINT },
-    // Note: The Grapheme_Link property is deprecated since Unicode 5.0
-    // because it is a "Duplication of ccc=9" (UAX #44).
-    { UCHAR_GRAPHEME_LINK,                  1, UPROPS_GRAPHEME_LINK },
-    { UCHAR_IDS_BINARY_OPERATOR,            1, UPROPS_IDS_BINARY_OPERATOR },
-    { UCHAR_IDS_TRINARY_OPERATOR,           1, UPROPS_IDS_TRINARY_OPERATOR },
-    { UCHAR_RADICAL,                        1, UPROPS_RADICAL },
-    { UCHAR_UNIFIED_IDEOGRAPH,              1, UPROPS_UNIFIED_IDEOGRAPH },
-    { UCHAR_DEPRECATED,                     1, UPROPS_DEPRECATED },
-    { UCHAR_LOGICAL_ORDER_EXCEPTION,        1, UPROPS_LOGICAL_ORDER_EXCEPTION },
-    { UCHAR_S_TERM,                         1, UPROPS_S_TERM },
-    { UCHAR_VARIATION_SELECTOR,             1, UPROPS_VARIATION_SELECTOR },
-    // Note: Pattern_Syntax & Pattern_White_Space are available via
-    // the internal PatternProps class and need not be stored here any more.
-    { UCHAR_PATTERN_SYNTAX,                 1, UPROPS_PATTERN_SYNTAX },
-    { UCHAR_PATTERN_WHITE_SPACE,            1, UPROPS_PATTERN_WHITE_SPACE },
-    { UCHAR_XID_START,                      1, UPROPS_XID_START },
-    { UCHAR_XID_CONTINUE,                   1, UPROPS_XID_CONTINUE },
-    { UCHAR_MATH,                           1, UPROPS_MATH },
-    { UCHAR_ALPHABETIC,                     1, UPROPS_ALPHABETIC },
-    { UCHAR_GRAPHEME_EXTEND,                1, UPROPS_GRAPHEME_EXTEND },
-    { UCHAR_DEFAULT_IGNORABLE_CODE_POINT,   1, UPROPS_DEFAULT_IGNORABLE_CODE_POINT },
-    { UCHAR_ID_START,                       1, UPROPS_ID_START },
-    { UCHAR_ID_CONTINUE,                    1, UPROPS_ID_CONTINUE },
-    { UCHAR_GRAPHEME_BASE,                  1, UPROPS_GRAPHEME_BASE },
-};
-
-struct PropToEnum {
-    int32_t prop;  // UProperty
-    int32_t vecWord, vecShift;
-    uint32_t vecMask;
-};
-
-static const PropToEnum
-propToEnums[]={
-    // Use UPROPS_SCRIPT_X_MASK not UPROPS_SCRIPT_MASK:
-    // When writing a Script code, remove Script_Extensions bits as well.
-    // If needed, they will get written again.
-    { UCHAR_SCRIPT,                     0, 0, UPROPS_SCRIPT_X_MASK },
-    { UCHAR_BLOCK,                      0, UPROPS_BLOCK_SHIFT, UPROPS_BLOCK_MASK },
-    { UCHAR_EAST_ASIAN_WIDTH,           0, UPROPS_EA_SHIFT, UPROPS_EA_MASK },
-    { UCHAR_DECOMPOSITION_TYPE,         2, 0, UPROPS_DT_MASK },
-    { UCHAR_GRAPHEME_CLUSTER_BREAK,     2, UPROPS_GCB_SHIFT, UPROPS_GCB_MASK },
-    { UCHAR_WORD_BREAK,                 2, UPROPS_WB_SHIFT, UPROPS_WB_MASK },
-    { UCHAR_SENTENCE_BREAK,             2, UPROPS_SB_SHIFT, UPROPS_SB_MASK },
-    { UCHAR_LINE_BREAK,                 2, UPROPS_LB_SHIFT, UPROPS_LB_MASK },
-};
-
-void
-Props2Writer::setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode) {
-    if(U_FAILURE(errorCode)) { return; }
-    UChar32 start=props.start;
-    UChar32 end=props.end;
-    if(start==0 && end==0x10ffff) {
-        // Also set bits for initialValue and errorValue.
-        end=UPVEC_MAX_CP;
-    }
-    if(newValues.containsSome(0, UCHAR_BINARY_LIMIT-1)) {
-        for(int32_t i=0; i<LENGTHOF(propToBinaries); ++i) {
-            const PropToBinary &p2b=propToBinaries[i];
-            U_ASSERT(p2b.vecShift<32);
-            if(newValues.contains(p2b.prop)) {
-                uint32_t mask=U_MASK(p2b.vecShift);
-                uint32_t value= props.binProps[p2b.prop] ? mask : 0;
-                upvec_setValue(pv, start, end, p2b.vecWord, value, mask, &errorCode);
-            }
-        }
-    }
-    if(newValues.containsSome(UCHAR_INT_START, UCHAR_INT_LIMIT-1)) {
-        for(int32_t i=0; i<LENGTHOF(propToEnums); ++i) {
-            const PropToEnum &p2e=propToEnums[i];
-            U_ASSERT(p2e.vecShift<32);
-            if(newValues.contains(p2e.prop)) {
-                uint32_t mask=p2e.vecMask;
-                uint32_t value=(uint32_t)(props.getIntProp(p2e.prop)<<p2e.vecShift);
-                U_ASSERT((value&mask)==value);
-                upvec_setValue(pv, start, end, p2e.vecWord, value, mask, &errorCode);
-            }
-        }
-    }
-    if(newValues.contains(UCHAR_AGE)) {
-        if(props.age[0]>15 || props.age[1]>15 || props.age[2]!=0 || props.age[3]!=0) {
-            char buffer[U_MAX_VERSION_STRING_LENGTH];
-            u_versionToString(props.age, buffer);
-            fprintf(stderr, "genprops error: age %s cannot be encoded\n", buffer);
-            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
-            return;
-        }
-        uint32_t version=(props.age[0]<<4)|props.age[1];
-        upvec_setValue(pv, start, end,
-                       0, version<<UPROPS_AGE_SHIFT, UPROPS_AGE_MASK,
-                       &errorCode);
-    }
-    // Write a new (Script, Script_Extensions) value if there are Script_Extensions
-    // and either Script or Script_Extensions are new on the current line.
-    // (If only Script is new, then it just clobbered the relevant bits.)
-    if( !props.scx.isEmpty() &&
-        (newValues.contains(UCHAR_SCRIPT) || newValues.contains(UCHAR_SCRIPT_EXTENSIONS))
-    ) {
-        UnicodeString codes;  // vector of 16-bit UScriptCode values
-        UnicodeSetIterator iter(props.scx);
-        while(iter.next()) { codes.append((UChar)iter.getCodepoint()); }
-
-        // Set bit 15 on the last script code, for termination.
-        int32_t length=codes.length();
-        codes.setCharAt(length-1, (UChar)(codes[length-1]|0x8000));
-        // Find this list of codes in the Script_Extensions data so far, or add this list.
-        int32_t index=scriptExtensions->indexOf(codes);
-        if(index<0) {
-            index=scriptExtensions->length();
-            scriptExtensions->append(codes);
-        }
-
-        // Encode the (Script, Script_Extensions index) pair.
-        int32_t script=props.getIntProp(UCHAR_SCRIPT);
-        uint32_t scriptX;
-        if(script==USCRIPT_COMMON) {
-            scriptX=UPROPS_SCRIPT_X_WITH_COMMON|(uint32_t)index;
-        } else if(script==USCRIPT_INHERITED) {
-            scriptX=UPROPS_SCRIPT_X_WITH_INHERITED|(uint32_t)index;
-        } else {
-            // Store an additional pair of 16-bit units for an unusual main Script code
-            // together with the Script_Extensions index.
-            UnicodeString codeIndexPair;
-            codeIndexPair.append((UChar)script).append((UChar)index);
-            index=scriptExtensions->indexOf(codeIndexPair);
-            if(index<0) {
-                index=scriptExtensions->length();
-                scriptExtensions->append(codeIndexPair);
-            }
-            scriptX=UPROPS_SCRIPT_X_WITH_OTHER|(uint32_t)index;
-        }
-        if(index>UPROPS_SCRIPT_MASK) {
-            fprintf(stderr, "genprops: Script_Extensions indexes overflow bit field\n");
-            errorCode=U_BUFFER_OVERFLOW_ERROR;
-            return;
-        }
-        upvec_setValue(pv, start, end, 0, scriptX, UPROPS_SCRIPT_X_MASK, &errorCode);
-    }
-    if(U_FAILURE(errorCode)) {
-        fprintf(stderr, "genprops error: unable to set props2 values for %04lX..%04lX: %s\n",
-                (long)start, (long)end, u_errorName(errorCode));
-    }
-}
-
-static uint8_t trieBlock[100000];
-static int32_t trieSize;
-
-int32_t
-props2FinalizeData(int32_t indexes[UPROPS_INDEX_COUNT], UErrorCode &errorCode) {
-    if(U_FAILURE(errorCode)) { return 0; }
-
-    newTrie=upvec_compactToUTrie2WithRowIndexes(pv, &errorCode);
-    if(U_FAILURE(errorCode)) {
-        fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n",
-                u_errorName(errorCode));
-        return 0;
-    }
-
-    trieSize=utrie2_serialize(newTrie, trieBlock, (int32_t)sizeof(trieBlock), &errorCode);
-    if(U_FAILURE(errorCode)) {
-        fprintf(stderr,
-                "genprops error: utrie2_freeze(additional properties)+utrie2_serialize() failed: %s\n",
-                u_errorName(errorCode));
-        return 0;
-    }
-
-    int32_t pvRows;
-    const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
-    int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
-
-    /* round up scriptExtensions to multiple of 4 bytes */
-    if(scriptExtensions->length()&1) {
-        scriptExtensions->append((UChar)0);
-    }
-
-    /* set indexes */
-    indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]=
-        indexes[UPROPS_ADDITIONAL_TRIE_INDEX]+trieSize/4;
-    indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]=UPROPS_VECTOR_WORDS;
-    indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]=
-        indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]+pvCount;
-    indexes[UPROPS_RESERVED_INDEX_7]=
-        indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]+scriptExtensions->length()/2;
-    indexes[UPROPS_RESERVED_INDEX_8]=indexes[UPROPS_RESERVED_INDEX_7];
-    indexes[UPROPS_DATA_TOP_INDEX]=indexes[UPROPS_RESERVED_INDEX_8];
-
-    indexes[UPROPS_MAX_VALUES_INDEX]=
-        (((int32_t)U_EA_COUNT-1)<<UPROPS_EA_SHIFT)|
-        (((int32_t)UBLOCK_COUNT-1)<<UPROPS_BLOCK_SHIFT)|
-        (((int32_t)USCRIPT_CODE_LIMIT-1)&UPROPS_SCRIPT_MASK);
-    indexes[UPROPS_MAX_VALUES_2_INDEX]=
-        (((int32_t)U_LB_COUNT-1)<<UPROPS_LB_SHIFT)|
-        (((int32_t)U_SB_COUNT-1)<<UPROPS_SB_SHIFT)|
-        (((int32_t)U_WB_COUNT-1)<<UPROPS_WB_SHIFT)|
-        (((int32_t)U_GCB_COUNT-1)<<UPROPS_GCB_SHIFT)|
-        ((int32_t)U_DT_COUNT-1);
-
-    if(beVerbose) {
-        printf("size in bytes of additional props trie:%5u\n", (int)trieSize);
-        printf("number of additional props vectors:    %5u\n", (int)pvRows);
-        printf("number of 32-bit words per vector:     %5u\n", UPROPS_VECTOR_WORDS);
-        printf("number of 16-bit scriptExtensions:     %5u\n", (int)scriptExtensions->length());
-    }
-
-    return 4*(indexes[UPROPS_DATA_TOP_INDEX]-indexes[UPROPS_ADDITIONAL_TRIE_INDEX]);
-}
-
-void
-props2AppendToCSourceFile(FILE *f, UErrorCode &errorCode) {
-    if(U_FAILURE(errorCode)) { return; }
-
-    int32_t pvRows;
-    const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
-    int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
-
-    usrc_writeUTrie2Arrays(f,
-        "static const uint16_t propsVectorsTrie_index[%ld]={\n", NULL,
-        newTrie,
-        "\n};\n\n");
-    usrc_writeUTrie2Struct(f,
-        "static const UTrie2 propsVectorsTrie={\n",
-        newTrie, "propsVectorsTrie_index", NULL,
-        "};\n\n");
-
-    usrc_writeArray(f,
-        "static const uint32_t propsVectors[%ld]={\n",
-        pvArray, 32, pvCount,
-        "};\n\n");
-    fprintf(f, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount);
-    fprintf(f, "static const int32_t propsVectorsColumns=%ld;\n", (long)UPROPS_VECTOR_WORDS);
-
-    usrc_writeArray(f,
-        "static const uint16_t scriptExtensions[%ld]={\n",
-        scriptExtensions->getBuffer(), 16, scriptExtensions->length(),
-        "};\n\n");
-}
-
-void
-props2AppendToBinaryFile(UNewDataMemory *pData, UErrorCode &errorCode) {
-    if(U_FAILURE(errorCode)) { return; }
-
-    int32_t pvRows;
-    const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
-    int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
-
-    udata_writeBlock(pData, trieBlock, trieSize);
-    udata_writeBlock(pData, pvArray, pvCount*4);
-    udata_writeBlock(pData, scriptExtensions->getBuffer(), scriptExtensions->length()*2);
-}
-
-PropsWriter *
-createProps2Writer(UErrorCode &errorCode) {
-    if(U_FAILURE(errorCode)) { return NULL; }
-    PropsWriter *pw=new Props2Writer(errorCode);
-    if(pw==NULL) {
-        errorCode=U_MEMORY_ALLOCATION_ERROR;
-    }
-    return pw;
-}