dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk "
dh->info.dataFormat[1] == 0x72 &&
dh->info.dataFormat[2] == 0x6b &&
- dh->info.dataFormat[3] == 0x20)
- // Note: info.fFormatVersion is duplicated in the RBBIDataHeader, and is
- // validated when checking that.
+ dh->info.dataFormat[3] == 0x20 &&
+ isDataVersionAcceptable(dh->info.formatVersion))
) {
status = U_INVALID_FORMAT_ERROR;
return;
fUDataMem = udm;
}
+UBool RBBIDataWrapper::isDataVersionAcceptable(const uint8_t version[]) {
+ for (int i=0; i<UPRV_LENGTHOF(RBBI_DATA_FORMAT_VERSION); ++i) {
+ if (RBBI_DATA_FORMAT_VERSION[i] != version[i]) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
//-----------------------------------------------------------------------------
//
// init(). Does most of the work of construction, shared between the
return;
}
fHeader = data;
- if (fHeader->fMagic != 0xb1a0 || fHeader->fFormatVersion[0] != 3)
- {
+ if (fHeader->fMagic != 0xb1a0 || !isDataVersionAcceptable(fHeader->fFormatVersion)) {
status = U_INVALID_FORMAT_ERROR;
return;
}
pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6b &&
pInfo->dataFormat[3]==0x20 &&
- pInfo->formatVersion[0]==3 )) {
+ RBBIDataWrapper::isDataVersionAcceptable(pInfo->formatVersion) )) {
udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
//
// Get the RRBI Data Header, and check that it appears to be OK.
//
- // Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually
- // an int32_t with a value of 1. Starting with ICU 3.4,
- // RBBI's fDataFormat matches the dataFormat field from the
- // UDataInfo header, four int8_t bytes. The value is {3,1,0,0}
- //
const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
- rbbiDH->fFormatVersion[0] != 3 ||
- ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader))
- {
+ !RBBIDataWrapper::isDataVersionAcceptable(rbbiDH->fFormatVersion) ||
+ ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) {
udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
*status=U_UNSUPPORTED_ERROR;
return 0;
U_NAMESPACE_BEGIN
+// The current RBBI data format version.
+static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {4, 0, 0, 0};
+
/*
* The following structs map exactly onto the raw data from ICU common data file.
*/
RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
~RBBIDataWrapper();
+ static UBool isDataVersionAcceptable(const uint8_t version[]);
+
void init0();
void init(const RBBIDataHeader *data, UErrorCode &status);
RBBIDataWrapper *addReference();
data->fMagic = 0xb1a0;
- data->fFormatVersion[0] = 3;
- data->fFormatVersion[1] = 1;
- data->fFormatVersion[2] = 0;
- data->fFormatVersion[3] = 0;
+ data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0];
+ data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1];
+ data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2];
+ data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3];
data->fLength = totalSize;
data->fCatCount = fSetBuilder->getNumCharCategories();
private boolean isBigEndian;
- static final int DATA_FORMAT = 0x42726b20; // "Brk "
- static final int FORMAT_VERSION = 0x03010000; // 3.1
+ static final int DATA_FORMAT = 0x42726b20; // "Brk "
+ static final int FORMAT_VERSION = 0x04000000; // 4.0.0.0
private static final class IsAcceptable implements Authenticate {
- // @Override when we switch to Java 6
@Override
public boolean isDataVersionAcceptable(byte version[]) {
- return version[0] == (FORMAT_VERSION >>> 24);
+ int intVersion = (version[0] << 24) + (version[1] << 16) + (version[2] << 8) + version[3];
+ return intVersion == FORMAT_VERSION;
}
}
private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
*/
final static class RBBIDataHeader {
int fMagic; // == 0xbla0
- int fVersion; // == 1 (for ICU 3.2 and earlier.
byte[] fFormatVersion; // For ICU 3.4 and later.
int fLength; // Total length in bytes of this RBBI Data,
// including all sections, not just the header.
// Read in the RBBI data header...
This.fHeader = new RBBIDataHeader();
This.fHeader.fMagic = bytes.getInt();
- // Read the same 4 bytes as an int and as a byte array: The data format could be
- // the old fVersion=1 (TODO: probably not with a real ICU data header?)
- // or the new fFormatVersion=3.x.
- This.fHeader.fVersion = bytes.getInt(bytes.position());
This.fHeader.fFormatVersion[0] = bytes.get();
This.fHeader.fFormatVersion[1] = bytes.get();
This.fHeader.fFormatVersion[2] = bytes.get();
ICUBinary.skipBytes(bytes, 6 * 4); // uint32_t fReserved[6];
- if (This.fHeader.fMagic != 0xb1a0 ||
- ! (This.fHeader.fVersion == 1 || // ICU 3.2 and earlier
- This.fHeader.fFormatVersion[0] == 3) // ICU 3.4
- ) {
+ if (This.fHeader.fMagic != 0xb1a0 || !IS_ACCEPTABLE.isDataVersionAcceptable(This.fHeader.fFormatVersion)) {
throw new IOException("Break Iterator Rule Data Magic Number Incorrect, or unsupported data version.");
}
// This is the main class for building (compiling) break rules into the tables
// required by the runtime RBBI engine.
//
-
+
String fDebugEnv; // controls debug trace output
String fRules; // The rule string that we are compiling
RBBIRuleScanner fScanner; // The scanner.
-
+
//
// There are four separate parse trees generated, one for each of the
// forward rules, reverse rules, safe forward rules and safe reverse rules.
// This array references the root of each of the trees.
- //
+ //
RBBINode[] fTreeRoots = new RBBINode[4];
static final int fForwardTree = 0; // Indexes into the above fTreeRoots array
static final int fReverseTree = 1; // for each of the trees.
// Map Value is the runtime array index.
List<Integer> fRuleStatusVals; // List of Integer objects. Has same layout as the
- // runtime array of status (tag) values -
+ // runtime array of status (tag) values -
// number of values in group 1
// first status value in group 1
// 2nd status value in group 1
//
static final int U_BRK_ERROR_START = 0x10200;
/**< Start of codes indicating Break Iterator failures */
-
+
static final int U_BRK_INTERNAL_ERROR = 0x10201;
/**< An internal error (bug) was detected. */
-
+
static final int U_BRK_HEX_DIGITS_EXPECTED = 0x10202;
/**< Hex digits expected as part of a escaped char in a rule. */
-
+
static final int U_BRK_SEMICOLON_EXPECTED = 0x10203;
/**< Missing ';' at the end of a RBBI rule. */
-
+
static final int U_BRK_RULE_SYNTAX = 0x10204;
/**< Syntax error in RBBI rule. */
-
+
static final int U_BRK_UNCLOSED_SET = 0x10205;
/**< UnicodeSet witing an RBBI rule missing a closing ']'. */
-
+
static final int U_BRK_ASSIGN_ERROR = 0x10206;
/**< Syntax error in RBBI rule assignment statement. */
-
+
static final int U_BRK_VARIABLE_REDFINITION = 0x10207;
/**< RBBI rule $Variable redefined. */
-
+
static final int U_BRK_MISMATCHED_PAREN = 0x10208;
/**< Mis-matched parentheses in an RBBI rule. */
-
+
static final int U_BRK_NEW_LINE_IN_QUOTED_STRING = 0x10209;
/**< Missing closing quote in an RBBI rule. */
-
+
static final int U_BRK_UNDEFINED_VARIABLE = 0x1020a;
/**< Use of an undefined $Variable in an RBBI rule. */
-
+
static final int U_BRK_INIT_ERROR = 0x1020b;
/**< Initialization failure. Probable missing ICU Data. */
-
+
static final int U_BRK_RULE_EMPTY_SET = 0x1020c;
/**< Rule contains an empty Unicode Set. */
-
+
static final int U_BRK_UNRECOGNIZED_OPTION = 0x1020d;
/**< !!option in RBBI rules not recognized. */
-
+
static final int U_BRK_MALFORMED_RULE_TAG = 0x1020e;
/**< The {nnn} tag on a rule is mal formed */
static final int U_BRK_MALFORMED_SET = 0x1020f;
-
+
static final int U_BRK_ERROR_LIMIT = 0x10210;
/**< This must always be the last value to indicate the limit for Break Iterator failures */
//
int[] header = new int[RBBIDataWrapper.DH_SIZE]; // sizeof struct RBBIDataHeader
header[RBBIDataWrapper.DH_MAGIC] = 0xb1a0;
- header[RBBIDataWrapper.DH_FORMATVERSION] = 0x03010000; // uint8_t fFormatVersion[4];
+ header[RBBIDataWrapper.DH_FORMATVERSION] = RBBIDataWrapper.FORMAT_VERSION;
header[RBBIDataWrapper.DH_LENGTH] = totalSize; // fLength, the total size of all rule sections.
header[RBBIDataWrapper.DH_CATCOUNT] = fSetBuilder.getNumCharCategories(); // fCatCount.
header[RBBIDataWrapper.DH_FTABLE] = headerSize; // fFTable
mainLoop: for (;;) {
if (c == DONE32) {
// Reached end of input string.
- if (mode == RBBI_END || fRData.fHeader.fVersion == 1) {
- // Either this is the old (ICU 3.2 and earlier) format data which
- // does not support explicit support for matching {eof}, or
- // we have already done the {eof} iteration. Now is the time
+ if (mode == RBBI_END) {
+ // We have already done the {eof} iteration. Now is the time
// to unconditionally bail out.
if (result == initialPosition) {
// Ran off start, no match found.
version https://git-lfs.github.com/spec/v1
-oid sha256:d315546f344483688e78322304130697164e0d0363b20ed00880598630632341
-size 12128031
+oid sha256:29b73bb7468ec529b2ad200e0e5e14a16b53d12cc8ba5ac29b9da9de8968adc0
+size 12128017
version https://git-lfs.github.com/spec/v1
-oid sha256:17fb194e1234c73ab09442acf76f1b872d77d8aa7494a06f5964f1342616d69e
+oid sha256:644e4eaa7dfdeb72c639d20160274994b0709da05f2b009a306bbc68f440bb87
size 92448