{ 0x078C, USCRIPT_THAANA },
{ 0x07CA, USCRIPT_NKO },
{ 0x07D8, USCRIPT_NKO },
+ { 0x2D30, USCRIPT_TIFINAGH },
{ 0x2D5E, USCRIPT_TIFINAGH },
{ 0x12A0, USCRIPT_ETHIOPIC },
{ 0x0905, USCRIPT_DEVANAGARI },
{ 0x112BE, USCRIPT_KHUDAWADI },
{ 0x1128F, USCRIPT_MULTANI },
{ 0x11315, USCRIPT_GRANTHA },
+ { 0x11412, USCRIPT_NEWA },
{ 0x11484, USCRIPT_TIRHUTA },
{ 0x1158E, USCRIPT_SIDDHAM },
{ 0x1160E, USCRIPT_MODI },
{ 0x1B83, USCRIPT_SUNDANESE },
{ 0x11005, USCRIPT_BRAHMI },
{ 0x10A00, USCRIPT_KHAROSHTHI },
+ { 0x11C0E, USCRIPT_BHAIKSUKI },
{ 0x0E17, USCRIPT_THAI },
{ 0x0EA5, USCRIPT_LAO },
{ 0xAA80, USCRIPT_TAI_VIET },
{ 0x0F40, USCRIPT_TIBETAN },
+ { 0x11C72, USCRIPT_MARCHEN },
{ 0x1C00, USCRIPT_LEPCHA },
{ 0xA840, USCRIPT_PHAGS_PA },
{ 0x1900, USCRIPT_LIMBU },
{ 0x1826, USCRIPT_MONGOLIAN },
{ 0x1C5A, USCRIPT_OL_CHIKI },
{ 0x13C4, USCRIPT_CHEROKEE },
+ { 0x104B5, USCRIPT_OSAGE },
{ 0x14C0, USCRIPT_CANADIAN_ABORIGINAL },
{ 0x168F, USCRIPT_OGHAM },
{ 0x16A0, USCRIPT_RUNIC },
{ 0xA6A0, USCRIPT_BAMUM },
{ 0x16AE6, USCRIPT_BASSA_VAH },
{ 0x1E802, USCRIPT_MENDE },
+ { 0x1E909, USCRIPT_ADLAM, },
{ 0xAC00, USCRIPT_HANGUL },
{ 0x304B, USCRIPT_HIRAGANA },
{ 0x30AB, USCRIPT_KATAKANA },
{ 0x109A0, USCRIPT_MEROITIC_CURSIVE },
{ 0x10980, USCRIPT_MEROITIC_HIEROGLYPHS },
{ 0x14400, USCRIPT_ANATOLIAN_HIEROGLYPHS },
+ { 0x18229, USCRIPT_TANGUT },
{ 0x5B57, USCRIPT_HAN },
{ 0xFDD0, USCRIPT_UNKNOWN } // unassigned-implicit primary weights
};
}
static UBool
-readAnElement(FILE *data,
+readAnElement(char *line,
CollationBaseDataBuilder &builder,
UnicodeString &prefix, UnicodeString &s,
int64_t ces[32], int32_t &cesLength,
if(U_FAILURE(*status)) {
return FALSE;
}
- char buffer[30000];
- char *result = fgets(buffer, sizeof(buffer), data);
- if(result == NULL) {
- if(feof(data)) {
- return FALSE;
- } else {
- fprintf(stderr, "empty line but no EOF!\n");
- *status = U_INVALID_FORMAT_ERROR;
- return FALSE;
- }
- }
- int32_t buflen = (int32_t)uprv_strlen(buffer);
- while(buflen>0 && (buffer[buflen-1] == '\r' || buffer[buflen-1] == '\n')) {
- buffer[--buflen] = 0;
+ int32_t lineLength = (int32_t)uprv_strlen(line);
+ while(lineLength>0 && (line[lineLength-1] == '\r' || line[lineLength-1] == '\n')) {
+ line[--lineLength] = 0;
}
- if(buflen >= 3 && buffer[0] == (char)0xef &&
- buffer[1] == (char)0xbb && buffer[2] == (char)0xbf) {
+ if(lineLength >= 3 && line[0] == (char)0xef &&
+ line[1] == (char)0xbb && line[2] == (char)0xbf) {
// U+FEFF UTF-8 signature byte sequence.
// Ignore, assuming it is at the start of the file.
- buflen -= 3;
- uprv_memmove(buffer, buffer + 3, buflen + 1); // +1: including NUL terminator
+ line += 3;
+ lineLength -= 3;
}
- if(buffer[0] == 0 || buffer[0] == '#') {
+ if(line[0] == 0 || line[0] == '#') {
return FALSE; // just a comment, skip whole line
}
// Directives.
- if(buffer[0] == '[') {
- readAnOption(builder, buffer, status);
+ if(line[0] == '[') {
+ readAnOption(builder, line, status);
return FALSE;
}
- char *startCodePoint = buffer;
+ CharString input;
+ char *startCodePoint = line;
char *endCodePoint = strchr(startCodePoint, ';');
if(endCodePoint == NULL) {
- fprintf(stderr, "error - line with no code point!\n");
+ fprintf(stderr, "error - line with no code point:\n%s\n", line);
*status = U_INVALID_FORMAT_ERROR; /* No code point - could be an error, but probably only an empty line */
return FALSE;
- } else {
- *endCodePoint = 0;
}
- char *pipePointer = strchr(buffer, '|');
+ char *pipePointer = strchr(line, '|');
if (pipePointer != NULL) {
// Read the prefix string which precedes the actual string.
- *pipePointer = 0;
+ input.append(startCodePoint, (int32_t)(pipePointer - startCodePoint), *status);
UChar *prefixChars = prefix.getBuffer(32);
int32_t prefixSize =
- u_parseString(startCodePoint,
+ u_parseString(input.data(),
prefixChars, prefix.getCapacity(),
NULL, status);
if(U_FAILURE(*status)) {
prefix.releaseBuffer(0);
- fprintf(stderr, "error - parsing of prefix \"%s\" failed: %s\n",
- startCodePoint, u_errorName(*status));
+ fprintf(stderr, "error - parsing of prefix \"%s\" failed: %s\n%s\n",
+ input.data(), line, u_errorName(*status));
*status = U_INVALID_FORMAT_ERROR;
return FALSE;
}
prefix.releaseBuffer(prefixSize);
startCodePoint = pipePointer + 1;
+ input.clear();
}
// Read the string which gets the CE(s) assigned.
+ input.append(startCodePoint, (int32_t)(endCodePoint - startCodePoint), *status);
UChar *uchars = s.getBuffer(32);
int32_t cSize =
- u_parseString(startCodePoint,
+ u_parseString(input.data(),
uchars, s.getCapacity(),
NULL, status);
if(U_FAILURE(*status)) {
s.releaseBuffer(0);
- fprintf(stderr, "error - parsing of code point(s) \"%s\" failed: %s\n",
- startCodePoint, u_errorName(*status));
+ fprintf(stderr, "error - parsing of code point(s) \"%s\" failed: %s\n%s\n",
+ input.data(), line, u_errorName(*status));
*status = U_INVALID_FORMAT_ERROR;
return FALSE;
}
break;
}
if(cesLength >= 31) {
- fprintf(stderr, "Error: Too many CEs on line '%s'\n", buffer);
+ fprintf(stderr, "Error: Too many CEs on line '%s'\n", line);
*status = U_INVALID_FORMAT_ERROR;
return FALSE;
}
ces[cesLength++] = parseCE(builder, pointer, *status);
if(U_FAILURE(*status)) {
fprintf(stderr, "Syntax error parsing CE from line '%s' - %s\n",
- buffer, u_errorName(*status));
+ line, u_errorName(*status));
return FALSE;
}
}
uint8_t b = (uint8_t)(ce >> (j * 8));
if(j <= 1) { b &= 0x3f; } // tertiary bytes use 6 bits
if (b == 1) {
- fprintf(stderr, "Warning: invalid UCA weight byte 01 for %s\n", buffer);
+ fprintf(stderr, "Warning: invalid UCA weight byte 01 for %s\n", line);
return FALSE;
}
if (j == 7 && b == 2) {
- fprintf(stderr, "Warning: invalid UCA primary weight lead byte 02 for %s\n", buffer);
+ fprintf(stderr, "Warning: invalid UCA primary weight lead byte 02 for %s\n", line);
return FALSE;
}
if (j == 7) {
// 02 is unusable and 03 is the low compression terminator when the lead byte is compressible.
if (isCompressible && (b <= 3 || b == 0xff)) {
fprintf(stderr, "Warning: invalid UCA primary second weight byte %02X for %s\n",
- b, buffer);
+ b, line);
return FALSE;
}
}
*status = U_FILE_ACCESS_ERROR;
return;
}
- uint32_t line = 0;
+ int32_t lineNumber = 0;
+ char buffer[30000];
UChar32 maxCodePoint = 0;
while(!feof(data)) {
if(U_FAILURE(*status)) {
fprintf(stderr, "Something returned an error %i (%s) while processing line %u of %s. Exiting...\n",
- *status, u_errorName(*status), (int)line, filename);
+ *status, u_errorName(*status), (int)lineNumber, filename);
exit(*status);
}
- line++;
+ lineNumber++;
+ char *line = fgets(buffer, sizeof(buffer), data);
+ if(line == NULL) {
+ if(feof(data)) {
+ break;
+ } else {
+ fprintf(stderr, "no more input line and also no EOF!\n");
+ *status = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ }
UnicodeString prefix;
UnicodeString s;
int64_t ces[32];
int32_t cesLength = 0;
- if(readAnElement(data, builder, prefix, s, ces, cesLength, status)) {
+ if(readAnElement(line, builder, prefix, s, ces, cesLength, status)) {
// we have read the line, now do something sensible with the read data!
uint32_t p = (uint32_t)(ces[0] >> 32);
if(script < 0) {
fprintf(stderr,
"Error: Unknown script for first-primary sample character "
- "U+%04x on line %u of %s\n"
+ "U+%04X on line %u of %s:\n"
+ "%s\n"
" (add the character to genuca.cpp sampleCharsToScripts[])\n",
- c2, (int)line, filename);
+ c2, (int)lineNumber, filename, line);
exit(U_INVALID_FORMAT_ERROR);
}
if(script == USCRIPT_UNKNOWN) {
if(0xe0000000 <= p && p < 0xf0000000) {
fprintf(stderr,
"Error: Unexpected mapping to an implicit or trailing primary"
- " on line %u of %s.\n",
- (int)line, filename);
+ " on line %u of %s:\n"
+ "%s\n",
+ (int)lineNumber, filename, line);
exit(U_INVALID_FORMAT_ERROR);
}
}
if (beVerbose) {
- printf("\nLines read: %u\n", (int)line);
+ printf("\nLines read: %u\n", (int)lineNumber);
}
fclose(data);