precedes the actual data. It contains platform properties values and the
file format version.
-The following is a description of format version 7 .
+The following is a description of format version 7.1 .
Data contents:
0xb0..0x1df fraction ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16
0x1e0..0x2ff large int ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33)
(only one significant decimal digit)
+ 0x300..0x323 base-60 (sexagesimal) integer (new in format version 7.1)
+ ((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4)
--- Additional properties (new in format version 2.1) ---
Change from UTrie to UTrie2.
+--- Changes in format version 7.1 ---
+
+Unicode 6.2 adds sexagesimal (base-60) numeric values:
+ cp;12432;na=CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS DISH;nv=216000
+ cp;12433;na=CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS MIN;nv=432000
+
+The encoding of numeric values was extended to handle such values.
+
----------------------------------------------------------------------------- */
U_NAMESPACE_USE
0,
{ 0x55, 0x50, 0x72, 0x6f }, /* dataFormat="UPro" */
- { 7, 0, 0, 0 }, /* formatVersion */
- { 6, 0, 0, 0 } /* dataVersion */
+ { 7, 1, 0, 0 }, /* formatVersion */
+ { 6, 2, 0, 0 } /* dataVersion */
};
class CorePropsBuilder : public PropsBuilder {
// For nt=U_NT_NUMERIC.
static int32_t
encodeNumericValue(UChar32 start, const char *s, UErrorCode &errorCode) {
+ const char *original;
/* get a possible minus sign */
UBool isNegative;
if(*s=='-') {
} else {
/* normal number parsing */
unsigned long ul=uprv_strtoul(s, &numberLimit, 10);
- if(ul>0x7fffffff) {
+ if(s==numberLimit || (*numberLimit!=0 && *numberLimit!='/') || ul>0x7fffffff) {
ntv=-1;
} else {
value=(int32_t)ul;
}
- if(s<numberLimit && *numberLimit=='/') {
+ if(ntv>=0 && *numberLimit=='/') {
/* fractional value, get the denominator */
- ul=uprv_strtoul(numberLimit+1, &numberLimit, 10);
- if(ul==0 || ul>0x7fffffff) {
+ s=numberLimit+1;
+ ul=uprv_strtoul(s, &numberLimit, 10);
+ if(s==numberLimit || *numberLimit!=0 || ul==0 || ul>0x7fffffff) {
ntv=-1;
} else {
den=(int32_t)ul;
mant/=10;
++exp;
}
+ // Note: value<=0x7fffffff guarantees exp<=33
if(mant<=9) {
ntv=((mant+14)<<5)+(exp-2);
+ } else {
+ // Try sexagesimal (base 60) numbers.
+ mant=value;
+ exp=0;
+ while((mant%60)==0) {
+ mant/=60;
+ ++exp;
+ }
+ if(mant<=9 && exp<=4) {
+ ntv=((mant+0xbf)<<2)+(exp-1);
+ } else {
+ ntv=-1;
+ }
}
}
} else if(2<=exp && exp<=33 && 1<=value && value<=9) {
/* large, single-significant-digit integer */
ntv=((value+14)<<5)+(exp-2);
+ } else {
+ ntv=-1;
}
- } else if(exp==0) {
- if(-1<=value && value<=17 && 1<=den && den<=16) {
- /* fraction */
- ntv=((value+12)<<4)+(den-1);
- }
+ } else if(exp==0 && -1<=value && value<=17 && 1<=den && den<=16) {
+ /* fraction */
+ ntv=((value+12)<<4)+(den-1);
+ } else if(exp==0 && value==-1 && den==0) {
+ /* -1 parsed with den=0, encoded as pseudo-fraction -1/1 */
+ ntv=((value+12)<<4);
+ } else {
+ ntv=-1;
}
if(ntv<0 || *numberLimit!=0) {
- fprintf(stderr, "genprops error: unable to encode numeric value nv=%s\n", s);
+ fprintf(stderr, "genprops error: unable to encode numeric value nv=%s\n", original);
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
}
return ntv;
Value(UBLOCK_TAKRI, "Takri Takri"),
};
-static const Value VALUES_ccc[56] = {
+static const Value VALUES_ccc[57] = {
Value(0, "NR Not_Reordered"),
Value(1, "OV Overlay"),
Value(7, "NK Nukta"),
Value(122, "CCC122 CCC122"),
Value(129, "CCC129 CCC129"),
Value(130, "CCC130 CCC130"),
- Value(132, "CCC133 CCC133"),
+ Value(132, "CCC132 CCC132"),
+ Value(133, "CCC133 CCC133"),
Value(200, "ATBL Attached_Below_Left"),
Value(202, "ATB Attached_Below"),
Value(214, "ATA Attached_Above"),
Value(U_LB_CLOSE_PARENTHESIS, "CP Close_Parenthesis"),
Value(U_LB_CONDITIONAL_JAPANESE_STARTER, "CJ Conditional_Japanese_Starter"),
Value(U_LB_HEBREW_LETTER, "HL Hebrew_Letter"),
- Value(U_LB_ZERO_WIDTH_JOINER, "ZJ Zero_Width_Joiner"),
+ Value(U_LB_REGIONAL_INDICATOR, "RI Regional_Indicator"),
};
static const Value VALUES_nt[4] = {
Value(UNORM_MAYBE, "M Maybe"),
};
-static const Value VALUES_lccc[56] = {
+static const Value VALUES_lccc[57] = {
Value(0, "NR Not_Reordered"),
Value(1, "OV Overlay"),
Value(7, "NK Nukta"),
Value(122, "CCC122 CCC122"),
Value(129, "CCC129 CCC129"),
Value(130, "CCC130 CCC130"),
- Value(132, "CCC133 CCC133"),
+ Value(132, "CCC132 CCC132"),
+ Value(133, "CCC133 CCC133"),
Value(200, "ATBL Attached_Below_Left"),
Value(202, "ATB Attached_Below"),
Value(214, "ATA Attached_Above"),
Value(240, "IS Iota_Subscript"),
};
-static const Value VALUES_tccc[56] = {
+static const Value VALUES_tccc[57] = {
Value(0, "NR Not_Reordered"),
Value(1, "OV Overlay"),
Value(7, "NK Nukta"),
Value(122, "CCC122 CCC122"),
Value(129, "CCC129 CCC129"),
Value(130, "CCC130 CCC130"),
- Value(132, "CCC133 CCC133"),
+ Value(132, "CCC132 CCC132"),
+ Value(133, "CCC133 CCC133"),
Value(200, "ATBL Attached_Below_Left"),
Value(202, "ATB Attached_Below"),
Value(214, "ATA Attached_Above"),
Value(240, "IS Iota_Subscript"),
};
-static const Value VALUES_GCB[14] = {
+static const Value VALUES_GCB[13] = {
Value(U_GCB_OTHER, "XX Other"),
Value(U_GCB_CONTROL, "CN Control"),
Value(U_GCB_CR, "CR CR"),
Value(U_GCB_V, "V V"),
Value(U_GCB_SPACING_MARK, "SM SpacingMark"),
Value(U_GCB_PREPEND, "PP Prepend"),
- Value(U_GCB_AFTER_JOINER, "AJ After_Joiner"),
- Value(U_GCB_JOINER, "J Joiner"),
+ Value(U_GCB_REGIONAL_INDICATOR, "RI Regional_Indicator"),
};
static const Value VALUES_SB[15] = {
Value(U_SB_SCONTINUE, "SC SContinue"),
};
-static const Value VALUES_WB[15] = {
+static const Value VALUES_WB[14] = {
Value(U_WB_OTHER, "XX Other"),
Value(U_WB_ALETTER, "LE ALetter"),
Value(U_WB_FORMAT, "FO Format"),
Value(U_WB_LF, "LF LF"),
Value(U_WB_MIDNUMLET, "MB MidNumLet"),
Value(U_WB_NEWLINE, "NL Newline"),
- Value(U_WB_AFTER_JOINER, "AJ After_Joiner"),
- Value(U_WB_JOINER, "J Joiner"),
+ Value(U_WB_REGIONAL_INDICATOR, "RI Regional_Indicator"),
};
static const Value VALUES_gcm[38] = {
Property(UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED, "CWKCF Changes_When_NFKC_Casefolded"),
Property(UCHAR_BIDI_CLASS, "bc Bidi_Class", VALUES_bc, 19),
Property(UCHAR_BLOCK, "blk Block", VALUES_blk, 221),
- Property(UCHAR_CANONICAL_COMBINING_CLASS, "ccc Canonical_Combining_Class", VALUES_ccc, 56),
+ Property(UCHAR_CANONICAL_COMBINING_CLASS, "ccc Canonical_Combining_Class", VALUES_ccc, 57),
Property(UCHAR_DECOMPOSITION_TYPE, "dt Decomposition_Type", VALUES_dt, 18),
Property(UCHAR_EAST_ASIAN_WIDTH, "ea East_Asian_Width", VALUES_ea, 6),
Property(UCHAR_GENERAL_CATEGORY, "gc General_Category", VALUES_gc, 30),
Property(UCHAR_NFKD_QUICK_CHECK, "NFKD_QC NFKD_Quick_Check", VALUES_NFKD_QC, 2),
Property(UCHAR_NFC_QUICK_CHECK, "NFC_QC NFC_Quick_Check", VALUES_NFC_QC, 3),
Property(UCHAR_NFKC_QUICK_CHECK, "NFKC_QC NFKC_Quick_Check", VALUES_NFKC_QC, 3),
- Property(UCHAR_LEAD_CANONICAL_COMBINING_CLASS, "lccc Lead_Canonical_Combining_Class", VALUES_lccc, 56),
- Property(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS, "tccc Trail_Canonical_Combining_Class", VALUES_tccc, 56),
- Property(UCHAR_GRAPHEME_CLUSTER_BREAK, "GCB Grapheme_Cluster_Break", VALUES_GCB, 14),
+ Property(UCHAR_LEAD_CANONICAL_COMBINING_CLASS, "lccc Lead_Canonical_Combining_Class", VALUES_lccc, 57),
+ Property(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS, "tccc Trail_Canonical_Combining_Class", VALUES_tccc, 57),
+ Property(UCHAR_GRAPHEME_CLUSTER_BREAK, "GCB Grapheme_Cluster_Break", VALUES_GCB, 13),
Property(UCHAR_SENTENCE_BREAK, "SB Sentence_Break", VALUES_SB, 15),
- Property(UCHAR_WORD_BREAK, "WB Word_Break", VALUES_WB, 15),
+ Property(UCHAR_WORD_BREAK, "WB Word_Break", VALUES_WB, 14),
Property(UCHAR_GENERAL_CATEGORY_MASK, "gcm General_Category_Mask", VALUES_gcm, 38),
Property(UCHAR_NUMERIC_VALUE, "nv Numeric_Value"),
Property(UCHAR_AGE, "age Age"),