From: kosako Date: Thu, 7 Apr 2016 09:36:30 +0000 (+0900) Subject: remove NameCtypeTable in unicode.c X-Git-Tag: v6.0.0^2~65 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b7da55ac9f4186dfce4b3307ecbc3e642a5cf6f4;p=onig remove NameCtypeTable in unicode.c --- diff --git a/src/Makefile.am b/src/Makefile.am index 2154cfc..2cd421b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -11,7 +11,8 @@ libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \ regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c \ regenc.c regsyntax.c regtrav.c regversion.c st.c \ regposix.c regposerr.c \ - unicode.c ascii.c utf8.c \ + unicode.c unicode_prop.c \ + ascii.c utf8.c \ utf16_be.c utf16_le.c \ utf32_be.c utf32_le.c \ euc_jp.c euc_jp_prop.c \ diff --git a/src/Makefile.in b/src/Makefile.in index 1a2cffc..1f05710 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -126,13 +126,14 @@ libonig_la_LIBADD = am_libonig_la_OBJECTS = regerror.lo regparse.lo regext.lo regcomp.lo \ regexec.lo reggnu.lo regenc.lo regsyntax.lo regtrav.lo \ regversion.lo st.lo regposix.lo regposerr.lo unicode.lo \ - ascii.lo utf8.lo utf16_be.lo utf16_le.lo utf32_be.lo \ - utf32_le.lo euc_jp.lo euc_jp_prop.lo sjis.lo sjis_prop.lo \ - iso8859_1.lo iso8859_2.lo iso8859_3.lo iso8859_4.lo \ - iso8859_5.lo iso8859_6.lo iso8859_7.lo iso8859_8.lo \ - iso8859_9.lo iso8859_10.lo iso8859_11.lo iso8859_13.lo \ - iso8859_14.lo iso8859_15.lo iso8859_16.lo euc_tw.lo euc_kr.lo \ - big5.lo gb18030.lo koi8_r.lo cp1251.lo onig_init.lo + unicode_prop.lo ascii.lo utf8.lo utf16_be.lo utf16_le.lo \ + utf32_be.lo utf32_le.lo euc_jp.lo euc_jp_prop.lo sjis.lo \ + sjis_prop.lo iso8859_1.lo iso8859_2.lo iso8859_3.lo \ + iso8859_4.lo iso8859_5.lo iso8859_6.lo iso8859_7.lo \ + iso8859_8.lo iso8859_9.lo iso8859_10.lo iso8859_11.lo \ + iso8859_13.lo iso8859_14.lo iso8859_15.lo iso8859_16.lo \ + euc_tw.lo euc_kr.lo big5.lo gb18030.lo koi8_r.lo cp1251.lo \ + onig_init.lo libonig_la_OBJECTS = $(am_libonig_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) @@ -330,7 +331,8 @@ libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \ regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c \ regenc.c regsyntax.c regtrav.c regversion.c st.c \ regposix.c regposerr.c \ - unicode.c ascii.c utf8.c \ + unicode.c unicode_prop.c \ + ascii.c utf8.c \ utf16_be.c utf16_le.c \ utf32_be.c utf32_le.c \ euc_jp.c euc_jp_prop.c \ @@ -485,6 +487,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sjis_prop.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/st.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unicode.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unicode_prop.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf16_be.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf16_le.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf32_be.Plo@am__quote@ diff --git a/src/regenc.h b/src/regenc.h index 659e961..2016b5a 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -147,6 +147,7 @@ ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); ONIG_EXTERN struct PropertyNameCtype* euc_jp_lookup_property_name P_((register const char *str, register unsigned int len)); ONIG_EXTERN struct PropertyNameCtype* sjis_lookup_property_name P_((register const char *str, register unsigned int len)); +ONIG_EXTERN struct PropertyNameCtype* unicode_lookup_property_name P_((register const char *str, register unsigned int len)); /* in enc/unicode.c */ ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); diff --git a/src/unicode.c b/src/unicode.c index 0972139..0f7607c 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -10487,128 +10487,6 @@ static const CaseUnfold_13_Type CaseUnfold_13[] = { }; -static PosixBracketEntryType HashEntryData[] = { - { (UChar* )"NEWLINE", 0, 7 }, - { (UChar* )"Alpha", 1, 5 }, - { (UChar* )"Blank", 2, 5 }, - { (UChar* )"Cntrl", 3, 5 }, - { (UChar* )"Digit", 4, 5 }, - { (UChar* )"Graph", 5, 5 }, - { (UChar* )"Lower", 6, 5 }, - { (UChar* )"Print", 7, 5 }, - { (UChar* )"Punct", 8, 5 }, - { (UChar* )"Space", 9, 5 }, - { (UChar* )"Upper", 10, 5 }, - { (UChar* )"XDigit", 11, 6 }, - { (UChar* )"Word", 12, 4 }, - { (UChar* )"Alnum", 13, 5 }, - { (UChar* )"ASCII", 14, 5 }, - -#ifdef USE_UNICODE_PROPERTIES - { (UChar* )"Any", 15, 3 }, - { (UChar* )"Assigned", 16, 8 }, - { (UChar* )"C", 17, 1 }, - { (UChar* )"Cc", 18, 2 }, - { (UChar* )"Cf", 19, 2 }, - { (UChar* )"Cn", 20, 2 }, - { (UChar* )"Co", 21, 2 }, - { (UChar* )"Cs", 22, 2 }, - { (UChar* )"L", 23, 1 }, - { (UChar* )"Ll", 24, 2 }, - { (UChar* )"Lm", 25, 2 }, - { (UChar* )"Lo", 26, 2 }, - { (UChar* )"Lt", 27, 2 }, - { (UChar* )"Lu", 28, 2 }, - { (UChar* )"M", 29, 1 }, - { (UChar* )"Mc", 30, 2 }, - { (UChar* )"Me", 31, 2 }, - { (UChar* )"Mn", 32, 2 }, - { (UChar* )"N", 33, 1 }, - { (UChar* )"Nd", 34, 2 }, - { (UChar* )"Nl", 35, 2 }, - { (UChar* )"No", 36, 2 }, - { (UChar* )"P", 37, 1 }, - { (UChar* )"Pc", 38, 2 }, - { (UChar* )"Pd", 39, 2 }, - { (UChar* )"Pe", 40, 2 }, - { (UChar* )"Pf", 41, 2 }, - { (UChar* )"Pi", 42, 2 }, - { (UChar* )"Po", 43, 2 }, - { (UChar* )"Ps", 44, 2 }, - { (UChar* )"S", 45, 1 }, - { (UChar* )"Sc", 46, 2 }, - { (UChar* )"Sk", 47, 2 }, - { (UChar* )"Sm", 48, 2 }, - { (UChar* )"So", 49, 2 }, - { (UChar* )"Z", 50, 1 }, - { (UChar* )"Zl", 51, 2 }, - { (UChar* )"Zp", 52, 2 }, - { (UChar* )"Zs", 53, 2 }, - { (UChar* )"Arabic", 54, 6 }, - { (UChar* )"Armenian", 55, 8 }, - { (UChar* )"Bengali", 56, 7 }, - { (UChar* )"Bopomofo", 57, 8 }, - { (UChar* )"Braille", 58, 7 }, - { (UChar* )"Buginese", 59, 8 }, - { (UChar* )"Buhid", 60, 5 }, - { (UChar* )"Canadian_Aboriginal", 61, 19 }, - { (UChar* )"Cherokee", 62, 8 }, - { (UChar* )"Common", 63, 6 }, - { (UChar* )"Coptic", 64, 6 }, - { (UChar* )"Cypriot", 65, 7 }, - { (UChar* )"Cyrillic", 66, 8 }, - { (UChar* )"Deseret", 67, 7 }, - { (UChar* )"Devanagari", 68, 10 }, - { (UChar* )"Ethiopic", 69, 8 }, - { (UChar* )"Georgian", 70, 8 }, - { (UChar* )"Glagolitic", 71, 10 }, - { (UChar* )"Gothic", 72, 6 }, - { (UChar* )"Greek", 73, 5 }, - { (UChar* )"Gujarati", 74, 8 }, - { (UChar* )"Gurmukhi", 75, 8 }, - { (UChar* )"Han", 76, 3 }, - { (UChar* )"Hangul", 77, 6 }, - { (UChar* )"Hanunoo", 78, 7 }, - { (UChar* )"Hebrew", 79, 6 }, - { (UChar* )"Hiragana", 80, 8 }, - { (UChar* )"Inherited", 81, 9 }, - { (UChar* )"Kannada", 82, 7 }, - { (UChar* )"Katakana", 83, 8 }, - { (UChar* )"Kharoshthi", 84, 10 }, - { (UChar* )"Khmer", 85, 5 }, - { (UChar* )"Lao", 86, 3 }, - { (UChar* )"Latin", 87, 5 }, - { (UChar* )"Limbu", 88, 5 }, - { (UChar* )"Linear_B", 89, 8 }, - { (UChar* )"Malayalam", 90, 9 }, - { (UChar* )"Mongolian", 91, 9 }, - { (UChar* )"Myanmar", 92, 7 }, - { (UChar* )"New_Tai_Lue", 93, 11 }, - { (UChar* )"Ogham", 94, 5 }, - { (UChar* )"Old_Italic", 95, 10 }, - { (UChar* )"Old_Persian", 96, 11 }, - { (UChar* )"Oriya", 97, 5 }, - { (UChar* )"Osmanya", 98, 7 }, - { (UChar* )"Runic", 99, 5 }, - { (UChar* )"Shavian", 100, 7 }, - { (UChar* )"Sinhala", 101, 7 }, - { (UChar* )"Syloti_Nagri", 102, 12 }, - { (UChar* )"Syriac", 103, 6 }, - { (UChar* )"Tagalog", 104, 7 }, - { (UChar* )"Tagbanwa", 105, 8 }, - { (UChar* )"Tai_Le", 106, 6 }, - { (UChar* )"Tamil", 107, 5 }, - { (UChar* )"Telugu", 108, 6 }, - { (UChar* )"Thaana", 109, 6 }, - { (UChar* )"Thai", 110, 4 }, - { (UChar* )"Tibetan", 111, 7 }, - { (UChar* )"Tifinagh", 112, 8 }, - { (UChar* )"Ugaritic", 113, 8 }, - { (UChar* )"Yi", 114, 2 }, -#endif /* USE_UNICODE_PROPERTIES */ - { (UChar* )NULL, -1, 0 } -}; - #ifdef USE_UNICODE_PROPERTIES #define CODE_RANGES_NUM 115 #else @@ -10780,32 +10658,14 @@ onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, #define PROPERTY_NAME_MAX_SIZE 20 -static st_table* NameCtypeTable; - -static int -init_name_ctype_table(void) -{ - PosixBracketEntryType *pb; - - NameCtypeTable = onig_st_init_strend_table_with_size(100); - if (ONIG_IS_NULL(NameCtypeTable)) return ONIGERR_MEMORY; - - for (pb = HashEntryData; ONIG_IS_NOT_NULL(pb->name); pb++) { - onig_st_insert_strend(NameCtypeTable, pb->name, pb->name + pb->len, - (st_data_t )pb->ctype); - } - - return 0; -} - extern int onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end) { int len; - hash_data_type ctype; - UChar buf[PROPERTY_NAME_MAX_SIZE]; UChar *p; OnigCodePoint code; + struct PropertyNameCtype* pc; + char buf[PROPERTY_NAME_MAX_SIZE]; p = name; len = 0; @@ -10814,7 +10674,7 @@ onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end if (code >= 0x80) return ONIGERR_INVALID_CHAR_PROPERTY_NAME; - buf[len++] = (UChar )code; + buf[len++] = (char )code; if (len >= PROPERTY_NAME_MAX_SIZE) return ONIGERR_INVALID_CHAR_PROPERTY_NAME; @@ -10823,11 +10683,17 @@ onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end buf[len] = 0; - if (onig_st_lookup_strend(NameCtypeTable, buf, buf + len, &ctype) == 0) { - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + pc = unicode_lookup_property_name(buf, len); + if (pc != 0) { +#ifndef USE_UNICODE_PROPERTIES + if (pc->ctype > ONIGENC_MAX_STD_CTYPE) + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; +#endif + + return pc->ctype; } - return (int )ctype; + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; } @@ -10877,8 +10743,6 @@ static st_table* Unfold3Table; extern void onigenc_end_unicode(void) { - if (NameCtypeTable != 0) st_free_table(NameCtypeTable); - if (FoldTable != 0) st_free_table(FoldTable); if (Unfold1Table != 0) st_free_table(Unfold1Table); if (Unfold2Table != 0) st_free_table(Unfold2Table); @@ -11350,10 +11214,6 @@ onigenc_unicode_initialize(void) if (unicode_inited != 0) return 0; - r = init_name_ctype_table(); - if (r != 0) - return r; - r = init_case_fold_table(); if (r != 0) return r; diff --git a/src/unicode_prop.c b/src/unicode_prop.c new file mode 100644 index 0000000..d1d9052 --- /dev/null +++ b/src/unicode_prop.c @@ -0,0 +1,414 @@ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -pt -T -L ANSI-C -N unicode_lookup_property_name --output-file unicode_prop.c unicode_prop.gperf */ +/* Computed positions: -k'1,3,$' */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to ." +#endif + +#line 1 "unicode_prop.gperf" + +#include +#include "regenc.h" + +#define TOTAL_KEYWORDS 115 +#define MIN_WORD_LENGTH 1 +#define MAX_WORD_LENGTH 19 +#define MIN_HASH_VALUE 1 +#define MAX_HASH_VALUE 251 +/* maximum key range = 251, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +hash (register const char *str, register unsigned int len) +{ + static unsigned char asso_values[] = + { + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 20, 5, 5, 100, 0, + 252, 0, 105, 0, 252, 85, 50, 20, 120, 65, + 0, 252, 5, 10, 0, 60, 252, 10, 0, 120, + 125, 252, 252, 252, 252, 252, 252, 60, 252, 15, + 2, 70, 47, 100, 0, 40, 7, 90, 65, 100, + 25, 0, 30, 5, 35, 7, 15, 75, 5, 5, + 15, 0, 252, 0, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 252, 252, 252 + }; + register int hval = len; + + switch (hval) + { + default: + hval += asso_values[(unsigned char)str[2]+2]; + /*FALLTHROUGH*/ + case 2: + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval + asso_values[(unsigned char)str[len - 1]]; +} + +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +struct PropertyNameCtype * +unicode_lookup_property_name (register const char *str, register unsigned int len) +{ + static struct PropertyNameCtype wordlist[] = + { + {""}, +#line 49 "unicode_prop.gperf" + {"P", 37}, +#line 55 "unicode_prop.gperf" + {"Po", 43}, + {""}, +#line 51 "unicode_prop.gperf" + {"Pd", 39}, + {""}, {""}, +#line 33 "unicode_prop.gperf" + {"Co", 21}, +#line 124 "unicode_prop.gperf" + {"Tifinagh", 112}, +#line 56 "unicode_prop.gperf" + {"Ps", 44}, + {""}, +#line 29 "unicode_prop.gperf" + {"C", 17}, +#line 61 "unicode_prop.gperf" + {"So", 49}, + {""}, +#line 34 "unicode_prop.gperf" + {"Cs", 22}, + {""}, {""}, +#line 50 "unicode_prop.gperf" + {"Pc", 38}, +#line 93 "unicode_prop.gperf" + {"Inherited", 81}, +#line 72 "unicode_prop.gperf" + {"Buhid", 60}, +#line 17 "unicode_prop.gperf" + {"Graph", 5}, +#line 57 "unicode_prop.gperf" + {"S", 45}, +#line 30 "unicode_prop.gperf" + {"Cc", 18}, +#line 27 "unicode_prop.gperf" + {"Any", 15}, + {""}, +#line 26 "unicode_prop.gperf" + {"ASCII", 14}, +#line 84 "unicode_prop.gperf" + {"Gothic", 72}, +#line 58 "unicode_prop.gperf" + {"Sc", 46}, + {""}, {""}, +#line 81 "unicode_prop.gperf" + {"Ethiopic", 69}, +#line 24 "unicode_prop.gperf" + {"Word", 12}, +#line 32 "unicode_prop.gperf" + {"Cn", 20}, + {""}, +#line 123 "unicode_prop.gperf" + {"Tibetan", 111}, + {""}, +#line 75 "unicode_prop.gperf" + {"Common", 63}, +#line 42 "unicode_prop.gperf" + {"Mc", 30}, +#line 82 "unicode_prop.gperf" + {"Georgian", 70}, + {""}, +#line 83 "unicode_prop.gperf" + {"Glagolitic", 71}, +#line 41 "unicode_prop.gperf" + {"M", 29}, +#line 54 "unicode_prop.gperf" + {"Pi", 42}, +#line 78 "unicode_prop.gperf" + {"Cyrillic", 66}, + {""}, {""}, +#line 115 "unicode_prop.gperf" + {"Syriac", 103}, +#line 44 "unicode_prop.gperf" + {"Mn", 32}, +#line 69 "unicode_prop.gperf" + {"Bopomofo", 57}, +#line 53 "unicode_prop.gperf" + {"Pf", 41}, +#line 20 "unicode_prop.gperf" + {"Punct", 8}, + {""}, +#line 38 "unicode_prop.gperf" + {"Lo", 26}, +#line 67 "unicode_prop.gperf" + {"Armenian", 55}, +#line 31 "unicode_prop.gperf" + {"Cf", 19}, +#line 111 "unicode_prop.gperf" + {"Runic", 99}, +#line 66 "unicode_prop.gperf" + {"Arabic", 54}, +#line 112 "unicode_prop.gperf" + {"Shavian", 100}, +#line 98 "unicode_prop.gperf" + {"Lao", 86}, +#line 122 "unicode_prop.gperf" + {"Thai", 110}, + {""}, +#line 76 "unicode_prop.gperf" + {"Coptic", 64}, +#line 77 "unicode_prop.gperf" + {"Cypriot", 65}, +#line 87 "unicode_prop.gperf" + {"Gurmukhi", 75}, + {""}, {""}, {""}, +#line 39 "unicode_prop.gperf" + {"Lt", 27}, + {""}, {""}, +#line 119 "unicode_prop.gperf" + {"Tamil", 107}, + {""}, +#line 52 "unicode_prop.gperf" + {"Pe", 40}, + {""}, {""}, {""}, {""}, +#line 104 "unicode_prop.gperf" + {"Myanmar", 92}, + {""}, {""}, +#line 15 "unicode_prop.gperf" + {"Cntrl", 3}, +#line 121 "unicode_prop.gperf" + {"Thaana", 109}, +#line 68 "unicode_prop.gperf" + {"Bengali", 56}, + {""}, +#line 103 "unicode_prop.gperf" + {"Mongolian", 91}, +#line 99 "unicode_prop.gperf" + {"Latin", 87}, + {""}, +#line 114 "unicode_prop.gperf" + {"Syloti_Nagri", 102}, + {""}, {""}, +#line 18 "unicode_prop.gperf" + {"Lower", 6}, + {""}, +#line 43 "unicode_prop.gperf" + {"Me", 31}, +#line 101 "unicode_prop.gperf" + {"Linear_B", 89}, + {""}, {""}, {""}, +#line 70 "unicode_prop.gperf" + {"Braille", 58}, +#line 125 "unicode_prop.gperf" + {"Ugaritic", 113}, + {""}, +#line 21 "unicode_prop.gperf" + {"Space", 9}, +#line 35 "unicode_prop.gperf" + {"L", 23}, +#line 59 "unicode_prop.gperf" + {"Sk", 47}, + {""}, {""}, +#line 28 "unicode_prop.gperf" + {"Assigned", 16}, +#line 120 "unicode_prop.gperf" + {"Telugu", 108}, +#line 113 "unicode_prop.gperf" + {"Sinhala", 101}, +#line 117 "unicode_prop.gperf" + {"Tagbanwa", 105}, + {""}, +#line 19 "unicode_prop.gperf" + {"Print", 7}, +#line 23 "unicode_prop.gperf" + {"XDigit", 11}, +#line 60 "unicode_prop.gperf" + {"Sm", 48}, +#line 86 "unicode_prop.gperf" + {"Gujarati", 74}, + {""}, +#line 14 "unicode_prop.gperf" + {"Blank", 2}, + {""}, +#line 36 "unicode_prop.gperf" + {"Ll", 24}, +#line 91 "unicode_prop.gperf" + {"Hebrew", 79}, +#line 73 "unicode_prop.gperf" + {"Canadian_Aboriginal", 61}, +#line 13 "unicode_prop.gperf" + {"Alpha", 1}, + {""}, +#line 48 "unicode_prop.gperf" + {"No", 36}, +#line 71 "unicode_prop.gperf" + {"Buginese", 59}, +#line 46 "unicode_prop.gperf" + {"Nd", 34}, +#line 97 "unicode_prop.gperf" + {"Khmer", 85}, + {""}, +#line 40 "unicode_prop.gperf" + {"Lu", 28}, + {""}, {""}, +#line 100 "unicode_prop.gperf" + {"Limbu", 88}, + {""}, +#line 110 "unicode_prop.gperf" + {"Osmanya", 98}, + {""}, +#line 65 "unicode_prop.gperf" + {"Zs", 53}, +#line 22 "unicode_prop.gperf" + {"Upper", 10}, + {""}, +#line 107 "unicode_prop.gperf" + {"Old_Italic", 95}, + {""}, {""}, {""}, {""}, +#line 90 "unicode_prop.gperf" + {"Hanunoo", 78}, + {""}, {""}, {""}, {""}, +#line 116 "unicode_prop.gperf" + {"Tagalog", 104}, +#line 108 "unicode_prop.gperf" + {"Old_Persian", 96}, + {""}, +#line 96 "unicode_prop.gperf" + {"Kharoshthi", 84}, + {""}, +#line 37 "unicode_prop.gperf" + {"Lm", 25}, + {""}, +#line 102 "unicode_prop.gperf" + {"Malayalam", 90}, +#line 25 "unicode_prop.gperf" + {"Alnum", 13}, + {""}, +#line 64 "unicode_prop.gperf" + {"Zp", 52}, +#line 95 "unicode_prop.gperf" + {"Katakana", 83}, + {""}, +#line 16 "unicode_prop.gperf" + {"Digit", 4}, + {""}, +#line 126 "unicode_prop.gperf" + {"Yi", 114}, +#line 88 "unicode_prop.gperf" + {"Han", 76}, + {""}, +#line 80 "unicode_prop.gperf" + {"Devanagari", 68}, +#line 118 "unicode_prop.gperf" + {"Tai_Le", 106}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 106 "unicode_prop.gperf" + {"Ogham", 94}, + {""}, {""}, {""}, {""}, +#line 94 "unicode_prop.gperf" + {"Kannada", 82}, +#line 74 "unicode_prop.gperf" + {"Cherokee", 62}, + {""}, {""}, {""}, +#line 47 "unicode_prop.gperf" + {"Nl", 35}, +#line 92 "unicode_prop.gperf" + {"Hiragana", 80}, + {""}, {""}, {""}, +#line 63 "unicode_prop.gperf" + {"Zl", 51}, + {""}, {""}, +#line 85 "unicode_prop.gperf" + {"Greek", 73}, + {""}, +#line 79 "unicode_prop.gperf" + {"Deseret", 67}, + {""}, {""}, {""}, +#line 105 "unicode_prop.gperf" + {"New_Tai_Lue", 93}, + {""}, {""}, {""}, {""}, +#line 89 "unicode_prop.gperf" + {"Hangul", 77}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 109 "unicode_prop.gperf" + {"Oriya", 97}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 45 "unicode_prop.gperf" + {"N", 33}, + {""}, {""}, {""}, {""}, {""}, +#line 12 "unicode_prop.gperf" + {"NEWLINE", 0}, + {""}, {""}, {""}, +#line 62 "unicode_prop.gperf" + {"Z", 50} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strcmp (str + 1, s + 1)) + return &wordlist[key]; + } + } + return 0; +} diff --git a/src/unicode_prop.gperf b/src/unicode_prop.gperf new file mode 100644 index 0000000..3b36a02 --- /dev/null +++ b/src/unicode_prop.gperf @@ -0,0 +1,126 @@ +%{ +#include +#include "regenc.h" +%} + +struct PropertyNameCtype { + char *name; + int ctype; +}; + +%% +NEWLINE, 0 +Alpha, 1 +Blank, 2 +Cntrl, 3 +Digit, 4 +Graph, 5 +Lower, 6 +Print, 7 +Punct, 8 +Space, 9 +Upper, 10 +XDigit, 11 +Word, 12 +Alnum, 13 +ASCII, 14 +Any, 15 +Assigned, 16 +C, 17 +Cc, 18 +Cf, 19 +Cn, 20 +Co, 21 +Cs, 22 +L, 23 +Ll, 24 +Lm, 25 +Lo, 26 +Lt, 27 +Lu, 28 +M, 29 +Mc, 30 +Me, 31 +Mn, 32 +N, 33 +Nd, 34 +Nl, 35 +No, 36 +P, 37 +Pc, 38 +Pd, 39 +Pe, 40 +Pf, 41 +Pi, 42 +Po, 43 +Ps, 44 +S, 45 +Sc, 46 +Sk, 47 +Sm, 48 +So, 49 +Z, 50 +Zl, 51 +Zp, 52 +Zs, 53 +Arabic, 54 +Armenian, 55 +Bengali, 56 +Bopomofo, 57 +Braille, 58 +Buginese, 59 +Buhid, 60 +Canadian_Aboriginal, 61 +Cherokee, 62 +Common, 63 +Coptic, 64 +Cypriot, 65 +Cyrillic, 66 +Deseret, 67 +Devanagari, 68 +Ethiopic, 69 +Georgian, 70 +Glagolitic, 71 +Gothic, 72 +Greek, 73 +Gujarati, 74 +Gurmukhi, 75 +Han, 76 +Hangul, 77 +Hanunoo, 78 +Hebrew, 79 +Hiragana, 80 +Inherited, 81 +Kannada, 82 +Katakana, 83 +Kharoshthi, 84 +Khmer, 85 +Lao, 86 +Latin, 87 +Limbu, 88 +Linear_B, 89 +Malayalam, 90 +Mongolian, 91 +Myanmar, 92 +New_Tai_Lue, 93 +Ogham, 94 +Old_Italic, 95 +Old_Persian, 96 +Oriya, 97 +Osmanya, 98 +Runic, 99 +Shavian, 100 +Sinhala, 101 +Syloti_Nagri, 102 +Syriac, 103 +Tagalog, 104 +Tagbanwa, 105 +Tai_Le, 106 +Tamil, 107 +Telugu, 108 +Thaana, 109 +Thai, 110 +Tibetan, 111 +Tifinagh, 112 +Ugaritic, 113 +Yi, 114