From: kosako Date: Thu, 7 Apr 2016 04:53:07 +0000 (+0900) Subject: remove initialize functions from EUC_JP and Shift_JIS by using gperf X-Git-Tag: v6.0.0^2~68 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=51afc7270be83eb594064b7a287254a564de8014;p=onig remove initialize functions from EUC_JP and Shift_JIS by using gperf --- diff --git a/src/Makefile.am b/src/Makefile.am index 68eb4b6..2154cfc 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -14,7 +14,9 @@ libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \ unicode.c ascii.c utf8.c \ utf16_be.c utf16_le.c \ utf32_be.c utf32_le.c \ - euc_jp.c sjis.c iso8859_1.c \ + euc_jp.c euc_jp_prop.c \ + sjis.c sjis_prop.c \ + iso8859_1.c \ iso8859_2.c iso8859_3.c \ iso8859_4.c iso8859_5.c \ iso8859_6.c iso8859_7.c \ diff --git a/src/Makefile.in b/src/Makefile.in index 86d2c6f..1a2cffc 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -127,12 +127,12 @@ am_libonig_la_OBJECTS = regerror.lo regparse.lo regext.lo regcomp.lo \ regexec.lo reggnu.lo regenc.lo regsyntax.lo regtrav.lo \ regversion.lo st.lo regposix.lo regposerr.lo unicode.lo \ ascii.lo utf8.lo utf16_be.lo utf16_le.lo utf32_be.lo \ - utf32_le.lo euc_jp.lo sjis.lo iso8859_1.lo iso8859_2.lo \ - iso8859_3.lo iso8859_4.lo iso8859_5.lo iso8859_6.lo \ - iso8859_7.lo iso8859_8.lo iso8859_9.lo iso8859_10.lo \ - iso8859_11.lo iso8859_13.lo iso8859_14.lo iso8859_15.lo \ - iso8859_16.lo euc_tw.lo euc_kr.lo big5.lo gb18030.lo koi8_r.lo \ - cp1251.lo onig_init.lo + utf32_le.lo euc_jp.lo euc_jp_prop.lo sjis.lo sjis_prop.lo \ + iso8859_1.lo iso8859_2.lo iso8859_3.lo iso8859_4.lo \ + iso8859_5.lo iso8859_6.lo iso8859_7.lo iso8859_8.lo \ + iso8859_9.lo iso8859_10.lo iso8859_11.lo iso8859_13.lo \ + iso8859_14.lo iso8859_15.lo iso8859_16.lo euc_tw.lo euc_kr.lo \ + big5.lo gb18030.lo koi8_r.lo cp1251.lo onig_init.lo libonig_la_OBJECTS = $(am_libonig_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) @@ -333,7 +333,9 @@ libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \ unicode.c ascii.c utf8.c \ utf16_be.c utf16_le.c \ utf32_be.c utf32_le.c \ - euc_jp.c sjis.c iso8859_1.c \ + euc_jp.c euc_jp_prop.c \ + sjis.c sjis_prop.c \ + iso8859_1.c \ iso8859_2.c iso8859_3.c \ iso8859_4.c iso8859_5.c \ iso8859_6.c iso8859_7.c \ @@ -446,6 +448,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/big5.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cp1251.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/euc_jp.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/euc_jp_prop.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/euc_kr.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/euc_tw.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gb18030.Plo@am__quote@ @@ -479,6 +482,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regtrav.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regversion.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sjis.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sjis_prop.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/st.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unicode.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf16_be.Plo@am__quote@ diff --git a/src/euc_jp.c b/src/euc_jp.c index 0fb2176..f0a09d9 100644 --- a/src/euc_jp.c +++ b/src/euc_jp.c @@ -173,11 +173,6 @@ is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) } -static const OnigCodePoint** PropertyList; -static int PropertyListNum; -static int PropertyListSize; -static hash_table_type* PropertyNameTable; - static const OnigCodePoint CR_Hiragana[] = { 1, 0xa4a1, 0xa4f3 @@ -190,37 +185,27 @@ static const OnigCodePoint CR_Katakana[] = { 0xaab1, 0xaadd }; /* CR_Katakana */ -static int -init_property_list(void) -{ - int r; - - PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana); - PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana); - - end: - return r; -} - -static int initialize(void) -{ - int r; - - /* fprintf(stderr, "euc_jp: initialize called.\n"); */ - r = init_property_list(); - return r; -} +static const OnigCodePoint* PropertyList[] = { + CR_Hiragana, + CR_Katakana +}; static int property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) { - hash_data_type ctype; - - if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) { - return onigenc_minimum_property_name_to_ctype(enc, p, end); + struct PropertyNameCtype* pc; + int len = end - p; + char q[32]; + + if (len < sizeof(q) - 1) { + xmemcpy(q, p, (size_t )len); + q[len] = '\0'; + pc = euc_jp_lookup_property_name(q, len); + if (pc != 0) + return pc->ctype; } - return (int )ctype; + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; } static int @@ -237,7 +222,7 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype) } else { ctype -= (ONIGENC_MAX_STD_CTYPE + 1); - if (ctype >= (unsigned int )PropertyListNum) + if (ctype >= (unsigned int )(sizeof(PropertyList)/sizeof(PropertyList[0]))) return ONIGERR_TYPE_BUG; return onig_is_in_code_range((UChar* )PropertyList[ctype], code); @@ -257,7 +242,7 @@ get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, *sb_out = 0x80; ctype -= (ONIGENC_MAX_STD_CTYPE + 1); - if (ctype >= (OnigCtype )PropertyListNum) + if (ctype >= (OnigCtype )sizeof(PropertyList)/sizeof(PropertyList[0])) return ONIGERR_TYPE_BUG; *ranges = PropertyList[ctype]; @@ -283,5 +268,5 @@ OnigEncodingType OnigEncodingEUC_JP = { get_ctype_code_range, left_adjust_char_head, is_allowed_reverse_match, - initialize /* init */ + NULL /* init */ }; diff --git a/src/euc_jp_prop.c b/src/euc_jp_prop.c new file mode 100644 index 0000000..8436fa2 --- /dev/null +++ b/src/euc_jp_prop.c @@ -0,0 +1,158 @@ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -pt -T -L ANSI-C -N euc_jp_lookup_property_name --output-file euc_jp_prop.c euc_jp_prop.gperf */ +/* Computed positions: -k'1,3' */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to ." +#endif + +#line 1 "euc_jp_prop.gperf" + +#include +#include "regenc.h" + +#define TOTAL_KEYWORDS 16 +#define MIN_WORD_LENGTH 4 +#define MAX_WORD_LENGTH 8 +#define MIN_HASH_VALUE 4 +#define MAX_HASH_VALUE 55 +/* maximum key range = 52, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +hash (register const char *str, register unsigned int len) +{ + static unsigned char asso_values[] = + { + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 0, 3, 10, 25, 56, + 56, 30, 0, 56, 56, 0, 20, 56, 56, 56, + 15, 56, 56, 20, 56, 30, 56, 0, 0, 56, + 56, 56, 56, 56, 56, 56, 56, 15, 56, 56, + 56, 56, 56, 25, 56, 10, 56, 56, 56, 56, + 5, 56, 0, 56, 0, 56, 5, 56, 56, 20, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56 + }; + return len + asso_values[(unsigned char)str[2]] + asso_values[(unsigned char)str[0]]; +} + +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +struct PropertyNameCtype * +euc_jp_lookup_property_name (register const char *str, register unsigned int len) +{ + static struct PropertyNameCtype wordlist[] = + { + {""}, {""}, {""}, {""}, +#line 23 "euc_jp_prop.gperf" + {"Word", 12}, +#line 12 "euc_jp_prop.gperf" + {"Alpha", 1}, + {""}, {""}, +#line 26 "euc_jp_prop.gperf" + {"Hiragana", 15}, + {""}, +#line 24 "euc_jp_prop.gperf" + {"Alnum", 13}, + {""}, {""}, +#line 27 "euc_jp_prop.gperf" + {"Katakana", 16}, + {""}, +#line 25 "euc_jp_prop.gperf" + {"ASCII", 14}, +#line 22 "euc_jp_prop.gperf" + {"XDigit", 11}, + {""}, {""}, {""}, +#line 14 "euc_jp_prop.gperf" + {"Cntrl", 3}, + {""}, {""}, +#line 13 "euc_jp_prop.gperf" + {"Blank", 2}, + {""}, +#line 19 "euc_jp_prop.gperf" + {"Punct", 8}, + {""}, {""}, {""}, {""}, +#line 18 "euc_jp_prop.gperf" + {"Print", 7}, + {""}, {""}, {""}, {""}, +#line 21 "euc_jp_prop.gperf" + {"Upper", 10}, + {""}, {""}, {""}, {""}, +#line 20 "euc_jp_prop.gperf" + {"Space", 9}, + {""}, {""}, {""}, {""}, +#line 17 "euc_jp_prop.gperf" + {"Lower", 6}, + {""}, {""}, {""}, {""}, +#line 16 "euc_jp_prop.gperf" + {"Graph", 5}, + {""}, {""}, {""}, {""}, +#line 15 "euc_jp_prop.gperf" + {"Digit", 4} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strcmp (str + 1, s + 1)) + return &wordlist[key]; + } + } + return 0; +} diff --git a/src/euc_jp_prop.gperf b/src/euc_jp_prop.gperf new file mode 100644 index 0000000..2cec8cf --- /dev/null +++ b/src/euc_jp_prop.gperf @@ -0,0 +1,27 @@ +%{ +#include +#include "regenc.h" +%} + +struct PropertyNameCtype { + char *name; + int ctype; +}; + +%% +Alpha, 1 +Blank, 2 +Cntrl, 3 +Digit, 4 +Graph, 5 +Lower, 6 +Print, 7 +Punct, 8 +Space, 9 +Upper, 10 +XDigit, 11 +Word, 12 +Alnum, 13 +ASCII, 14 +Hiragana, 15 +Katakana, 16 diff --git a/src/regenc.c b/src/regenc.c index e4224a2..e48010c 100644 --- a/src/regenc.c +++ b/src/regenc.c @@ -836,54 +836,3 @@ onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, } return 0; } - -/* Property management */ -static int -resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize) -{ - int size; - const OnigCodePoint **list = *plist; - - size = sizeof(OnigCodePoint*) * new_size; - if (IS_NULL(list)) { - list = (const OnigCodePoint** )xmalloc(size); - } - else { - list = (const OnigCodePoint** )xrealloc((void* )list, size); - } - - if (IS_NULL(list)) return ONIGERR_MEMORY; - - *plist = list; - *psize = new_size; - - return 0; -} - -extern int -onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, - hash_table_type **table, const OnigCodePoint*** plist, int *pnum, - int *psize) -{ -#define PROP_INIT_SIZE 16 - - int r; - - if (*psize <= *pnum) { - int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2); - r = resize_property_list(new_size, plist, psize); - if (r != 0) return r; - } - - (*plist)[*pnum] = prop; - - if (ONIG_IS_NULL(*table)) { - *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE); - if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY; - } - - *pnum = *pnum + 1; - onig_st_insert_strend(*table, name, name + strlen((char* )name), - (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE)); - return 0; -} diff --git a/src/regenc.h b/src/regenc.h index ced7661..659e961 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -4,7 +4,7 @@ regenc.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -102,6 +102,10 @@ typedef struct { short int len; } PosixBracketEntryType; +struct PropertyNameCtype { + char *name; + int ctype; +}; /* #define USE_CRNL_AS_LINE_TERMINATOR */ #define USE_UNICODE_PROPERTIES @@ -141,7 +145,8 @@ ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint co ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code)); ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); - +ONIG_EXTERN struct PropertyNameCtype* euc_jp_lookup_property_name P_((register const char *str, register unsigned int len)); +ONIG_EXTERN struct PropertyNameCtype* sjis_lookup_property_name P_((register const char *str, register unsigned int len)); /* in enc/unicode.c */ ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); diff --git a/src/regint.h b/src/regint.h index 69c6e88..7a39839 100644 --- a/src/regint.h +++ b/src/regint.h @@ -767,15 +767,6 @@ extern hash_table_type* onig_st_init_strend_table_with_size P_((int size)); extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value)); extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value)); -/* encoding property management */ -#define PROPERTY_LIST_ADD_PROP(Name, CR) \ - r = onigenc_property_list_add_property((UChar* )Name, CR,\ - &PropertyNameTable, &PropertyList, &PropertyListNum,\ - &PropertyListSize);\ - if (r != 0) goto end - -extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize)); - typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void); #endif /* REGINT_H */ diff --git a/src/sjis.c b/src/sjis.c index 58d216b..bda8247 100644 --- a/src/sjis.c +++ b/src/sjis.c @@ -206,11 +206,6 @@ is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) } -static const OnigCodePoint** PropertyList; -static int PropertyListNum; -static int PropertyListSize; -static hash_table_type* PropertyNameTable; - static const OnigCodePoint CR_Hiragana[] = { 1, 0x829f, 0x82f1 @@ -224,38 +219,28 @@ static const OnigCodePoint CR_Katakana[] = { 0x8380, 0x8396, }; /* CR_Katakana */ -static int -init_property_list(void) -{ - int r; - - PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana); - PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana); - - end: - return r; -} - -static int initialize(void) -{ - int r; - - /* fprintf(stderr, "sjis: initialize called.\n"); */ - r = init_property_list(); - return r; -} +static const OnigCodePoint* PropertyList[] = { + CR_Hiragana, + CR_Katakana +}; static int property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) { - hash_data_type ctype; - - if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) { - return onigenc_minimum_property_name_to_ctype(enc, p, end); + struct PropertyNameCtype* pc; + int len = end - p; + char q[32]; + + if (len < sizeof(q) - 1) { + xmemcpy(q, p, (size_t )len); + q[len] = '\0'; + pc = euc_jp_lookup_property_name(q, len); + if (pc != 0) + return pc->ctype; } - return (int )ctype; + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; } static int @@ -272,7 +257,7 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype) } else { ctype -= (ONIGENC_MAX_STD_CTYPE + 1); - if (ctype >= (unsigned int )PropertyListNum) + if (ctype >= (unsigned int )(sizeof(PropertyList)/sizeof(PropertyList[0]))) return ONIGERR_TYPE_BUG; return onig_is_in_code_range((UChar* )PropertyList[ctype], code); @@ -292,7 +277,7 @@ get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, *sb_out = 0x80; ctype -= (ONIGENC_MAX_STD_CTYPE + 1); - if (ctype >= (OnigCtype )PropertyListNum) + if (ctype >= (OnigCtype )(sizeof(PropertyList)/sizeof(PropertyList[0]))) return ONIGERR_TYPE_BUG; *ranges = PropertyList[ctype]; @@ -317,5 +302,5 @@ OnigEncodingType OnigEncodingSJIS = { get_ctype_code_range, left_adjust_char_head, is_allowed_reverse_match, - initialize /* init */ + NULL /* init */ }; diff --git a/src/sjis_prop.c b/src/sjis_prop.c new file mode 100644 index 0000000..83a7b45 --- /dev/null +++ b/src/sjis_prop.c @@ -0,0 +1,158 @@ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -pt -T -L ANSI-C -N sjis_lookup_property_name --output-file sjis_prop.c sjis_prop.gperf */ +/* Computed positions: -k'1,3' */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to ." +#endif + +#line 1 "sjis_prop.gperf" + +#include +#include "regenc.h" + +#define TOTAL_KEYWORDS 16 +#define MIN_WORD_LENGTH 4 +#define MAX_WORD_LENGTH 8 +#define MIN_HASH_VALUE 4 +#define MAX_HASH_VALUE 55 +/* maximum key range = 52, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +hash (register const char *str, register unsigned int len) +{ + static unsigned char asso_values[] = + { + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 0, 3, 10, 25, 56, + 56, 30, 0, 56, 56, 0, 20, 56, 56, 56, + 15, 56, 56, 20, 56, 30, 56, 0, 0, 56, + 56, 56, 56, 56, 56, 56, 56, 15, 56, 56, + 56, 56, 56, 25, 56, 10, 56, 56, 56, 56, + 5, 56, 0, 56, 0, 56, 5, 56, 56, 20, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56 + }; + return len + asso_values[(unsigned char)str[2]] + asso_values[(unsigned char)str[0]]; +} + +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +struct PropertyNameCtype * +sjis_lookup_property_name (register const char *str, register unsigned int len) +{ + static struct PropertyNameCtype wordlist[] = + { + {""}, {""}, {""}, {""}, +#line 23 "sjis_prop.gperf" + {"Word", 12}, +#line 12 "sjis_prop.gperf" + {"Alpha", 1}, + {""}, {""}, +#line 26 "sjis_prop.gperf" + {"Hiragana", 15}, + {""}, +#line 24 "sjis_prop.gperf" + {"Alnum", 13}, + {""}, {""}, +#line 27 "sjis_prop.gperf" + {"Katakana", 16}, + {""}, +#line 25 "sjis_prop.gperf" + {"ASCII", 14}, +#line 22 "sjis_prop.gperf" + {"XDigit", 11}, + {""}, {""}, {""}, +#line 14 "sjis_prop.gperf" + {"Cntrl", 3}, + {""}, {""}, +#line 13 "sjis_prop.gperf" + {"Blank", 2}, + {""}, +#line 19 "sjis_prop.gperf" + {"Punct", 8}, + {""}, {""}, {""}, {""}, +#line 18 "sjis_prop.gperf" + {"Print", 7}, + {""}, {""}, {""}, {""}, +#line 21 "sjis_prop.gperf" + {"Upper", 10}, + {""}, {""}, {""}, {""}, +#line 20 "sjis_prop.gperf" + {"Space", 9}, + {""}, {""}, {""}, {""}, +#line 17 "sjis_prop.gperf" + {"Lower", 6}, + {""}, {""}, {""}, {""}, +#line 16 "sjis_prop.gperf" + {"Graph", 5}, + {""}, {""}, {""}, {""}, +#line 15 "sjis_prop.gperf" + {"Digit", 4} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strcmp (str + 1, s + 1)) + return &wordlist[key]; + } + } + return 0; +} diff --git a/src/sjis_prop.gperf b/src/sjis_prop.gperf new file mode 100644 index 0000000..2cec8cf --- /dev/null +++ b/src/sjis_prop.gperf @@ -0,0 +1,27 @@ +%{ +#include +#include "regenc.h" +%} + +struct PropertyNameCtype { + char *name; + int ctype; +}; + +%% +Alpha, 1 +Blank, 2 +Cntrl, 3 +Digit, 4 +Graph, 5 +Lower, 6 +Print, 7 +Punct, 8 +Space, 9 +Upper, 10 +XDigit, 11 +Word, 12 +Alnum, 13 +ASCII, 14 +Hiragana, 15 +Katakana, 16