From: Dmitry Stogov Date: Wed, 11 Mar 2015 12:02:47 +0000 (+0300) Subject: Improved table search X-Git-Tag: PRE_PHP7_NSAPI_REMOVAL~704 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=781d1c400c195e9e32edd23504b34069ea0e50cb;p=php Improved table search --- diff --git a/ext/standard/html.c b/ext/standard/html.c index c8e36b8d45..cce1b73be8 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -451,8 +451,9 @@ det_charset: int found = 0; /* now walk the charset map and look for the codeset */ - for (i = 0; charset_map[i].codeset; i++) { - if (len == strlen(charset_map[i].codeset) && strncasecmp(charset_hint, charset_map[i].codeset, len) == 0) { + for (i = 0; i < sizeof(charset_map)/sizeof(charset_map[0]); i++) { + if (len == charset_map[i].codeset_len && + zend_binary_strcasecmp(charset_hint, len, charset_map[i].codeset, len) == 0) { charset = charset_map[i].charset; found = 1; break; diff --git a/ext/standard/html_tables.h b/ext/standard/html_tables.h index 151cc21b6f..378a801f7d 100644 --- a/ext/standard/html_tables.h +++ b/ext/standard/html_tables.h @@ -39,42 +39,42 @@ enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251, static const struct { const char *codeset; + uint32_t codeset_len; enum entity_charset charset; } charset_map[] = { - { "ISO-8859-1", cs_8859_1 }, - { "ISO8859-1", cs_8859_1 }, - { "ISO-8859-15", cs_8859_15 }, - { "ISO8859-15", cs_8859_15 }, - { "utf-8", cs_utf_8 }, - { "cp1252", cs_cp1252 }, - { "Windows-1252", cs_cp1252 }, - { "1252", cs_cp1252 }, - { "BIG5", cs_big5 }, - { "950", cs_big5 }, - { "GB2312", cs_gb2312 }, - { "936", cs_gb2312 }, - { "BIG5-HKSCS", cs_big5hkscs }, - { "Shift_JIS", cs_sjis }, - { "SJIS", cs_sjis }, - { "932", cs_sjis }, - { "SJIS-win", cs_sjis }, - { "CP932", cs_sjis }, - { "EUCJP", cs_eucjp }, - { "EUC-JP", cs_eucjp }, - { "eucJP-win", cs_eucjp }, - { "KOI8-R", cs_koi8r }, - { "koi8-ru", cs_koi8r }, - { "koi8r", cs_koi8r }, - { "cp1251", cs_cp1251 }, - { "Windows-1251", cs_cp1251 }, - { "win-1251", cs_cp1251 }, - { "iso8859-5", cs_8859_5 }, - { "iso-8859-5", cs_8859_5 }, - { "cp866", cs_cp866 }, - { "866", cs_cp866 }, - { "ibm866", cs_cp866 }, - { "MacRoman", cs_macroman }, - { NULL } + { "ISO-8859-1", sizeof("ISO-8859-1")-1, cs_8859_1 }, + { "ISO8859-1", sizeof("ISO8859-1")-1, cs_8859_1 }, + { "ISO-8859-15", sizeof("ISO-8859-15")-1, cs_8859_15 }, + { "ISO8859-15", sizeof("ISO8859-15")-1, cs_8859_15 }, + { "utf-8", sizeof("utf-8")-1, cs_utf_8 }, + { "cp1252", sizeof("cp1252")-1, cs_cp1252 }, + { "Windows-1252", sizeof("Windows-1252")-1, cs_cp1252 }, + { "1252", sizeof("1252")-1, cs_cp1252 }, + { "BIG5", sizeof("BIG5")-1, cs_big5 }, + { "950", sizeof("950")-1, cs_big5 }, + { "GB2312", sizeof("GB2312")-1, cs_gb2312 }, + { "936", sizeof("936")-1, cs_gb2312 }, + { "BIG5-HKSCS", sizeof("BIG5-HKSCS")-1, cs_big5hkscs }, + { "Shift_JIS", sizeof("Shift_JIS")-1, cs_sjis }, + { "SJIS", sizeof("SJIS")-1, cs_sjis }, + { "932", sizeof("932")-1, cs_sjis }, + { "SJIS-win", sizeof("SJIS-win")-1, cs_sjis }, + { "CP932", sizeof("CP932")-1, cs_sjis }, + { "EUCJP", sizeof("EUCJP")-1, cs_eucjp }, + { "EUC-JP", sizeof("EUC-JP")-1, cs_eucjp }, + { "eucJP-win", sizeof("eucJP-win")-1, cs_eucjp }, + { "KOI8-R", sizeof("KOI8-R")-1, cs_koi8r }, + { "koi8-ru", sizeof("koi8-ru")-1, cs_koi8r }, + { "koi8r", sizeof("koi8r")-1, cs_koi8r }, + { "cp1251", sizeof("cp1251")-1, cs_cp1251 }, + { "Windows-1251", sizeof("Windows-1251")-1, cs_cp1251 }, + { "win-1251", sizeof("win-1251")-1, cs_cp1251 }, + { "iso8859-5", sizeof("iso8859-5")-1, cs_8859_5 }, + { "iso-8859-5", sizeof("iso-8859-5")-1, cs_8859_5 }, + { "cp866", sizeof("cp866")-1, cs_cp866 }, + { "866", sizeof("866")-1, cs_cp866 }, + { "ibm866", sizeof("ibm866")-1, cs_cp866 }, + { "MacRoman", sizeof("MacRoman")-1, cs_macroman } }; /* longest entity name length excluding & and ; */ diff --git a/ext/standard/html_tables/html_table_gen.php b/ext/standard/html_tables/html_table_gen.php index 45ae5f832b..d047b9df13 100644 --- a/ext/standard/html_tables/html_table_gen.php +++ b/ext/standard/html_tables/html_table_gen.php @@ -62,42 +62,42 @@ enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251, static const struct { const char *codeset; + uint32_t codeset_len; enum entity_charset charset; } charset_map[] = { - { "ISO-8859-1", cs_8859_1 }, - { "ISO8859-1", cs_8859_1 }, - { "ISO-8859-15", cs_8859_15 }, - { "ISO8859-15", cs_8859_15 }, - { "utf-8", cs_utf_8 }, - { "cp1252", cs_cp1252 }, - { "Windows-1252", cs_cp1252 }, - { "1252", cs_cp1252 }, - { "BIG5", cs_big5 }, - { "950", cs_big5 }, - { "GB2312", cs_gb2312 }, - { "936", cs_gb2312 }, - { "BIG5-HKSCS", cs_big5hkscs }, - { "Shift_JIS", cs_sjis }, - { "SJIS", cs_sjis }, - { "932", cs_sjis }, - { "SJIS-win", cs_sjis }, - { "CP932", cs_sjis }, - { "EUCJP", cs_eucjp }, - { "EUC-JP", cs_eucjp }, - { "eucJP-win", cs_eucjp }, - { "KOI8-R", cs_koi8r }, - { "koi8-ru", cs_koi8r }, - { "koi8r", cs_koi8r }, - { "cp1251", cs_cp1251 }, - { "Windows-1251", cs_cp1251 }, - { "win-1251", cs_cp1251 }, - { "iso8859-5", cs_8859_5 }, - { "iso-8859-5", cs_8859_5 }, - { "cp866", cs_cp866 }, - { "866", cs_cp866 }, - { "ibm866", cs_cp866 }, - { "MacRoman", cs_macroman }, - { NULL } + { "ISO-8859-1", sizeof("ISO-8859-1")-1, cs_8859_1 }, + { "ISO8859-1", sizeof("ISO8859-1")-1, cs_8859_1 }, + { "ISO-8859-15", sizeof("ISO-8859-15")-1, cs_8859_15 }, + { "ISO8859-15", sizeof("ISO8859-15")-1, cs_8859_15 }, + { "utf-8", sizeof("utf-8")-1, cs_utf_8 }, + { "cp1252", sizeof("cp1252")-1, cs_cp1252 }, + { "Windows-1252", sizeof("Windows-1252")-1, cs_cp1252 }, + { "1252", sizeof("1252")-1, cs_cp1252 }, + { "BIG5", sizeof("BIG5")-1, cs_big5 }, + { "950", sizeof("950")-1, cs_big5 }, + { "GB2312", sizeof("GB2312")-1, cs_gb2312 }, + { "936", sizeof("936")-1, cs_gb2312 }, + { "BIG5-HKSCS", sizeof("BIG5-HKSCS")-1, cs_big5hkscs }, + { "Shift_JIS", sizeof("Shift_JIS")-1, cs_sjis }, + { "SJIS", sizeof("SJIS")-1, cs_sjis }, + { "932", sizeof("932")-1, cs_sjis }, + { "SJIS-win", sizeof("SJIS-win")-1, cs_sjis }, + { "CP932", sizeof("CP932")-1, cs_sjis }, + { "EUCJP", sizeof("EUCJP")-1, cs_eucjp }, + { "EUC-JP", sizeof("EUC-JP")-1, cs_eucjp }, + { "eucJP-win", sizeof("eucJP-win")-1, cs_eucjp }, + { "KOI8-R", sizeof("KOI8-R")-1, cs_koi8r }, + { "koi8-ru", sizeof("koi8-ru")-1, cs_koi8r }, + { "koi8r", sizeof("koi8r")-1, cs_koi8r }, + { "cp1251", sizeof("cp1251")-1, cs_cp1251 }, + { "Windows-1251", sizeof("Windows-1251")-1, cs_cp1251 }, + { "win-1251", sizeof("win-1251")-1, cs_cp1251 }, + { "iso8859-5", sizeof("iso8859-5")-1, cs_8859_5 }, + { "iso-8859-5", sizeof("iso-8859-5")-1, cs_8859_5 }, + { "cp866", sizeof("cp866")-1, cs_cp866 }, + { "866", sizeof("866")-1, cs_cp866 }, + { "ibm866", sizeof("ibm866")-1, cs_cp866 }, + { "MacRoman", sizeof("MacRoman")-1, cs_macroman } }; /* longest entity name length excluding & and ; */ @@ -406,7 +406,7 @@ typedef const entity_stage3_row *const *entity_stage1_row; /* 64 elements */ /* For stage 1, Calculate k & 0xFFF000 >> 3*4. * If larger than 1D, we have no mapping. Otherwise lookup that index */ - + typedef struct { const entity_stage1_row *ms_table; /* for tables with only basic entities, this member is to be accessed