From 5945ba1c6dce1068f13517a30075c35eff3c00e2 Mon Sep 17 00:00:00 2001 From: Moriyoshi Koizumi Date: Mon, 24 Mar 2003 19:15:16 +0000 Subject: [PATCH] MFH(r-1.76): added koi8-r, cp866, and cp1251 support for htmlentities() Patch by Antony Dovgal --- ext/standard/html.c | 98 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) diff --git a/ext/standard/html.c b/ext/standard/html.c index 8080698055..c94a209074 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -44,7 +44,9 @@ ZEND_EXTERN_MODULE_GLOBALS(mbstring) enum entity_charset { cs_terminator, cs_8859_1, cs_cp1252, cs_8859_15, cs_utf_8, cs_big5, cs_gb2312, - cs_big5hkscs, cs_sjis, cs_eucjp}; + cs_big5hkscs, cs_sjis, cs_eucjp, cs_koi8r, + cs_cp1251, cs_8859_5, cs_cp866 + }; typedef const char *entity_table_t; /* codepage 1252 is a Windows extension to iso-8859-1. */ @@ -253,6 +255,73 @@ static entity_table_t ent_uni_9824_9830[] = { "spades", NULL, NULL, "clubs", NULL, "hearts", "diams" }; +static entity_table_t ent_koi8r[] = { + "#1105", /* "jo "*/ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, "#1025", /* "JO" */ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "#1102", "#1072", "#1073", "#1094", "#1076", "#1077", "#1092", + "#1075", "#1093", "#1080", "#1081", "#1082", "#1083", "#1084", + "#1085", "#1086", "#1087", "#1103", "#1088", "#1089", "#1090", + "#1091", "#1078", "#1074", "#1100", "#1099", "#1079", "#1096", + "#1101", "#1097", "#1095", "#1098", "#1070", "#1040", "#1041", + "#1062", "#1044", "#1045", "#1060", "#1043", "#1061", "#1048", + "#1049", "#1050", "#1051", "#1052", "#1053", "#1054", "#1055", + "#1071", "#1056", "#1057", "#1058", "#1059", "#1046", "#1042", + "#1068", "#1067", "#1047", "#1064", "#1069", "#1065", "#1063", + "#1066" +}; + +static entity_table_t ent_cp_1251[] = { + "#1026", "#1027", "#8218", "#1107", "#8222", "hellip", "dagger", + "Dagger", "euro", "permil", "#1033", "#8249", "#1034", "#1036", + "#1035", "#1039", "#1106", "#8216", "#8217", "#8219", "#8220", + "bull", "ndash", "mdash", NULL, "trade", "#1113", "#8250", + "#1114", "#1116", "#1115", "#1119", "nbsp", "#1038", "#1118", + "#1032", "curren", "#1168", "brvbar", "sect", "#1025", "copy", + "#1028", "laquo", "not", "shy", "reg", "#1031", "deg", "plusmn", + "#1030", "#1110", "#1169", "micro", "para", "middot", "#1105", + "#8470", "#1108", "raquo", "#1112", "#1029", "#1109", "#1111", + "#1040", "#1041", "#1042", "#1043", "#1044", "#1045", "#1046", + "#1047", "#1048", "#1049", "#1050", "#1051", "#1052", "#1053", + "#1054", "#1055", "#1056", "#1057", "#1058", "#1059", "#1060", + "#1061", "#1062", "#1063", "#1064", "#1065", "#1066", "#1067", + "#1068", "#1069", "#1070", "#1071", "#1072", "#1073", "#1074", + "#1075", "#1076", "#1077", "#1078", "#1079", "#1080", "#1081", + "#1082", "#1083", "#1084", "#1085", "#1086", "#1087", "#1088", + "#1089", "#1090", "#1091", "#1092", "#1093", "#1094", "#1095", + "#1096", "#1097", "#1098", "#1099", "#1100", "#1101", "#1102", + "#1103" +}; + +static entity_table_t ent_iso_8859_5[] = { + "#1056", "#1057", "#1058", "#1059", "#1060", "#1061", "#1062", + "#1063", "#1064", "#1065", "#1066", "#1067", "#1068", "#1069", + "#1070", "#1071", "#1072", "#1073", "#1074", "#1075", "#1076", + "#1077", "#1078", "#1079", "#1080", "#1081", "#1082", "#1083", + "#1084", "#1085", "#1086", "#1087", "#1088", "#1089", "#1090", + "#1091", "#1092", "#1093", "#1094", "#1095", "#1096", "#1097", + "#1098", "#1099", "#1100", "#1101", "#1102", "#1103", "#1104", + "#1105", "#1106", "#1107", "#1108", "#1109", "#1110", "#1111", + "#1112", "#1113", "#1114", "#1115", "#1116", "#1117", "#1118", + "#1119" +}; + +static entity_table_t ent_cp_866[] = { + + "#9492", "#9524", "#9516", "#9500", "#9472", "#9532", "#9566", + "#9567", "#9562", "#9556", "#9577", "#9574", "#9568", "#9552", + "#9580", "#9575", "#9576", "#9572", "#9573", "#9561", "#9560", + "#9554", "#9555", "#9579", "#9578", "#9496", "#9484", "#9608", + "#9604", "#9612", "#9616", "#9600", "#1088", "#1089", "#1090", + "#1091", "#1092", "#1093", "#1094", "#1095", "#1096", "#1097", + "#1098", "#1099", "#1100", "#1101", "#1102", "#1103", "#1025", + "#1105", "#1028", "#1108", "#1031", "#1111", "#1038", "#1118", + "#176", "#8729", "#183", "#8730", "#8470", "#164", "#9632", + "#160" +}; + + struct html_entity_map { enum entity_charset charset; /* charset identifier */ unsigned short basechar; /* char code at start of table */ @@ -279,6 +348,10 @@ static const struct html_entity_map entity_map[] = { { cs_big5hkscs, 0xa0, 0xff, ent_iso_8859_1 }, { cs_sjis, 0xa0, 0xff, ent_iso_8859_1 }, { cs_eucjp, 0xa0, 0xff, ent_iso_8859_1 }, + { cs_koi8r, 0xa3, 0xff, ent_koi8r }, + { cs_cp1251, 0x80, 0xff, ent_cp_1251 }, + { cs_8859_5, 0xc0, 0xff, ent_iso_8859_5 }, + { cs_cp866, 0xc0, 0xff, ent_cp_866 }, { cs_terminator } }; @@ -304,6 +377,17 @@ static const struct { { "932", cs_sjis }, { "EUCJP", cs_eucjp }, { "EUC-JP", cs_eucjp }, + { "KOI8-R", cs_koi8r }, + { "koi8-ru", cs_koi8r }, + { "koi8r", cs_koi8r }, + { "cp1251", cs_cp1251 }, + { "Windows-1251", cs_cp1251 }, + { "win-1251", cs_cp1251 }, + { "iso8859-5", cs_8859_5 }, + { "iso-8859-5", cs_8859_5 }, + { "cp866", cs_cp866 }, + { "866", cs_cp866 }, + { "ibm866", cs_cp866 }, { NULL } }; @@ -578,6 +662,18 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC) case mbfl_no_encoding_hz: case mbfl_no_encoding_cp936: return cs_gb2312; + + case mbfl_no_encoding_koi8r: + return cs_koi8r; + + case mbfl_no_encoding_cp866: + return cs_cp866; + + case mbfl_no_encoding_cp1251: + return cs_cp1251; + + case mbfl_no_encoding_8859_5: + return cs_8859_5; } #else { -- 2.50.1