enum entity_charset { cs_terminator, cs_8859_1, cs_cp1252,
cs_8859_15, cs_utf_8, cs_big5, cs_gb2312,
- cs_big5hkscs, cs_sjis, cs_eucjp};
+ cs_big5hkscs, cs_sjis, cs_eucjp, cs_koi8r,
+ cs_cp1251, cs_8859_5, cs_cp866
+ };
typedef const char *entity_table_t;
/* codepage 1252 is a Windows extension to iso-8859-1. */
"spades", NULL, NULL, "clubs", NULL, "hearts", "diams"
};
+static entity_table_t ent_koi8r[] = {
+ "#1105", /* "jo "*/
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, "#1025", /* "JO" */
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ "#1102", "#1072", "#1073", "#1094", "#1076", "#1077", "#1092",
+ "#1075", "#1093", "#1080", "#1081", "#1082", "#1083", "#1084",
+ "#1085", "#1086", "#1087", "#1103", "#1088", "#1089", "#1090",
+ "#1091", "#1078", "#1074", "#1100", "#1099", "#1079", "#1096",
+ "#1101", "#1097", "#1095", "#1098", "#1070", "#1040", "#1041",
+ "#1062", "#1044", "#1045", "#1060", "#1043", "#1061", "#1048",
+ "#1049", "#1050", "#1051", "#1052", "#1053", "#1054", "#1055",
+ "#1071", "#1056", "#1057", "#1058", "#1059", "#1046", "#1042",
+ "#1068", "#1067", "#1047", "#1064", "#1069", "#1065", "#1063",
+ "#1066"
+};
+
+static entity_table_t ent_cp_1251[] = {
+ "#1026", "#1027", "#8218", "#1107", "#8222", "hellip", "dagger",
+ "Dagger", "euro", "permil", "#1033", "#8249", "#1034", "#1036",
+ "#1035", "#1039", "#1106", "#8216", "#8217", "#8219", "#8220",
+ "bull", "ndash", "mdash", NULL, "trade", "#1113", "#8250",
+ "#1114", "#1116", "#1115", "#1119", "nbsp", "#1038", "#1118",
+ "#1032", "curren", "#1168", "brvbar", "sect", "#1025", "copy",
+ "#1028", "laquo", "not", "shy", "reg", "#1031", "deg", "plusmn",
+ "#1030", "#1110", "#1169", "micro", "para", "middot", "#1105",
+ "#8470", "#1108", "raquo", "#1112", "#1029", "#1109", "#1111",
+ "#1040", "#1041", "#1042", "#1043", "#1044", "#1045", "#1046",
+ "#1047", "#1048", "#1049", "#1050", "#1051", "#1052", "#1053",
+ "#1054", "#1055", "#1056", "#1057", "#1058", "#1059", "#1060",
+ "#1061", "#1062", "#1063", "#1064", "#1065", "#1066", "#1067",
+ "#1068", "#1069", "#1070", "#1071", "#1072", "#1073", "#1074",
+ "#1075", "#1076", "#1077", "#1078", "#1079", "#1080", "#1081",
+ "#1082", "#1083", "#1084", "#1085", "#1086", "#1087", "#1088",
+ "#1089", "#1090", "#1091", "#1092", "#1093", "#1094", "#1095",
+ "#1096", "#1097", "#1098", "#1099", "#1100", "#1101", "#1102",
+ "#1103"
+};
+
+static entity_table_t ent_iso_8859_5[] = {
+ "#1056", "#1057", "#1058", "#1059", "#1060", "#1061", "#1062",
+ "#1063", "#1064", "#1065", "#1066", "#1067", "#1068", "#1069",
+ "#1070", "#1071", "#1072", "#1073", "#1074", "#1075", "#1076",
+ "#1077", "#1078", "#1079", "#1080", "#1081", "#1082", "#1083",
+ "#1084", "#1085", "#1086", "#1087", "#1088", "#1089", "#1090",
+ "#1091", "#1092", "#1093", "#1094", "#1095", "#1096", "#1097",
+ "#1098", "#1099", "#1100", "#1101", "#1102", "#1103", "#1104",
+ "#1105", "#1106", "#1107", "#1108", "#1109", "#1110", "#1111",
+ "#1112", "#1113", "#1114", "#1115", "#1116", "#1117", "#1118",
+ "#1119"
+};
+
+static entity_table_t ent_cp_866[] = {
+
+ "#9492", "#9524", "#9516", "#9500", "#9472", "#9532", "#9566",
+ "#9567", "#9562", "#9556", "#9577", "#9574", "#9568", "#9552",
+ "#9580", "#9575", "#9576", "#9572", "#9573", "#9561", "#9560",
+ "#9554", "#9555", "#9579", "#9578", "#9496", "#9484", "#9608",
+ "#9604", "#9612", "#9616", "#9600", "#1088", "#1089", "#1090",
+ "#1091", "#1092", "#1093", "#1094", "#1095", "#1096", "#1097",
+ "#1098", "#1099", "#1100", "#1101", "#1102", "#1103", "#1025",
+ "#1105", "#1028", "#1108", "#1031", "#1111", "#1038", "#1118",
+ "#176", "#8729", "#183", "#8730", "#8470", "#164", "#9632",
+ "#160"
+};
+
+
struct html_entity_map {
enum entity_charset charset; /* charset identifier */
unsigned short basechar; /* char code at start of table */
{ cs_big5hkscs, 0xa0, 0xff, ent_iso_8859_1 },
{ cs_sjis, 0xa0, 0xff, ent_iso_8859_1 },
{ cs_eucjp, 0xa0, 0xff, ent_iso_8859_1 },
+ { cs_koi8r, 0xa3, 0xff, ent_koi8r },
+ { cs_cp1251, 0x80, 0xff, ent_cp_1251 },
+ { cs_8859_5, 0xc0, 0xff, ent_iso_8859_5 },
+ { cs_cp866, 0xc0, 0xff, ent_cp_866 },
{ cs_terminator }
};
{ "932", cs_sjis },
{ "EUCJP", cs_eucjp },
{ "EUC-JP", cs_eucjp },
+ { "KOI8-R", cs_koi8r },
+ { "koi8-ru", cs_koi8r },
+ { "koi8r", cs_koi8r },
+ { "cp1251", cs_cp1251 },
+ { "Windows-1251", cs_cp1251 },
+ { "win-1251", cs_cp1251 },
+ { "iso8859-5", cs_8859_5 },
+ { "iso-8859-5", cs_8859_5 },
+ { "cp866", cs_cp866 },
+ { "866", cs_cp866 },
+ { "ibm866", cs_cp866 },
{ NULL }
};
case mbfl_no_encoding_hz:
case mbfl_no_encoding_cp936:
return cs_gb2312;
+
+ case mbfl_no_encoding_koi8r:
+ return cs_koi8r;
+
+ case mbfl_no_encoding_cp866:
+ return cs_cp866;
+
+ case mbfl_no_encoding_cp1251:
+ return cs_cp1251;
+
+ case mbfl_no_encoding_8859_5:
+ return cs_8859_5;
}
#else
{