libmbfl/filters/mbfilter_utf7.c
libmbfl/filters/mbfilter_utf7imap.c
libmbfl/filters/mbfilter_utf8.c
+ libmbfl/filters/mbfilter_utf8_mobile.c
libmbfl/filters/mbfilter_uuencode.c
libmbfl/filters/mbfilter_koi8u.c
libmbfl/mbfl/mbfilter.c
mbfilter_iso2022_jp_ms.c \
mbfilter_koi8r.c mbfilter_qprint.c mbfilter_sjis.c mbfilter_ucs2.c \
mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \
- mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \
+ mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c mbfilter_utf8_mobile.c \
mbfilter_koi8u.c mbfilter_cp1254.c \
mbfilter_uuencode.c mbfilter_armscii8.c mbfilter_cp850.c \
mbfilter_cp5022x.c mbfilter_sjis_open.c mbfilter_sjis_mobile.c mbfilter_sjis_mac.c \
mbfilter_utf32.c \
mbfilter_utf16.c \
mbfilter_utf8.c \
+ mbfilter_utf8_mobile.c \
mbfilter_utf7.c \
mbfilter_utf7imap.c \
mbfilter_euc_jp_win.c \
mbfilter_utf7.h \
mbfilter_utf7imap.h \
mbfilter_utf8.h \
+ mbfilter_utf8_mobile.h \
mbfilter_uuencode.h \
mbfilter_cp5022x.h \
mbfilter_cp51932.h \
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
-static int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
+int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
return c;
}
-static int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter)
+int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter)
{
if (filter->status) { /* kanji second char */
if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */
#include "emoji2uni.h"
-static int mbfl_filt_ident_sjis_mobile(int c, mbfl_identify_filter *filter);
+extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
};
+const struct mbfl_identify_vtbl vtbl_identify_sjis_docomo = {
+ mbfl_no_encoding_sjis_docomo,
+ mbfl_filt_ident_common_ctor,
+ mbfl_filt_ident_common_dtor,
+ mbfl_filt_ident_sjis
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_sjis_kddi = {
+ mbfl_no_encoding_sjis_kddi,
+ mbfl_filt_ident_common_ctor,
+ mbfl_filt_ident_common_dtor,
+ mbfl_filt_ident_sjis
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_sjis_sb = {
+ mbfl_no_encoding_sjis_sb,
+ mbfl_filt_ident_common_ctor,
+ mbfl_filt_ident_common_dtor,
+ mbfl_filt_ident_sjis
+};
+
const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = {
mbfl_no_encoding_sjis_docomo,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_flush
};
+static const char nflags_s[10][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"};
+static const int nflags_code_kddi[10] = {0x2549, 0x2546, 0x24c0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254a, 0x24c1, 0x27f7};
+static const int nflags_code_sb[10] = {0x2b0a, 0x2b05, 0x2b08, 0x2b04, 0x2b07, 0x2b06, 0x2b02, 0x2b0b, 0x2b09, 0x2b03};
+
+const int mbfl_docomo2uni_pua[4][3] = {
+ {0x28c2, 0x2929, 0xe63e},
+ {0x2930, 0x2932, 0xe6ac},
+ {0x2935, 0x293e, 0xe6b1},
+ {0x2952, 0x29db, 0xe6ce},
+};
+
+const int mbfl_kddi2uni_pua[6][3] = {
+ {0x26ec, 0x2838, 0xe468},
+ {0x284c, 0x2863, 0xe5b5},
+ {0x24b8, 0x24ca, 0xe5cd},
+ {0x24cb, 0x2545, 0xea80},
+ {0x2839, 0x284b, 0xeafb},
+ {0x2546, 0x25c0, 0xeb0e},
+};
+
+const int mbfl_sb2uni_pua[6][3] = {
+ {0x27a9, 0x2802, 0xe101},
+ {0x2808, 0x285a, 0xe201},
+ {0x2921, 0x297a, 0xe001},
+ {0x2980, 0x29cc, 0xe301},
+ {0x2a99, 0x2ae4, 0xe401},
+ {0x2af8, 0x2b2e, 0xe501},
+};
+
+const int mbfl_kddi2uni_pua_b[8][3] = {
+ {0x24b8, 0x24f6, 0xec40},
+ {0x24f7, 0x2573, 0xec80},
+ {0x2574, 0x25b2, 0xed40},
+ {0x25b3, 0x25c0, 0xed80},
+ {0x26ec, 0x272a, 0xef40},
+ {0x272b, 0x27a7, 0xef80},
+ {0x27a8, 0x27e6, 0xf040},
+ {0x27e7, 0x2863, 0xf080},
+};
+
+#define NFLAGS(c) (0x1F1A5+(int)(c))
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
} \
} while (0)
+#define CODE2JIS(c1,c2,s1,s2) \
+ c1 = (s1)/94+0x21; \
+ c2 = (s1)-94*((c1)-0x21)+0x21; \
+ s1 = ((c1) << 8) | (c2); \
+ s2 = 1
+
+int
+mbfilter_conv_map_tbl(int c, int *w, const int map[][3], int n)
+{
+ int i, match = 0;
+
+ for (i = 0; i < n; i++) {
+ if (map[i][0] <= c && c <= map[i][1]) {
+ *w = c - map[i][0] + map[i][2];
+ match = 1;
+ break;
+ }
+ }
+ return match;
+}
+
+int
+mbfilter_conv_r_map_tbl(int c, int *w, const int map[][3], int n)
+{
+ int i, match = 0;
+
+ for (i = 0; i < n; i++) {
+ if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) {
+ *w = c + map[i][0] - map[i][2];
+ match = 1;
+ break;
+ }
+ }
+ return match;
+}
+
+int
+mbfilter_conv_seq_tbl(int c, int *w, const int *key, const int *val, int n)
+{
+ int i, match = 0;
+ for (i = 0; i< n; i++) {
+ if (c == key[i]) {
+ *w = val[i];
+ match = 1;
+ break;
+ }
+ }
+ return match;
+}
+
+
int
-mbfiler_sjis_emoji_docomo2unicode(int s, int *snd)
+mbfilter_sjis_emoji_docomo2unicode(int s, int *snd)
{
int w = s;
if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
}
int
-mbfiler_sjis_emoji_kddi2unicode(int s, int *snd)
+mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
{
- int w = s, si;
- const char flags[][2] = {"FR","DE","IT","GB","CN","KR","ES","RU","JP","US"};
-
+ int w = s, si, c;
+ const int nflags_order_kddi[] = {3, 1, 5, 4, 0, 7};
+
*snd = 0;
- if (s >= mb_tbl_code2uni_kddi1_min &&
- s <= mb_tbl_code2uni_kddi1_max) {
+ if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) {
si = s - mb_tbl_code2uni_kddi1_min;
- if (si == 0x0008) {
- *snd = 0x1F1A5 + flags[6][0]; w = 0x1F1A5 + flags[6][1];
- } else if (si == 0x0009) {
- *snd = 0x1F1A5 + flags[7][0]; w = 0x1F1A5 + flags[7][1];
+ if (si == 0x0008) { /* ES */
+ *snd = NFLAGS(nflags_s[2][0]); w = NFLAGS(nflags_s[2][1]);
+ } else if (si == 0x0009) { /* RU */
+ *snd = NFLAGS(nflags_s[8][0]); w = NFLAGS(nflags_s[8][1]);
} else if (si >= 0x008d && si <= 0x0092) {
- *snd = 0x1F1A5 + flags[si - 0x8d][0];
- w = 0x1F1A5 + flags[si - 0x8d][1];
+ c = nflags_order_kddi[si-0x008d];
+ *snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
} else if (si == 0x0104) {
*snd = 0x0023; w = 0x20E3;
} else {
w = mb_tbl_code2uni_kddi1[si];
}
- } else if (s >= mb_tbl_code2uni_kddi2_min &&
- s <= mb_tbl_code2uni_kddi2_max) {
+ } else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) {
si = s - mb_tbl_code2uni_kddi2_min;
- if (si == 100) {
- *snd = 0x1F1A5 + flags[8][0]; w = 0x1F1A5 + flags[8][1];
+ if (si == 100) { /* JP */
+ *snd = NFLAGS(nflags_s[6][0]); w = NFLAGS(nflags_s[6][1]);
} else if (si >= 0x00ba && si <= 0x00c2) {
*snd = si-0x00ba+0x0031; w = 0x20E3;
- } else if (si == 0x010b) {
- *snd = 0x1F1A5 + flags[9][0]; w = 0x1F1A5 + flags[9][1];
+ } else if (si == 0x010b) { /* US */
+ *snd = NFLAGS(nflags_s[9][0]); w = NFLAGS(nflags_s[9][1]);
} else if (si == 0x0144) {
*snd = 0x0030; w = 0x20E3;
} else {
}
int
-mbfiler_sjis_emoji_sb2unicode(int s, int *snd)
+mbfilter_sjis_emoji_sb2unicode(int s, int *snd)
{
- int w = s, si;
- const char flags[][2] = {"JP","US","FR","DE","IT","GB","ES","RU","CN","KR"};
-
+ int w = s, si, c;
+ const int nflags_order_sb[10] = {6, 9, 3, 1, 5, 4, 2, 8, 0, 7};
+
*snd = 0;
- if (s >= mb_tbl_code2uni_sb1_min &&
- s <= mb_tbl_code2uni_sb1_max) {
+ if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb1_max) {
si = s - mb_tbl_code2uni_sb1_min;
if (si == 0x006e || (si >= 0x007a && si <= 0x0083)) {
*snd = mb_tbl_code2uni_sb1[si];
} else {
w = mb_tbl_code2uni_sb1[si];
}
- } else if (s >= mb_tbl_code2uni_sb2_min &&
- s <= mb_tbl_code2uni_sb2_max) {
+ } else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) {
si = s - mb_tbl_code2uni_sb2_min;
w = mb_tbl_code2uni_sb2[si];
- } else if (s >= mb_tbl_code2uni_sb3_min &&
- s <= mb_tbl_code2uni_sb3_max) {
+ } else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) {
si = s - mb_tbl_code2uni_sb3_min;
if (si >= 0x0069 && si <= 0x0072) {
- *snd = 0x1F1A5 + flags[si - 0x0069][0];
- w = 0x1F1A5 + flags[si - 0x0069][1];
+ c = nflags_order_sb[si-0x0069];
+ *snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
} else {
w = mb_tbl_code2uni_sb3[si];
}
}
int
-mbfiler_sjis_emoji_kddi2unicode_pua(int s, int *w)
-{
-
- if (s >= 0x26ec && s <= 0x2838) {
- *w = s - 0x26ec + 0xe468;
- } else if (s >= 0x284c && s <= 0x2863) {
- *w = s - 0x284c + 0xe5b5;
- } else if (s >= 0x24b8 && s <= 0x24ca) {
- *w = s - 0x24b8 + 0xe5cd;
- } else if (s >= 0x24cb && s <= 0x2545) {
- *w = s - 0x24cb + 0xea80;
- } else if (s >= 0x2839 && s <= 0x284b) {
- *w = s - 0x2839 + 0xeafb;
- } else if (s >= 0x2546 && s <= 0x25c0) {
- *w = s - 0x2546 + 0xeb0e;
- }
-
- return *w;
-}
-
-int
-mbfiler_sjis_emoji_kddi2unicode_pua_b(int s, int c1, int c, int *w)
-{
- if ((c1 == 0xf3 && (c >= 0x40 && c<=0xfc && c != 0x7f)) ||
- (c1 == 0xf4 && (c >= 0x40 && c<=0x93 && c != 0x7f)) ||
- ((c1 == 0xf6 || c1 == 0xf7) && (c >= 0x40 && c<=0xfc && c != 0x7f))) {
- *w = (c1 << 8) + c - 0x0700; /* KDDI unofficial UTF-8 encoding */
- }
- return *w;
-}
-
-int
-mbfiler_sjis_emoji2unicode_pua(int s, int *w)
-{
- if (s >= 0x27a9 && s <= 0x2802) {
- *w = s - 0x27a9 + 0xe101;
- } else if (s >= 0x2808 && s <= 0x285a) {
- *w = s - 0x2808 + 0xe201;
- } else if (s >= 0x2921 && s <= 0x297a) {
- *w = s - 0x2921 + 0xe001;
- } else if (s >= 0x2980 && s <= 0x29cc) {
- *w = s - 0x2980 + 0xe301;
- } else if (s >= 0x2a99 && s <= 0x2ae4) {
- *w = s - 0x2a99 + 0xe401;
- } else if (s >= 0x2af8 && s <= 0x2b2e) {
- *w = s - 0x2af8 + 0xe501;
- }
- return *w;
-}
-
-int
-mbfiler_unicode_pua2sjis_emoji_kddi(int c, int *s1, int *s2, int *c1, int *c2)
-{
- int match = 1;
-
- if (c >= 0xe468 && c <= 0xe5b4) {
- *s1 = c - 0xe468 + 0x26ec;
- } else if (c >= 0xe5b5 && c <= 0xe5cc) {
- *s1 = c - 0xe5b5 + 0x284c;
- } else if (c >= 0xe5cd && c <= 0xe5df) {
- *s1 = c - 0xe5cd + 0x24b8;
- } else if (c >= 0xea80 && c <= 0xeafa) {
- *s1 = c - 0xea80 + 0x24cb;
- } else if (c >= 0xeafb && c <= 0xeb0d) {
- *s1 = c - 0xeafb + 0x2839;
- } else if (c >= 0xeb0e && c <= 0xeb88) {
- *s1 = c - 0xeb0e + 0x2546;
- } else {
- match = 0;
- }
- if (match && *s1 >0) {
- *c1 = *s1/94+0x21;
- *c2 = *s1-94*(*c1-0x21)+0x21;
- *s1 = (*c1 << 8) | *c2;
- *s2 = 1;
- }
-
- return 1;
-}
-
-
-
-int
-mbfiler_unicode_pua2sjis_emoji(int c, int *s1, int *s2, int *c1, int *c2, int *sjis_encoded)
-{
- int match = 1;
-
- if (c >= 0xe001 && c <= 0xe05a) {
- *s1 = c - 0xe001 + 0x2921;
- } else if (c >= 0xe101 && c <= 0xe15a) {
- *s1 = c - 0xe101 + 0x27a9;
- } else if (c >= 0xe201 && c <= 0xe253) {
- *s1 = c - 0xe201 + 0x2808;
- } else if (c >= 0xe301 && c <= 0xe34d) {
- *s1 = c - 0xe301 + 0x2980;
- } else if (c >= 0xe401 && c <= 0xe44c) {
- *s1 = c - 0xe401 + 0x2a99;
- } else if (c >= 0xe501 && c <= 0xe537) {
- *s1 = c - 0xe501 + 0x2af8;
- } else if (
- ((c >= 0xec40 && c<=0xecfc) && c != 0xec7f) ||
- ((c >= 0xed40 && c<=0xed93) && c != 0xed7f) ||
- ((c >= 0xef40 && c<=0xeffc) && c != 0xef7f) ||
- ((c >= 0xf040 && c<=0xf0fc) && c != 0xf07f)) {
- /* KDDI unofficial UTF-8 encoding */
- *s1 = c + 0x0700;
- *sjis_encoded = 1;
- } else {
- match = 0;
- }
- if (match && *sjis_encoded == 0 && *s1 >0) {
- *c1 = *s1/94+0x21;
- *c2 = *s1-94*(*c1-0x21)+0x21;
- *s1 = (*c1 << 8) | *c2;
- *s2 = 1;
- }
-
- return 1;
-}
-
-int
-mbfiler_unicode2sjis_emoji_docomo(int c, int *s1, int *s2, int *c1, int *c2, mbfl_convert_filter *filter)
+mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter)
{
int i, match = 0, c1s;
match = 1;
}
} else {
- if (c1s >= ucs_a1_jis_table_min && c1s < ucs_a1_jis_table_max) {
- c1s = ucs_a1_jis_table[c1s - ucs_a1_jis_table_min];
- CK((*filter->output_function)(c1s, filter->data));
- }
+ CK((*filter->output_function)(c1s, filter->data));
}
} else {
if (c == 0x0023 || (c >= 0x0030 && c<=0x0039)) {
filter->status = 1;
filter->cache = c;
*s1 = -1;
- return 0;
+ return match;
}
if (c == 0x00A9) {
} else if (c == 0x00AE) {
*s1 = 0x29ba; match = 1;
} else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) {
- for (i=0; i < sizeof(mb_tbl_uni_docomo2code2_key)/sizeof(int); i++) {
- if (c >0 && mb_tbl_uni_docomo2code2_key[i] == c) {
- *s1 = mb_tbl_uni_docomo2code2_value[i];
- match = 1;
- break;
- }
- }
+ match = mbfilter_conv_seq_tbl(c, s1, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_value,
+ sizeof(mb_tbl_uni_docomo2code2_key)/sizeof(int));
} else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) {
- for (i=0; i < sizeof(mb_tbl_uni_docomo2code3_key)/sizeof(int); i++) {
- if (c >0 && mb_tbl_uni_docomo2code3_key[i] == c) {
- *s1 = mb_tbl_uni_docomo2code3_value[i];
- match = 1;
- break;
- }
- }
+ match = mbfilter_conv_seq_tbl(c, s1, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_value,
+ sizeof(mb_tbl_uni_docomo2code3_key)/sizeof(int));
}
}
- if (match && *s1 >0) {
- *c1 = *s1/94+0x21;
- *c2 = *s1-94*(*c1-0x21)+0x21;
- *s1 = (*c1 << 8) | *c2;
- *s2 = 1;
- }
-
- return 1;
+ return match;
}
-const char nflags_s[][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"};
-const int nflags_code_kddi[] = {0x2549, 0x2546, 0x24c0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254a, 0x24c1, 0x27f7};
-const int nflags_code_sb[] = {0x2b0a, 0x2b05, 0x2b08, 0x2b04, 0x2b07, 0x2b06, 0x2b02, 0x2b0b, 0x2b09, 0x2b03};
-
int
-mbfiler_unicode2sjis_emoji_kddi(int c, int *s1, int *s2, int *c1, int *c2, mbfl_convert_filter *filter)
+mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter)
{
int i, match = 0, c1s;
*s1 = 0x27a6 + (c1s - 0x0031);
match = 1;
}
- } else if ((c >= 0x1F1A5 + 0x41 && c <= 0x1F1A5 + 0x5A) &&
- (c1s >= 0x1F1A5 + 0x41 && c1s <= 0x1F1A5 + 0x5A)) {
- for (i=0;i<10;i++) {
- if (c1s == 0x1F1A5 + nflags_s[i][0] && c == 0x1F1A5 + nflags_s[i][1]) {
+ } else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
+ for (i=0; i<10; i++) {
+ if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
*s1 = nflags_code_kddi[i];
match = 1;
break;
}
} else {
if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) ||
- (c >= 0x1F1A5 + 0x41 && c<= 0x1F1A5 + 0x5A)) {
+ (c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
filter->status = 1;
filter->cache = c;
*s1 = -1;
- return 0;
+ return match;
}
if (c == 0x00A9) {
} else if (c == 0x00AE) {
*s1 = 0x27dd; match = 1;
} else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
- for (i=0; i < sizeof(mb_tbl_uni_kddi2code2_key)/sizeof(int); i++) {
- if (c >0 && mb_tbl_uni_kddi2code2_key[i] == c) {
- *s1 = mb_tbl_uni_kddi2code2_value[i];
- match = 1;
- break;
- }
- }
+ match = mbfilter_conv_seq_tbl(c, s1, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_value,
+ sizeof(mb_tbl_uni_kddi2code2_key)/sizeof(int));
} else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
- for (i=0; i < sizeof(mb_tbl_uni_kddi2code3_key)/sizeof(int); i++) {
- if (c >0 && mb_tbl_uni_kddi2code3_key[i] == c) {
- *s1 = mb_tbl_uni_kddi2code3_value[i];
- match = 1;
- break;
- }
- }
+ match = mbfilter_conv_seq_tbl(c, s1, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_value,
+ sizeof(mb_tbl_uni_kddi2code3_key)/sizeof(int));
}
}
- if (match && *s1 >0) {
- *c1 = *s1/94+0x21;
- *c2 = *s1-94*(*c1-0x21)+0x21;
- *s1 = (*c1 << 8) | *c2;
- *s2 = 1;
- }
-
- return 1;
+ return match;
}
int
-mbfiler_unicode2sjis_emoji_sb(int c, int *s1, int *s2, int *c1, int *c2, mbfl_convert_filter *filter)
+mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter)
{
int i, match = 0, c1s;
*s1 = 0x2823 + (c1s - 0x0031);
match = 1;
}
- } else if ((c >= 0x1F1A5 + 0x41 && c <= 0x1F1A5 + 0x5A) &&
- (c1s >= 0x1F1A5 + 0x41 && c1s <= 0x1F1A5 + 0x5A)) {
- for (i=0;i<10;i++) {
- if (c1s == 0x1F1A5 + nflags_s[i][0] && c == 0x1F1A5 + nflags_s[i][1]) {
+ } else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
+ for (i=0; i<10; i++) {
+ if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
*s1 = nflags_code_sb[i];
match = 1;
break;
}
}
} else {
- if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) ||
- (c >= 0x1F1A5 + 0x41 && c<= 0x1F1A5 + 0x5A)) {
+ if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) || (c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
filter->status = 1;
filter->cache = c;
*s1 = -1;
- return 0;
+ return match;
}
if (c == 0x00A9) {
} else if (c == 0x00AE) {
*s1 = 0x2856; match = 1;
} else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) {
- for (i=0; i < sizeof(mb_tbl_uni_sb2code2_key)/sizeof(int); i++) {
- if (c >0 && mb_tbl_uni_sb2code2_key[i] == c) {
- *s1 = mb_tbl_uni_sb2code2_value[i];
- match = 1;
- break;
- }
- }
+ match = mbfilter_conv_seq_tbl(c, s1, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_value,
+ sizeof(mb_tbl_uni_sb2code2_key)/sizeof(int));
} else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) {
- for (i=0; i < sizeof(mb_tbl_uni_sb2code3_key)/sizeof(int); i++) {
- if (c >0 && mb_tbl_uni_sb2code3_key[i] == c) {
- *s1 = mb_tbl_uni_sb2code3_value[i];
- match = 1;
- break;
- }
- }
+ match = mbfilter_conv_seq_tbl(c, s1, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_value,
+ sizeof(mb_tbl_uni_sb2code3_key)/sizeof(int));
}
}
- if (match && *s1 >0) {
- *c1 = *s1/94+0x21;
- *c2 = *s1-94*(*c1-0x21)+0x21;
- *s1 = (*c1 << 8) | *c2;
- *s2 = 1;
- }
-
- return 1;
+ return match;
}
/*
if (s >= (94*94) && s < 119*94) {
if (filter->from->no_encoding == mbfl_no_encoding_sjis_docomo) {
- w = mbfiler_sjis_emoji_docomo2unicode(s, &snd);
+ w = mbfilter_sjis_emoji_docomo2unicode(s, &snd);
} else if (filter->from->no_encoding == mbfl_no_encoding_sjis_kddi) {
- w = mbfiler_sjis_emoji_kddi2unicode(s, &snd);
+ w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
} else if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb) {
- w = mbfiler_sjis_emoji_sb2unicode(s, &snd);
+ w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
} else if (filter->from->no_encoding == mbfl_no_encoding_sjis_kddi_pua) {
- mbfiler_sjis_emoji_kddi2unicode_pua(s, &w);
+ mbfilter_conv_map_tbl(s, &w, mbfl_kddi2uni_pua, 6);
} else if (filter->from->no_encoding == mbfl_no_encoding_sjis_kddi_pua_b) {
- mbfiler_sjis_emoji_kddi2unicode_pua_b(s, c1, c, &w);
+ mbfilter_conv_map_tbl(s, &w, mbfl_kddi2uni_pua_b, 8);
} else if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb_pua) {
- mbfiler_sjis_emoji2unicode_pua(s, &w);
+ mbfilter_conv_map_tbl(s, &w, mbfl_sb2uni_pua, 6);
+ } else if (filter->from->no_encoding == mbfl_no_encoding_sjis_docomo_pua) {
+ mbfilter_conv_map_tbl(s, &w, mbfl_docomo2uni_pua, 4);
}
+
if (w > 0 && snd > 0) {
CK((*filter->output_function)(snd, filter->data));
}
s1 = 0x96; s2 = c;
}
s = (s1 - 0x21)*94 + s2 - 0x21;
- w = mbfiler_sjis_emoji_sb2unicode(s, &snd);
+ w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
if (w > 0 && snd > 0) {
CK((*filter->output_function)(snd, filter->data));
}
mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter)
{
int c1, c2, s1, s2;
- int sjis_encoded = 0;
s1 = 0;
s2 = 0;
}
}
- if (filter->to->no_encoding == mbfl_no_encoding_sjis_docomo) {
- mbfiler_unicode2sjis_emoji_docomo(c, &s1, &s2, &c1, &c2, filter);
- } else if (filter->to->no_encoding == mbfl_no_encoding_sjis_kddi) {
- mbfiler_unicode2sjis_emoji_kddi(c, &s1, &s2, &c1, &c2, filter);
- } else if (filter->to->no_encoding == mbfl_no_encoding_sjis_sb) {
- mbfiler_unicode2sjis_emoji_sb(c, &s1, &s2, &c1, &c2, filter);
- } else if (filter->to->no_encoding == mbfl_no_encoding_sjis_kddi_pua) {
- mbfiler_unicode_pua2sjis_emoji_kddi(c, &s1, &s2, &c1, &c2);
- } else if (filter->to->no_encoding == mbfl_no_encoding_sjis_docomo_pua ||
- filter->to->no_encoding == mbfl_no_encoding_sjis_kddi_pua_b ||
- filter->to->no_encoding == mbfl_no_encoding_sjis_sb_pua) {
- mbfiler_unicode_pua2sjis_emoji(c, &s1, &s2, &c1, &c2, &sjis_encoded);
+ if ((filter->to->no_encoding == mbfl_no_encoding_sjis_docomo &&
+ mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0) ||
+ (filter->to->no_encoding == mbfl_no_encoding_sjis_kddi &&
+ mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) ||
+ (filter->to->no_encoding == mbfl_no_encoding_sjis_sb &&
+ mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 ) ||
+ (filter->to->no_encoding == mbfl_no_encoding_sjis_kddi_pua &&
+ mbfilter_conv_r_map_tbl(c, &s1, mbfl_kddi2uni_pua, 6) > 0) ||
+ (filter->to->no_encoding == mbfl_no_encoding_sjis_docomo_pua &&
+ mbfilter_conv_r_map_tbl(c, &s1, mbfl_docomo2uni_pua, 4) > 0) ||
+ (filter->to->no_encoding == mbfl_no_encoding_sjis_kddi_pua_b &&
+ mbfilter_conv_r_map_tbl(c, &s1, mbfl_kddi2uni_pua_b, 8) > 0) ||
+ (filter->to->no_encoding == mbfl_no_encoding_sjis_sb_pua &&
+ mbfilter_conv_r_map_tbl(c, &s1, mbfl_sb2uni_pua, 6) > 0)) {
+ CODE2JIS(c1,c2,s1,s2);
}
if (filter->status == 1 && filter->cache > 0) {
if (s1 < 0x100) { /* latin or kana */
CK((*filter->output_function)(s1, filter->data));
} else { /* kanji */
- if (sjis_encoded) {
- s2 = s1 & 0xff;
- s1 = (s1 >> 8) & 0xff;
- } else {
- c1 = (s1 >> 8) & 0xff;
- c2 = s1 & 0xff;
- SJIS_ENCODE(c1, c2, s1, s2);
- }
+ c1 = (s1 >> 8) & 0xff;
+ c2 = s1 & 0xff;
+ SJIS_ENCODE(c1, c2, s1, s2);
CK((*filter->output_function)(s1, filter->data));
CK((*filter->output_function)(s2, filter->data));
}
return 0;
}
-static int mbfl_filt_ident_sjis_mobile(int c, mbfl_identify_filter *filter)
-{
- if (filter->status) { /* kanji second char */
- if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */
- filter->flag = 1;
- }
- filter->status = 0;
- } else if (c >= 0 && c < 0x80) { /* latin ok */
- ;
- } else if (c > 0xa0 && c < 0xe0) { /* kana ok */
- ;
- } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
- filter->status = 1;
- } else { /* bad */
- filter->flag = 1;
- }
-
- return c;
-}
-
-
extern const mbfl_encoding mbfl_encoding_sjis_kddi_pua_b;
extern const mbfl_encoding mbfl_encoding_sjis_sb_pua;
-extern const struct mbfl_identify_vtbl vtbl_identify_sjis_mobile;
+extern const struct mbfl_identify_vtbl vtbl_identify_sjis_docomo;
+extern const struct mbfl_identify_vtbl vtbl_identify_sjis_kddi;
+extern const struct mbfl_identify_vtbl vtbl_identify_sjis_sb;
extern const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo;
int mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter);
+int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd);
+int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd);
+int mbfilter_sjis_emoji_sb2unicode(int s, int *snd);
+
+int mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter);
+int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter);
+int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter);
+
+int mbfilter_unicode_pua2sjis_emoji_docomo( int c, int *s1, int *s2, int *c1, int *c2);
+int mbfilter_unicode_pua2sjis_emoji_kddi(int c, int *s1, int *s2, int *c1, int *c2);
+int mbfilter_unicode_pua2sjis_emoji(int c, int *s1, int *s2, int *c1, int *c2, int *sjis_encoded);
+
+int mbfilter_conv_map_tbl(int c, int *w, const int map[][3], int n);
+int mbfilter_conv_r_map_tbl(int c, int *w, const int map[][3], int n);
+
#endif /* MBFL_MBFILTER_SJIS_MOBILE_H */
#include "mbfilter.h"
#include "mbfilter_utf8.h"
-static int mbfl_filt_ident_utf8(int c, mbfl_identify_filter *filter);
+int mbfl_filt_ident_utf8(int c, mbfl_identify_filter *filter);
-static const unsigned char mblen_table_utf8[] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1
+const unsigned char mblen_table_utf8[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1
};
static const char *mbfl_encoding_utf8_aliases[] = {"utf8", NULL};
return c;
}
-static int mbfl_filt_ident_utf8(int c, mbfl_identify_filter *filter)
+int mbfl_filt_ident_utf8(int c, mbfl_identify_filter *filter)
{
int c1;
--- /dev/null
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this file:
+ *
+ */
+/*
+ * The source code included in this files was separated from mbfilter.c
+ * by rui hrokawa <hirokawa@php.net> on 8 aug 2011.
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mbfilter.h"
+
+#include "mbfilter_utf8_mobile.h"
+#include "mbfilter_sjis_mobile.h"
+
+extern int mbfl_filt_ident_utf8(int c, mbfl_identify_filter *filter);
+extern const int mbfl_docomo2uni_pua[4][3];
+extern const int mbfl_kddi2uni_pua[6][3];
+extern const int mbfl_sb2uni_pua[6][3];
+extern const int mbfl_kddi2uni_pua_b[8][3];
+
+extern const unsigned char mblen_table_utf8[];
+
+static const char *mbfl_encoding_utf8_docomo_aliases[] = {"utf8-mobile#docomo", NULL};
+static const char *mbfl_encoding_utf8_kddi_aliases[] = {"utf8-mobile#kddi", NULL};
+static const char *mbfl_encoding_utf8_kddi_b_aliases[] = {"utf8-mobile#kddi-b", NULL};
+static const char *mbfl_encoding_utf8_sb_aliases[] = {"utf8-mobile#softbank", NULL};
+
+const mbfl_encoding mbfl_encoding_utf8_docomo = {
+ mbfl_no_encoding_utf8_docomo,
+ "UTF-8-Mobile#DOCOMO",
+ "UTF-8",
+ (const char *(*)[])&mbfl_encoding_utf8_docomo_aliases,
+ mblen_table_utf8,
+ MBFL_ENCTYPE_MBCS
+};
+
+const mbfl_encoding mbfl_encoding_utf8_kddi = {
+ mbfl_no_encoding_utf8_kddi,
+ "UTF-8-Mobile#KDDI",
+ "UTF-8",
+ (const char *(*)[])&mbfl_encoding_utf8_kddi_aliases,
+ mblen_table_utf8,
+ MBFL_ENCTYPE_MBCS
+};
+
+const mbfl_encoding mbfl_encoding_utf8_kddi_b = {
+ mbfl_no_encoding_utf8_kddi_b,
+ "UTF-8-Mobile#KDDI-B",
+ "UTF-8",
+ (const char *(*)[])&mbfl_encoding_utf8_kddi_b_aliases,
+ mblen_table_utf8,
+ MBFL_ENCTYPE_MBCS
+};
+
+const mbfl_encoding mbfl_encoding_utf8_sb = {
+ mbfl_no_encoding_utf8_sb,
+ "UTF-8-Mobile#SOFTBANK",
+ "UTF-8",
+ (const char *(*)[])&mbfl_encoding_utf8_sb_aliases,
+ mblen_table_utf8,
+ MBFL_ENCTYPE_MBCS
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_utf8_docomo = {
+ mbfl_no_encoding_utf8_docomo,
+ mbfl_filt_ident_common_ctor,
+ mbfl_filt_ident_common_dtor,
+ mbfl_filt_ident_utf8
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_utf8_kddi = {
+ mbfl_no_encoding_utf8_kddi,
+ mbfl_filt_ident_common_ctor,
+ mbfl_filt_ident_common_dtor,
+ mbfl_filt_ident_utf8
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_utf8_kddi_b = {
+ mbfl_no_encoding_utf8_kddi_b,
+ mbfl_filt_ident_common_ctor,
+ mbfl_filt_ident_common_dtor,
+ mbfl_filt_ident_utf8
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_utf8_sb = {
+ mbfl_no_encoding_utf8_sb,
+ mbfl_filt_ident_common_ctor,
+ mbfl_filt_ident_common_dtor,
+ mbfl_filt_ident_utf8
+};
+
+const struct mbfl_convert_vtbl vtbl_utf8_docomo_wchar = {
+ mbfl_no_encoding_utf8_docomo,
+ mbfl_no_encoding_wchar,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_utf8_mobile_wchar,
+ mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_wchar_utf8_docomo = {
+ mbfl_no_encoding_wchar,
+ mbfl_no_encoding_utf8_docomo,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_wchar_utf8_mobile,
+ mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_utf8_kddi_wchar = {
+ mbfl_no_encoding_utf8_kddi,
+ mbfl_no_encoding_wchar,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_utf8_mobile_wchar,
+ mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_wchar_utf8_kddi = {
+ mbfl_no_encoding_wchar,
+ mbfl_no_encoding_utf8_kddi,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_wchar_utf8_mobile,
+ mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_utf8_kddi_b_wchar = {
+ mbfl_no_encoding_utf8_kddi_b,
+ mbfl_no_encoding_wchar,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_utf8_mobile_wchar,
+ mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_wchar_utf8_kddi_b = {
+ mbfl_no_encoding_wchar,
+ mbfl_no_encoding_utf8_kddi_b,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_wchar_utf8_mobile,
+ mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_utf8_sb_wchar = {
+ mbfl_no_encoding_utf8_sb,
+ mbfl_no_encoding_wchar,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_utf8_mobile_wchar,
+ mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_wchar_utf8_sb = {
+ mbfl_no_encoding_wchar,
+ mbfl_no_encoding_utf8_sb,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_wchar_utf8_mobile,
+ mbfl_filt_conv_common_flush
+};
+
+#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
+
+/*
+ * UTF-8 => wchar
+ */
+int mbfl_filt_conv_utf8_mobile_wchar(int c, mbfl_convert_filter *filter)
+{
+ int s, w = 0, flag = 0;
+ int s1 = 0, c1 = 0, snd = 0;
+
+ if (c < 0x80) {
+ if (c >= 0) {
+ CK((*filter->output_function)(c, filter->data));
+ }
+ filter->status = 0;
+ } else if (c < 0xc0) {
+ int status = filter->status & 0xff;
+ switch (status) {
+ case 0x10: /* 2byte code 2nd char: 0x80-0xbf */
+ case 0x21: /* 3byte code 3rd char: 0x80-0xbf */
+ case 0x32: /* 4byte code 4th char: 0x80-0xbf */
+ filter->status = 0;
+ s = filter->cache | (c & 0x3f);
+ filter->cache = 0;
+ if ((status == 0x10 && s >= 0x80) ||
+ (status == 0x21 && s >= 0x800 && (s < 0xd800 || s > 0xdfff)) ||
+ (status == 0x32 && s >= 0x10000 && s < 0x200000)) {
+
+ if (filter->from->no_encoding == mbfl_no_encoding_utf8_docomo &&
+ mbfilter_conv_r_map_tbl(s, &s1, mbfl_docomo2uni_pua, 4) > 0) {
+ s = mbfilter_sjis_emoji_docomo2unicode(s1, &snd);
+ } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi &&
+ mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua, 6) > 0) {
+ s = mbfilter_sjis_emoji_kddi2unicode(s1, &snd);
+ } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_b &&
+ mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua_b, 8) > 0) {
+ s = mbfilter_sjis_emoji_kddi2unicode(s1, &snd);
+ } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_sb &&
+ mbfilter_conv_r_map_tbl(s, &s1, mbfl_sb2uni_pua, 6) > 0) {
+ s = mbfilter_sjis_emoji_sb2unicode(s1, &snd);
+ }
+
+ if (snd > 0) {
+ CK((*filter->output_function)(snd, filter->data));
+ }
+ CK((*filter->output_function)(s, filter->data));
+ } else {
+ w = s & MBFL_WCSGROUP_MASK;
+ flag = 1;
+ }
+ break;
+ case 0x20: /* 3byte code 2nd char: 0:0xa0-0xbf,D:0x80-9F,1-C,E-F:0x80-0x9f */
+ s = filter->cache | ((c & 0x3f) << 6);
+ c1 = (s >> 12) & 0xf;
+ if ((c1 == 0x0 && c >= 0xa0) ||
+ (c1 == 0xd && c < 0xa0) ||
+ (c1 > 0x0 && c1 != 0xd)) {
+ filter->cache = s;
+ filter->status++;
+ } else {
+ w = s & MBFL_WCSGROUP_MASK;
+ flag = 1;
+ }
+ break;
+ case 0x31: /* 4byte code 3rd char: 0x80-0xbf */
+ filter->cache |= ((c & 0x3f) << 6);
+ filter->status++;
+ break;
+ case 0x30: /* 4byte code 2nd char: 0:0x90-0xbf,1-3:0x80-0xbf,4:0x80-0x8f */
+ s = filter->cache | ((c & 0x3f) << 12);
+ c1 = (s >> 18) & 0x7;
+ if ((c1 == 0x0 && c >= 0x90) ||
+ (c1 > 0x0 && c1 < 0x4) ||
+ (c1 == 0x4 && c < 0x90)) {
+ filter->cache = s;
+ filter->status++;
+ } else {
+ w = s & MBFL_WCSGROUP_MASK;
+ flag = 1;
+ }
+ break;
+ default:
+ w = c & MBFL_WCSGROUP_MASK;
+ flag = 1;
+ break;
+ }
+ } else if (c < 0xc2) { /* invalid: 0xc0,0xc1 */
+ w = c & MBFL_WCSGROUP_MASK;
+ flag = 1;
+ } else if (c < 0xe0) { /* 2byte code first char: 0xc2-0xdf */
+ if (filter->status == 0x0) {
+ filter->status = 0x10;
+ filter->cache = (c & 0x1f) << 6;
+ } else {
+ w = c & MBFL_WCSGROUP_MASK;
+ flag = 1;
+ }
+ } else if (c < 0xf0) { /* 3byte code first char: 0xe0-0xef */
+ if (filter->status == 0x0) {
+ filter->status = 0x20;
+ filter->cache = (c & 0xf) << 12;
+ } else {
+ w = c & MBFL_WCSGROUP_MASK;
+ flag = 1;
+ }
+ } else if (c < 0xf5) { /* 4byte code first char: 0xf0-0xf4 */
+ if (filter->status == 0x0) {
+ filter->status = 0x30;
+ filter->cache = (c & 0x7) << 18;
+ } else {
+ w = c & MBFL_WCSGROUP_MASK;
+ flag = 1;
+ }
+ } else {
+ w = c & MBFL_WCSGROUP_MASK;
+ flag = 1;
+ }
+
+ if (flag) {
+ w |= MBFL_WCSGROUP_THROUGH;
+ CK((*filter->output_function)(w, filter->data));
+ filter->status = 0;
+ filter->cache = 0;
+ }
+
+ return c;
+}
+
+/*
+ * wchar => UTF-8
+ */
+int mbfl_filt_conv_wchar_utf8_mobile(int c, mbfl_convert_filter *filter)
+{
+ if (c >= 0 && c < 0x110000) {
+ int s1, c1;
+
+ if ((filter->to->no_encoding == mbfl_no_encoding_utf8_docomo &&
+ mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0 &&
+ mbfilter_conv_map_tbl(s1, &c1, mbfl_docomo2uni_pua, 4) > 0) ||
+ (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi &&
+ mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0 &&
+ mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua, 6) > 0) ||
+ (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_b &&
+ mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0 &&
+ mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua_b, 8) > 0) ||
+ (filter->to->no_encoding == mbfl_no_encoding_utf8_sb &&
+ mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 &&
+ mbfilter_conv_map_tbl(s1, &c1, mbfl_sb2uni_pua, 6) > 0)) {
+ c = c1;
+ }
+
+ if (filter->status == 1 && filter->cache > 0) {
+ return c;
+ }
+
+ if (c < 0x80) {
+ CK((*filter->output_function)(c, filter->data));
+ } else if (c < 0x800) {
+ CK((*filter->output_function)(((c >> 6) & 0x1f) | 0xc0, filter->data));
+ CK((*filter->output_function)((c & 0x3f) | 0x80, filter->data));
+ } else if (c < 0x10000) {
+ CK((*filter->output_function)(((c >> 12) & 0x0f) | 0xe0, filter->data));
+ CK((*filter->output_function)(((c >> 6) & 0x3f) | 0x80, filter->data));
+ CK((*filter->output_function)((c & 0x3f) | 0x80, filter->data));
+ } else {
+ CK((*filter->output_function)(((c >> 18) & 0x07) | 0xf0, filter->data));
+ CK((*filter->output_function)(((c >> 12) & 0x3f) | 0x80, filter->data));
+ CK((*filter->output_function)(((c >> 6) & 0x3f) | 0x80, filter->data));
+ CK((*filter->output_function)((c & 0x3f) | 0x80, filter->data));
+ }
+ } else {
+ if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
+ CK(mbfl_filt_conv_illegal_output(c, filter));
+ }
+ }
+
+ return c;
+}
+
--- /dev/null
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this file:
+ *
+ */
+/*
+ * The source code included in this files was separated from mbfilter.c
+ * by rui hrokawa <hirokawa@php.net> on 8 aug 2011.
+ *
+ */
+
+#ifndef MBFL_MBFILTER_UTF8_MOBILE_H
+#define MBFL_MBFILTER_UTF8_MOBILE_H
+
+extern const mbfl_encoding mbfl_encoding_utf8_docomo;
+extern const mbfl_encoding mbfl_encoding_utf8_kddi;
+extern const mbfl_encoding mbfl_encoding_utf8_kddi_b;
+extern const mbfl_encoding mbfl_encoding_utf8_sb;
+
+extern const struct mbfl_identify_vtbl vtbl_identify_utf8_docomo;
+extern const struct mbfl_identify_vtbl vtbl_identify_utf8_kddi;
+extern const struct mbfl_identify_vtbl vtbl_identify_utf8_kddi_b;
+extern const struct mbfl_identify_vtbl vtbl_identify_utf8_sb;
+
+extern const struct mbfl_convert_vtbl vtbl_utf8_docomo_wchar;
+extern const struct mbfl_convert_vtbl vtbl_wchar_utf8_docomo;
+extern const struct mbfl_convert_vtbl vtbl_utf8_kddi_wchar;
+extern const struct mbfl_convert_vtbl vtbl_wchar_utf8_kddi;
+extern const struct mbfl_convert_vtbl vtbl_utf8_kddi_b_wchar;
+extern const struct mbfl_convert_vtbl vtbl_wchar_utf8_kddi_b;
+extern const struct mbfl_convert_vtbl vtbl_utf8_sb_wchar;
+extern const struct mbfl_convert_vtbl vtbl_wchar_utf8_sb;
+
+
+
+int mbfl_filt_conv_utf8_mobile_wchar(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_utf8_mobile(int c, mbfl_convert_filter *filter);
+
+#endif /* MBFL_MBFILTER_UTF8_MOBILE_H */
#include "filters/mbfilter_utf7.h"
#include "filters/mbfilter_utf7imap.h"
#include "filters/mbfilter_utf8.h"
+#include "filters/mbfilter_utf8_mobile.h"
#include "filters/mbfilter_utf16.h"
#include "filters/mbfilter_utf32.h"
#include "filters/mbfilter_byte2.h"
&vtbl_wchar_sjis_sb_pua,
&vtbl_sjis_mac_wchar,
&vtbl_wchar_sjis_mac,
+ &vtbl_utf8_docomo_wchar,
+ &vtbl_wchar_utf8_docomo,
+ &vtbl_utf8_kddi_wchar,
+ &vtbl_wchar_utf8_kddi,
+ &vtbl_utf8_kddi_b_wchar,
+ &vtbl_wchar_utf8_kddi_b,
+ &vtbl_utf8_sb_wchar,
+ &vtbl_wchar_utf8_sb,
&vtbl_euccn_wchar,
&vtbl_wchar_euccn,
&vtbl_cp936_wchar,
#include "filters/mbfilter_utf7.h"
#include "filters/mbfilter_utf7imap.h"
#include "filters/mbfilter_utf8.h"
+#include "filters/mbfilter_utf8_mobile.h"
#include "filters/mbfilter_utf16.h"
#include "filters/mbfilter_utf32.h"
#include "filters/mbfilter_byte2.h"
&mbfl_encoding_sjis_kddi_pua_b,
&mbfl_encoding_sjis_sb_pua,
&mbfl_encoding_sjis_mac,
+ &mbfl_encoding_utf8_docomo,
+ &mbfl_encoding_utf8_kddi,
+ &mbfl_encoding_utf8_kddi_b,
+ &mbfl_encoding_utf8_sb,
&mbfl_encoding_cp932,
&mbfl_encoding_cp51932,
&mbfl_encoding_jis,
mbfl_no_encoding_utf16be,
mbfl_no_encoding_utf16le,
mbfl_no_encoding_utf8,
+ mbfl_no_encoding_utf8_docomo,
+ mbfl_no_encoding_utf8_kddi,
+ mbfl_no_encoding_utf8_kddi_b,
+ mbfl_no_encoding_utf8_sb,
mbfl_no_encoding_utf7,
mbfl_no_encoding_utf7imap,
mbfl_no_encoding_ascii,
#include "filters/mbfilter_iso2022_jp_ms.h"
#include "filters/mbfilter_euc_jp.h"
#include "filters/mbfilter_euc_jp_win.h"
+#include "filters/mbfilter_utf8_mobile.h"
#include "filters/mbfilter_ascii.h"
#include "filters/mbfilter_koi8r.h"
#include "filters/mbfilter_koi8u.h"
&vtbl_identify_2022jp,
&vtbl_identify_2022jpms,
&vtbl_identify_cp51932,
+ &vtbl_identify_sjis_docomo,
+ &vtbl_identify_sjis_kddi,
+ &vtbl_identify_sjis_sb,
+ &vtbl_identify_utf8_docomo,
+ &vtbl_identify_utf8_kddi,
+ &vtbl_identify_utf8_kddi_b,
+ &vtbl_identify_utf8_sb,
&vtbl_identify_euccn,
&vtbl_identify_cp936,
&vtbl_identify_hz,