AC_FUNC_REALLOC
AC_CHECK_FUNCS([strcasecmp strchr])
+AC_CHECK_PROGS([FETCH_VIA_FTP], [wget curl ncftpget])
+
+if test "$FETCH_VIA_FTP" = "curl"; then
+ FETCH_VIA_FTP="curl -O"
+fi
+
AC_CONFIG_FILES([Makefile mbfl/Makefile filters/Makefile nls/Makefile])
AC_OUTPUT
-EXTRA_DIST=Makefile.bcc32
+EXTRA_DIST=Makefile.bcc32 mk_eaw_tbl.awk
lib_LTLIBRARIES=libmbfl.la
-libmbfl_la_SOURCES=mbfilter.c mbfl_string.c mbfl_language.c mbfl_encoding.c mbfl_convert.c mbfl_ident.c mbfl_memory_device.c mbfl_allocators.c mbfl_filter_output.c mbfilter_pass.c mbfilter_wchar.c mbfilter_8bit.c
+libmbfl_la_SOURCES=mbfilter.c mbfl_string.c mbfl_language.c mbfl_encoding.c mbfl_convert.c mbfl_ident.c mbfl_memory_device.c mbfl_allocators.c mbfl_filter_output.c mbfilter_pass.c mbfilter_wchar.c mbfilter_8bit.c eaw_table.h
libmbfl_filters_la=../filters/libmbfl_filters.la
libmbfl_nls_la=../nls/libmbfl_nls.la
libmbfl_la_LIBADD=$(libmbfl_filters_la) $(libmbfl_nls_la)
libmbfl_includedir=$(includedir)/mbfl
libmbfl_include_HEADERS=mbfilter.h mbfl_consts.h mbfl_encoding.h mbfl_language.h mbfl_string.h mbfl_convert.h mbfl_ident.h mbfl_memory_device.h mbfl_allocators.h mbfl_defs.h mbfl_filter_output.h mbfilter_pass.h mbfilter_wchar.h mbfilter_8bit.h
+mbfilter.c: eaw_table.h
+
+eaw_table.h: mk_eaw_tbl.awk
+ $(AWK) -v TABLE_NAME=mbfl_eaw_table -f mk_eaw_tbl.awk EastAsianWidth.txt > $@
+
+EastAsianWidth.txt:
+ $(FETCH_VIA_FTP) ftp://ftp.unicode.org/Public/UNIDATA/EastAsianWidth.txt
+
$(libmbfl_filters_la):
$(MAKE) -C `dirname $(libmbfl_filters_la)`
--- /dev/null
+static const struct {
+ int begin;
+ int end;
+} mbfl_eaw_table[] = {
+ { 0x1100, 0x1159 },
+ { 0x115f, 0x115f },
+ { 0x2329, 0x232a },
+ { 0x2e80, 0x2e99 },
+ { 0x2e9b, 0x2ef3 },
+ { 0x2f00, 0x2fd5 },
+ { 0x2ff0, 0x2ffb },
+ { 0x3000, 0x303e },
+ { 0x3041, 0x3096 },
+ { 0x3099, 0x30ff },
+ { 0x3105, 0x312c },
+ { 0x3131, 0x318e },
+ { 0x3190, 0x31b7 },
+ { 0x31f0, 0x321e },
+ { 0x3220, 0x3243 },
+ { 0x3250, 0x327d },
+ { 0x327f, 0x32fe },
+ { 0x3300, 0x4db5 },
+ { 0x4e00, 0x9fa5 },
+ { 0xa000, 0xa48c },
+ { 0xa490, 0xa4c6 },
+ { 0xac00, 0xd7a3 },
+ { 0xf900, 0xfa2d },
+ { 0xfa30, 0xfa6a },
+ { 0xfe30, 0xfe52 },
+ { 0xfe54, 0xfe66 },
+ { 0xfe68, 0xfe6b },
+ { 0xff01, 0xff60 },
+ { 0xffe0, 0xffe6 },
+ { 0x20000, 0x2fffd },
+ { 0x30000, 0x3fffd }
+};
#include "mbfl_filter_output.h"
#include "mbfilter_pass.h"
+#include "eaw_table.h"
+
/* hex character table "0123456789ABCDEF" */
static char mbfl_hexchar_table[] = {
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
}
+#include <stdio.h>
/*
* strwidth
*/
-static int
-filter_count_width(int c, void* data)
+static int is_fullwidth(int c)
{
- if (c >= 0x20) {
- if (c < 0x2000 || (c > 0xff60 && c < 0xffa0)) {
- (*(int *)data)++;
- } else {
- (*(int *)data) += 2;
+ int i;
+
+ if (c < mbfl_eaw_table[0].begin) {
+ return 0;
+ }
+
+ for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) {
+ if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) {
+ return 1;
}
}
+ return 0;
+}
+
+static int
+filter_count_width(int c, void* data)
+{
+ (*(int *)data) += (is_fullwidth(c) ? 2: 1);
return c;
}
break;
default:
if (pc->outchar >= pc->from) {
- if (c >= 0x20) {
- if (c < 0x2000 || (c > 0xff60 && c < 0xffa0)) {
- pc->outwidth++;
- } else {
- pc->outwidth += 2;
- }
- }
+ pc->outwidth += (is_fullwidth(c) ? 2: 1);
if (pc->outwidth > pc->width) {
if (pc->status == 0) {
pc->endpos = pc->device.pos;
--- /dev/null
+#!/usr/bin/awk -f
+#
+# $Id$
+#
+# Description: a script to generate east asian width table.
+#
+
+BEGIN {
+ prev = -1
+ comma = 0
+ ORS = ""
+ FS = "[;.|# ]"
+ print "static const struct {\n\tint begin;\n\tint end;\n} " TABLE_NAME "[] = {\n\t"
+}
+
+/^#/ {
+}
+
+/^[0-9a-fA-F]+;/ {
+ if ($2 == "W" || $2 == "F") {
+ v = ( "0x" $1 ) + 0
+ if (prev < 0) {
+ first = v
+ } else if (v - prev > 1) {
+ if (comma) {
+ print ",\n\t"
+ }
+ printf("{ 0x%04x, 0x%04x }", first, prev)
+ first = v
+ comma = 1
+ }
+ prev = v
+ } else {
+ if (prev >= 0) {
+ if (comma) {
+ print ",\n\t"
+ }
+ printf("{ 0x%04x, 0x%04x }", first, prev)
+ prev = -1
+ comma = 1
+ }
+ }
+}
+
+/^[0-9a-fA-F]+\.\./ {
+ if ($4 == "W" || $4 == "F") {
+ vs = ( "0x" $1 ) + 0
+ ve = ( "0x" $3 ) + 0
+ if (prev < 0) {
+ first = vs
+ } else if (vs - prev > 1) {
+ if (comma) {
+ print ",\n\t"
+ }
+ printf("{ 0x%04x, 0x%04x }", first, prev)
+ first = vs
+ comma = 1
+ }
+ prev = ve
+ } else {
+ if (prev >= 0) {
+ if (comma) {
+ print ",\n\t"
+ }
+ printf("{ 0x%04x, 0x%04x }", first, prev)
+ prev = -1
+ comma = 1
+ }
+ }
+}
+
+END {
+ if (prev >= 0) {
+ if (comma) {
+ print ",\n\t"
+ }
+ printf("{ 0x%04x, 0x%04x }", first, prev)
+ }
+ print "\n};\n"
+}