#include "file.h"
#ifndef lint
-FILE_RCSID("@(#)$File: encoding.c,v 1.2 2008/11/06 22:49:08 rrt Exp $")
+FILE_RCSID("@(#)$File: encoding.c,v 1.3 2009/02/03 20:27:51 christos Exp $")
#endif /* lint */
#include "magic.h"
private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
+#ifdef DEBUG_ENCODING
+#define DPRINTF(a) printf a
+#else
+#define DPRINTF(a)
+#endif
+
/*
* Try to determine whether text is in some character code we can
* identify. Each of these tests, if it succeeds, will leave
*type = "text";
if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
+ DPRINTF(("ascii %zu\n", *ulen));
*code = "ASCII";
*code_mime = "us-ascii";
} else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
+ DPRINTF(("utf8/bom %zu\n", *ulen));
*code = "UTF-8 Unicode (with BOM)";
*code_mime = "utf-8";
} else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) {
+ DPRINTF(("utf8 %zu\n", *ulen));
+ *code = "UTF-8 Unicode (with BOM)";
*code = "UTF-8 Unicode";
*code_mime = "utf-8";
} else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) {
*code = "Big-endian UTF-16 Unicode";
*code_mime = "utf-16be";
}
+ DPRINTF(("ucs16 %zu\n", *ulen));
} else if (looks_latin1(buf, nbytes, *ubuf, ulen)) {
+ DPRINTF(("latin1 %zu\n", *ulen));
*code = "ISO-8859";
*code_mime = "iso-8859-1";
} else if (looks_extended(buf, nbytes, *ubuf, ulen)) {
+ DPRINTF(("extended %zu\n", *ulen));
*code = "Non-ISO extended-ASCII";
*code_mime = "unknown-8bit";
} else {
from_ebcdic(buf, nbytes, nbuf);
if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) {
+ DPRINTF(("ebcdic %zu\n", *ulen));
*code = "EBCDIC";
*code_mime = "ebcdic";
} else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) {
+ DPRINTF(("ebcdic/international %zu\n", *ulen));
*code = "International EBCDIC";
*code_mime = "ebcdic";
} else { /* Doesn't look like text at all */
+ DPRINTF(("binary\n"));
rv = 0;
*type = "binary";
}