From 4cfdd9db417a636b91c90c876e82a0ce1c5b280a Mon Sep 17 00:00:00 2001 From: Christos Zoulas Date: Sun, 13 Sep 2009 19:02:22 +0000 Subject: [PATCH] add debug for encoding. --- src/encoding.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/encoding.c b/src/encoding.c index dbffd42a..5f32dfa3 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -35,7 +35,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: encoding.c,v 1.2 2008/11/06 22:49:08 rrt Exp $") +FILE_RCSID("@(#)$File: encoding.c,v 1.3 2009/02/03 20:27:51 christos Exp $") #endif /* lint */ #include "magic.h" @@ -52,6 +52,12 @@ private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *); private int looks_extended(const unsigned char *, size_t, unichar *, size_t *); private void from_ebcdic(const unsigned char *, size_t, unsigned char *); +#ifdef DEBUG_ENCODING +#define DPRINTF(a) printf a +#else +#define DPRINTF(a) +#endif + /* * Try to determine whether text is in some character code we can * identify. Each of these tests, if it succeeds, will leave @@ -78,12 +84,16 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni *type = "text"; if (looks_ascii(buf, nbytes, *ubuf, ulen)) { + DPRINTF(("ascii %zu\n", *ulen)); *code = "ASCII"; *code_mime = "us-ascii"; } else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) { + DPRINTF(("utf8/bom %zu\n", *ulen)); *code = "UTF-8 Unicode (with BOM)"; *code_mime = "utf-8"; } else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) { + DPRINTF(("utf8 %zu\n", *ulen)); + *code = "UTF-8 Unicode (with BOM)"; *code = "UTF-8 Unicode"; *code_mime = "utf-8"; } else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) { @@ -94,22 +104,28 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni *code = "Big-endian UTF-16 Unicode"; *code_mime = "utf-16be"; } + DPRINTF(("ucs16 %zu\n", *ulen)); } else if (looks_latin1(buf, nbytes, *ubuf, ulen)) { + DPRINTF(("latin1 %zu\n", *ulen)); *code = "ISO-8859"; *code_mime = "iso-8859-1"; } else if (looks_extended(buf, nbytes, *ubuf, ulen)) { + DPRINTF(("extended %zu\n", *ulen)); *code = "Non-ISO extended-ASCII"; *code_mime = "unknown-8bit"; } else { from_ebcdic(buf, nbytes, nbuf); if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) { + DPRINTF(("ebcdic %zu\n", *ulen)); *code = "EBCDIC"; *code_mime = "ebcdic"; } else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) { + DPRINTF(("ebcdic/international %zu\n", *ulen)); *code = "International EBCDIC"; *code_mime = "ebcdic"; } else { /* Doesn't look like text at all */ + DPRINTF(("binary\n")); rv = 0; *type = "binary"; } -- 2.40.0