]> granicus.if.org Git - file/commitdiff
add debug for encoding.
authorChristos Zoulas <christos@zoulas.com>
Sun, 13 Sep 2009 19:02:22 +0000 (19:02 +0000)
committerChristos Zoulas <christos@zoulas.com>
Sun, 13 Sep 2009 19:02:22 +0000 (19:02 +0000)
src/encoding.c

index dbffd42a4914947f15ea9ab9503d27c872d2dc48..5f32dfa3759b0b59cf0a231145189ba6d1350ab0 100644 (file)
@@ -35,7 +35,7 @@
 #include "file.h"
 
 #ifndef        lint
-FILE_RCSID("@(#)$File: encoding.c,v 1.2 2008/11/06 22:49:08 rrt Exp $")
+FILE_RCSID("@(#)$File: encoding.c,v 1.3 2009/02/03 20:27:51 christos Exp $")
 #endif /* lint */
 
 #include "magic.h"
@@ -52,6 +52,12 @@ private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
 private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
 private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
 
+#ifdef DEBUG_ENCODING
+#define DPRINTF(a) printf a
+#else
+#define DPRINTF(a)
+#endif
+
 /*
  * Try to determine whether text is in some character code we can
  * identify.  Each of these tests, if it succeeds, will leave
@@ -78,12 +84,16 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni
 
        *type = "text";
        if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
+               DPRINTF(("ascii %zu\n", *ulen));
                *code = "ASCII";
                *code_mime = "us-ascii";
        } else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
+               DPRINTF(("utf8/bom %zu\n", *ulen));
                *code = "UTF-8 Unicode (with BOM)";
                *code_mime = "utf-8";
        } else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) {
+               DPRINTF(("utf8 %zu\n", *ulen));
+               *code = "UTF-8 Unicode (with BOM)";
                *code = "UTF-8 Unicode";
                *code_mime = "utf-8";
        } else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) {
@@ -94,22 +104,28 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni
                        *code = "Big-endian UTF-16 Unicode";
                        *code_mime = "utf-16be";
                }
+               DPRINTF(("ucs16 %zu\n", *ulen));
        } else if (looks_latin1(buf, nbytes, *ubuf, ulen)) {
+               DPRINTF(("latin1 %zu\n", *ulen));
                *code = "ISO-8859";
                *code_mime = "iso-8859-1";
        } else if (looks_extended(buf, nbytes, *ubuf, ulen)) {
+               DPRINTF(("extended %zu\n", *ulen));
                *code = "Non-ISO extended-ASCII";
                *code_mime = "unknown-8bit";
        } else {
                from_ebcdic(buf, nbytes, nbuf);
 
                if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) {
+                       DPRINTF(("ebcdic %zu\n", *ulen));
                        *code = "EBCDIC";
                        *code_mime = "ebcdic";
                } else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) {
+                       DPRINTF(("ebcdic/international %zu\n", *ulen));
                        *code = "International EBCDIC";
                        *code_mime = "ebcdic";
                } else { /* Doesn't look like text at all */
+                       DPRINTF(("binary\n"));
                        rv = 0;
                        *type = "binary";
                }