add debug for encoding.

author Christos Zoulas <christos@zoulas.com>

Sun, 13 Sep 2009 19:02:22 +0000 (19:02 +0000)

committer Christos Zoulas <christos@zoulas.com>

Sun, 13 Sep 2009 19:02:22 +0000 (19:02 +0000)
author Christos Zoulas <christos@zoulas.com>
Sun, 13 Sep 2009 19:02:22 +0000 (19:02 +0000)
committer Christos Zoulas <christos@zoulas.com>
Sun, 13 Sep 2009 19:02:22 +0000 (19:02 +0000)
diff --git a/src/encoding.c b/src/encoding.c

index dbffd42a4914947f15ea9ab9503d27c872d2dc48..5f32dfa3759b0b59cf0a231145189ba6d1350ab0 100644 (file)
--- a/src/encoding.c
+++ b/src/encoding.c
@@ -35,7 +35,7 @@
  #include "file.h"
  
  #ifndef        lint
-FILE_RCSID("@(#)$File: encoding.c,v 1.2 2008/11/06 22:49:08 rrt Exp $")
+FILE_RCSID("@(#)$File: encoding.c,v 1.3 2009/02/03 20:27:51 christos Exp $")
  #endif /* lint */
  
  #include "magic.h"
@@ -52,6 +52,12 @@ private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
  private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
  private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
  
+#ifdef DEBUG_ENCODING
+#define DPRINTF(a) printf a
+#else
+#define DPRINTF(a)
+#endif
+
  /*
   * Try to determine whether text is in some character code we can
   * identify.  Each of these tests, if it succeeds, will leave
@@ -78,12 +84,16 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni
  
         *type = "text";
         if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
+               DPRINTF(("ascii %zu\n", *ulen));
                 *code = "ASCII";
                 *code_mime = "us-ascii";
         } else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
+               DPRINTF(("utf8/bom %zu\n", *ulen));
                 *code = "UTF-8 Unicode (with BOM)";
                 *code_mime = "utf-8";
         } else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) {
+               DPRINTF(("utf8 %zu\n", *ulen));
+               *code = "UTF-8 Unicode (with BOM)";
                 *code = "UTF-8 Unicode";
                 *code_mime = "utf-8";
         } else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) {
@@ -94,22 +104,28 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni
                         *code = "Big-endian UTF-16 Unicode";
                         *code_mime = "utf-16be";
                 }
+               DPRINTF(("ucs16 %zu\n", *ulen));
         } else if (looks_latin1(buf, nbytes, *ubuf, ulen)) {
+               DPRINTF(("latin1 %zu\n", *ulen));
                 *code = "ISO-8859";
                 *code_mime = "iso-8859-1";
         } else if (looks_extended(buf, nbytes, *ubuf, ulen)) {
+               DPRINTF(("extended %zu\n", *ulen));
                 *code = "Non-ISO extended-ASCII";
                 *code_mime = "unknown-8bit";
         } else {
                 from_ebcdic(buf, nbytes, nbuf);
  
                 if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) {
+                       DPRINTF(("ebcdic %zu\n", *ulen));
                         *code = "EBCDIC";
                         *code_mime = "ebcdic";
                 } else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) {
+                       DPRINTF(("ebcdic/international %zu\n", *ulen));
                         *code = "International EBCDIC";
                         *code_mime = "ebcdic";
                 } else { /* Doesn't look like text at all */
+                       DPRINTF(("binary\n"));
                         rv = 0;
                         *type = "binary";
                 }
author	Christos Zoulas <christos@zoulas.com>
	Sun, 13 Sep 2009 19:02:22 +0000 (19:02 +0000)
committer	Christos Zoulas <christos@zoulas.com>
	Sun, 13 Sep 2009 19:02:22 +0000 (19:02 +0000)