-.\" $File: file.man,v 1.115 2015/05/29 14:27:31 christos Exp $
+.\" $File: file.man,v 1.116 2015/06/03 18:21:24 christos Exp $
.Dd June 3, 2015
.Dt FILE __CSECTION__
.Os
and printing \e012- between entries is clumsy and complicated; refactor
and centralize.
.Pp
+Some of the encoding logic is hard-coded in encoding.c and can be moved
+to the magic files if we had a !:charset annotation
+.Pp
Continue to squash all magic bugs.
See Debian BTS for a good source.
.Pp
#include "file.h"
#ifndef lint
-FILE_RCSID("@(#)$File: encoding.c,v 1.10 2014/09/11 12:08:52 christos Exp $")
+FILE_RCSID("@(#)$File: encoding.c,v 1.11 2015/01/24 23:22:25 christos Exp $")
#endif /* lint */
#include "magic.h"
private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
size_t *);
+private int looks_utf7(const unsigned char *, size_t, unichar *, size_t *);
private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);
private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
}
if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
- DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen));
- *code = "ASCII";
- *code_mime = "us-ascii";
+ if (looks_utf7(buf, nbytes, *ubuf, ulen)) {
+ DPRINTF(("utf-7 %" SIZE_T_FORMAT "u\n", *ulen));
+ *code = "UTF-7 Unicode";
+ *code_mime = "utf-7";
+ } else {
+ DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen));
+ *code = "ASCII";
+ *code_mime = "us-ascii";
+ }
} else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen));
*code = "UTF-8 Unicode (with BOM)";
return -1;
}
+private int
+looks_utf7(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
+{
+ if (ubuf)
+ *ulen = 0;
+ if (nbytes > 4 && buf[0] == '+' && buf[1] == '/' && buf[2] == 'v')
+ switch (buf[3]) {
+ case '8':
+ case '9':
+ case '+':
+ case '/':
+ return 1;
+ default:
+ return -1;
+ }
+ else
+ return -1;
+}
+
private int
looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf,
size_t *ulen)