#include "names.h"
#ifndef lint
-FILE_RCSID("@(#)$File: ascmagic.c,v 1.59 2008/02/11 00:19:29 rrt Exp $")
+FILE_RCSID("@(#)$File: ascmagic.c,v 1.60 2008/02/24 01:16:08 rrt Exp $")
#endif /* lint */
-typedef unsigned long unichar;
-
#define MAXLINELEN 300 /* longest sane line length */
#define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
|| (x) == 0x85 || (x) == '\f')
private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
size_t *);
-private int looks_utf8(const unsigned char *, size_t, unichar *, size_t *);
+protected int file_looks_utf8(const unsigned char *, size_t, unichar *, size_t *);
private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);
private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
code = "UTF-8 Unicode (with BOM)";
code_mime = "utf-8";
type = "text";
- } else if (looks_utf8(buf, nbytes, ubuf, &ulen) > 1) {
+ } else if (file_looks_utf8(buf, nbytes, ubuf, &ulen) > 1) {
code = "UTF-8 Unicode";
code_mime = "utf-8";
type = "text";
* 0: uses odd control characters, so doesn't look like text
* 1: 7-bit text
* 2: definitely UTF-8 text (valid high-bit set bytes)
+ *
+ * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen;
+ * ubuf must be big enough!
*/
-private int
-looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
+protected int
+file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
{
size_t i;
int n;
unichar c;
int gotone = 0, ctrl = 0;
- *ulen = 0;
+ if (ubuf)
+ *ulen = 0;
for (i = 0; i < nbytes; i++) {
if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
if (text_chars[buf[i]] != T)
ctrl = 1;
- ubuf[(*ulen)++] = buf[i];
+ if (ubuf)
+ ubuf[(*ulen)++] = buf[i];
} else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
return -1;
} else { /* 11xxxxxx begins UTF-8 */
c = (c << 6) + (buf[i] & 0x3f);
}
- ubuf[(*ulen)++] = c;
+ if (ubuf)
+ ubuf[(*ulen)++] = c;
gotone = 1;
}
}
size_t *ulen)
{
if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
- return looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
+ return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
else
return -1;
}
*/
/*
* file.h - definitions for file(1) program
- * @(#)$File: file.h,v 1.100 2008/02/24 01:13:13 rrt Exp $
+ * @(#)$File: file.h,v 1.101 2008/02/24 01:16:08 rrt Exp $
*/
#ifndef __file_h__
union VALUETYPE ms_value; /* either number or string */
};
+/* Type for Unicode characters */
+typedef unsigned long unichar;
+
struct stat;
protected const char *file_fmttime(uint32_t, int);
protected int file_buffer(struct magic_set *, int, const char *, const void *,
protected const char *file_getbuffer(struct magic_set *);
protected ssize_t sread(int, void *, size_t, int);
protected int file_check_mem(struct magic_set *, unsigned int);
+protected int file_looks_utf8(const unsigned char *, size_t, unichar *, size_t *);
#ifndef COMPILE_ONLY
extern const char *file_names[];