From b4a223333c38a5e21b6e287a803d010b67d0881f Mon Sep 17 00:00:00 2001 From: Reuben Thomas Date: Sun, 24 Feb 2008 01:16:08 +0000 Subject: [PATCH] Make looks_utf8 a global function, file_looks_utf8 --- src/ascmagic.c | 26 +++++++++++++++----------- src/file.h | 6 +++++- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/ascmagic.c b/src/ascmagic.c index d7b0108c..d8e2eaa7 100644 --- a/src/ascmagic.c +++ b/src/ascmagic.c @@ -49,11 +49,9 @@ #include "names.h" #ifndef lint -FILE_RCSID("@(#)$File: ascmagic.c,v 1.59 2008/02/11 00:19:29 rrt Exp $") +FILE_RCSID("@(#)$File: ascmagic.c,v 1.60 2008/02/24 01:16:08 rrt Exp $") #endif /* lint */ -typedef unsigned long unichar; - #define MAXLINELEN 300 /* longest sane line length */ #define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \ || (x) == 0x85 || (x) == '\f') @@ -61,7 +59,7 @@ typedef unsigned long unichar; private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *); private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *, size_t *); -private int looks_utf8(const unsigned char *, size_t, unichar *, size_t *); +protected int file_looks_utf8(const unsigned char *, size_t, unichar *, size_t *); private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *); private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *); private int looks_extended(const unsigned char *, size_t, unichar *, size_t *); @@ -124,7 +122,7 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) code = "UTF-8 Unicode (with BOM)"; code_mime = "utf-8"; type = "text"; - } else if (looks_utf8(buf, nbytes, ubuf, &ulen) > 1) { + } else if (file_looks_utf8(buf, nbytes, ubuf, &ulen) > 1) { code = "UTF-8 Unicode"; code_mime = "utf-8"; type = "text"; @@ -508,16 +506,20 @@ looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf, * 0: uses odd control characters, so doesn't look like text * 1: 7-bit text * 2: definitely UTF-8 text (valid high-bit set bytes) + * + * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen; + * ubuf must be big enough! */ -private int -looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) +protected int +file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) { size_t i; int n; unichar c; int gotone = 0, ctrl = 0; - *ulen = 0; + if (ubuf) + *ulen = 0; for (i = 0; i < nbytes; i++) { if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ @@ -529,7 +531,8 @@ looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) if (text_chars[buf[i]] != T) ctrl = 1; - ubuf[(*ulen)++] = buf[i]; + if (ubuf) + ubuf[(*ulen)++] = buf[i]; } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */ return -1; } else { /* 11xxxxxx begins UTF-8 */ @@ -564,7 +567,8 @@ looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) c = (c << 6) + (buf[i] & 0x3f); } - ubuf[(*ulen)++] = c; + if (ubuf) + ubuf[(*ulen)++] = c; gotone = 1; } } @@ -582,7 +586,7 @@ looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) { if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf) - return looks_utf8(buf + 3, nbytes - 3, ubuf, ulen); + return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen); else return -1; } diff --git a/src/file.h b/src/file.h index 4fafffa8..c4c64a5f 100644 --- a/src/file.h +++ b/src/file.h @@ -27,7 +27,7 @@ */ /* * file.h - definitions for file(1) program - * @(#)$File: file.h,v 1.100 2008/02/24 01:13:13 rrt Exp $ + * @(#)$File: file.h,v 1.101 2008/02/24 01:16:08 rrt Exp $ */ #ifndef __file_h__ @@ -312,6 +312,9 @@ struct magic_set { union VALUETYPE ms_value; /* either number or string */ }; +/* Type for Unicode characters */ +typedef unsigned long unichar; + struct stat; protected const char *file_fmttime(uint32_t, int); protected int file_buffer(struct magic_set *, int, const char *, const void *, @@ -343,6 +346,7 @@ protected size_t file_mbswidth(const char *); protected const char *file_getbuffer(struct magic_set *); protected ssize_t sread(int, void *, size_t, int); protected int file_check_mem(struct magic_set *, unsigned int); +protected int file_looks_utf8(const unsigned char *, size_t, unichar *, size_t *); #ifndef COMPILE_ONLY extern const char *file_names[]; -- 2.50.1