-.\" $File: magic.man,v 1.75 2012/10/30 23:22:57 christos Exp $
-.Dd November 7, 2012
+.\" $File: magic.man,v 1.76 2012/11/07 20:29:27 christos Exp $
+.Dd January 7, 2013
.Dt MAGIC __FSECTION__
.Os
.\" install as magic.4 on USG, magic.5 on V7, Berkeley and Linux systems.
no other matches.
.El
.Pp
+For compatibility with the Single
+.Ux
+Standard, the type specifiers
+.Dv dC
+and
+.Dv d1
+are equivalent to
+.Dv byte ,
+the type specifiers
+.Dv uC
+and
+.Dv u1
+are equivalent to
+.Dv ubyte ,
+the type specifiers
+.Dv dS
+and
+.Dv d2
+are equivalent to
+.Dv short ,
+the type specifiers
+.Dv uS
+and
+.Dv u2
+are equivalent to
+.Dv ushort ,
+the type specifiers
+.Dv dI ,
+.Dv dL ,
+and
+.Dv d4
+are equivalent to
+.Dv long ,
+the type specifiers
+.Dv uI ,
+.Dv uL ,
+and
+.Dv u4
+are equivalent to
+.Dv ulong ,
+the type specifier
+.Dv d8
+is equivalent to
+.Dv quad ,
+the type specifier
+.Dv u8
+is equivalent to
+.Dv uquad ,
+and the type specifier
+.Dv s
+is equivalent to
+.Dv string .
+.Pp
Each top-level magic pattern (see below for an explanation of levels)
is classified as text or binary according to the types used.
Types
.Dv melong ,
.Dv short ,
.Dv beshort ,
-.Dv leshort ,
-.Dv date ,
-.Dv bedate ,
-.Dv medate ,
-.Dv ledate ,
-.Dv beldate ,
-.Dv leldate ,
and
-.Dv meldate
-are system-dependent; perhaps they should be specified as a number
-of bytes (2B, 4B, etc),
-since the files being recognized typically come from
-a system on which the lengths are invariant.
+.Dv leshort
+do not depend on the length of the C data types
+.Dv short
+and
+.Dv long
+on the platform, even though the Single
+.Ux
+Specification implies that they do. However, as OS X Mountain Lion has
+passed the Single
+.Ux
+Specification validation suite, and supplies a version of
+.Xr file __CSECTION__
+in which they do not depend on the sizes of the C data types and that is
+built for a 64-bit environment in which
+.Dv long
+is 8 bytes rather than 4 bytes, presumably the validation suite does not
+test whether, for example
+.Dv long
+refers to an item with the same size as the C data type
+.Dv long .
+There should probably be
+.Dv type
+names
+.Dv int8 ,
+.Dv uint8 ,
+.Dv int16 ,
+.Dv uint16 ,
+.Dv int32 ,
+.Dv uint32 ,
+.Dv int64 ,
+and
+.Dv uint64 ,
+and specified-byte-order variants of them,
+to make it clearer that those types have specified widths.
.\"
.\" From: guy@sun.uucp (Guy Harris)
.\" Newsgroups: net.bugs.usg
#include "file.h"
#ifndef lint
-FILE_RCSID("@(#)$File: apprentice.c,v 1.184 2013/01/07 18:06:40 christos Exp $")
+FILE_RCSID("@(#)$File: apprentice.c,v 1.185 2013/01/07 18:15:15 christos Exp $")
#endif /* lint */
#include "magic.h"
}
#endif /* COMPILE_ONLY */
+/*
+ * XXX - the actual Single UNIX Specification says that "long" means "long",
+ * as in the C data type, but we treat it as meaning "4-byte integer".
+ * Given that the OS X version of file 5.04 did the same, I guess that passes
+ * the actual test; having "long" be dependent on how big a "long" is on
+ * the machine running "file" is silly.
+ */
static const struct type_tbl_s {
const char name[16];
const size_t len;
{ XX("qwdate"), FILE_QWDATE, FILE_FMT_STR },
{ XX("leqwdate"), FILE_LEQWDATE, FILE_FMT_STR },
{ XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR },
+ { XX_NULL, FILE_INVALID, FILE_FMT_NONE },
+};
+
+/*
+ * These are not types, and cannot be preceded by "u" to make them
+ * unsigned.
+ */
+static const struct type_tbl_s special_tbl[] = {
{ XX("name"), FILE_NAME, FILE_FMT_STR },
{ XX("use"), FILE_USE, FILE_FMT_STR },
{ XX_NULL, FILE_INVALID, FILE_FMT_NONE },
+};
# undef XX
# undef XX_NULL
-};
private int
-get_type(const char *l, const char **t)
+get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
{
const struct type_tbl_s *p;
- for (p = type_tbl; p->len; p++) {
+ for (p = tbl; p->len; p++) {
if (strncmp(l, p->name, p->len) == 0) {
if (t)
*t = l + p->len;
return p->type;
}
+private int
+get_standard_integer_type(const char *l, const char **t)
+{
+ int type;
+ unsigned long length;
+ char *end;
+
+ if (isalpha((unsigned char)l[1])) {
+ switch (l[1]) {
+ case 'C':
+ /* "dC" and "uC" */
+ type = FILE_BYTE;
+ l += 2;
+ break;
+ case 'S':
+ /* "dS" and "uS" */
+ type = FILE_SHORT;
+ l += 2;
+ break;
+ case 'I':
+ case 'L':
+ /*
+ * "dI", "dL", "uI", and "uL".
+ *
+ * XXX - the actual Single UNIX Specification says
+ * that "L" means "long", as in the C data type,
+ * but we treat it as meaning "4-byte integer".
+ * Given that the OS X version of file 5.04 did
+ * the same, I guess that passes the actual SUS
+ * validation suite; having "dL" be dependent on
+ * how big a "long" is on the machine running
+ * "file" is silly.
+ */
+ type = FILE_LONG;
+ l += 2;
+ break;
+ case 'Q':
+ /* "dQ" and "uQ" */
+ type = FILE_QUAD;
+ l += 2;
+ break;
+ default:
+ /* "d{anything else}", "u{anything else}" */
+ return FILE_INVALID;
+ }
+ } else if (isdigit((unsigned char)l[1])) {
+ /* "d{num}" and "u{num}" */
+ length = strtoul(l + 1, &end, 10);
+ if (end != l + 2 || (*end && !isspace((unsigned char)*end)))
+ return FILE_INVALID;
+ l += 2;
+ switch (length) {
+ case 1:
+ type = FILE_BYTE;
+ break;
+ case 2:
+ type = FILE_SHORT;
+ break;
+ case 4:
+ type = FILE_LONG;
+ break;
+ case 8:
+ type = FILE_QUAD;
+ break;
+ default:
+ /* XXX - what about 3, 5, 6, or 7? */
+ return FILE_INVALID;
+ }
+ } else {
+ /*
+ * "d" or "u" by itself.
+ */
+ type = FILE_LONG;
+ ++l;
+ }
+ if (t)
+ *t = l;
+ return type;
+}
+
private void
init_file_tables(void)
{
mlist_free(ms->mlist[i]);
ms->mlist[i] = NULL;
}
- file_error(ms, 0, "could not find any magic files!");
+ file_error(ms, 0, "could not find any valid magic files!");
return -1;
}
cont_level = 0;
+ /*
+ * Parse the offset.
+ */
while (*l == '>') {
++l; /* step over */
cont_level++;
EATAB;
#endif
- if (*l == 'u' && (l[1] != 's' || l[2] != 'e')) {
- ++l;
+ /*
+ * Parse the type.
+ */
+ if (*l == 'u') {
+ /*
+ * Try it as a keyword type prefixed by "u"; match what
+ * follows the "u". If that fails, try it as an SUS
+ * integer type. In either case, it's unsigned.
+ */
m->flag |= UNSIGNED;
+ m->type = get_type(type_tbl, l + 1, &l);
+ if (m->type == FILE_INVALID) {
+ /*
+ * Not a keyword type; parse it as an SUS type,
+ * 'u' possibly followed by a number or C/S/L.
+ */
+ m->type = get_standard_integer_type(l, &l);
+ }
+ } else {
+ /*
+ * Try it as a keyword type. If that fails, try it as
+ * an SUS integer type if it begins with "d" or as an
+ * SUS string type if it begins with "s". In any case,
+ * it's not unsigned.
+ */
+ m->type = get_type(type_tbl, l, &l);
+ if (m->type == FILE_INVALID) {
+ /*
+ * Not a keyword type; parse it as an SUS type,
+ * either 'd' possibly followed by a number or
+ * C/S/L, or just 's'.
+ */
+ if (*l == 'd')
+ m->type = get_standard_integer_type(l, &l);
+ else if (*l == 's' && !isalpha((unsigned char)l[1])) {
+ m->type = FILE_STRING;
+ ++l;
+ }
+ }
}
- m->type = get_type(l, &l);
+ if (m->type == FILE_INVALID) {
+ /* Not found - try it as a special keyword. */
+ m->type = get_type(special_tbl, l, &l);
+ }
+
if (m->type == FILE_INVALID) {
if (ms->flags & MAGIC_CHECK)
file_magwarn(ms, "type `%s' invalid", l);