From: Christos Zoulas Date: Tue, 8 Jan 2013 01:37:01 +0000 (+0000) Subject: Implement [du]{1,2,4,8} (Guy Harris) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=953709bb84439b59a250efb3b7b141d6d692cb92;p=file Implement [du]{1,2,4,8} (Guy Harris) --- diff --git a/ChangeLog b/ChangeLog index b34d1b1e..6d0cf969 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2013-01-07 11:20 Christos Zoulas + + * Add SuS d{,1,2,4,8}, u{,1,2,4,8} and document + what long, int, short, etc is (Guy Harris) + 2013-01-06 11:20 Christos Zoulas * add magic_version function and constant diff --git a/doc/magic.man b/doc/magic.man index ee4282ca..f7595cb6 100644 --- a/doc/magic.man +++ b/doc/magic.man @@ -1,5 +1,5 @@ -.\" $File: magic.man,v 1.76 2012/11/07 20:29:27 christos Exp $ -.Dd November 7, 2012 +.\" $File: magic.man,v 1.77 2013/01/08 01:37:01 christos Exp $ +.Dd January 7, 2013 .Dt MAGIC __FSECTION__ .Os .\" install as magic.4 on USG, magic.5 on V7, Berkeley and Linux systems. @@ -265,6 +265,59 @@ This is intended to be used with the test no other matches. .El .Pp +For compatibility with the Single +.Ux +Standard, the type specifiers +.Dv dC +and +.Dv d1 +are equivalent to +.Dv byte , +the type specifiers +.Dv uC +and +.Dv u1 +are equivalent to +.Dv ubyte , +the type specifiers +.Dv dS +and +.Dv d2 +are equivalent to +.Dv short , +the type specifiers +.Dv uS +and +.Dv u2 +are equivalent to +.Dv ushort , +the type specifiers +.Dv dI , +.Dv dL , +and +.Dv d4 +are equivalent to +.Dv long , +the type specifiers +.Dv uI , +.Dv uL , +and +.Dv u4 +are equivalent to +.Dv ulong , +the type specifier +.Dv d8 +is equivalent to +.Dv quad , +the type specifier +.Dv u8 +is equivalent to +.Dv uquad , +and the type specifier +.Dv s +is equivalent to +.Dv string . +.Pp Each top-level magic pattern (see below for an explanation of levels) is classified as text or binary according to the types used. Types @@ -570,19 +623,41 @@ The formats .Dv melong , .Dv short , .Dv beshort , -.Dv leshort , -.Dv date , -.Dv bedate , -.Dv medate , -.Dv ledate , -.Dv beldate , -.Dv leldate , and -.Dv meldate -are system-dependent; perhaps they should be specified as a number -of bytes (2B, 4B, etc), -since the files being recognized typically come from -a system on which the lengths are invariant. +.Dv leshort +do not depend on the length of the C data types +.Dv short +and +.Dv long +on the platform, even though the Single +.Ux +Specification implies that they do. However, as OS X Mountain Lion has +passed the Single +.Ux +Specification validation suite, and supplies a version of +.Xr file __CSECTION__ +in which they do not depend on the sizes of the C data types and that is +built for a 64-bit environment in which +.Dv long +is 8 bytes rather than 4 bytes, presumably the validation suite does not +test whether, for example +.Dv long +refers to an item with the same size as the C data type +.Dv long . +There should probably be +.Dv type +names +.Dv int8 , +.Dv uint8 , +.Dv int16 , +.Dv uint16 , +.Dv int32 , +.Dv uint32 , +.Dv int64 , +and +.Dv uint64 , +and specified-byte-order variants of them, +to make it clearer that those types have specified widths. .\" .\" From: guy@sun.uucp (Guy Harris) .\" Newsgroups: net.bugs.usg diff --git a/src/apprentice.c b/src/apprentice.c index e431a676..2a22f87f 100644 --- a/src/apprentice.c +++ b/src/apprentice.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: apprentice.c,v 1.185 2013/01/07 18:15:15 christos Exp $") +FILE_RCSID("@(#)$File: apprentice.c,v 1.186 2013/01/08 01:37:01 christos Exp $") #endif /* lint */ #include "magic.h" @@ -176,6 +176,13 @@ main(int argc, char *argv[]) } #endif /* COMPILE_ONLY */ +/* + * XXX - the actual Single UNIX Specification says that "long" means "long", + * as in the C data type, but we treat it as meaning "4-byte integer". + * Given that the OS X version of file 5.04 did the same, I guess that passes + * the actual test; having "long" be dependent on how big a "long" is on + * the machine running "file" is silly. + */ static const struct type_tbl_s { const char name[16]; const size_t len; @@ -228,19 +235,27 @@ static const struct type_tbl_s { { XX("qwdate"), FILE_QWDATE, FILE_FMT_STR }, { XX("leqwdate"), FILE_LEQWDATE, FILE_FMT_STR }, { XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR }, + { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, +}; + +/* + * These are not types, and cannot be preceded by "u" to make them + * unsigned. + */ +static const struct type_tbl_s special_tbl[] = { { XX("name"), FILE_NAME, FILE_FMT_STR }, { XX("use"), FILE_USE, FILE_FMT_STR }, { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, +}; # undef XX # undef XX_NULL -}; private int -get_type(const char *l, const char **t) +get_type(const struct type_tbl_s *tbl, const char *l, const char **t) { const struct type_tbl_s *p; - for (p = type_tbl; p->len; p++) { + for (p = tbl; p->len; p++) { if (strncmp(l, p->name, p->len) == 0) { if (t) *t = l + p->len; @@ -250,6 +265,86 @@ get_type(const char *l, const char **t) return p->type; } +private int +get_standard_integer_type(const char *l, const char **t) +{ + int type; + unsigned long length; + char *end; + + if (isalpha((unsigned char)l[1])) { + switch (l[1]) { + case 'C': + /* "dC" and "uC" */ + type = FILE_BYTE; + l += 2; + break; + case 'S': + /* "dS" and "uS" */ + type = FILE_SHORT; + l += 2; + break; + case 'I': + case 'L': + /* + * "dI", "dL", "uI", and "uL". + * + * XXX - the actual Single UNIX Specification says + * that "L" means "long", as in the C data type, + * but we treat it as meaning "4-byte integer". + * Given that the OS X version of file 5.04 did + * the same, I guess that passes the actual SUS + * validation suite; having "dL" be dependent on + * how big a "long" is on the machine running + * "file" is silly. + */ + type = FILE_LONG; + l += 2; + break; + case 'Q': + /* "dQ" and "uQ" */ + type = FILE_QUAD; + l += 2; + break; + default: + /* "d{anything else}", "u{anything else}" */ + return FILE_INVALID; + } + } else if (isdigit((unsigned char)l[1])) { + /* "d{num}" and "u{num}" */ + length = strtoul(l + 1, &end, 10); + if (end != l + 2 || (*end && !isspace((unsigned char)*end))) + return FILE_INVALID; + l += 2; + switch (length) { + case 1: + type = FILE_BYTE; + break; + case 2: + type = FILE_SHORT; + break; + case 4: + type = FILE_LONG; + break; + case 8: + type = FILE_QUAD; + break; + default: + /* XXX - what about 3, 5, 6, or 7? */ + return FILE_INVALID; + } + } else { + /* + * "d" or "u" by itself. + */ + type = FILE_LONG; + ++l; + } + if (t) + *t = l; + return type; +} + private void init_file_tables(void) { @@ -485,7 +580,7 @@ file_apprentice(struct magic_set *ms, const char *fn, int action) mlist_free(ms->mlist[i]); ms->mlist[i] = NULL; } - file_error(ms, 0, "could not find any magic files!"); + file_error(ms, 0, "could not find any valid magic files!"); return -1; } @@ -1362,6 +1457,9 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line, cont_level = 0; + /* + * Parse the offset. + */ while (*l == '>') { ++l; /* step over */ cont_level++; @@ -1530,12 +1628,52 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line, EATAB; #endif - if (*l == 'u' && (l[1] != 's' || l[2] != 'e')) { - ++l; + /* + * Parse the type. + */ + if (*l == 'u') { + /* + * Try it as a keyword type prefixed by "u"; match what + * follows the "u". If that fails, try it as an SUS + * integer type. In either case, it's unsigned. + */ m->flag |= UNSIGNED; + m->type = get_type(type_tbl, l + 1, &l); + if (m->type == FILE_INVALID) { + /* + * Not a keyword type; parse it as an SUS type, + * 'u' possibly followed by a number or C/S/L. + */ + m->type = get_standard_integer_type(l, &l); + } + } else { + /* + * Try it as a keyword type. If that fails, try it as + * an SUS integer type if it begins with "d" or as an + * SUS string type if it begins with "s". In any case, + * it's not unsigned. + */ + m->type = get_type(type_tbl, l, &l); + if (m->type == FILE_INVALID) { + /* + * Not a keyword type; parse it as an SUS type, + * either 'd' possibly followed by a number or + * C/S/L, or just 's'. + */ + if (*l == 'd') + m->type = get_standard_integer_type(l, &l); + else if (*l == 's' && !isalpha((unsigned char)l[1])) { + m->type = FILE_STRING; + ++l; + } + } } - m->type = get_type(l, &l); + if (m->type == FILE_INVALID) { + /* Not found - try it as a special keyword. */ + m->type = get_type(special_tbl, l, &l); + } + if (m->type == FILE_INVALID) { if (ms->flags & MAGIC_CHECK) file_magwarn(ms, "type `%s' invalid", l);