From 65c81154fcf085c1560577f6dc69a6b840336844 Mon Sep 17 00:00:00 2001 From: Anon Ymous Date: Thu, 18 Jan 2007 05:29:33 +0000 Subject: [PATCH] 1) Move the "type" detection code from parse() into its own table driven routine. This avoids maintaining multiple lists in file.h. 2) Add an optional conditional field (ust before the type field). This code is wrapped in "#ifdef ENABLE_CONDITIONALS" as it is likely to go away. --- src/apprentice.c | 189 +++++++++++++++++++++++++++++++++++++++++++---- src/file.h | 125 ++++++++++--------------------- src/softmagic.c | 75 ++++++++++++------- 3 files changed, 257 insertions(+), 132 deletions(-) diff --git a/src/apprentice.c b/src/apprentice.c index e80e7dde..934133a2 100644 --- a/src/apprentice.c +++ b/src/apprentice.c @@ -46,7 +46,7 @@ #endif #ifndef lint -FILE_RCSID("@(#)$File: apprentice.c,v 1.101 2007/01/12 17:38:27 christos Exp $") +FILE_RCSID("@(#)$File: apprentice.c,v 1.102 2007/01/16 14:58:48 ljt Exp $") #endif /* lint */ #define EATAB {while (isascii((unsigned char) *l) && \ @@ -81,10 +81,10 @@ struct magic_entry { uint32_t max_count; }; -const int file_formats[] = { FILE_FORMAT_STRING }; -const size_t file_nformats = sizeof(file_formats) / sizeof(file_formats[0]); -const char *file_names[] = { FILE_FORMAT_NAME }; -const size_t file_nnames = sizeof(file_names) / sizeof(file_names[0]); +int file_formats[FILE_NAMES_SIZE]; +const size_t file_nformats = FILE_NAMES_SIZE; +const char *file_names[FILE_NAMES_SIZE]; +const size_t file_nnames = FILE_NAMES_SIZE; private int getvalue(struct magic_set *ms, struct magic *, const char **, int); private int hextoint(int); @@ -148,6 +148,82 @@ main(int argc, char *argv[]) } #endif /* COMPILE_ONLY */ +static const struct type_tbl_s { + const char *name; + const size_t len; + const int type; + const int format; +} type_tbl[] = { +# define XX(s) s, (sizeof(s) - 1) +# define XX_NULL NULL, 0 + { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, + { XX("short"), FILE_SHORT, FILE_FMT_NUM }, + { XX("default"), FILE_DEFAULT, FILE_FMT_STR }, + { XX("long"), FILE_LONG, FILE_FMT_NUM }, + { XX("string"), FILE_STRING, FILE_FMT_STR }, + { XX("date"), FILE_DATE, FILE_FMT_STR }, + { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, + { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, + { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, + { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, + { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, + { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, + { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, + { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, + { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, + { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, + { XX("regex"), FILE_REGEX, FILE_FMT_STR }, + { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, + { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, + { XX("search"), FILE_SEARCH, FILE_FMT_STR }, + { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, + { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, + { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, + { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, + { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, + { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, + { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, + { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, + { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, + { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, + { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, + { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, + { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, +# undef XX +# undef XX_NULL +}; + +private int +get_type(const char *l, const char **t) +{ + const struct type_tbl_s *p; + + for (p = type_tbl; p->name; p++) { + if (strncmp(l, p->name, p->len) == 0) { + if (t) + *t = l + p->len; + break; + } + } + return p->type; +} + +private void +init_file_tables(void) +{ + static int done = 0; + const struct type_tbl_s *p; + + if (done) + return; + done++; + + for (p = type_tbl; p->name; p++) { + assert(p->type < FILE_NAMES_SIZE); + file_names[p->type] = p->name; + file_formats[p->type] = p->format; + } +} /* * Handle one file. @@ -235,7 +311,6 @@ file_delmagic(struct magic *p, int type, size_t entries) } } - /* const char *fn: list of magic files */ protected struct mlist * file_apprentice(struct magic_set *ms, const char *fn, int action) @@ -245,6 +320,8 @@ file_apprentice(struct magic_set *ms, const char *fn, int action) struct mlist *mlist; static const char mime[] = ".mime"; + init_file_tables(); + if (fn == NULL) fn = getenv("MAGIC"); if (fn == NULL) @@ -668,6 +745,77 @@ get_op(char c) } } +#ifdef ENABLE_CONDITIONALS +private int +get_cond(const char *l, const char **t) +{ + static struct cond_tbl_s { + const char *name; + const size_t len; + const int cond; + } cond_tbl[] = { + { "if", 2, COND_IF }, + { "elif", 4, COND_ELIF }, + { "else", 4, COND_ELSE }, + { NULL, 0, COND_NONE }, + }; + struct cond_tbl_s *p; + + for (p = cond_tbl; p->name; p++) { + if (strncmp(l, p->name, p->len) == 0 && + isspace((unsigned char)l[p->len])) { + if (t) + *t = l + p->len; + break; + } + } + return p->cond; +} + +private int +check_cond(struct magic_set *ms, int cond, uint32_t cont_level) +{ + int last_cond; + last_cond = ms->c.li[cont_level].last_cond; + + switch (cond) { + case COND_IF: + if (last_cond != COND_NONE && last_cond != COND_ELIF) { + if (ms->flags & MAGIC_CHECK) + file_magwarn(ms, "syntax error: `if'"); + return -1; + } + last_cond = COND_IF; + break; + + case COND_ELIF: + if (last_cond != COND_IF && last_cond != COND_ELIF) { + if (ms->flags & MAGIC_CHECK) + file_magwarn(ms, "syntax error: `elif'"); + return -1; + } + last_cond = COND_ELIF; + break; + + case COND_ELSE: + if (last_cond != COND_IF && last_cond != COND_ELIF) { + if (ms->flags & MAGIC_CHECK) + file_magwarn(ms, "syntax error: `else'"); + return -1; + } + last_cond = COND_NONE; + break; + + case COND_NONE: + last_cond = COND_NONE; + break; + } + + ms->c.li[cont_level].last_cond = last_cond; + return 0; +} +#endif /* ENABLE_CONDITIONALS */ + /* * parse one line from magic file, put into magic[index++] if valid */ @@ -675,6 +823,9 @@ private int parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, const char *line, size_t lineno, int action) { +#ifdef ENABLE_CONDITIONALS + static uint32_t last_cont_level = 0; +#endif size_t i; struct magic_entry *me; struct magic *m; @@ -689,6 +840,12 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, ++l; /* step over */ cont_level++; } +#ifdef ENABLE_CONDITIONALS + if (cont_level == 0 || cont_level > last_cont_level) + if (file_check_mem(ms, cont_level) == -1) + return -1; + last_cont_level = cont_level; +#endif #define ALLOC_CHUNK (size_t)10 #define ALLOC_INCR (size_t)200 @@ -840,21 +997,21 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, } EATAB; +#ifdef ENABLE_CONDITIONALS + m->cond = get_cond(l, &l); + if (check_cond(ms, m->cond, cont_level) == -1) + return -1; + + EATAB; +#endif + if (*l == 'u') { ++l; m->flag |= UNSIGNED; } - /* get type, skip it */ - for (i = 0; i < file_nnames; i++) { - size_t len = strlen(file_names[i]); - if (strncmp(l, file_names[i], len) == 0) { - m->type = i; - l+= len; - break; - } - } - if (i == file_nnames) { + m->type = get_type(l, &l); + if (m->type == FILE_INVALID) { if (ms->flags & MAGIC_CHECK) file_magwarn(ms, "type `%s' invalid", l); return -1; diff --git a/src/file.h b/src/file.h index 53f815a7..0608fe88 100644 --- a/src/file.h +++ b/src/file.h @@ -27,7 +27,7 @@ */ /* * file.h - definitions for file(1) program - * @(#)$File: file.h,v 1.84 2007/01/12 17:38:28 christos Exp $ + * @(#)$File: file.h,v 1.85 2007/01/16 14:58:48 ljt Exp $ */ #ifndef __file_h__ @@ -51,6 +51,8 @@ /* Do this here and now, because struct stat gets re-defined on solaris */ #include +#define ENABLE_CONDITIONALS + #ifndef MAGIC #define MAGIC "/etc/magic" #endif @@ -113,11 +115,13 @@ struct magic { #define OFFADD 2 /* if '>&' or '>...(&' appears */ #define INDIROFFADD 4 /* if '>&(' appears */ #define UNSIGNED 8 /* comparison is unsigned */ + /* Word 2 */ uint8_t reln; /* relation (0=eq, '>'=gt, etc) */ uint8_t vallen; /* length of string value, if any */ uint8_t type; /* int, short, long or string. */ uint8_t in_type; /* type of indirrection */ +#define FILE_INVALID 0 #define FILE_BYTE 1 #define FILE_SHORT 2 #define FILE_DEFAULT 3 @@ -150,99 +154,32 @@ struct magic { #define FILE_QLDATE 30 #define FILE_LEQLDATE 31 #define FILE_BEQLDATE 32 +#define FILE_NAMES_SIZE 33/* size of array to contain all names */ -#define IS_PLAINSTRING(t) \ +#define IS_STRING(t) \ ((t) == FILE_STRING || \ (t) == FILE_PSTRING || \ (t) == FILE_BESTRING16 || \ - (t) == FILE_LESTRING16) - -#define IS_STRING(t) \ - (IS_PLAINSTRING(t) || \ - (t) == FILE_REGEX || \ - (t) == FILE_SEARCH || \ - (t) == FILE_DEFAULT) - -#define FILE_FORMAT_NAME \ -/* 0 */ "invalid 0", \ -/* 1 */ "byte", \ -/* 2 */ "short", \ -/* 3 */ "default", \ -/* 4 */ "long", \ -/* 5 */ "string", \ -/* 6 */ "date", \ -/* 7 */ "beshort", \ -/* 8 */ "belong", \ -/* 9 */ "bedate", \ -/* 10 */ "leshort", \ -/* 11 */ "lelong", \ -/* 12 */ "ledate", \ -/* 13 */ "pstring", \ -/* 14 */ "ldate", \ -/* 15 */ "beldate", \ -/* 16 */ "leldate", \ -/* 17 */ "regex", \ -/* 18 */ "bestring16", \ -/* 19 */ "lestring16", \ -/* 20 */ "search", \ -/* 21 */ "medate", \ -/* 22 */ "meldate", \ -/* 23 */ "melong", \ -/* 24 */ "quad", \ -/* 25 */ "lequad", \ -/* 26 */ "bequad", \ -/* 27 */ "qdate", \ -/* 28 */ "leqdate", \ -/* 29 */ "beqdate", \ -/* 30 */ "qldate", \ -/* 31 */ "leqldate", \ -/* 32 */ "beqldate", + (t) == FILE_LESTRING16 || \ + (t) == FILE_REGEX || \ + (t) == FILE_SEARCH || \ + (t) == FILE_DEFAULT) #define FILE_FMT_NONE 0 #define FILE_FMT_NUM 1 /* "cduxXi" */ #define FILE_FMT_STR 2 /* "s" */ #define FILE_FMT_QUAD 3 /* "ll" */ -#define FILE_FORMAT_STRING \ -/* 0 */ FILE_FMT_NONE, \ -/* 1 */ FILE_FMT_NUM, \ -/* 2 */ FILE_FMT_NUM, \ -/* 3 */ FILE_FMT_STR, \ -/* 4 */ FILE_FMT_NUM, \ -/* 5 */ FILE_FMT_STR, \ -/* 6 */ FILE_FMT_STR, \ -/* 7 */ FILE_FMT_NUM, \ -/* 8 */ FILE_FMT_NUM, \ -/* 9 */ FILE_FMT_STR, \ -/* 10 */ FILE_FMT_NUM, \ -/* 11 */ FILE_FMT_NUM, \ -/* 12 */ FILE_FMT_STR, \ -/* 13 */ FILE_FMT_STR, \ -/* 14 */ FILE_FMT_STR, \ -/* 15 */ FILE_FMT_STR, \ -/* 16 */ FILE_FMT_STR, \ -/* 17 */ FILE_FMT_STR, \ -/* 18 */ FILE_FMT_STR, \ -/* 19 */ FILE_FMT_STR, \ -/* 20 */ FILE_FMT_STR, \ -/* 21 */ FILE_FMT_STR, \ -/* 22 */ FILE_FMT_STR, \ -/* 23 */ FILE_FMT_NUM, \ -/* 24 */ FILE_FMT_QUAD, \ -/* 25 */ FILE_FMT_QUAD, \ -/* 26 */ FILE_FMT_QUAD, \ -/* 27 */ FILE_FMT_STR, \ -/* 28 */ FILE_FMT_STR, \ -/* 29 */ FILE_FMT_STR, \ -/* 30 */ FILE_FMT_STR, \ -/* 31 */ FILE_FMT_STR, \ -/* 32 */ FILE_FMT_STR, - /* Word 3 */ uint8_t in_op; /* operator for indirection */ uint8_t mask_op; /* operator for mask */ +#ifdef ENABLE_CONDITIONALS + uint8_t cond; /* conditional type */ + uint8_t dummy1; +#else uint8_t dummy1; uint8_t dummy2; +#endif #define FILE_OPS "&|^+-*/%" #define FILE_OPAND 0 @@ -260,6 +197,13 @@ struct magic { #define FILE_OPINVERSE 0x40 #define FILE_OPINDIRECT 0x80 +#ifdef ENABLE_CONDITIONALS +#define COND_NONE 0 +#define COND_IF 1 +#define COND_ELIF 2 +#define COND_ELSE 3 +#endif /* ENABLE_CONDITIONALS */ + /* Word 4 */ uint32_t offset; /* offset to magic number */ /* Word 5 */ @@ -280,15 +224,13 @@ struct magic { /* Words 9-16 */ union VALUETYPE { -// union NUMTYPE { - uint8_t b; - uint16_t h; - uint32_t l; - uint64_t q; - uint8_t hs[2]; /* 2 bytes of a fixed-endian "short" */ - uint8_t hl[4]; /* 4 bytes of a fixed-endian "long" */ - uint8_t hq[8]; /* 8 bytes of a fixed-endian "quad" */ -// } n; + uint8_t b; + uint16_t h; + uint32_t l; + uint64_t q; + uint8_t hs[2]; /* 2 bytes of a fixed-endian "short" */ + uint8_t hl[4]; /* 4 bytes of a fixed-endian "long" */ + uint8_t hq[8]; /* 8 bytes of a fixed-endian "quad" */ char s[MAXstring]; /* the search string or regex pattern */ } value; /* either number or string */ /* Words 17..31 */ @@ -326,6 +268,10 @@ struct magic_set { struct level_info { int32_t off; int got_match; +#ifdef ENABLE_CONDITIONALS + int last_match; + int last_cond; /* used for error checking by parse() */ +#endif } *li; } c; struct out { @@ -382,6 +328,9 @@ protected void file_showstr(FILE *, const char *, size_t); protected size_t file_mbswidth(const char *); protected const char *file_getbuffer(struct magic_set *); protected ssize_t sread(int, void *, size_t); +#ifdef ENABLE_CONDITIONALS +protected int file_check_mem(struct magic_set *, unsigned int); +#endif #ifndef COMPILE_ONLY extern const char *file_names[]; diff --git a/src/softmagic.c b/src/softmagic.c index 95f56f94..dd80d5e9 100644 --- a/src/softmagic.c +++ b/src/softmagic.c @@ -51,7 +51,6 @@ private void mdebug(uint32_t, const char *, size_t); private int mcopy(struct magic_set *, union VALUETYPE *, int, int, const unsigned char *, uint32_t, size_t, size_t); private int mconvert(struct magic_set *, struct magic *); -private int check_mem(struct magic_set *, unsigned int); private int print_sep(struct magic_set *, int); private void cvt_8(union VALUETYPE *, const struct magic *); private void cvt_16(union VALUETYPE *, const struct magic *); @@ -75,6 +74,32 @@ file_softmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) return 0; } +#ifdef ENABLE_CONDITIONALS +protected int +#else +private int +#endif +file_check_mem(struct magic_set *ms, unsigned int level) +{ + size_t len; + + if (level >= ms->c.len) { + len = (ms->c.len += 20) * sizeof(*ms->c.li); + ms->c.li = (ms->c.li == NULL) ? malloc(len) : + realloc(ms->c.li, len); + if (ms->c.li == NULL) { + file_oomem(ms, len); + return -1; + } + } + ms->c.li[level].got_match = 0; +#ifdef ENABLE_CONDITIONALS + ms->c.li[level].last_match = 0; + ms->c.li[level].last_cond = COND_NONE; +#endif /* ENABLE_CONDITIONALS */ + return 0; +} + /* * Go through the whole list, stopping if you find a match. Process all * the continuations of that match before returning. @@ -113,7 +138,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, int firstline = 1; /* a flag to print X\n X\n- X */ int printed_something = 0; - if (check_mem(ms, cont_level) == -1) + if (file_check_mem(ms, cont_level) == -1) return -1; for (magindex = 0; magindex < nmagic; magindex++) { @@ -165,7 +190,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, return -1; /* and any continuations that match */ - if (check_mem(ms, ++cont_level) == -1) + if (file_check_mem(ms, ++cont_level) == -1) return -1; while (magic[magindex+1].cont_level != 0 && @@ -187,6 +212,13 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, ms->c.li[cont_level - 1].off; } +#ifdef ENABLE_CONDITIONALS + if (magic[magindex].cond == COND_ELSE || + magic[magindex].cond == COND_ELIF) { + if (ms->c.li[cont_level].last_match == 1) + continue; + } +#endif flush = !mget(ms, s, &magic[magindex], nbytes, cont_level); if (flush && magic[magindex].reln != '!') @@ -196,8 +228,14 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, case -1: return -1; case 0: +#ifdef ENABLE_CONDITIONALS + ms->c.li[cont_level].last_match = 0; +#endif break; default: +#ifdef ENABLE_CONDITIONALS + ms->c.li[cont_level].last_match = 1; +#endif if (magic[magindex].type != FILE_DEFAULT) ms->c.li[cont_level].got_match = 1; else if (ms->c.li[cont_level].got_match) { @@ -214,16 +252,15 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, return -1; } /* - * This continuation matched. - * Print its message, with - * a blank before it if - * the previous item printed - * and this item isn't empty. + * This continuation matched. Print + * its message, with a blank before it + * if the previous item printed and + * this item isn't empty. */ /* space if previous printed */ if (need_separator && (magic[magindex].nospflag == 0) - && (magic[magindex].desc[0] != '\0')) { + && (magic[magindex].desc[0] != '\0')) { if (file_printf(ms, " ") == -1) return -1; need_separator = 0; @@ -238,7 +275,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, * at a higher level, * process them. */ - if (check_mem(ms, ++cont_level) == -1) + if (file_check_mem(ms, ++cont_level) == -1) return -1; break; } @@ -253,24 +290,6 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, return returnval; /* This is hit if -k is set or there is no match */ } -private int -check_mem(struct magic_set *ms, unsigned int level) -{ - size_t len; - - if (level >= ms->c.len) { - len = (ms->c.len += 20) * sizeof(*ms->c.li); - ms->c.li = (ms->c.li == NULL) ? malloc(len) : - realloc(ms->c.li, len); - if (ms->c.li == NULL) { - file_oomem(ms, len); - return -1; - } - } - ms->c.li[level].got_match = 0; - return 0; -} - private int check_fmt(struct magic_set *ms, struct magic *m) { -- 2.40.0