From: Christos Zoulas Date: Tue, 3 Jun 2014 19:01:34 +0000 (+0000) Subject: * Enforce limit of 8K on regex searches that have no limits X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5b4668364ab2540dd24fc4418db38208c1fa087e;p=file * Enforce limit of 8K on regex searches that have no limits * Allow the l modifier for regex to mean line count. Default to byte count. If line count is specified, assume a max of 80 characters per line to limit the byte count. * Don't allow conversions to be used for dates, allowing the mask field to be used as an offset. * Bump the version of the magic format so that regex changes are visible. --- diff --git a/ChangeLog b/ChangeLog index 976dbc4d..ad244c13 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2014-06-02 14:50 Christos Zoulas + + * Enforce limit of 8K on regex searches that have no limits + * Allow the l modifier for regex to mean line count. Default + to byte count. If line count is specified, assume a max + of 80 characters per line to limit the byte count. + * Don't allow conversions to be used for dates, allowing + the mask field to be used as an offset. + 2014-05-30 12:51 Christos Zoulas * Make the range operator limit the length of the diff --git a/doc/magic.man b/doc/magic.man index 2cd60d42..762da481 100644 --- a/doc/magic.man +++ b/doc/magic.man @@ -1,4 +1,4 @@ -.\" $File: magic.man,v 1.83 2014/06/03 17:36:13 christos Exp $ +.\" $File: magic.man,v 1.84 2014/06/03 19:01:34 christos Exp $ .Dd June 3, 2014 .Dt MAGIC __FSECTION__ .Os @@ -232,12 +232,21 @@ The size of the string to search should also be limited by specifying .Dv / , to avoid performance issues scanning long files. The type specification can also be optionally followed by -.Dv /[c][s] . +.Dv /[c][s][l] . The .Dq c flag makes the match case insensitive, while the .Dq s flag update the offset to the start offset of the match, rather than the end. +The +.Dq l +modifier, changes the limit of length to mean number of lines instead of a +byte count. +Lines are delimited by the platforms native line delimiter. +When a line count is specified, an implicit byte count also computed assuming +each line is 80 characters long. +If neither a byte or line count is specified, the search is limited automatically +to 8KiB. .Dv ^ and .Dv $ @@ -406,6 +415,9 @@ is octal, and .Dv 0x13 is hexadecimal. .Pp +Numeric operations are not performed on date types, instead the numeric +value is interpreted as an offset. +.Pp For string values, the string from the file must match the specified string. The operators diff --git a/magic/Magdir/android b/magic/Magdir/android index cae0cb7d..4a4c3feb 100644 --- a/magic/Magdir/android +++ b/magic/Magdir/android @@ -1,6 +1,6 @@ #------------------------------------------------------------ -# $File: android,v 1.3 2013/11/08 01:24:22 christos Exp $ +# $File: android,v 1.4 2014/06/03 19:01:34 christos Exp $ # Various android related magic entries #------------------------------------------------------------ @@ -89,12 +89,12 @@ >17 string 0\n \b, Not-Compressed >17 string 1\n \b, Compressed # any string as long as it's not the word none (which is matched below) ->>19 regex/1 \^([^n\n]|n[^o]|no[^n]|non[^e]|none.+).* \b, Encrypted (%s) +>>19 regex/1l \^([^n\n]|n[^o]|no[^n]|non[^e]|none.+).* \b, Encrypted (%s) >>19 string none\n \b, Not-Encrypted # Commented out because they don't seem useful to print # (but they are part of the header - the tar file comes after them): -#>>>&1 regex/1 .* \b, Password salt: %s -#>>>>&1 regex/1 .* \b, Master salt: %s -#>>>>>&1 regex/1 .* \b, PBKDF2 rounds: %s -#>>>>>>&1 regex/1 .* \b, IV: %s -#>>>>>>>&1 regex/1 .* \b, Key: %s +#>>>&1 regex/1l .* \b, Password salt: %s +#>>>>&1 regex/1l .* \b, Master salt: %s +#>>>>>&1 regex/1l .* \b, PBKDF2 rounds: %s +#>>>>>>&1 regex/1l .* \b, IV: %s +#>>>>>>>&1 regex/1l .* \b, Key: %s diff --git a/magic/Magdir/fortran b/magic/Magdir/fortran index 0604c259..921beec3 100644 --- a/magic/Magdir/fortran +++ b/magic/Magdir/fortran @@ -1,7 +1,7 @@ #------------------------------------------------------------------------------ -# $File: fortran,v 1.7 2012/06/21 01:55:02 christos Exp $ +# $File: fortran,v 1.8 2014/06/03 19:01:34 christos Exp $ # FORTRAN source -0 regex/100 \^[Cc][\ \t] FORTRAN program +0 regex/100l \^[Cc][\ \t] FORTRAN program !:mime text/x-fortran !:strength - 5 diff --git a/magic/Magdir/graphviz b/magic/Magdir/graphviz index b944d463..cddc1164 100644 --- a/magic/Magdir/graphviz +++ b/magic/Magdir/graphviz @@ -1,12 +1,12 @@ #------------------------------------------------------------------------------ -# $File: graphviz,v 1.7 2009/09/19 16:28:09 christos Exp $ +# $File: graphviz,v 1.8 2014/06/03 19:01:34 christos Exp $ # graphviz: file(1) magic for http://www.graphviz.org/ # FIXME: These patterns match too generally. For example, the first # line matches a LaTeX file containing the word "graph" (with a { # following later) and the second line matches this file. -#0 regex/100 [\r\n\t\ ]*graph[\r\n\t\ ]+.*\\{ graphviz graph text +#0 regex/100l [\r\n\t\ ]*graph[\r\n\t\ ]+.*\\{ graphviz graph text #!:mime text/vnd.graphviz -#0 regex/100 [\r\n\t\ ]*digraph[\r\n\t\ ]+.*\\{ graphviz digraph text +#0 regex/100l [\r\n\t\ ]*digraph[\r\n\t\ ]+.*\\{ graphviz digraph text #!:mime text/vnd.graphviz diff --git a/magic/Magdir/marc21 b/magic/Magdir/marc21 index 83f7959e..7e859a38 100644 --- a/magic/Magdir/marc21 +++ b/magic/Magdir/marc21 @@ -12,17 +12,17 @@ 20 string 45 # leader starts with 5 digits, followed by codes specific to MARC format ->0 regex/1 (^[0-9]{5})[acdnp][^bhlnqsu-z] MARC21 Bibliographic +>0 regex/1l (^[0-9]{5})[acdnp][^bhlnqsu-z] MARC21 Bibliographic !:mime application/marc ->0 regex/1 (^[0-9]{5})[acdnosx][z] MARC21 Authority +>0 regex/1l (^[0-9]{5})[acdnosx][z] MARC21 Authority !:mime application/marc ->0 regex/1 (^[0-9]{5})[cdn][uvxy] MARC21 Holdings +>0 regex/1l (^[0-9]{5})[cdn][uvxy] MARC21 Holdings !:mime application/marc -0 regex/1 (^[0-9]{5})[acdn][w] MARC21 Classification +0 regex/1l (^[0-9]{5})[acdn][w] MARC21 Classification !:mime application/marc ->0 regex/1 (^[0-9]{5})[cdn][q] MARC21 Community +>0 regex/1l (^[0-9]{5})[cdn][q] MARC21 Community !:mime application/marc # leader position 22-23, should be "00" but is it? ->0 regex/1 (^.{21})([^0]{2}) (non-conforming) +>0 regex/1l (^.{21})([^0]{2}) (non-conforming) !:mime application/marc diff --git a/magic/Magdir/scientific b/magic/Magdir/scientific index 66d3c598..f780743c 100644 --- a/magic/Magdir/scientific +++ b/magic/Magdir/scientific @@ -1,6 +1,6 @@ #------------------------------------------------------------------------------ -# $File: scientific,v 1.8 2014/01/06 17:46:23 rrt Exp $ +# $File: scientific,v 1.9 2014/06/03 19:01:34 christos Exp $ # scientific: file(1) magic for scientific formats # # From: Joe Krahn @@ -91,12 +91,12 @@ # uppercase letters. However, examples have been seen without the date string, # e.g., the example on the chemime site. 0 string HEADER\ \ \ \ ->&0 regex/1 \^.{40} ->>&0 regex/1 [0-9]{2}-[A-Z]{3}-[0-9]{2}\ {3} ->>>&0 regex/1s [A-Z0-9]{4}.{14}$ ->>>>&0 regex/1 [A-Z0-9]{4} Protein Data Bank data, ID Code %s +>&0 regex/1l \^.{40} +>>&0 regex/1l [0-9]{2}-[A-Z]{3}-[0-9]{2}\ {3} +>>>&0 regex/1ls [A-Z0-9]{4}.{14}$ +>>>>&0 regex/1l [A-Z0-9]{4} Protein Data Bank data, ID Code %s !:mime chemical/x-pdb ->>>>0 regex/1 [0-9]{2}-[A-Z]{3}-[0-9]{2} \b, %s +>>>>0 regex/1l [0-9]{2}-[A-Z]{3}-[0-9]{2} \b, %s # Type: GDSII Stream file 0 belong 0x00060002 GDSII Stream file diff --git a/magic/Magdir/troff b/magic/Magdir/troff index b24ea0a4..cb6bc00e 100644 --- a/magic/Magdir/troff +++ b/magic/Magdir/troff @@ -1,6 +1,6 @@ #------------------------------------------------------------------------------ -# $File: troff,v 1.10 2009/09/19 16:28:12 christos Exp $ +# $File: troff,v 1.11 2014/06/03 19:01:34 christos Exp $ # troff: file(1) magic for *roff # # updated by Daniel Quinlan (quinlan@yggdrasil.com) @@ -16,9 +16,9 @@ !:mime text/troff 0 search/1 ''' troff or preprocessor input text !:mime text/troff -0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9][\ \t] troff or preprocessor input text +0 regex/20l \^\\.[A-Za-z0-9][A-Za-z0-9][\ \t] troff or preprocessor input text !:mime text/troff -0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9]$ troff or preprocessor input text +0 regex/20l \^\\.[A-Za-z0-9][A-Za-z0-9]$ troff or preprocessor input text !:mime text/troff # ditroff intermediate output text diff --git a/src/apprentice.c b/src/apprentice.c index 48a60e44..d2fad660 100644 --- a/src/apprentice.c +++ b/src/apprentice.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: apprentice.c,v 1.210 2014/05/14 23:15:42 christos Exp $") +FILE_RCSID("@(#)$File: apprentice.c,v 1.211 2014/06/03 19:01:34 christos Exp $") #endif /* lint */ #include "magic.h" @@ -1382,7 +1382,8 @@ string_modifier_check(struct magic_set *ms, struct magic *m) if ((ms->flags & MAGIC_CHECK) == 0) return 0; - if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) { + if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) && + (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) { file_magwarn(ms, "'/BHhLl' modifiers are only allowed for pascal strings\n"); return -1; @@ -1875,8 +1876,13 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line, m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE; break; case CHAR_PSTRING_4_LE: - if (m->type != FILE_PSTRING) + switch (m->type) { + case FILE_PSTRING: + case FILE_REGEX: + break; + default: goto bad; + } m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE; break; case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: diff --git a/src/file.h b/src/file.h index 4f5d68da..67a25680 100644 --- a/src/file.h +++ b/src/file.h @@ -27,7 +27,7 @@ */ /* * file.h - definitions for file(1) program - * @(#)$File: file.h,v 1.151 2014/05/14 23:15:42 christos Exp $ + * @(#)$File: file.h,v 1.152 2014/06/03 19:01:34 christos Exp $ */ #ifndef __file_h__ @@ -133,7 +133,7 @@ #define MAXstring 64 /* max len of "string" types */ #define MAGICNO 0xF11E041C -#define VERSIONNO 11 +#define VERSIONNO 12 #define FILE_MAGICSIZE 248 #define FILE_LOAD 0 @@ -321,6 +321,7 @@ struct magic { #define PSTRING_2_LE BIT(9) #define PSTRING_4_BE BIT(10) #define PSTRING_4_LE BIT(11) +#define REGEX_LINE_COUNT BIT(11) #define PSTRING_LEN \ (PSTRING_1_BE|PSTRING_2_LE|PSTRING_2_BE|PSTRING_4_LE|PSTRING_4_BE) #define PSTRING_LENGTH_INCLUDES_ITSELF BIT(12) diff --git a/src/softmagic.c b/src/softmagic.c index 0a5b2286..7acb21c1 100644 --- a/src/softmagic.c +++ b/src/softmagic.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: softmagic.c,v 1.189 2014/05/30 16:47:44 christos Exp $") +FILE_RCSID("@(#)$File: softmagic.c,v 1.190 2014/06/03 19:01:34 christos Exp $") #endif /* lint */ #include "magic.h" @@ -57,7 +57,7 @@ private int32_t mprint(struct magic_set *, struct magic *); private int32_t moffset(struct magic_set *, struct magic *); private void mdebug(uint32_t, const char *, size_t); private int mcopy(struct magic_set *, union VALUETYPE *, int, int, - const unsigned char *, uint32_t, size_t, size_t); + const unsigned char *, uint32_t, size_t, struct magic *); private int mconvert(struct magic_set *, struct magic *, int); private int print_sep(struct magic_set *, int); private int handle_annotation(struct magic_set *, struct magic *); @@ -540,7 +540,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_LEDATE: case FILE_MEDATE: if (file_printf(ms, F(ms, m, "%s"), - file_fmttime(p->l, FILE_T_LOCAL, tbuf)) == -1) + file_fmttime(p->l + m->num_mask, FILE_T_LOCAL, tbuf)) == -1) return -1; t = ms->offset + sizeof(uint32_t); break; @@ -550,7 +550,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_LELDATE: case FILE_MELDATE: if (file_printf(ms, F(ms, m, "%s"), - file_fmttime(p->l, 0, tbuf)) == -1) + file_fmttime(p->l + m->num_mask, 0, tbuf)) == -1) return -1; t = ms->offset + sizeof(uint32_t); break; @@ -559,7 +559,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_BEQDATE: case FILE_LEQDATE: if (file_printf(ms, F(ms, m, "%s"), - file_fmttime(p->q, FILE_T_LOCAL, tbuf)) == -1) + file_fmttime(p->q + m->num_mask, FILE_T_LOCAL, tbuf)) == -1) return -1; t = ms->offset + sizeof(uint64_t); break; @@ -568,7 +568,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_BEQLDATE: case FILE_LEQLDATE: if (file_printf(ms, F(ms, m, "%s"), - file_fmttime(p->q, 0, tbuf)) == -1) + file_fmttime(p->q + m->num_mask, 0, tbuf)) == -1) return -1; t = ms->offset + sizeof(uint64_t); break; @@ -577,7 +577,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_BEQWDATE: case FILE_LEQWDATE: if (file_printf(ms, F(ms, m, "%s"), - file_fmttime(p->q, FILE_T_WINDOWS, tbuf)) == -1) + file_fmttime(p->q + m->num_mask, FILE_T_WINDOWS, tbuf)) == -1) return -1; t = ms->offset + sizeof(uint64_t); break; @@ -912,8 +912,9 @@ private int mconvert(struct magic_set *ms, struct magic *m, int flip) { union VALUETYPE *p = &ms->ms_value; + uint8_t type; - switch (cvt_flip(m->type, flip)) { + switch (type = cvt_flip(m->type, flip)) { case FILE_BYTE: cvt_8(p, m); return 1; @@ -957,7 +958,8 @@ mconvert(struct magic_set *ms, struct magic *m, int flip) case FILE_BELDATE: p->l = (int32_t) ((p->hl[0]<<24)|(p->hl[1]<<16)|(p->hl[2]<<8)|(p->hl[3])); - cvt_32(p, m); + if (type == FILE_BELONG) + cvt_32(p, m); return 1; case FILE_BEQUAD: case FILE_BEQDATE: @@ -968,7 +970,8 @@ mconvert(struct magic_set *ms, struct magic *m, int flip) ((uint64_t)p->hq[2]<<40)|((uint64_t)p->hq[3]<<32)| ((uint64_t)p->hq[4]<<24)|((uint64_t)p->hq[5]<<16)| ((uint64_t)p->hq[6]<<8)|((uint64_t)p->hq[7])); - cvt_64(p, m); + if (type == FILE_BEQUAD) + cvt_64(p, m); return 1; case FILE_LESHORT: p->h = (short)((p->hs[1]<<8)|(p->hs[0])); @@ -979,7 +982,8 @@ mconvert(struct magic_set *ms, struct magic *m, int flip) case FILE_LELDATE: p->l = (int32_t) ((p->hl[3]<<24)|(p->hl[2]<<16)|(p->hl[1]<<8)|(p->hl[0])); - cvt_32(p, m); + if (type == FILE_LELONG) + cvt_32(p, m); return 1; case FILE_LEQUAD: case FILE_LEQDATE: @@ -990,14 +994,16 @@ mconvert(struct magic_set *ms, struct magic *m, int flip) ((uint64_t)p->hq[5]<<40)|((uint64_t)p->hq[4]<<32)| ((uint64_t)p->hq[3]<<24)|((uint64_t)p->hq[2]<<16)| ((uint64_t)p->hq[1]<<8)|((uint64_t)p->hq[0])); - cvt_64(p, m); + if (type == FILE_LEQUAD) + cvt_64(p, m); return 1; case FILE_MELONG: case FILE_MEDATE: case FILE_MELDATE: p->l = (int32_t) ((p->hl[1]<<24)|(p->hl[0]<<16)|(p->hl[3]<<8)|(p->hl[2])); - cvt_32(p, m); + if (type == FILE_MELONG) + cvt_32(p, m); return 1; case FILE_FLOAT: cvt_float(p, m); @@ -1054,7 +1060,7 @@ mdebug(uint32_t offset, const char *str, size_t len) private int mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir, - const unsigned char *s, uint32_t offset, size_t nbytes, size_t linecnt) + const unsigned char *s, uint32_t offset, size_t nbytes, struct magic *m) { /* * Note: FILE_SEARCH and FILE_REGEX do not actually copy @@ -1074,15 +1080,29 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir, const char *last; /* end of search region */ const char *buf; /* start of search region */ const char *end; - size_t lines; + size_t lines, linecnt, bytecnt; if (s == NULL) { ms->search.s_len = 0; ms->search.s = NULL; return 0; } + + if (m->str_flags & REGEX_LINE_COUNT) { + linecnt = m->str_range; + bytecnt = linecnt * 80; + } else { + linecnt = 0; + bytecnt = m->str_range; + } + + if (bytecnt == 0) + bytecnt = 8192; + if (bytecnt > nbytes) + bytecnt = nbytes; + buf = RCAST(const char *, s) + offset; - end = last = RCAST(const char *, s) + nbytes; + end = last = RCAST(const char *, s) + bytecnt; /* mget() guarantees buf <= last */ for (lines = linecnt, b = buf; lines && b < end && ((b = CAST(const char *, @@ -1095,7 +1115,7 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir, b++; } if (lines) - last = RCAST(const char *, s) + nbytes; + last = RCAST(const char *, s) + bytecnt; ms->search.s = buf; ms->search.s_len = last - buf; @@ -1166,7 +1186,6 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m, int *need_separator, int *returnval) { uint32_t soffset, offset = ms->offset; - uint32_t count = m->str_range; uint32_t lhs; int rv, oneed_separator, in_type; char *sbuf, *rbuf; @@ -1179,13 +1198,12 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m, } if (mcopy(ms, p, m->type, m->flag & INDIR, s, (uint32_t)(offset + o), - (uint32_t)nbytes, count) == -1) + (uint32_t)nbytes, m) == -1) return -1; if ((ms->flags & MAGIC_DEBUG) != 0) { fprintf(stderr, "mget(type=%d, flag=%x, offset=%u, o=%zu, " - "nbytes=%zu, count=%u)\n", m->type, m->flag, offset, o, - nbytes, count); + "nbytes=%zu)\n", m->type, m->flag, offset, o, nbytes); mdebug(offset, (char *)(void *)p, sizeof(union VALUETYPE)); #ifndef COMPILE_ONLY file_mdump(m); @@ -1550,7 +1568,7 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m, if ((ms->flags & MAGIC_DEBUG) != 0) fprintf(stderr, "indirect +offs=%u\n", offset); } - if (mcopy(ms, p, m->type, 0, s, offset, nbytes, count) == -1) + if (mcopy(ms, p, m->type, 0, s, offset, nbytes, m) == -1) return -1; ms->offset = offset; @@ -1906,7 +1924,8 @@ magiccheck(struct magic_set *ms, struct magic *m) if (slen + idx > ms->search.s_len) break; - v = file_strncmp(m->value.s, ms->search.s + idx, slen, m->str_flags); + v = file_strncmp(m->value.s, ms->search.s + idx, slen, + m->str_flags); if (v == 0) { /* found match */ ms->search.offset += idx; break; @@ -1929,16 +1948,11 @@ magiccheck(struct magic_set *ms, struct magic *m) file_regerror(&rx, rc, ms); v = (uint64_t)-1; } else { -#ifndef REG_STARTEND - char c; -#endif regmatch_t pmatch[1]; size_t slen = ms->search.s_len; - /* Limit by offset if requested */ - if (m->str_range > 0) - slen = MIN(slen, m->str_range); #ifndef REG_STARTEND #define REG_STARTEND 0 + char c; if (slen != 0) slen--; c = ms->search.s[slen];