From 2f0eeb07ba633f1d915f78a50b22808123b38ea0 Mon Sep 17 00:00:00 2001 From: Christos Zoulas Date: Wed, 22 Dec 2010 18:14:05 +0000 Subject: [PATCH] support for various formats of pascal strings. --- ChangeLog | 8 ++++++ doc/magic.man | 18 ++++++++++-- src/apprentice.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++-- src/file.h | 18 +++++++++++- src/softmagic.c | 18 ++++++------ 5 files changed, 121 insertions(+), 15 deletions(-) diff --git a/ChangeLog b/ChangeLog index eefdc712..29e8724a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,12 +1,20 @@ +2010-12-22 13:12 Christos Zoulas + + * Add pstring/BHhLl to specify the type of the length of pascal + strings. + 2010-11-26 18:39 Reuben Thomas + * Fix "-e soft": it was ignored when softmagic was called during asciimagic. * Improve comments and use "unsigned char" in tar.h/is_tar.c. 2010-11-05 17:26 Reuben Thomas + * Make bug reporting addresses more visible. 2010-11-01 18:35 Reuben Thomas + * Add tcl magic from Gustaf Neumann 2010-10-24 10:42 Christos Zoulas diff --git a/doc/magic.man b/doc/magic.man index 84866451..299bb8d1 100644 --- a/doc/magic.man +++ b/doc/magic.man @@ -1,4 +1,4 @@ -.\" $File: magic.man,v 1.62 2010/09/20 20:15:36 rrt Exp $ +.\" $File: magic.man,v 1.63 2010/10/21 22:08:34 christos Exp $ .Dd August 30, 2008 .Dt MAGIC __FSECTION__ .Os @@ -71,8 +71,22 @@ characters in the magic match both lower and upper case characters in the target, whereas upper case characters in the magic only match uppercase characters in the target. .It Dv pstring -A Pascal-style string where the first byte is interpreted as the an +A Pascal-style string where the first byte/short/int is interpreted as the an unsigned length. +The length defaults to byte and can be specified as a modifier. +The following modifiers are supported: +.Bl -tag -compact -width B +.It B +A byte length (default). +.It H +A 2 byte big endian length. +.It h +A 2 byte big little length. +.It L +A 4 byte big endian length. +.It l +A 4 byte big little length. +.El The string is not NUL terminated. .It Dv date A four-byte value interpreted as a UNIX date. diff --git a/src/apprentice.c b/src/apprentice.c index 40d547bc..1120a696 100644 --- a/src/apprentice.c +++ b/src/apprentice.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: apprentice.c,v 1.160 2010/09/20 14:14:49 christos Exp $") +FILE_RCSID("@(#)$File: apprentice.c,v 1.161 2010/10/07 00:33:20 christos Exp $") #endif /* lint */ #include "magic.h" @@ -981,6 +981,11 @@ string_modifier_check(struct magic_set *ms, struct magic *m) if ((ms->flags & MAGIC_CHECK) == 0) return 0; + if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) { + file_magwarn(ms, + "'/BHhLl' modifiers are only allowed for pascal strings\n"); + return -1; + } switch (m->type) { case FILE_BESTRING16: case FILE_LESTRING16: @@ -1357,7 +1362,7 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, ++l; } m->str_range = 0; - m->str_flags = 0; + m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; m->num_mask = 0; if ((op = get_op(*l)) != -1) { if (!IS_STRING(m->type)) { @@ -1412,6 +1417,32 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, case CHAR_TEXTTEST: m->str_flags |= STRING_TEXTTEST; break; + case CHAR_PSTRING_1_LE: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags |= PSTRING_1_LE; + break; + case CHAR_PSTRING_2_BE: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags |= PSTRING_2_BE; + break; + case CHAR_PSTRING_2_LE: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags |= PSTRING_2_LE; + break; + case CHAR_PSTRING_4_BE: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags |= PSTRING_4_BE; + break; + case CHAR_PSTRING_4_LE: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags |= PSTRING_4_LE; + break; + bad: default: if (ms->flags & MAGIC_CHECK) file_magwarn(ms, @@ -2045,7 +2076,7 @@ out: *p = '\0'; m->vallen = CAST(unsigned char, (p - origp)); if (m->type == FILE_PSTRING) - m->vallen++; + m->vallen += file_pstring_length_size(m); return s; } @@ -2434,3 +2465,40 @@ bs1(struct magic *m) m->num_mask = swap8(m->num_mask); } } + +protected size_t +file_pstring_length_size(const struct magic *m) +{ + switch (m->str_flags & PSTRING_LEN) { + case PSTRING_1_LE: + return 1; + case PSTRING_2_LE: + case PSTRING_2_BE: + return 2; + case PSTRING_4_LE: + case PSTRING_4_BE: + return 4; + default: + abort(); /* Impossible */ + return 1; + } +} +protected size_t +file_pstring_get_length(const struct magic *m, const char *s) +{ + switch (m->str_flags & PSTRING_LEN) { + case PSTRING_1_LE: + return *s; + case PSTRING_2_LE: + return (s[1] << 8) | s[0]; + case PSTRING_2_BE: + return (s[0] << 8) | s[1]; + case PSTRING_4_LE: + return (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0]; + case PSTRING_4_BE: + return (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3]; + default: + abort(); /* Impossible */ + return 1; + } +} diff --git a/src/file.h b/src/file.h index 25cd3a5b..c84749f7 100644 --- a/src/file.h +++ b/src/file.h @@ -27,7 +27,7 @@ */ /* * file.h - definitions for file(1) program - * @(#)$File: file.h,v 1.127 2010/10/24 14:35:45 christos Exp $ + * @(#)$File: file.h,v 1.128 2010/11/23 18:02:08 christos Exp $ */ #ifndef __file_h__ @@ -294,6 +294,14 @@ struct magic { #define REGEX_OFFSET_START BIT(4) #define STRING_TEXTTEST BIT(5) #define STRING_BINTEST BIT(6) +#define PSTRING_1_BE BIT(7) +#define PSTRING_1_LE BIT(7) +#define PSTRING_2_BE BIT(8) +#define PSTRING_2_LE BIT(9) +#define PSTRING_4_BE BIT(10) +#define PSTRING_4_LE BIT(11) +#define PSTRING_LEN \ + (PSTRING_1_BE|PSTRING_2_LE|PSTRING_2_BE|PSTRING_4_LE|PSTRING_4_BE) #define CHAR_COMPACT_WHITESPACE 'W' #define CHAR_COMPACT_OPTIONAL_WHITESPACE 'w' #define CHAR_IGNORE_LOWERCASE 'c' @@ -301,6 +309,12 @@ struct magic { #define CHAR_REGEX_OFFSET_START 's' #define CHAR_TEXTTEST 't' #define CHAR_BINTEST 'b' +#define CHAR_PSTRING_1_BE 'B' +#define CHAR_PSTRING_1_LE 'B' +#define CHAR_PSTRING_2_BE 'H' +#define CHAR_PSTRING_2_LE 'h' +#define CHAR_PSTRING_4_BE 'L' +#define CHAR_PSTRING_4_LE 'l' #define STRING_IGNORE_CASE (STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE) #define STRING_DEFAULT_RANGE 100 @@ -411,6 +425,8 @@ protected ssize_t sread(int, void *, size_t, int); protected int file_check_mem(struct magic_set *, unsigned int); protected int file_looks_utf8(const unsigned char *, size_t, unichar *, size_t *); +protected size_t file_pstring_length_size(const struct magic *); +protected size_t file_pstring_get_length(const struct magic *, const char *); #ifdef __EMX__ protected int file_os2_apptype(struct magic_set *, const char *, const void *, size_t); diff --git a/src/softmagic.c b/src/softmagic.c index a5659896..0f152273 100644 --- a/src/softmagic.c +++ b/src/softmagic.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: softmagic.c,v 1.141 2010/09/20 14:24:01 rrt Exp $") +FILE_RCSID("@(#)$File: softmagic.c,v 1.142 2010/10/24 14:42:07 christos Exp $") #endif /* lint */ #include "magic.h" @@ -168,6 +168,8 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, continue; } + if ((e = handle_annotation(ms, m)) != 0) + return e; /* * If we are going to print something, we'll need to print * a blank before we print something else. @@ -175,8 +177,6 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, if (*m->desc) { need_separator = 1; printed_something = 1; - if ((e = handle_annotation(ms, m)) != 0) - return e; if (print_sep(ms, firstline) == -1) return -1; } @@ -251,13 +251,13 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, ms->c.li[cont_level].got_match = 0; break; } + if ((e = handle_annotation(ms, m)) != 0) + return e; /* * If we are going to print something, * make sure that we have a separator first. */ if (*m->desc) { - if ((e = handle_annotation(ms, m)) != 0) - return e; if (!printed_something) { printed_something = 1; if (print_sep(ms, firstline) @@ -449,7 +449,7 @@ mprint(struct magic_set *ms, struct magic *m) return -1; t = ms->offset + strlen(p->s); if (m->type == FILE_PSTRING) - t++; + t += file_pstring_length_size(m); } break; @@ -614,7 +614,7 @@ moffset(struct magic_set *ms, struct magic *m) p->s[strcspn(p->s, "\n")] = '\0'; t = CAST(uint32_t, (ms->offset + strlen(p->s))); if (m->type == FILE_PSTRING) - t++; + t += file_pstring_length_size(m); return t; } @@ -799,8 +799,8 @@ mconvert(struct magic_set *ms, struct magic *m) return 1; } case FILE_PSTRING: { - char *ptr1 = p->s, *ptr2 = ptr1 + 1; - size_t len = *p->s; + char *ptr1 = p->s, *ptr2 = ptr1 + file_pstring_length_size(m); + size_t len = file_pstring_get_length(m, ptr1); if (len >= sizeof(p->s)) len = sizeof(p->s) - 1; while (len--) -- 2.40.0