From: Christos Zoulas Date: Sat, 20 Nov 2004 23:50:12 +0000 (+0000) Subject: Add support for hackish ucs16 strings. X-Git-Tag: FILE4_11~4 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=83c7f6ec5ebc396e31d898f5a712b0cd4fe64b75;p=file Add support for hackish ucs16 strings. --- diff --git a/src/apprentice.c b/src/apprentice.c index 38f9d51e..08b68636 100644 --- a/src/apprentice.c +++ b/src/apprentice.c @@ -45,7 +45,7 @@ #endif #ifndef lint -FILE_RCSID("@(#)$Id: apprentice.c,v 1.80 2004/11/13 08:11:39 christos Exp $") +FILE_RCSID("@(#)$Id: apprentice.c,v 1.81 2004/11/20 23:50:12 christos Exp $") #endif /* lint */ #define EATAB {while (isascii((unsigned char) *l) && \ @@ -74,6 +74,9 @@ FILE_RCSID("@(#)$Id: apprentice.c,v 1.80 2004/11/13 08:11:39 christos Exp $") #define MAXPATHLEN 1024 #endif +#define IS_STRING(t) ((t) == FILE_STRING || (t) == FILE_PSTRING || \ + (t) == FILE_BESTRING16 || (t) == FILE_LESTRING16) + private int getvalue(struct magic_set *ms, struct magic *, char **); private int hextoint(int); private char *getstr(struct magic_set *, char *, char *, int, int *); @@ -370,6 +373,8 @@ file_signextend(struct magic_set *ms, struct magic *m, uint32_t v) break; case FILE_STRING: case FILE_PSTRING: + case FILE_BESTRING16: + case FILE_LESTRING16: break; case FILE_REGEX: break; @@ -542,6 +547,8 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, #define NBELDATE 7 #define NLELDATE 7 #define NREGEX 5 +#define NBESTRING16 10 +#define NLESTRING16 10 if (*l == 'u') { ++l; @@ -599,7 +606,13 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, l += NLELDATE; } else if (strncmp(l, "regex", NREGEX)==0) { m->type = FILE_REGEX; - l += sizeof("regex"); + l += NREGEX; + } else if (strncmp(l, "bestring16", NBESTRING16)==0) { + m->type = FILE_BESTRING16; + l += NBESTRING16; + } else if (strncmp(l, "lestring16", NLESTRING16)==0) { + m->type = FILE_LESTRING16; + l += NLESTRING16; } else { if (ms->flags & MAGIC_CHECK) file_magwarn(ms, "type `%s' invalid", l); @@ -608,14 +621,13 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ if (*l == '~') { - if (FILE_STRING != m->type && FILE_PSTRING != m->type) + if (!IS_STRING(m->type)) m->mask_op = FILE_OPINVERSE; ++l; } if ((t = strchr(fops, *l)) != NULL) { uint32_t op = (uint32_t)(t - fops); - if (op != FILE_OPDIVIDE || - (FILE_STRING != m->type && FILE_PSTRING != m->type)) { + if (op != FILE_OPDIVIDE || !IS_STRING(m->type)) { ++l; m->mask_op |= op; val = (uint32_t)strtoul(l, &l, 0); @@ -666,7 +678,7 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, } break; case '!': - if (m->type != FILE_STRING && m->type != FILE_PSTRING) { + if (!IS_STRING(m->type)) { m->reln = *l; ++l; break; @@ -784,6 +796,8 @@ getvalue(struct magic_set *ms, struct magic *m, char **p) int slen; switch (m->type) { + case FILE_BESTRING16: + case FILE_LESTRING16: case FILE_STRING: case FILE_PSTRING: case FILE_REGEX: @@ -1210,7 +1224,7 @@ bs1(struct magic *m) m->cont_level = swap2(m->cont_level); m->offset = swap4((uint32_t)m->offset); m->in_offset = swap4((uint32_t)m->in_offset); - if (m->type != FILE_STRING) + if (IS_STRING(m->type)) m->value.l = swap4(m->value.l); m->mask = swap4(m->mask); } diff --git a/src/file.h b/src/file.h index 2f109399..f29bba01 100644 --- a/src/file.h +++ b/src/file.h @@ -27,7 +27,7 @@ */ /* * file.h - definitions for file(1) program - * @(#)$Id: file.h,v 1.63 2004/11/13 08:11:39 christos Exp $ + * @(#)$Id: file.h,v 1.64 2004/11/20 23:50:12 christos Exp $ */ #ifndef __file_h__ @@ -108,6 +108,8 @@ struct magic { #define FILE_BELDATE 15 #define FILE_LELDATE 16 #define FILE_REGEX 17 +#define FILE_BESTRING16 18 +#define FILE_LESTRING16 19 #define FILE_FORMAT_NAME \ /* 0 */ "invalid 0", \ @@ -127,7 +129,9 @@ struct magic { /* 14 */ "ldate", \ /* 15 */ "beldate", \ /* 16 */ "leldate", \ -/* 17 */ "regex", +/* 17 */ "regex", \ +/* 18 */ "bestring16", \ +/* 19 */ "lestring16", #define FILE_FMT_NUM "cduxXi" #define FILE_FMT_STR "s" @@ -150,7 +154,9 @@ struct magic { /* 14 */ FILE_FMT_STR, \ /* 15 */ FILE_FMT_STR, \ /* 16 */ FILE_FMT_STR, \ -/* 17 */ FILE_FMT_STR, +/* 17 */ FILE_FMT_STR, \ +/* 18 */ FILE_FMT_STR, \ +/* 19 */ FILE_FMT_STR, /* Word 3 */ uint8_t in_op; /* operator for indirection */ diff --git a/src/softmagic.c b/src/softmagic.c index 4fc1af83..3cc0c2cf 100644 --- a/src/softmagic.c +++ b/src/softmagic.c @@ -39,7 +39,7 @@ #ifndef lint -FILE_RCSID("@(#)$Id: softmagic.c,v 1.69 2004/11/13 08:11:12 christos Exp $") +FILE_RCSID("@(#)$Id: softmagic.c,v 1.70 2004/11/20 23:50:13 christos Exp $") #endif /* lint */ private int match(struct magic_set *, struct magic *, uint32_t, @@ -49,6 +49,8 @@ private int mget(struct magic_set *, union VALUETYPE *, const unsigned char *, private int mcheck(struct magic_set *, union VALUETYPE *, struct magic *); private int32_t mprint(struct magic_set *, union VALUETYPE *, struct magic *); private void mdebug(uint32_t, const char *, size_t); +private int mcopy(struct magic_set *, union VALUETYPE *, int, int, + const unsigned char *, size_t, size_t); private int mconvert(struct magic_set *, union VALUETYPE *, struct magic *); private int check_mem(struct magic_set *, unsigned int); @@ -271,6 +273,8 @@ mprint(struct magic_set *ms, union VALUETYPE *p, struct magic *m) case FILE_STRING: case FILE_PSTRING: + case FILE_BESTRING16: + case FILE_LESTRING16: if (m->reln == '=') { if (file_printf(ms, m->desc, m->value.s) == -1) return -1; @@ -421,6 +425,8 @@ mconvert(struct magic_set *ms, union VALUETYPE *p, struct magic *m) p->l = ~p->l; return 1; case FILE_STRING: + case FILE_BESTRING16: + case FILE_LESTRING16: { size_t len; @@ -598,12 +604,10 @@ mdebug(uint32_t offset, const char *str, size_t len) } private int -mget(struct magic_set *ms, union VALUETYPE *p, const unsigned char *s, - struct magic *m, size_t nbytes) +mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir, + const unsigned char *s, size_t offset, size_t nbytes) { - uint32_t offset = m->offset; - - if (m->type == FILE_REGEX) { + if (type == FILE_REGEX && indir == 0) { /* * offset is interpreted as last line to search, * (starting at 1), not as bytes-from start-of-file @@ -619,18 +623,53 @@ mget(struct magic_set *ms, union VALUETYPE *p, const unsigned char *s, last = b; if (last != NULL) *last = '\0'; - } else if (offset + sizeof(union VALUETYPE) <= nbytes) - memcpy(p, s + offset, sizeof(union VALUETYPE)); - else { - /* - * the usefulness of padding with zeroes eludes me, it - * might even cause problems - */ - memset(p, 0, sizeof(union VALUETYPE)); - if (offset < nbytes) - memcpy(p, s + offset, nbytes - offset); + return 0; + } + + if (indir == 0 && (type == FILE_BESTRING16 || type == FILE_LESTRING16)) + { + const char *src = s + offset; + const char *esrc = s + nbytes; + char *dst = p->s, *edst = &p->s[sizeof(p->s) - 1]; + + if (type == FILE_BESTRING16) + src++; + + for (;src < esrc; src++, dst++) { + if (dst < edst) + *dst = *src++; + else + break; + if (*dst == '\0') + *dst = ' '; + } + *edst = '\0'; + return 0; } + if (offset + sizeof(*p) <= nbytes) + nbytes = sizeof(*p); + + (void)memcpy(p, s + offset, nbytes); + + /* + * the usefulness of padding with zeroes eludes me, it + * might even cause problems + */ + if (nbytes < sizeof(*p)) + (void)memset(p + nbytes, 0, sizeof(*p) - nbytes); + return 0; +} + +private int +mget(struct magic_set *ms, union VALUETYPE *p, const unsigned char *s, + struct magic *m, size_t nbytes) +{ + uint32_t offset = m->offset; + + if (mcopy(ms, p, m->type, m->flag & INDIR, s, offset, nbytes) == -1) + return -1; + /* Verify we have enough data to match magic type */ switch (m->type) { case FILE_BYTE: @@ -1017,11 +1056,8 @@ mget(struct magic_set *ms, union VALUETYPE *p, const unsigned char *s, break; } - if (nbytes < sizeof(union VALUETYPE) || - nbytes - sizeof(union VALUETYPE) < offset) - return 0; - - memcpy(p, s + offset, sizeof(union VALUETYPE)); + if (mcopy(ms, p, m->type, 0, s, offset, nbytes) == -1) + return -1; if ((ms->flags & MAGIC_DEBUG) != 0) { mdebug(offset, (char *)(void *)p, @@ -1070,6 +1106,8 @@ mcheck(struct magic_set *ms, union VALUETYPE *p, struct magic *m) break; case FILE_STRING: + case FILE_BESTRING16: + case FILE_LESTRING16: case FILE_PSTRING: { /*