From 83abc6c5379cde8ae4975a9daa54f55dec3bd024 Mon Sep 17 00:00:00 2001 From: Scott MacVicar Date: Fri, 29 Aug 2008 09:50:57 +0000 Subject: [PATCH] MFB: Replace ereg code with pcre, fix duplicate macro names and segfault. Patch by Mikko --- ext/fileinfo/libmagic/apprentice.c | 6 +- ext/fileinfo/libmagic/file.h | 8 +- ext/fileinfo/libmagic/print.c | 2 +- ext/fileinfo/libmagic/softmagic.c | 274 ++++++++++++++++++++++------- 4 files changed, 223 insertions(+), 67 deletions(-) diff --git a/ext/fileinfo/libmagic/apprentice.c b/ext/fileinfo/libmagic/apprentice.c index 2b72592f5c..9d3d3fab3f 100644 --- a/ext/fileinfo/libmagic/apprentice.c +++ b/ext/fileinfo/libmagic/apprentice.c @@ -1118,7 +1118,7 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, m->mask_op = 0; if (*l == '~') { - if (!IS_STRING(m->type)) + if (!IS_LIBMAGIC_STRING(m->type)) m->mask_op |= FILE_OPINVERSE; else if (ms->flags & MAGIC_CHECK) file_magwarn(ms, "'~' invalid for string types"); @@ -1128,7 +1128,7 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, m->str_flags = 0; m->num_mask = 0; if ((op = get_op(*l)) != -1) { - if (!IS_STRING(m->type)) { + if (!IS_LIBMAGIC_STRING(m->type)) { uint64_t val; ++l; m->mask_op |= op; @@ -2066,7 +2066,7 @@ bs1(struct magic *m) m->offset = swap4((uint32_t)m->offset); m->in_offset = swap4((uint32_t)m->in_offset); m->lineno = swap4((uint32_t)m->lineno); - if (IS_STRING(m->type)) { + if (IS_LIBMAGIC_STRING(m->type)) { m->str_range = swap4(m->str_range); m->str_flags = swap4(m->str_flags); } diff --git a/ext/fileinfo/libmagic/file.h b/ext/fileinfo/libmagic/file.h index 74279539ed..b28631c97c 100644 --- a/ext/fileinfo/libmagic/file.h +++ b/ext/fileinfo/libmagic/file.h @@ -46,7 +46,11 @@ #ifdef HAVE_INTTYPES_H #include #endif -#include + +#include "php.h" +#include "ext/standard/php_string.h" +#include "ext/pcre/php_pcre.h" + #include /* Do this here and now, because struct stat gets re-defined on solaris */ #include @@ -165,7 +169,7 @@ struct magic { #define FILE_LEDOUBLE 38 #define FILE_NAMES_SIZE 39/* size of array to contain all names */ -#define IS_STRING(t) \ +#define IS_LIBMAGIC_STRING(t) \ ((t) == FILE_STRING || \ (t) == FILE_PSTRING || \ (t) == FILE_BESTRING16 || \ diff --git a/ext/fileinfo/libmagic/print.c b/ext/fileinfo/libmagic/print.c index c41e71eac5..a6d186135d 100644 --- a/ext/fileinfo/libmagic/print.c +++ b/ext/fileinfo/libmagic/print.c @@ -74,7 +74,7 @@ file_mdump(struct magic *m) if (m->mask_op & FILE_OPINVERSE) (void) fputc('~', stderr); - if (IS_STRING(m->type)) { + if (IS_LIBMAGIC_STRING(m->type)) { if (m->str_flags) { (void) fputc('/', stderr); if (m->str_flags & STRING_COMPACT_BLANK) diff --git a/ext/fileinfo/libmagic/softmagic.c b/ext/fileinfo/libmagic/softmagic.c index cfa9a36fbd..b91b803f4f 100644 --- a/ext/fileinfo/libmagic/softmagic.c +++ b/ext/fileinfo/libmagic/softmagic.c @@ -36,6 +36,10 @@ #include #include +#ifndef PREG_OFFSET_CAPTURE +# define PREG_OFFSET_CAPTURE (1<<8) +#endif + #ifndef lint FILE_RCSID("@(#)$File: softmagic.c,v 1.117 2008/03/01 22:21:49 rrt Exp $") @@ -281,25 +285,18 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, private int check_fmt(struct magic_set *ms, struct magic *m) { - regex_t rx = {0}; - int rc; - + pcre *pce; + int re_options; + pcre_extra *re_extra; + if (strchr(MAGIC_DESC, '%') == NULL) { return 0; } - - rc = regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB); - if (rc) { - char errmsg[512]; - (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); - file_magerror(ms, "regex error %d, (%s)", rc, errmsg); + + if ((pce = pcre_get_compiled_regex("~%[-0-9.]*s~", &re_extra, &re_options TSRMLS_CC)) == NULL) { return -1; } else { - regmatch_t *pmatch = (regmatch_t *)ecalloc(sizeof(regmatch_t), rx.re_nsub + 1); - rc = regexec(&rx, MAGIC_DESC, rx.re_nsub + 1, pmatch, 0); - efree(pmatch); - regfree(&rx); - return !rc; + return !pcre_exec(pce, re_extra, MAGIC_DESC, strlen(MAGIC_DESC), 0, re_options, NULL, 0); } } @@ -1488,6 +1485,66 @@ file_strncmp16(const char *a, const char *b, size_t len, uint32_t flags) return file_strncmp(a, b, len, flags); } +private void +convert_libmagic_pattern(zval *pattern, int options) +{ + int i, j=0; + char *t; + + t = (char *) safe_emalloc(Z_STRLEN_P(pattern), 2, 5); + memset(t, '\0', sizeof(t)); + + t[j++] = '~'; + + for (i=0; isearch.s == NULL) - return 0; + zval *pattern; + int options = 0; + pcre_cache_entry *pce; + + MAKE_STD_ZVAL(pattern); + Z_STRVAL_P(pattern) = (char *)m->value.s; + Z_STRLEN_P(pattern) = m->vallen; + Z_TYPE_P(pattern) = IS_STRING; + + options |= PCRE_MULTILINE; + + if (m->str_flags & STRING_IGNORE_CASE) { + options |= PCRE_CASELESS; + } + + convert_libmagic_pattern(pattern, options); - l = 0; - rc = regcomp(&rx, m->value.s, REG_EXTENDED|REG_NEWLINE|((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0)); - if (rc) { - (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); - file_magerror(ms, "regex error %d, (%s)", rc, errmsg); - v = (uint64_t)-1; + if ((pce = pcre_get_compiled_regex_cache(Z_STRVAL_P(pattern), Z_STRLEN_P(pattern) TSRMLS_CC)) == NULL) { + return -1; } else { - regmatch_t *pmatch = (regmatch_t *)ecalloc(sizeof(regmatch_t), rx.re_nsub + 1); -#ifndef REG_STARTEND -#define REG_STARTEND 0 - size_t l = ms->search.s_len - 1; - char c = ms->search.s[l]; - ((char *)(intptr_t)ms->search.s)[l] = '\0'; -#else - pmatch[0].rm_so = 0; - pmatch[0].rm_eo = ms->search.s_len; -#endif - rc = regexec(&rx, (const char *)ms->search.s, 1, pmatch, REG_STARTEND); -#if REG_STARTEND == 0 - ((char *)(intptr_t)ms->search.s)[l] = c; -#endif - switch (rc) { - case 0: - ms->search.s += (int)pmatch[0].rm_so; - ms->search.offset += (size_t)pmatch[0].rm_so; - ms->search.rm_len = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so); - v = 0; - break; + /* pce now contains the compiled regex */ + zval *retval; + zval *subpats; + char *haystack; + + MAKE_STD_ZVAL(retval); + ALLOC_INIT_ZVAL(subpats); + + /* Cut the search len from haystack, equals to REG_STARTEND */ + haystack = estrndup(ms->search.s, ms->search.s_len); - case REG_NOMATCH: - v = 1; - break; + /* match v = 0, no match v = 1 */ + php_pcre_match_impl(pce, haystack, ms->search.s_len, retval, subpats, 1, 1, PREG_OFFSET_CAPTURE, 0 TSRMLS_CC); + + /* Free haystack */ + efree(haystack); + + if (Z_LVAL_P(retval) < 0) { + zval_ptr_dtor(&subpats); + FREE_ZVAL(retval); + efree(Z_STRVAL_P(pattern)); + efree(pattern); + return -1; + } else if ((Z_LVAL_P(retval) > 0) && (Z_TYPE_P(subpats) == IS_ARRAY)) { + + /* Need to fetch global match which equals pmatch[0] */ + HashTable *ht = Z_ARRVAL_P(subpats); + HashPosition outer_pos; + zval *pattern_match = NULL, *pattern_offset = NULL; + + zend_hash_internal_pointer_reset_ex(ht, &outer_pos); + + if (zend_hash_has_more_elements_ex(ht, &outer_pos) == SUCCESS && + zend_hash_move_forward_ex(ht, &outer_pos)) { + + zval **ppzval; + + /* The first element (should be) is the global match + Need to move to the inner array to get the global match */ + + if (zend_hash_get_current_data_ex(ht, (void**)&ppzval, &outer_pos) != FAILURE) { + + HashTable *inner_ht; + HashPosition inner_pos; + zval **match, **offset; + zval tmpcopy = **ppzval, matchcopy, offsetcopy; + + zval_copy_ctor(&tmpcopy); + INIT_PZVAL(&tmpcopy); + + inner_ht = Z_ARRVAL(tmpcopy); + + /* If everything goes according to the master plan + tmpcopy now contains two elements: + 0 = the match + 1 = starting position of the match */ + zend_hash_internal_pointer_reset_ex(inner_ht, &inner_pos); + + if (zend_hash_has_more_elements_ex(inner_ht, &inner_pos) == SUCCESS && + zend_hash_move_forward_ex(inner_ht, &inner_pos)) { + + if (zend_hash_get_current_data_ex(inner_ht, (void**)&match, &inner_pos) != FAILURE) { + + matchcopy = **match; + zval_copy_ctor(&matchcopy); + INIT_PZVAL(&matchcopy); + convert_to_string(&matchcopy); + + MAKE_STD_ZVAL(pattern_match); + Z_STRVAL_P(pattern_match) = (char *)Z_STRVAL(matchcopy); + Z_STRLEN_P(pattern_match) = Z_STRLEN(matchcopy); + Z_TYPE_P(pattern_match) = IS_STRING; + + zval_dtor(&matchcopy); + } + } + + if (zend_hash_has_more_elements_ex(inner_ht, &inner_pos) == SUCCESS && + zend_hash_move_forward_ex(inner_ht, &inner_pos)) { + + if (zend_hash_get_current_data_ex(inner_ht, (void**)&offset, &inner_pos) != FAILURE) { + + offsetcopy = **offset; + zval_copy_ctor(&offsetcopy); + INIT_PZVAL(&offsetcopy); + convert_to_long(&offsetcopy); + + MAKE_STD_ZVAL(pattern_offset); + Z_LVAL_P(pattern_offset) = Z_LVAL(offsetcopy); + Z_TYPE_P(pattern_offset) = IS_LONG; + + zval_dtor(&offsetcopy); + } + } + zval_dtor(&tmpcopy); + } + + if ((pattern_match != NULL) && (pattern_offset != NULL)) { + ms->search.s += (int)Z_LVAL_P(pattern_offset); /* this is where the match starts */ + ms->search.offset += (size_t)Z_LVAL_P(pattern_offset); /* this is where the match starts as size_t */ + ms->search.rm_len = Z_STRLEN_P(pattern_match) /* This is the length of the matched pattern */; + v = 0; + + efree(pattern_match); + efree(pattern_offset); + + } else { + zval_ptr_dtor(&subpats); + FREE_ZVAL(retval); + efree(Z_STRVAL_P(pattern)); + efree(pattern); + return -1; + } + } - default: - (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); - file_magerror(ms, "regexec error %d, (%s)", rc, errmsg); - v = (uint64_t)-1; - break; + + } else { + v = 1; } - regfree(&rx); - efree(pmatch); + zval_ptr_dtor(&subpats); + FREE_ZVAL(retval); } - if (v == (uint64_t)-1) { - return -1; - } - break; + efree(Z_STRVAL_P(pattern)); + efree(pattern); + break; } + + default: file_magerror(ms, "invalid type %d in magiccheck()", m->type); return -1; -- 2.40.0