From: Sascha Schumann Date: Wed, 12 May 1999 15:40:15 +0000 (+0000) Subject: - regex cache (needs some work) X-Git-Tag: BEFORE_PHP4_APACHE_MODULE_CHANGE~52 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a4d20864a689eb7ee7a29c2b0b119fb53a361558;p=php - regex cache (needs some work) - str_replace changes from php3 --- diff --git a/ext/ereg/ereg.c b/ext/ereg/ereg.c index 4c8b1e240e..b492865e6b 100644 --- a/ext/ereg/ereg.c +++ b/ext/ereg/ereg.c @@ -49,10 +49,95 @@ function_entry reg_functions[] = { {NULL, NULL, NULL} }; -php3_module_entry regexp_module_entry = { - "Regular Expressions", reg_functions, NULL, NULL, NULL, NULL, NULL, STANDARD_MODULE_PROPERTIES + +static int php_minit_regex(INIT_FUNC_ARGS); +static int php_mshutdown_regex(SHUTDOWN_FUNC_ARGS); +static void php_info_regex(ZEND_MODULE_INFO_FUNC_ARGS); + +zend_module_entry regexp_module_entry = { + "Regular Expressions", + reg_functions, + php_minit_regex, php_mshutdown_regex, + NULL, NULL, php_info_regex, + STANDARD_MODULE_PROPERTIES }; +#ifdef ZTS +int reg_globals_id; +#else +static php_reg_globals reg_globals; +#endif + +typedef struct { + regex_t preg; + int cflags; +} reg_cache; + +static int _php_regcomp(regex_t *preg, const char *pattern, int cflags) +{ + REGSLS_FETCH + int r = 0; + int patlen = strlen(pattern); + reg_cache *rc = NULL; + + if(_php3_hash_find(®(ht_rc), (char *) pattern, patlen, (void **) &rc) == FAILURE || + rc->cflags != cflags) { + r = regcomp(preg, pattern, cflags); + if(!r) { + reg_cache rcp; + + rcp.cflags = cflags; + memcpy(&rcp.preg, preg, sizeof(*preg)); + _php3_hash_update(®(ht_rc), (char *) pattern, patlen, + (void *) &rcp, sizeof(*rc), NULL); + } + } else { + memcpy(preg, &rc->preg, sizeof(*preg)); + } + + return r; +} + +#define regfree(a); +#define regcomp _php_regcomp + +static void _free_reg_cache(reg_cache *rc) +{ + regfree(&rc->preg); +} + +static void php_reg_init_globals(php_reg_globals *reg_globals) +{ + _php3_hash_init(®_globals->ht_rc, 0, NULL, (void (*)(void *)) _free_reg_cache, 1); +} + +static int php_minit_regex(INIT_FUNC_ARGS) +{ +#ifdef ZTS + reg_globals_id = tsrm_allocate_id(sizeof(php_reg_globals), php_reg_init_globals, NULL); +#else + php_reg_init_globals(®_globals); +#endif + + return SUCCESS; +} + +static int php_mshutdown_regex(SHUTDOWN_FUNC_ARGS) +{ + _php3_hash_destroy(®(ht_rc)); + return SUCCESS; +} + +static void php_info_regex(ZEND_MODULE_INFO_FUNC_ARGS) +{ +#if HSREGEX + PUTS("Bundled regex library enabled\n"); +#else + PUTS("System regex library enabled\n"); +#endif +} + + /* This is the maximum number of (..) constructs we'll generate from a call to ereg() or eregi() with the optional third argument. */ #define NS 10 @@ -563,6 +648,8 @@ PHPAPI void php3_sql_regcase(INTERNAL_FUNCTION_PARAMETERS) } /* }}} */ + + /* * Local variables: * tab-width: 4 diff --git a/ext/ereg/php_ereg.h b/ext/ereg/php_ereg.h index bb33aa768f..011484bc4f 100644 --- a/ext/ereg/php_ereg.h +++ b/ext/ereg/php_ereg.h @@ -36,13 +36,36 @@ extern php3_module_entry regexp_module_entry; #define regexp_module_ptr ®exp_module_entry -extern char *_php3_regreplace(const char *pattern, const char *replace, const char *string, int icase, int extended); - -extern void php3_ereg(INTERNAL_FUNCTION_PARAMETERS); -extern void php3_eregi(INTERNAL_FUNCTION_PARAMETERS); -extern void php3_eregireplace(INTERNAL_FUNCTION_PARAMETERS); -extern void php3_eregreplace(INTERNAL_FUNCTION_PARAMETERS); -extern void php3_split(INTERNAL_FUNCTION_PARAMETERS); -extern PHPAPI void php3_sql_regcase(INTERNAL_FUNCTION_PARAMETERS); +char *_php3_regreplace(const char *pattern, const char *replace, const char *string, int icase, int extended); + +PHP_FUNCTION(ereg); +PHP_FUNCTION(eregi); +PHP_FUNCTION(eregireplace); +PHP_FUNCTION(eregreplace); +PHP_FUNCTION(split); +PHP_FUNCTION(sql_regcase); + +typedef struct { + HashTable ht_rc; +} php_reg_globals; + + +#ifdef ZTS +#define REGSLS_D php_reg_globals *reg_globals +#define REGSLS_DC , REGSLS_D +#define REGSLS_C reg_globals +#define REGSLS_CC , REGSLS_C +#define REG(v) (reg_globals->v) +#define REGSLS_FETCH php_reg_globals *reg_globals = ts_resource(reg_globals_id); +#else +#define REGSLS_D +#define REGSLS_DC +#define REGSLS_C +#define REGSLS_CC +#define REG(v) (reg_globals.v) +#define REGSLS_FETCH +#endif + +#define phpext_regex_ptr regexp_module_ptr #endif /* _REG_H */ diff --git a/ext/standard/reg.c b/ext/standard/reg.c index 4c8b1e240e..b492865e6b 100644 --- a/ext/standard/reg.c +++ b/ext/standard/reg.c @@ -49,10 +49,95 @@ function_entry reg_functions[] = { {NULL, NULL, NULL} }; -php3_module_entry regexp_module_entry = { - "Regular Expressions", reg_functions, NULL, NULL, NULL, NULL, NULL, STANDARD_MODULE_PROPERTIES + +static int php_minit_regex(INIT_FUNC_ARGS); +static int php_mshutdown_regex(SHUTDOWN_FUNC_ARGS); +static void php_info_regex(ZEND_MODULE_INFO_FUNC_ARGS); + +zend_module_entry regexp_module_entry = { + "Regular Expressions", + reg_functions, + php_minit_regex, php_mshutdown_regex, + NULL, NULL, php_info_regex, + STANDARD_MODULE_PROPERTIES }; +#ifdef ZTS +int reg_globals_id; +#else +static php_reg_globals reg_globals; +#endif + +typedef struct { + regex_t preg; + int cflags; +} reg_cache; + +static int _php_regcomp(regex_t *preg, const char *pattern, int cflags) +{ + REGSLS_FETCH + int r = 0; + int patlen = strlen(pattern); + reg_cache *rc = NULL; + + if(_php3_hash_find(®(ht_rc), (char *) pattern, patlen, (void **) &rc) == FAILURE || + rc->cflags != cflags) { + r = regcomp(preg, pattern, cflags); + if(!r) { + reg_cache rcp; + + rcp.cflags = cflags; + memcpy(&rcp.preg, preg, sizeof(*preg)); + _php3_hash_update(®(ht_rc), (char *) pattern, patlen, + (void *) &rcp, sizeof(*rc), NULL); + } + } else { + memcpy(preg, &rc->preg, sizeof(*preg)); + } + + return r; +} + +#define regfree(a); +#define regcomp _php_regcomp + +static void _free_reg_cache(reg_cache *rc) +{ + regfree(&rc->preg); +} + +static void php_reg_init_globals(php_reg_globals *reg_globals) +{ + _php3_hash_init(®_globals->ht_rc, 0, NULL, (void (*)(void *)) _free_reg_cache, 1); +} + +static int php_minit_regex(INIT_FUNC_ARGS) +{ +#ifdef ZTS + reg_globals_id = tsrm_allocate_id(sizeof(php_reg_globals), php_reg_init_globals, NULL); +#else + php_reg_init_globals(®_globals); +#endif + + return SUCCESS; +} + +static int php_mshutdown_regex(SHUTDOWN_FUNC_ARGS) +{ + _php3_hash_destroy(®(ht_rc)); + return SUCCESS; +} + +static void php_info_regex(ZEND_MODULE_INFO_FUNC_ARGS) +{ +#if HSREGEX + PUTS("Bundled regex library enabled\n"); +#else + PUTS("System regex library enabled\n"); +#endif +} + + /* This is the maximum number of (..) constructs we'll generate from a call to ereg() or eregi() with the optional third argument. */ #define NS 10 @@ -563,6 +648,8 @@ PHPAPI void php3_sql_regcase(INTERNAL_FUNCTION_PARAMETERS) } /* }}} */ + + /* * Local variables: * tab-width: 4 diff --git a/ext/standard/reg.h b/ext/standard/reg.h index bb33aa768f..011484bc4f 100644 --- a/ext/standard/reg.h +++ b/ext/standard/reg.h @@ -36,13 +36,36 @@ extern php3_module_entry regexp_module_entry; #define regexp_module_ptr ®exp_module_entry -extern char *_php3_regreplace(const char *pattern, const char *replace, const char *string, int icase, int extended); - -extern void php3_ereg(INTERNAL_FUNCTION_PARAMETERS); -extern void php3_eregi(INTERNAL_FUNCTION_PARAMETERS); -extern void php3_eregireplace(INTERNAL_FUNCTION_PARAMETERS); -extern void php3_eregreplace(INTERNAL_FUNCTION_PARAMETERS); -extern void php3_split(INTERNAL_FUNCTION_PARAMETERS); -extern PHPAPI void php3_sql_regcase(INTERNAL_FUNCTION_PARAMETERS); +char *_php3_regreplace(const char *pattern, const char *replace, const char *string, int icase, int extended); + +PHP_FUNCTION(ereg); +PHP_FUNCTION(eregi); +PHP_FUNCTION(eregireplace); +PHP_FUNCTION(eregreplace); +PHP_FUNCTION(split); +PHP_FUNCTION(sql_regcase); + +typedef struct { + HashTable ht_rc; +} php_reg_globals; + + +#ifdef ZTS +#define REGSLS_D php_reg_globals *reg_globals +#define REGSLS_DC , REGSLS_D +#define REGSLS_C reg_globals +#define REGSLS_CC , REGSLS_C +#define REG(v) (reg_globals->v) +#define REGSLS_FETCH php_reg_globals *reg_globals = ts_resource(reg_globals_id); +#else +#define REGSLS_D +#define REGSLS_DC +#define REGSLS_C +#define REGSLS_CC +#define REG(v) (reg_globals.v) +#define REGSLS_FETCH +#endif + +#define phpext_regex_ptr regexp_module_ptr #endif /* _REG_H */ diff --git a/ext/standard/string.c b/ext/standard/string.c index e1880c93b6..133f817f17 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -1213,7 +1213,6 @@ static void _php3_char_to_str(char *str,uint len,char from,char *to,int to_len,p *target = 0; } -#if 0 /* * this is a binary safe equivalent to strnstr * note that we don't check for the end in str_to_str but here @@ -1225,20 +1224,14 @@ _php3_memnstr(char *haystack, char *needle, int needle_len, char *end) char *p = haystack; char *s = NULL; - for(; p < end - needle_len + 1&& - (s = memchr(p, *needle, end - haystack)); p = s + 1) { + for(; p <= end - needle_len && + (s = memchr(p, *needle, end - p - needle_len + 1)); p = s + 1) { if(memcmp(s, needle, needle_len) == 0) return s; } return NULL; } -/* - * because of efficiency we use malloc/realloc/free here - * erealloc _will_ move your data around - it took me some time - * to find out ... Sascha Schumann 981220 - */ - static char *_php3_str_to_str(char *haystack, int length, char *needle, int needle_len, char *str, int str_len, int *_new_length) { @@ -1246,78 +1239,48 @@ static char *_php3_str_to_str(char *haystack, int length, char *r, *s; char *end = haystack + length; char *new; + char *off; - new = malloc(length); + new = emalloc(length); /* we jump through haystack searching for the needle. hurray! */ for(p = haystack, q = new; (r = _php3_memnstr(p, needle, needle_len, end));) { /* this ain't optimal. you could call it `efficient memory usage' */ - realloc(new, (q - new) + (r - p) + (str_len) + 1); + off = erealloc(new, (q - new) + (r - p) + (str_len) + 1); + if(off != new) { + if(!off) { + goto finish; + } + q += off - new; + new = off; + } memcpy(q, p, r - p); q += r - p; memcpy(q, str, str_len); q += str_len; p = r + needle_len; } + /* if there is a rest, copy it */ - if((end - p)) { + if((end - p) > 0) { s = (q) + (end - p); - new = realloc(new, s - new + 1); + off = erealloc(new, s - new + 1); + if(off != new) { + if(!off) { + goto finish; + } + q += off - new; + new = off; + s = q + (end - p); + } memcpy(q, p, end - p); q = s; } +finish: *q = '\0'; if(_new_length) *_new_length = q - new; return new; } -#endif - -static char *_php3_memstr(char *s, char *c, size_t n, size_t m) -{ - char *p; - - for(p = s; ((size_t) (p - s)) < n; p++) - if(memcmp(p, c, m) == 0) - return p; - return NULL; -} - -#define ATTCHSTR(st, sz) \ - nl += sz; \ - n = realloc(n, nl + 1); \ - memcpy(n + no, st, sz); \ - no += sz - - -static char *_php3_str_to_str(char *a, int al, char *b, int bl, char *c, int cl, - int *newlen) -{ - char *n = NULL, *p, *q; - int nl = 0; - int no = 0; - - /* run through all occurences of b in a */ - for(p = q = a; (p = _php3_memstr(p, b, al - (p - a), bl)); q = p) { - /* attach everything between the previous occ. and this one */ - ATTCHSTR(q, p - q); - /* attach the replacement string c */ - ATTCHSTR(c, cl); - /* jump over string b in a */ - p += bl; - } - - /* anything left over ? */ - if((al - (q - a)) > 0) { - ATTCHSTR(q, al - (q - a)); - } - - if(newlen) *newlen = nl; - n[nl] = '\0'; - - return n; -} - -#undef ATTCHSTR /* {{{ proto string str_replace(string needle, string str, string haystack) Replace all occurrences of needle in haystack with str */ @@ -1325,29 +1288,29 @@ void php3_str_replace(INTERNAL_FUNCTION_PARAMETERS) { pval *haystack, *needle, *str; char *new; + int len = 0; - if(ARG_COUNT(ht) != 3 || + if(ARG_COUNT(ht) != 3 || getParameters(ht, 3, &needle, &str, &haystack) == FAILURE) { WRONG_PARAM_COUNT; } convert_to_string(haystack); convert_to_string(needle); - convert_to_string(str); - - if(needle->value.str.len == 1) { - _php3_char_to_str(haystack->value.str.val,haystack->value.str.len,needle->value.str.val[0],str->value.str.val, str->value.str.len ,return_value); + convert_to_string(str); if(needle->value.str.len == 1) { _php3_char_to_str(haystack->value.str.val,haystack->value.str.len,needle->value.str.val[0],str->value.str.val, str->value.str.len ,return_value); return; } - new = _php3_str_to_str(haystack->value.str.val, haystack->value.str.len, - needle->value.str.val, needle->value.str.len, - str->value.str.val, str->value.str.len, - &return_value->value.str.len); - return_value->value.str.val = emalloc(return_value->value.str.len + 1); - memcpy(return_value->value.str.val, new, return_value->value.str.len + 1); - free(new); - return_value->type = IS_STRING; + if(needle->value.str.len == 0) { + php3_error(E_WARNING, "The length of the needle must not be 0"); + RETURN_FALSE; + } + + new = _php3_str_to_str(haystack->value.str.val, haystack->value.str.len, + needle->value.str.val, needle->value.str.len, + str->value.str.val, str->value.str.len, + &len); + RETURN_STRINGL(new, len, 0); } /* }}} */ diff --git a/main/internal_functions.c.in b/main/internal_functions.c.in index 510dd37d61..4b144063db 100644 --- a/main/internal_functions.c.in +++ b/main/internal_functions.c.in @@ -40,6 +40,7 @@ #include #include +#include "ext/standard/reg.h" @EXT_INCLUDE_CODE@ /* SNMP has to be moved to ext */ @@ -51,6 +52,7 @@ unsigned char second_arg_force_ref[] = { 2, BYREF_NONE, BYREF_FORCE }; unsigned char second_arg_allow_ref[] = { 2, BYREF_NONE, BYREF_ALLOW }; zend_module_entry *php3_builtin_modules[] = { + phpext_regex_ptr, @EXT_MODULE_PTRS@ };