--- /dev/null
+/*
+ +----------------------------------------------------------------------+
+ | PHP HTML Embedded Scripting Language Version 3.0 |
+ +----------------------------------------------------------------------+
+ | Copyright (c) 1997-1999 PHP Development Team (See Credits file) |
+ +----------------------------------------------------------------------+
+ | This program is free software; you can redistribute it and/or modify |
+ | it under the terms of one of the following licenses: |
+ | |
+ | A) the GNU General Public License as published by the Free Software |
+ | Foundation; either version 2 of the License, or (at your option) |
+ | any later version. |
+ | |
+ | B) the PHP License as published by the PHP Development Team and |
+ | included in the distribution in the file: LICENSE |
+ | |
+ | This program is distributed in the hope that it will be useful, |
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of |
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+ | GNU General Public License for more details. |
+ | |
+ | You should have received a copy of both licenses referred to here. |
+ | If you did not, or have any questions about PHP licensing, please |
+ | contact core@php.net. |
+ +----------------------------------------------------------------------+
+ | Authors: Andrey Zmievski <andrey@ispi.net> |
+ +----------------------------------------------------------------------+
+ */
+
+/* $Id$ */
+
+/* Get PCRE library from ftp://ftp.cus.cam.ac.uk/pub/software/programs/pcre/ */
+
+#include "php.h"
+
+#if HAVE_LIBPCRE
+
+#include "php_pcre.h"
+
+/* {{{ module definition structures */
+
+unsigned char third_arg_force_ref[] = { 3, BYREF_NONE, BYREF_NONE, BYREF_FORCE };
+
+function_entry pcre_functions[] = {
+ PHP_FE(pcre_match, third_arg_force_ref)
+ PHP_FE(pcre_replace, NULL)
+ {NULL, NULL, NULL}
+};
+
+php3_module_entry pcre_module_entry = {
+ "PCRE", pcre_functions, NULL, NULL, php_rinit_pcre, NULL,
+ php_info_pcre, STANDARD_MODULE_PROPERTIES
+};
+
+/* }}} */
+
+
+static void *php_pcre_malloc(size_t size)
+{
+ return emalloc(size);
+}
+
+static void php_pcre_free(void *ptr)
+{
+ efree(ptr);
+}
+
+
+/* {{{ void php_info_pcre(ZEND_MODULE_INFO_FUNC_ARGS) */
+void php_info_pcre(ZEND_MODULE_INFO_FUNC_ARGS)
+{
+ php3_printf("Perl Compatible Regular Expressions");
+ php3_printf("<table cellpadding=5>"
+ "<tr><td>PCRE library version:</td>"
+ "<td>%s</td></tr>"
+ "</table>", pcre_version());
+}
+/* }}} */
+
+
+/* {{{ int php_rinit_pcre(INIT_FUNC_ARGS) */
+int php_rinit_pcre(INIT_FUNC_ARGS)
+{
+ pcre_malloc = php_pcre_malloc;
+ pcre_free = php_pcre_free;
+
+ return SUCCESS;
+}
+/* }}} */
+
+
+/* {{{ static pcre* _pcre_get_compiled_regex(char *regex, pcre_extra *extra) */
+static pcre* _pcre_get_compiled_regex(char *regex, pcre_extra *extra) {
+ pcre *re = NULL;
+ int coptions = 0;
+ int soptions = 0;
+ const char *error;
+ int erroffset;
+ char delimiter;
+ unsigned char *p, *pp;
+ char *pattern;
+ int do_study = 0;
+
+ p = regex;
+
+ /* Parse through the leading whitespace, and display a warning if we
+ get to the end without encountering a delimiter. */
+ while (isspace(*p)) p++;
+ if (*p == 0) {
+ zend_error(E_WARNING, "Empty regular expression");
+ return NULL;
+ }
+
+ /* Get the delimiter and display a warning if it is alphanumeric
+ or a backslash. */
+ delimiter = *p++;
+ if (isalnum(delimiter) || delimiter == '\\') {
+ zend_error(E_WARNING, "Delimiter must not be alphanumeric or backslash");
+ return NULL;
+ }
+
+ /* We need to iterate through the pattern, searching for the ending delimiter,
+ but skipping the backslashed delimiters. If the ending delimiter is not
+ found, display a warning. */
+ pp = p;
+ while (*pp != 0) {
+ if (*pp == delimiter && pp[-1] != '\\')
+ break;
+ pp++;
+ }
+ if (*pp == 0) {
+ zend_error(E_WARNING, "No ending delimiter found");
+ return NULL;
+ }
+
+ /* Make a copy of the actual pattern. */
+ pattern = estrndup(p, pp-p);
+
+ /* Move on to the options */
+ pp++;
+
+ /* Parse through the options, setting appropriate flags. Display
+ a warning if we encounter an unknown option. */
+ while (*pp != 0) {
+ switch (*pp++) {
+ case 'i': coptions |= PCRE_CASELESS; break;
+ case 'm': coptions |= PCRE_MULTILINE; break;
+ case 's': coptions |= PCRE_DOTALL; break;
+ case 'x': coptions |= PCRE_EXTENDED; break;
+
+ case 'A': coptions |= PCRE_ANCHORED; break;
+ case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
+ case 'S': do_study = 1; break;
+ case 'U': coptions |= PCRE_UNGREEDY; break;
+ case 'X': coptions |= PCRE_EXTRA; break;
+
+ case ' ':
+ case '\n':
+ break;
+
+ default:
+ zend_error(E_WARNING, "Unknown option '%c'", pp[-1]);
+ efree(pattern);
+ return NULL;
+ }
+ }
+
+ /* Compile pattern and display a warning if compilation failed. */
+ re = pcre_compile(pattern,
+ coptions,
+ &error,
+ &erroffset,
+ NULL);
+
+ if (re == NULL) {
+ zend_error(E_WARNING, "Compilation failed: %s at offset %d\n", error, erroffset);
+ efree(pattern);
+ return NULL;
+ }
+
+ /* If study option was specified, study the pattern and
+ store the result in extra for passing to pcre_exec. */
+ if (do_study) {
+ extra = pcre_study(re, soptions, &error);
+ if (error != NULL) {
+ zend_error(E_WARNING, "Error while studying pattern");
+ }
+ }
+
+ efree(pattern);
+
+ return re;
+}
+/* }}} */
+
+
+/* {{{ proto pcre_match(string pattern, string subject [, array subpatterns ])
+ Perform a Perl-style regular expression match */
+PHP_FUNCTION(pcre_match)
+{
+ zval *regex,
+ *subject,
+ *subpats = NULL;
+ pcre *re = NULL;
+ pcre_extra *extra = NULL;
+ int exoptions = 0;
+ const char *error;
+ int count;
+ int *offsets;
+ int size_offsets;
+ int matched;
+ int i;
+ zval *entry;
+ const char **stringlist;
+
+ /* Get function parameters and do error-checking. */
+ switch(ARG_COUNT(ht)) {
+ case 2:
+ if (getParameters(ht, 2, ®ex, &subject) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+ break;
+
+ case 3:
+ if (getParameters(ht, 3, ®ex, &subject, &subpats) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+ if (!ParameterPassedByReference(ht, 3)) {
+ zend_error(E_WARNING, "Array to be filled with matches must be passed by reference.");
+ RETURN_FALSE;
+ }
+ break;
+
+ default:
+ WRONG_PARAM_COUNT;
+ }
+
+ /* Make sure we're dealing with strings. */
+ convert_to_string(regex);
+ convert_to_string(subject);
+
+ /* Compile regex or get it from cache. */
+ if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra)) == NULL)
+ return;
+
+ /* Calculate the size of the offsets array, and allocate memory for it. */
+ size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3;
+ offsets = (int *)emalloc(size_offsets * sizeof(int));
+
+ /* Execute the regular expression. */
+ count = pcre_exec(re, extra, subject->value.str.val, subject->value.str.len,
+ exoptions, offsets, size_offsets);
+
+ /* Check for too many substrings condition. */
+ if (count == 0) {
+ zend_error(E_NOTICE, "Matched, but too many substrings\n");
+ count = size_offsets/3;
+ }
+
+ /* If something has matched */
+ if (count >= 0) {
+ matched = 1;
+
+ for(i=0; i<count; i++)
+ {
+ php3_printf("so = %d, eo = %d\n", offsets[i<<1], offsets[(i<<1)+1]);
+ }
+ /* If subpatters array has been passed, fill it in with values. */
+ if (subpats != NULL) {
+ /* Try to get the list of substrings and display a warning if failed. */
+ if (pcre_get_substring_list(subject->value.str.val, offsets, count, &stringlist) < 0) {
+ efree(offsets);
+ efree(re);
+ zend_error(E_WARNING, "Get subpatterns list failed");
+ return;
+ }
+
+ /* Make sure to clean up the passed array and initialize it. */
+ zval_dtor(subpats);
+ array_init(subpats);
+
+ /* For each subpattern, allocate an array entry, initialize it,
+ and fill in the matched pattern. Then insert it into the
+ subpatterns array. */
+ for (i=0; i<count; i++) {
+ entry = (zval *)emalloc(sizeof(zval));
+ entry->type = IS_STRING;
+ entry->value.str.val = estrdup(stringlist[i]);
+ entry->value.str.len = offsets[(i<<1)+1] - offsets[i<<1];
+ entry->is_ref = 0;
+ entry->refcount = 1;
+
+ zend_hash_index_update(subpats->value.ht, i, &entry, sizeof(zval *), NULL);
+ }
+
+ efree(stringlist);
+ }
+ }
+ /* If nothing matched */
+ else {
+ matched = 0;
+
+ /* Make sure to clean up the passed array and initialize it
+ to empty since we don't want to leave previous values in it. */
+ if (subpats != NULL) {
+ zval_dtor(subpats);
+ array_init(subpats);
+ }
+ }
+
+ efree(offsets);
+ efree(re);
+
+ RETVAL_LONG(matched);
+}
+/* }}} */
+
+/* {{{ int _pcre_get_backref(const char *walk, int *backref) */
+static int _pcre_get_backref(const char *walk, int *backref)
+{
+ if (*walk < '0' && *walk > '9')
+ return 0;
+ else
+ *backref = *walk - '0';
+
+ if (walk[1] >= '0' && walk[1] <= '9')
+ *backref = *backref * 10 + walk[1] - '0';
+
+ php3_printf("backref = %d\n", *backref);
+ return 1;
+}
+/* }}} */
+
+
+/* {{{ proto pcre_replace()
+ */
+PHP_FUNCTION(pcre_replace)
+{
+ zval *regex,
+ *replace,
+ *subject;
+ pcre *re = NULL;
+ pcre_extra *extra = NULL;
+ int exoptions = 0;
+ const char *error;
+ int count = 0;
+ int *offsets;
+ int size_offsets;
+ int matched;
+ const char ***stringlist;
+ int new_len;
+ int alloc_len;
+ int subject_len;
+ int subject_offset;
+ int result_len;
+ int backref;
+ char *result,
+ *new_buf,
+ *walkbuf,
+ *walk;
+
+ if (ARG_COUNT(ht) != 3 || getParameters(ht, 3, ®ex, &replace, &subject) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+
+ /* Make sure we're dealing with strings. */
+ convert_to_string(regex);
+ convert_to_string(replace);
+ convert_to_string(subject);
+
+ /* Compile regex or get it from cache. */
+ if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra)) == NULL)
+ return;
+
+ /* Calculate the size of the offsets array, and allocate memory for it. */
+ size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3;
+ offsets = (int *)emalloc(size_offsets * sizeof(int));
+
+ subject_len = subject->value.str.len;
+
+ alloc_len = 2 * subject_len + 1;
+ result = emalloc(alloc_len * sizeof(char));
+ if (!result) {
+ zend_error(E_WARNING, "Unable to allocate memory in pcre_replace");
+ efree(re);
+ efree(offsets);
+ return;
+ }
+
+ subject_offset = 0;
+ result[0] = '\0';
+
+ while (count >= 0) {
+ /* Execute the regular expression. */
+ count = pcre_exec(re, extra, &subject->value.str.val[subject_offset],
+ subject->value.str.len-subject_offset,
+ (subject_offset ? PCRE_NOTBOL : 0), offsets, size_offsets);
+
+ /* Check for too many substrings condition. */
+ if (count == 0) {
+ zend_error(E_NOTICE, "Matched, but too many substrings\n");
+ count = size_offsets/3;
+ }
+
+ if (count > 0) {
+ new_len = strlen(result) + offsets[0]; /* part before the match */
+ walk = replace->value.str.val;
+ while (*walk)
+ if ('\\' == *walk &&
+ _pcre_get_backref(walk+1, &backref) &&
+ backref < count) {
+ new_len += offsets[2*backref+1] - offsets[2*backref];
+ walk += (backref > 9) ? 3 : 2;
+ } else {
+ new_len++;
+ walk++;
+ }
+
+ if (new_len + 1 > alloc_len) {
+ alloc_len = 1 + alloc_len + 2 * new_len;
+ new_buf = emalloc(alloc_len);
+ strcpy(new_buf, result);
+ efree(result);
+ result = new_buf;
+ }
+ result_len = strlen(result);
+ /* copy the part of the string before the match */
+ strncat(result, &subject->value.str.val[subject_offset], offsets[0]);
+
+ /* copy replacement and backrefs */
+ walkbuf = &result[result_len + offsets[0]];
+ walk = replace->value.str.val;
+ while (*walk)
+ if ('\\' == *walk &&
+ _pcre_get_backref(walk+1, &backref) &&
+ backref < count) {
+ result_len = offsets[2*backref+1] - offsets[2*backref];
+ memcpy (walkbuf,
+ &subject->value.str.val[subject_offset + offsets[2*backref]],
+ result_len);
+ walkbuf += result_len;
+ walk += (backref > 9) ? 3 : 2;
+ } else
+ *walkbuf++ = *walk++;
+ *walkbuf = '\0';
+
+ /* and get ready to keep looking for replacements */
+ if (offsets[0] == offsets[1]) {
+ if (offsets[0] + subject_offset >= subject_len)
+ break;
+ new_len = strlen (result) + 1;
+ if (new_len + 1 > alloc_len) {
+ alloc_len = 1 + alloc_len + 2 * new_len;
+ new_buf = emalloc(alloc_len * sizeof(char));
+ strcpy(new_buf, result);
+ efree(result);
+ result = new_buf;
+ }
+ subject_offset += offsets[1] + 1;
+ result [new_len-1] = subject->value.str.val [subject_offset-1];
+ result [new_len] = '\0';
+ } else {
+ subject_offset += offsets[1];
+ }
+ } else { /* REG_NOMATCH */
+ new_len = strlen(result) + strlen(&subject->value.str.val[subject_offset]);
+ if (new_len + 1 > alloc_len) {
+ alloc_len = new_len + 1; /* now we know exactly how long it is */
+ new_buf = emalloc(alloc_len * sizeof(char));
+ strcpy(new_buf, result);
+ efree(result);
+ result = new_buf;
+ }
+ /* stick that last bit of string on our output */
+ strcat(result, &subject->value.str.val[subject_offset]);
+ }
+ }
+
+ efree(offsets);
+ efree(re);
+
+ RETVAL_STRING(result, 1);
+ efree(result);
+}
+/* }}} */
+
+
+#endif /* HAVE_PCRE */
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ */