]> granicus.if.org Git - apache/commitdiff
Replace PCRE with PCRE2 where it is available.
authorWilliam A. Rowe Jr <wrowe@apache.org>
Fri, 9 Dec 2016 19:06:06 +0000 (19:06 +0000)
committerWilliam A. Rowe Jr <wrowe@apache.org>
Fri, 9 Dec 2016 19:06:06 +0000 (19:06 +0000)
This patch removes the needless assignment of re_erroffset in the conf pool
by the worker threads; such mistakes break the shared copy-on-write pages of
memory that should have remained common between all httpd worker processes.

Two de-optimizations are inherent in this patch, the former ovector-on-stack
opportunity is lost unless implemented as a new general context. Safer that
we either create a new general context using pool allocation, or recycle a
per pool or per thread match_data buffer of some arbitrary 10 elts or so.

Submitted by: wrowe, Petr Pisar <ppisar@redhat.com>

git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1773454 13f79535-47bb-0310-9956-ffa450edef68

configure.in
server/util_pcre.c

index 19b6e1dcfedfd364924992da385339d3a34c8aa5..5e70d7aee0050d83e81c18dea7a591388b8582c8 100644 (file)
@@ -223,18 +223,18 @@ fi
 AC_ARG_WITH(pcre,
 APACHE_HELP_STRING(--with-pcre=PATH,Use external PCRE library))
 
-AC_PATH_PROG(PCRE_CONFIG, pcre-config, false)
-if test -d "$with_pcre" && test -x "$with_pcre/bin/pcre-config"; then
-   PCRE_CONFIG=$with_pcre/bin/pcre-config
-elif test -x "$with_pcre"; then
-   PCRE_CONFIG=$with_pcre
-fi
+AC_CHECK_TARGET_TOOLS(PCRE_CONFIG, [pcre2-config pcre-config],
+                      [`which $with_pcre 2>/dev/null`],
+                      [$with_pcre/bin:$with_pcre])
 
-if test "$PCRE_CONFIG" != "false"; then
+if test "x$PCRE_CONFIG" != "x"; then
   if $PCRE_CONFIG --version >/dev/null 2>&1; then :; else
-    AC_MSG_ERROR([Did not find pcre-config script at $PCRE_CONFIG])
+    AC_MSG_ERROR([Did not find working script at $PCRE_CONFIG])
   fi
   case `$PCRE_CONFIG --version` in
+  [1[0-9].*])
+    AC_DEFINE(HAVE_PCRE2, 1, [Detected PCRE2]) 
+    ;;
   [[1-5].*])
     AC_MSG_ERROR([Need at least pcre version 6.7])
     ;;
@@ -244,10 +244,10 @@ if test "$PCRE_CONFIG" != "false"; then
   esac
   AC_MSG_NOTICE([Using external PCRE library from $PCRE_CONFIG])
   APR_ADDTO(PCRE_INCLUDES, [`$PCRE_CONFIG --cflags`])
-  APR_ADDTO(PCRE_LIBS, [`$PCRE_CONFIG --libs`])
+  APR_ADDTO(PCRE_LIBS, [`$PCRE_CONFIG --libs8 2>/dev/null || $PCRE_CONFIG --libs`])
   APR_ADDTO(HTTPD_LIBS, [\$(PCRE_LIBS)])
 else
-  AC_MSG_ERROR([pcre-config for libpcre not found. PCRE is required and available from http://pcre.org/])
+  AC_MSG_ERROR([pcre(2)-config for libpcre not found. PCRE is required and available from http://pcre.org/])
 fi
 APACHE_SUBST(PCRE_LIBS)
 
index 3e4fbb9444306b911219e9950a18f901e27af1a5..9c83e203a75c955696f2062d3780551f7be451c5 100644 (file)
@@ -46,10 +46,18 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "httpd.h"
 #include "apr_strings.h"
 #include "apr_tables.h"
+
+#ifdef HAVE_PCRE2
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include "pcre2.h"
+#define PCREn(x) PCRE2_ ## x
+#else
 #include "pcre.h"
+#define PCREn(x) PCRE_ ## x
+#endif
 
 /* PCRE_DUPNAMES is only present since version 6.7 of PCRE */
-#ifndef PCRE_DUPNAMES
+#if !defined(PCRE_DUPNAMES) && !defined(HAVE_PCRE2)
 #error PCRE Version 6.7 or later required!
 #else
 
@@ -74,11 +82,19 @@ static const char *const pstring[] = {
 
 AP_DECLARE(const char *) ap_pcre_version_string(int which)
 {
+#ifdef HAVE_PCRE2
+    static char buf[80];
+#endif
     switch (which) {
     case AP_REG_PCRE_COMPILED:
-        return APR_STRINGIFY(PCRE_MAJOR) "." APR_STRINGIFY(PCRE_MINOR) " " APR_STRINGIFY(PCRE_DATE);
+        return APR_STRINGIFY(PCREn(MAJOR)) "." APR_STRINGIFY(PCREn(MINOR)) " " APR_STRINGIFY(PCREn(DATE));
     case AP_REG_PCRE_LOADED:
+#ifdef HAVE_PCRE2
+        pcre2_config(PCRE2_CONFIG_VERSION, buf);
+        return buf;
+#else
         return pcre_version();
+#endif
     default:
         return "Unknown";
     }
@@ -118,7 +134,11 @@ AP_DECLARE(apr_size_t) ap_regerror(int errcode, const ap_regex_t *preg,
 
 AP_DECLARE(void) ap_regfree(ap_regex_t *preg)
 {
+#ifdef HAVE_PCRE2
+    pcre2_code_free(preg->re_pcre);
+#else
     (pcre_free)(preg->re_pcre);
+#endif
 }
 
 
@@ -139,34 +159,48 @@ AP_DECLARE(void) ap_regfree(ap_regex_t *preg)
 */
 AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags)
 {
+#ifdef HAVE_PCRE2
+    uint32_t capcount;
+    size_t erroffset;
+#else
     const char *errorptr;
     int erroffset;
+#endif
     int errcode = 0;
-    int options = PCRE_DUPNAMES;
+    int options = PCREn(DUPNAMES);
 
     if ((cflags & AP_REG_ICASE) != 0)
-        options |= PCRE_CASELESS;
+        options |= PCREn(CASELESS);
     if ((cflags & AP_REG_NEWLINE) != 0)
-        options |= PCRE_MULTILINE;
+        options |= PCREn(MULTILINE);
     if ((cflags & AP_REG_DOTALL) != 0)
-        options |= PCRE_DOTALL;
+        options |= PCREn(DOTALL);
 
-    preg->re_pcre =
-        pcre_compile2(pattern, options, &errcode, &errorptr, &erroffset, NULL);
-    preg->re_erroffset = erroffset;
+#ifdef HAVE_PCRE2
+    preg->re_pcre = pcre2_compile((const unsigned char *)pattern,
+                                  PCRE2_ZERO_TERMINATED, options, &errcode,
+                                  &erroffset, NULL);
+#else
+    preg->re_pcre = pcre_compile2(pattern, options, &errcode,
+                                  &errorptr, &erroffset, NULL);
+#endif
 
+    preg->re_erroffset = erroffset;
     if (preg->re_pcre == NULL) {
-        /*
-         * There doesn't seem to be constants defined for compile time error
-         * codes. 21 is "failed to get memory" according to pcreapi(3).
-         */
+        /* Internal ERR21 is "failed to get memory" according to pcreapi(3) */
         if (errcode == 21)
             return AP_REG_ESPACE;
         return AP_REG_INVARG;
     }
 
+#ifdef HAVE_PCRE2
+    pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
+                       PCRE2_INFO_CAPTURECOUNT, &capcount);
+    preg->re_nsub = capcount;
+#else
     pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
-                   PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub));
+                  PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub));
+#endif
     return 0;
 }
 
@@ -198,17 +232,33 @@ AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff,
 {
     int rc;
     int options = 0;
-    int *ovector = NULL;
+#ifdef HAVE_PCRE2
+    pcre2_match_data *matchdata;
+    size_t *ovector;
+#else
     int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
     int allocated_ovector = 0;
+    int *ovector = NULL;
+#endif
 
     if ((eflags & AP_REG_NOTBOL) != 0)
-        options |= PCRE_NOTBOL;
+        options |= PCREn(NOTBOL);
     if ((eflags & AP_REG_NOTEOL) != 0)
-        options |= PCRE_NOTEOL;
-
-    ((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1);    /* Only has meaning after compile */
-
+        options |= PCREn(NOTEOL);
+
+#ifdef HAVE_PCRE2
+    /* TODO: create a generic TLS matchdata buffer of some nmatch limit,
+     * e.g. 10 matches, to avoid a malloc-per-call. If it must be alloced,
+     * implement a general context using palloc and no free implementation.
+     */
+    matchdata = pcre2_match_data_create(nmatch, NULL);
+    if (matchdata == NULL)
+        return AP_REG_ESPACE;
+    ovector = pcre2_get_ovector_pointer(matchdata);
+    rc = pcre2_match((const pcre2_code *)preg->re_pcre,
+                     (const unsigned char *)buff, len,
+                     0, options, matchdata, NULL);
+#else
     if (nmatch > 0) {
         if (nmatch <= POSIX_MALLOC_THRESHOLD) {
             ovector = &(small_ovector[0]);
@@ -220,52 +270,64 @@ AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff,
             allocated_ovector = 1;
         }
     }
-
     rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len,
                    0, options, ovector, nmatch * 3);
+#endif
 
     if (rc == 0)
         rc = nmatch;            /* All captured slots were filled in */
 
     if (rc >= 0) {
         apr_size_t i;
-        for (i = 0; i < (apr_size_t)rc; i++) {
+        apr_size_t nlim = (apr_size_t)rc < nmatch ? (apr_size_t)rc : nmatch;
+        for (i = 0; i < nlim; i++) {
             pmatch[i].rm_so = ovector[i * 2];
             pmatch[i].rm_eo = ovector[i * 2 + 1];
         }
-        if (allocated_ovector)
-            free(ovector);
         for (; i < nmatch; i++)
             pmatch[i].rm_so = pmatch[i].rm_eo = -1;
-        return 0;
     }
 
+#ifdef HAVE_PCRE2
+    pcre2_match_data_free(matchdata);
+#else
+    if (allocated_ovector)
+        free(ovector);
+#endif
+
+    if (rc >= 0) {
+        return 0;
+    }
     else {
-        if (allocated_ovector)
-            free(ovector);
+#ifdef HAVE_PCRE2
+        if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
+            return AP_REG_INVARG;
+#endif
         switch (rc) {
-        case PCRE_ERROR_NOMATCH:
+        case PCREn(ERROR_NOMATCH):
             return AP_REG_NOMATCH;
-        case PCRE_ERROR_NULL:
+        case PCREn(ERROR_NULL):
             return AP_REG_INVARG;
-        case PCRE_ERROR_BADOPTION:
+        case PCREn(ERROR_BADOPTION):
             return AP_REG_INVARG;
-        case PCRE_ERROR_BADMAGIC:
+        case PCREn(ERROR_BADMAGIC):
             return AP_REG_INVARG;
-        case PCRE_ERROR_UNKNOWN_NODE:
-            return AP_REG_ASSERT;
-        case PCRE_ERROR_NOMEMORY:
+        case PCREn(ERROR_NOMEMORY):
             return AP_REG_ESPACE;
-#ifdef PCRE_ERROR_MATCHLIMIT
-        case PCRE_ERROR_MATCHLIMIT:
+#if defined(HAVE_PCRE2) || defined(PCRE_ERROR_MATCHLIMIT)
+        case PCREn(ERROR_MATCHLIMIT):
             return AP_REG_ESPACE;
 #endif
-#ifdef PCRE_ERROR_BADUTF8
-        case PCRE_ERROR_BADUTF8:
+#if defined(PCRE_ERROR_UNKNOWN_NODE)
+        case PCRE_ERROR_UNKNOWN_NODE:
+            return AP_REG_ASSERT;
+#endif
+#if defined(PCRE_ERROR_BADUTF8)
+        case PCREn(ERROR_BADUTF8):
             return AP_REG_INVARG;
 #endif
-#ifdef PCRE_ERROR_BADUTF8_OFFSET
-        case PCRE_ERROR_BADUTF8_OFFSET:
+#if defined(PCRE_ERROR_BADUTF8_OFFSET)
+        case PCREn(ERROR_BADUTF8_OFFSET):
             return AP_REG_INVARG;
 #endif
         default:
@@ -278,17 +340,29 @@ AP_DECLARE(int) ap_regname(const ap_regex_t *preg,
                            apr_array_header_t *names, const char *prefix,
                            int upper)
 {
+    char *nametable;
+
+#ifdef HAVE_PCRE2
+    uint32_t namecount;
+    uint32_t nameentrysize;
+    uint32_t i;
+    pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
+                       PCRE2_INFO_NAMECOUNT, &namecount);
+    pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
+                       PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize);
+    pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
+                       PCRE2_INFO_NAMETABLE, &nametable);
+#else
     int namecount;
     int nameentrysize;
     int i;
-    char *nametable;
-
     pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
-                       PCRE_INFO_NAMECOUNT, &namecount);
+                  PCRE_INFO_NAMECOUNT, &namecount);
     pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
-                       PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
+                  PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
     pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
-                       PCRE_INFO_NAMETABLE, &nametable);
+                  PCRE_INFO_NAMETABLE, &nametable);
+#endif
 
     for (i = 0; i < namecount; i++) {
         const char *offset = nametable + i * nameentrysize;