1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* This code is based on pcreposix.c from the PCRE Library distribution,
18 * as originally written by Philip Hazel <ph10@cam.ac.uk>, and forked by
19 * the Apache HTTP Server project to provide POSIX-style regex function
20 * wrappers around underlying PCRE library functions for httpd.
22 * The original source file pcreposix.c is copyright and licensed as follows;
24 Copyright (c) 1997-2004 University of Cambridge
26 -----------------------------------------------------------------------------
27 Redistribution and use in source and binary forms, with or without
28 modification, are permitted provided that the following conditions are met:
30 * Redistributions of source code must retain the above copyright notice,
31 this list of conditions and the following disclaimer.
33 * Redistributions in binary form must reproduce the above copyright
34 notice, this list of conditions and the following disclaimer in the
35 documentation and/or other materials provided with the distribution.
37 * Neither the name of the University of Cambridge nor the names of its
38 contributors may be used to endorse or promote products derived from
39 this software without specific prior written permission.
41 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
42 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
45 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
46 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
47 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
48 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
49 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
50 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
51 POSSIBILITY OF SUCH DAMAGE.
52 -----------------------------------------------------------------------------
56 #include "apr_strings.h"
57 #include "apr_tables.h"
60 #define PCRE2_CODE_UNIT_WIDTH 8
62 #define PCREn(x) PCRE2_ ## x
65 #define PCREn(x) PCRE_ ## x
68 /* PCRE_DUPNAMES is only present since version 6.7 of PCRE */
69 #if !defined(PCRE_DUPNAMES) && !defined(HAVE_PCRE2)
70 #error PCRE Version 6.7 or later required!
73 #define APR_WANT_STRFUNC
76 #ifndef POSIX_MALLOC_THRESHOLD
77 #define POSIX_MALLOC_THRESHOLD (10)
80 /* Table of error strings corresponding to POSIX error codes; must be
81 * kept in synch with include/ap_regex.h's AP_REG_E* definitions.
84 static const char *const pstring[] = {
85 "", /* Dummy for value 0 */
86 "internal error", /* AP_REG_ASSERT */
87 "failed to get memory", /* AP_REG_ESPACE */
88 "bad argument", /* AP_REG_INVARG */
89 "match failed" /* AP_REG_NOMATCH */
92 AP_DECLARE(const char *) ap_pcre_version_string(int which)
98 case AP_REG_PCRE_COMPILED:
99 return APR_STRINGIFY(PCREn(MAJOR)) "." APR_STRINGIFY(PCREn(MINOR)) " " APR_STRINGIFY(PCREn(DATE));
100 case AP_REG_PCRE_LOADED:
102 pcre2_config(PCRE2_CONFIG_VERSION, buf);
105 return pcre_version();
112 AP_DECLARE(apr_size_t) ap_regerror(int errcode, const ap_regex_t *preg,
113 char *errbuf, apr_size_t errbuf_size)
115 const char *message, *addmessage;
116 apr_size_t length, addlength;
118 message = (errcode >= (int)(sizeof(pstring) / sizeof(char *))) ?
119 "unknown error code" : pstring[errcode];
120 length = strlen(message) + 1;
122 addmessage = " at offset ";
123 addlength = (preg != NULL && (int)preg->re_erroffset != -1) ?
124 strlen(addmessage) + 6 : 0;
126 if (errbuf_size > 0) {
127 if (addlength > 0 && errbuf_size >= length + addlength)
128 apr_snprintf(errbuf, errbuf_size, "%s%s%-6d", message, addmessage,
129 (int)preg->re_erroffset);
131 apr_cpystrn(errbuf, message, errbuf_size);
134 return length + addlength;
140 /*************************************************
141 * Free store held by a regex *
142 *************************************************/
144 AP_DECLARE(void) ap_regfree(ap_regex_t *preg)
147 pcre2_code_free(preg->re_pcre);
149 (pcre_free)(preg->re_pcre);
156 /*************************************************
157 * Compile a regular expression *
158 *************************************************/
160 static int default_cflags = AP_REG_DOTALL |
161 AP_REG_DOLLAR_ENDONLY;
163 AP_DECLARE(int) ap_regcomp_get_default_cflags(void)
165 return default_cflags;
168 AP_DECLARE(void) ap_regcomp_set_default_cflags(int cflags)
170 default_cflags = cflags;
173 AP_DECLARE(int) ap_regcomp_default_cflag_by_name(const char *name)
177 if (ap_cstr_casecmp(name, "ICASE") == 0) {
178 cflag = AP_REG_ICASE;
180 else if (ap_cstr_casecmp(name, "DOTALL") == 0) {
181 cflag = AP_REG_DOTALL;
183 else if (ap_cstr_casecmp(name, "DOLLAR_ENDONLY") == 0) {
184 cflag = AP_REG_DOLLAR_ENDONLY;
186 else if (ap_cstr_casecmp(name, "EXTENDED") == 0) {
187 cflag = AP_REG_EXTENDED;
195 * preg points to a structure for recording the compiled expression
196 * pattern the pattern to compile
197 * cflags compilation flags
199 * Returns: 0 on success
200 * various non-zero codes on failure
202 AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags)
208 const char *errorptr;
212 int options = PCREn(DUPNAMES);
214 cflags |= default_cflags;
215 if ((cflags & AP_REG_ICASE) != 0)
216 options |= PCREn(CASELESS);
217 if ((cflags & AP_REG_NEWLINE) != 0)
218 options |= PCREn(MULTILINE);
219 if ((cflags & AP_REG_DOTALL) != 0)
220 options |= PCREn(DOTALL);
221 if ((cflags & AP_REG_DOLLAR_ENDONLY) != 0)
222 options |= PCREn(DOLLAR_ENDONLY);
225 preg->re_pcre = pcre2_compile((const unsigned char *)pattern,
226 PCRE2_ZERO_TERMINATED, options, &errcode,
229 preg->re_pcre = pcre_compile2(pattern, options, &errcode,
230 &errorptr, &erroffset, NULL);
233 preg->re_erroffset = erroffset;
234 if (preg->re_pcre == NULL) {
235 /* Internal ERR21 is "failed to get memory" according to pcreapi(3) */
237 return AP_REG_ESPACE;
238 return AP_REG_INVARG;
242 pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
243 PCRE2_INFO_CAPTURECOUNT, &capcount);
244 preg->re_nsub = capcount;
246 pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
247 PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub));
255 /*************************************************
256 * Match a regular expression *
257 *************************************************/
259 /* Unfortunately, PCRE requires 3 ints of working space for each captured
260 * substring, so we have to get and release working store instead of just using
261 * the POSIX structures as was done in earlier releases when PCRE needed only 2
262 * ints. However, if the number of possible capturing brackets is small, use a
263 * block of store on the stack, to reduce the use of malloc/free. The threshold
264 * is in a macro that can be changed at configure time.
266 AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string,
267 apr_size_t nmatch, ap_regmatch_t *pmatch,
270 return ap_regexec_len(preg, string, strlen(string), nmatch, pmatch,
274 AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff,
275 apr_size_t len, apr_size_t nmatch,
276 ap_regmatch_t *pmatch, int eflags)
282 pcre2_match_data *matchdata;
285 int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
286 int allocated_ovector = 0;
290 if ((eflags & AP_REG_NOTBOL) != 0)
291 options |= PCREn(NOTBOL);
292 if ((eflags & AP_REG_NOTEOL) != 0)
293 options |= PCREn(NOTEOL);
294 if ((eflags & AP_REG_NOTEMPTY) != 0)
295 options |= PCREn(NOTEMPTY);
296 if ((eflags & AP_REG_ANCHORED) != 0)
297 options |= PCREn(ANCHORED);
300 /* TODO: create a generic TLS matchdata buffer of some nmatch limit,
301 * e.g. 10 matches, to avoid a malloc-per-call. If it must be alloced,
302 * implement a general context using palloc and no free implementation.
304 nlim = ((apr_size_t)preg->re_nsub + 1) > nmatch
305 ? ((apr_size_t)preg->re_nsub + 1) : nmatch;
306 matchdata = pcre2_match_data_create(nlim, NULL);
307 if (matchdata == NULL)
308 return AP_REG_ESPACE;
309 ovector = pcre2_get_ovector_pointer(matchdata);
310 rc = pcre2_match((const pcre2_code *)preg->re_pcre,
311 (const unsigned char *)buff, len,
312 0, options, matchdata, NULL);
314 rc = nlim; /* All captured slots were filled in */
317 if (nmatch <= POSIX_MALLOC_THRESHOLD) {
318 ovector = &(small_ovector[0]);
321 ovector = (int *)malloc(sizeof(int) * nmatch * 3);
323 return AP_REG_ESPACE;
324 allocated_ovector = 1;
327 rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len,
328 0, options, ovector, nmatch * 3);
330 rc = nmatch; /* All captured slots were filled in */
335 nlim = (apr_size_t)rc < nmatch ? (apr_size_t)rc : nmatch;
336 for (i = 0; i < nlim; i++) {
337 pmatch[i].rm_so = ovector[i * 2];
338 pmatch[i].rm_eo = ovector[i * 2 + 1];
340 for (; i < nmatch; i++)
341 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
345 pcre2_match_data_free(matchdata);
347 if (allocated_ovector)
356 if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
357 return AP_REG_INVARG;
360 case PCREn(ERROR_NOMATCH):
361 return AP_REG_NOMATCH;
362 case PCREn(ERROR_NULL):
363 return AP_REG_INVARG;
364 case PCREn(ERROR_BADOPTION):
365 return AP_REG_INVARG;
366 case PCREn(ERROR_BADMAGIC):
367 return AP_REG_INVARG;
368 case PCREn(ERROR_NOMEMORY):
369 return AP_REG_ESPACE;
370 #if defined(HAVE_PCRE2) || defined(PCRE_ERROR_MATCHLIMIT)
371 case PCREn(ERROR_MATCHLIMIT):
372 return AP_REG_ESPACE;
374 #if defined(PCRE_ERROR_UNKNOWN_NODE)
375 case PCRE_ERROR_UNKNOWN_NODE:
376 return AP_REG_ASSERT;
378 #if defined(PCRE_ERROR_BADUTF8)
379 case PCREn(ERROR_BADUTF8):
380 return AP_REG_INVARG;
382 #if defined(PCRE_ERROR_BADUTF8_OFFSET)
383 case PCREn(ERROR_BADUTF8_OFFSET):
384 return AP_REG_INVARG;
387 return AP_REG_ASSERT;
392 AP_DECLARE(int) ap_regname(const ap_regex_t *preg,
393 apr_array_header_t *names, const char *prefix,
400 uint32_t nameentrysize;
402 pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
403 PCRE2_INFO_NAMECOUNT, &namecount);
404 pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
405 PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize);
406 pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
407 PCRE2_INFO_NAMETABLE, &nametable);
412 pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
413 PCRE_INFO_NAMECOUNT, &namecount);
414 pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
415 PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
416 pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
417 PCRE_INFO_NAMETABLE, &nametable);
420 for (i = 0; i < namecount; i++) {
421 const char *offset = nametable + i * nameentrysize;
422 int capture = ((offset[0] << 8) + offset[1]);
423 while (names->nelts <= capture) {
424 apr_array_push(names);
426 if (upper || prefix) {
427 char *name = ((char **) names->elts)[capture] =
428 prefix ? apr_pstrcat(names->pool, prefix, offset + 2,
430 apr_pstrdup(names->pool, offset + 2);
432 ap_str_toupper(name);
436 ((const char **)names->elts)[capture] = offset + 2;
443 #endif /* PCRE_DUPNAMES defined */
445 /* End of pcreposix.c */