1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
6 This is a library of functions to support regular expressions whose syntax
7 and semantics are as close as possible to those of the Perl 5 language. See
8 the file Tech.Notes for some information on the internals.
10 This module is a wrapper that provides a POSIX API to the underlying PCRE
13 Written by: Philip Hazel <ph10@cam.ac.uk>
15 Copyright (c) 1997-2004 University of Cambridge
17 -----------------------------------------------------------------------------
18 Redistribution and use in source and binary forms, with or without
19 modification, are permitted provided that the following conditions are met:
21 * Redistributions of source code must retain the above copyright notice,
22 this list of conditions and the following disclaimer.
24 * Redistributions in binary form must reproduce the above copyright
25 notice, this list of conditions and the following disclaimer in the
26 documentation and/or other materials provided with the distribution.
28 * Neither the name of the University of Cambridge nor the names of its
29 contributors may be used to endorse or promote products derived from
30 this software without specific prior written permission.
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
36 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
42 POSSIBILITY OF SUCH DAMAGE.
43 -----------------------------------------------------------------------------
47 #include "apr_strings.h"
48 #include "apr_tables.h"
51 #define APR_WANT_STRFUNC
54 #ifndef POSIX_MALLOC_THRESHOLD
55 #define POSIX_MALLOC_THRESHOLD (10)
58 /* Table of error strings corresponding to POSIX error codes; must be
59 * kept in synch with include/ap_regex.h's AP_REG_E* definitions.
62 static const char *const pstring[] = {
63 "", /* Dummy for value 0 */
64 "internal error", /* AP_REG_ASSERT */
65 "failed to get memory", /* AP_REG_ESPACE */
66 "bad argument", /* AP_REG_INVARG */
67 "match failed" /* AP_REG_NOMATCH */
70 AP_DECLARE(apr_size_t) ap_regerror(int errcode, const ap_regex_t *preg,
71 char *errbuf, apr_size_t errbuf_size)
73 const char *message, *addmessage;
74 apr_size_t length, addlength;
76 message = (errcode >= (int)(sizeof(pstring) / sizeof(char *))) ?
77 "unknown error code" : pstring[errcode];
78 length = strlen(message) + 1;
80 addmessage = " at offset ";
81 addlength = (preg != NULL && (int)preg->re_erroffset != -1) ?
82 strlen(addmessage) + 6 : 0;
84 if (errbuf_size > 0) {
85 if (addlength > 0 && errbuf_size >= length + addlength)
86 apr_snprintf(errbuf, errbuf_size, "%s%s%-6d", message, addmessage,
87 (int)preg->re_erroffset);
89 apr_cpystrn(errbuf, message, errbuf_size);
92 return length + addlength;
98 /*************************************************
99 * Free store held by a regex *
100 *************************************************/
102 AP_DECLARE(void) ap_regfree(ap_regex_t *preg)
104 (pcre_free)(preg->re_pcre);
110 /*************************************************
111 * Compile a regular expression *
112 *************************************************/
114 static int default_cflags = AP_REG_DOLLAR_ENDONLY;
116 AP_DECLARE(int) ap_regcomp_get_default_cflags(void)
118 return default_cflags;
121 AP_DECLARE(void) ap_regcomp_set_default_cflags(int cflags)
123 default_cflags = cflags;
126 AP_DECLARE(int) ap_regcomp_default_cflag_by_name(const char *name)
130 if (ap_cstr_casecmp(name, "ICASE") == 0) {
131 cflag = AP_REG_ICASE;
133 else if (ap_cstr_casecmp(name, "DOTALL") == 0) {
134 cflag = AP_REG_DOTALL;
136 else if (ap_cstr_casecmp(name, "DOLLAR_ENDONLY") == 0) {
137 cflag = AP_REG_DOLLAR_ENDONLY;
139 else if (ap_cstr_casecmp(name, "EXTENDED") == 0) {
140 cflag = AP_REG_EXTENDED;
148 * preg points to a structure for recording the compiled expression
149 * pattern the pattern to compile
150 * cflags compilation flags
152 * Returns: 0 on success
153 * various non-zero codes on failure
155 AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags)
157 const char *errorptr;
160 int options = PCRE_DUPNAMES;
162 cflags |= default_cflags;
163 if ((cflags & AP_REG_ICASE) != 0)
164 options |= PCRE_CASELESS;
165 if ((cflags & AP_REG_NEWLINE) != 0)
166 options |= PCRE_MULTILINE;
167 if ((cflags & AP_REG_DOTALL) != 0)
168 options |= PCRE_DOTALL;
169 if ((cflags & AP_REG_DOLLAR_ENDONLY) != 0)
170 options |= PCRE_DOLLAR_ENDONLY;
173 pcre_compile2(pattern, options, &errcode, &errorptr, &erroffset, NULL);
174 preg->re_erroffset = erroffset;
176 if (preg->re_pcre == NULL) {
178 * There doesn't seem to be constants defined for compile time error
179 * codes. 21 is "failed to get memory" according to pcreapi(3).
182 return AP_REG_ESPACE;
183 return AP_REG_INVARG;
186 pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
187 PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub));
194 /*************************************************
195 * Match a regular expression *
196 *************************************************/
198 /* Unfortunately, PCRE requires 3 ints of working space for each captured
199 * substring, so we have to get and release working store instead of just using
200 * the POSIX structures as was done in earlier releases when PCRE needed only 2
201 * ints. However, if the number of possible capturing brackets is small, use a
202 * block of store on the stack, to reduce the use of malloc/free. The threshold
203 * is in a macro that can be changed at configure time.
205 AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string,
206 apr_size_t nmatch, ap_regmatch_t *pmatch,
209 return ap_regexec_len(preg, string, strlen(string), nmatch, pmatch,
213 AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff,
214 apr_size_t len, apr_size_t nmatch,
215 ap_regmatch_t *pmatch, int eflags)
220 int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
221 int allocated_ovector = 0;
223 if ((eflags & AP_REG_NOTBOL) != 0)
224 options |= PCRE_NOTBOL;
225 if ((eflags & AP_REG_NOTEOL) != 0)
226 options |= PCRE_NOTEOL;
228 ((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */
231 if (nmatch <= POSIX_MALLOC_THRESHOLD) {
232 ovector = &(small_ovector[0]);
235 ovector = (int *)malloc(sizeof(int) * nmatch * 3);
237 return AP_REG_ESPACE;
238 allocated_ovector = 1;
242 rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len,
243 0, options, ovector, nmatch * 3);
246 rc = nmatch; /* All captured slots were filled in */
250 for (i = 0; i < (apr_size_t)rc; i++) {
251 pmatch[i].rm_so = ovector[i * 2];
252 pmatch[i].rm_eo = ovector[i * 2 + 1];
254 if (allocated_ovector)
256 for (; i < nmatch; i++)
257 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
262 if (allocated_ovector)
265 case PCRE_ERROR_NOMATCH:
266 return AP_REG_NOMATCH;
267 case PCRE_ERROR_NULL:
268 return AP_REG_INVARG;
269 case PCRE_ERROR_BADOPTION:
270 return AP_REG_INVARG;
271 case PCRE_ERROR_BADMAGIC:
272 return AP_REG_INVARG;
273 case PCRE_ERROR_UNKNOWN_NODE:
274 return AP_REG_ASSERT;
275 case PCRE_ERROR_NOMEMORY:
276 return AP_REG_ESPACE;
277 #ifdef PCRE_ERROR_MATCHLIMIT
278 case PCRE_ERROR_MATCHLIMIT:
279 return AP_REG_ESPACE;
281 #ifdef PCRE_ERROR_BADUTF8
282 case PCRE_ERROR_BADUTF8:
283 return AP_REG_INVARG;
285 #ifdef PCRE_ERROR_BADUTF8_OFFSET
286 case PCRE_ERROR_BADUTF8_OFFSET:
287 return AP_REG_INVARG;
290 return AP_REG_ASSERT;
295 AP_DECLARE(int) ap_regname(const ap_regex_t *preg,
296 apr_array_header_t *names, const char *prefix,
304 pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
305 PCRE_INFO_NAMECOUNT, &namecount);
306 pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
307 PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
308 pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
309 PCRE_INFO_NAMETABLE, &nametable);
311 for (i = 0; i < namecount; i++) {
312 const char *offset = nametable + i * nameentrysize;
313 int capture = ((offset[0] << 8) + offset[1]);
314 while (names->nelts <= capture) {
315 apr_array_push(names);
317 if (upper || prefix) {
318 char *name = ((char **) names->elts)[capture] =
319 prefix ? apr_pstrcat(names->pool, prefix, offset + 2,
321 apr_pstrdup(names->pool, offset + 2);
323 ap_str_toupper(name);
327 ((const char **)names->elts)[capture] = offset + 2;
334 /* End of pcreposix.c */