1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
12 /* Use the internal info for displaying the results of pcre_study(). */
16 /* It is possible to compile this test program without including support for
17 testing the POSIX interface, though this is not available via the standard
21 #include "pcreposix.h"
24 #ifndef CLOCKS_PER_SEC
26 #define CLOCKS_PER_SEC CLK_TCK
28 #define CLOCKS_PER_SEC 100
32 #define LOOPREPEAT 20000
36 static int log_store = 0;
37 static size_t gotten_store;
41 /* Debugging function to print the internal form of the regex. This is the same
42 code as contained in pcre.c under the DEBUG macro. */
44 static const char *OP_names[] = {
45 "End", "\\A", "\\B", "\\b", "\\D", "\\d",
46 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
47 "Opt", "^", "$", "Any", "chars", "not",
48 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
49 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
50 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
51 "*", "*?", "+", "+?", "?", "??", "{", "{",
52 "class", "Ref", "Recurse",
53 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
54 "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
55 "Brazero", "Braminzero", "Bra"
59 static void print_internals(pcre *re)
61 unsigned char *code = ((real_pcre *)re)->code;
63 fprintf(outfile, "------------------------------------------------------------------\n");
70 fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
74 fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
81 fprintf(outfile, " %s\n", OP_names[*code]);
82 fprintf(outfile, "------------------------------------------------------------------\n");
86 fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
91 fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);
96 fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);
101 charlength = *(++code);
102 fprintf(outfile, "%3d ", charlength);
103 while (charlength-- > 0)
104 if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
105 else fprintf(outfile, "\\x%02x", c);
115 case OP_ASSERTBACK_NOT:
117 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
122 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
137 case OP_TYPEMINQUERY:
138 if (*code >= OP_TYPESTAR)
139 fprintf(outfile, " %s", OP_names[code[1]]);
140 else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
141 else fprintf(outfile, " \\x%02x", c);
142 fprintf(outfile, "%s", OP_names[*code++]);
148 if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
149 else fprintf(outfile, " \\x%02x{", c);
150 if (*code != OP_EXACT) fprintf(outfile, ",");
151 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
152 if (*code == OP_MINUPTO) fprintf(outfile, "?");
159 fprintf(outfile, " %s{", OP_names[code[3]]);
160 if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
161 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
162 if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
167 if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
168 else fprintf(outfile, " [^\\x%02x]", c);
177 if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
178 else fprintf(outfile, " [^\\x%02x]", c);
179 fprintf(outfile, "%s", OP_names[*code++]);
185 if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
186 else fprintf(outfile, " [^\\x%02x]{", c);
187 if (*code != OP_NOTEXACT) fprintf(outfile, ",");
188 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
189 if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
194 fprintf(outfile, " \\%d", *(++code));
196 goto CLASS_REF_REPEAT;
202 fprintf(outfile, " [");
204 for (i = 0; i < 256; i++)
206 if ((code[i/8] & (1 << (i&7))) != 0)
209 for (j = i+1; j < 256; j++)
210 if ((code[j/8] & (1 << (j&7))) == 0) break;
211 if (i == '-' || i == ']') fprintf(outfile, "\\");
212 if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
215 fprintf(outfile, "-");
216 if (j == '-' || j == ']') fprintf(outfile, "\\");
217 if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
222 fprintf(outfile, "]");
235 fprintf(outfile, "%s", OP_names[*code]);
240 min = (code[1] << 8) + code[2];
241 max = (code[3] << 8) + code[4];
242 if (max == 0) fprintf(outfile, "{%d,}", min);
243 else fprintf(outfile, "{%d,%d}", min, max);
244 if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
254 /* Anything else is just a one-node item */
257 fprintf(outfile, " %s", OP_names[*code]);
262 fprintf(outfile, "\n");
268 /* Character string printing function. */
270 static void pchars(unsigned char *p, int length)
274 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
275 else fprintf(outfile, "\\x%02x", c);
280 /* Alternative malloc function, to test functionality and show the size of the
283 static void *new_malloc(size_t size)
287 fprintf(outfile, "Memory allocation (code space): %d\n",
288 (int)((int)size - offsetof(real_pcre, code[0])));
295 /* Get one piece of information from the pcre_fullinfo() function */
297 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
300 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
301 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
307 /* Read lines from named file or stdin and write to named file or stdout; lines
308 consist of a regular expression, in delimiters and optionally followed by
309 options, followed by a set of test data, terminated by an empty line. */
311 int main(int argc, char **argv)
313 FILE *infile = stdin;
315 int study_options = 0;
323 unsigned char buffer[30000];
324 unsigned char dbuffer[1024];
326 /* Static so that new_malloc can use it. */
332 while (argc > 1 && argv[op][0] == '-')
334 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
336 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
337 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
338 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
339 else if (strcmp(argv[op], "-p") == 0) posix = 1;
342 printf("*** Unknown option %s\n", argv[op]);
343 printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");
344 printf(" -d debug: show compiled code; implies -i\n"
345 " -i show information about compiled pattern\n"
346 " -p use POSIX interface\n"
347 " -s output store information\n"
348 " -t time compilation and execution\n");
355 /* Sort out the input and output files */
359 infile = fopen(argv[op], "r");
362 printf("** Failed to open %s\n", argv[op]);
369 outfile = fopen(argv[op+1], "w");
372 printf("** Failed to open %s\n", argv[op+1]);
377 /* Set alternative malloc function */
379 pcre_malloc = new_malloc;
381 /* Heading line, then prompt for first regex if stdin */
383 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
390 pcre_extra *extra = NULL;
392 #if !defined NOPOSIX /* There are still compilers that require no indent */
398 unsigned char *p, *pp, *ppp;
399 unsigned const char *tables = NULL;
401 int do_debug = debug;
404 int do_showinfo = showinfo;
406 int erroroffset, len, delimiter;
408 if (infile == stdin) printf(" re> ");
409 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
410 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
413 while (isspace(*p)) p++;
414 if (*p == 0) continue;
416 /* Get the delimiter and seek the end of the pattern; if is isn't
417 complete, read more. */
421 if (isalnum(delimiter) || delimiter == '\\')
423 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
433 if (*pp == '\\' && pp[1] != 0) pp++;
434 else if (*pp == delimiter) break;
439 len = sizeof(buffer) - (pp - buffer);
442 fprintf(outfile, "** Expression too long - missing delimiter?\n");
446 if (infile == stdin) printf(" > ");
447 if (fgets((char *)pp, len, infile) == NULL)
449 fprintf(outfile, "** Unexpected EOF\n");
453 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
456 /* If the first character after the delimiter is backslash, make
457 the pattern end with backslash. This is purely to provide a way
458 of testing for the error message when a pattern ends with backslash. */
460 if (pp[1] == '\\') *pp++ = '\\';
462 /* Terminate the pattern at the delimiter */
466 /* Look for options after final delimiter */
470 log_store = showstore; /* default from command line */
476 case 'g': do_g = 1; break;
477 case 'i': options |= PCRE_CASELESS; break;
478 case 'm': options |= PCRE_MULTILINE; break;
479 case 's': options |= PCRE_DOTALL; break;
480 case 'x': options |= PCRE_EXTENDED; break;
482 case '+': do_showrest = 1; break;
483 case 'A': options |= PCRE_ANCHORED; break;
484 case 'D': do_debug = do_showinfo = 1; break;
485 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
486 case 'G': do_G = 1; break;
487 case 'I': do_showinfo = 1; break;
488 case 'M': log_store = 1; break;
491 case 'P': do_posix = 1; break;
494 case 'S': do_study = 1; break;
495 case 'U': options |= PCRE_UNGREEDY; break;
496 case 'X': options |= PCRE_EXTRA; break;
500 while (*ppp != '\n' && *ppp != ' ') ppp++;
502 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
504 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
507 tables = pcre_maketables();
511 case '\n': case ' ': break;
513 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
518 /* Handle compiling via the POSIX interface, which doesn't support the
519 timing, showing, or debugging options, nor the ability to pass over
520 local character tables. */
523 if (posix || do_posix)
527 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
528 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
529 rc = regcomp(&preg, (char *)p, cflags);
531 /* Compilation failed; go back for another re, skipping to blank line
532 if non-interactive. */
536 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
537 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
542 /* Handle compiling via the native interface */
545 #endif /* !defined NOPOSIX */
552 clock_t start_time = clock();
553 for (i = 0; i < LOOPREPEAT; i++)
555 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
556 if (re != NULL) free(re);
558 time_taken = clock() - start_time;
559 fprintf(outfile, "Compile time %.3f milliseconds\n",
560 ((double)time_taken * 1000.0) /
561 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
564 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
566 /* Compilation failed; go back for another re, skipping to blank line
567 if non-interactive. */
571 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
577 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
582 len = (int)strlen((char *)buffer);
583 while (len > 0 && isspace(buffer[len-1])) len--;
586 fprintf(outfile, "\n");
591 /* Compilation succeeded; print data if required. There are now two
592 info-returning functions. The old one has a limited interface and
593 returns only limited data. Check that it agrees with the newer one. */
597 int old_first_char, old_options, old_count;
598 int count, backrefmax, first_char, need_char;
601 if (do_debug) print_internals(re);
603 new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
604 new_info(re, NULL, PCRE_INFO_SIZE, &size);
605 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
606 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
607 new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
608 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
610 old_count = pcre_info(re, &old_options, &old_first_char);
611 if (count < 0) fprintf(outfile,
612 "Error %d from pcre_info()\n", count);
615 if (old_count != count) fprintf(outfile,
616 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
619 if (old_first_char != first_char) fprintf(outfile,
620 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
621 first_char, old_first_char);
623 if (old_options != options) fprintf(outfile,
624 "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,
628 if (size != gotten_store) fprintf(outfile,
629 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
632 fprintf(outfile, "Capturing subpattern count = %d\n", count);
634 fprintf(outfile, "Max back reference = %d\n", backrefmax);
635 if (options == 0) fprintf(outfile, "No options\n");
636 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",
637 ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
638 ((options & PCRE_CASELESS) != 0)? " caseless" : "",
639 ((options & PCRE_EXTENDED) != 0)? " extended" : "",
640 ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
641 ((options & PCRE_DOTALL) != 0)? " dotall" : "",
642 ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
643 ((options & PCRE_EXTRA) != 0)? " extra" : "",
644 ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
646 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
647 fprintf(outfile, "Case state changes\n");
649 if (first_char == -1)
651 fprintf(outfile, "First char at start or follows \\n\n");
653 else if (first_char < 0)
655 fprintf(outfile, "No first char\n");
659 if (isprint(first_char))
660 fprintf(outfile, "First char = \'%c\'\n", first_char);
662 fprintf(outfile, "First char = %d\n", first_char);
667 fprintf(outfile, "No need char\n");
671 if (isprint(need_char))
672 fprintf(outfile, "Need char = \'%c\'\n", need_char);
674 fprintf(outfile, "Need char = %d\n", need_char);
678 /* If /S was present, study the regexp to generate additional info to
679 help with the matching. */
687 clock_t start_time = clock();
688 for (i = 0; i < LOOPREPEAT; i++)
689 extra = pcre_study(re, study_options, &error);
690 time_taken = clock() - start_time;
691 if (extra != NULL) free(extra);
692 fprintf(outfile, " Study time %.3f milliseconds\n",
693 ((double)time_taken * 1000.0)/
694 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
697 extra = pcre_study(re, study_options, &error);
699 fprintf(outfile, "Failed to study: %s\n", error);
700 else if (extra == NULL)
701 fprintf(outfile, "Study returned NULL\n");
703 else if (do_showinfo)
705 uschar *start_bits = NULL;
706 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
707 if (start_bits == NULL)
708 fprintf(outfile, "No starting character set\n");
713 fprintf(outfile, "Starting character set: ");
714 for (i = 0; i < 256; i++)
716 if ((start_bits[i/8] & (1<<(i%8))) != 0)
720 fprintf(outfile, "\n ");
723 if (isprint(i) && i != ' ')
725 fprintf(outfile, "%c ", i);
730 fprintf(outfile, "\\x%02x ", i);
735 fprintf(outfile, "\n");
741 /* Read data lines and test them */
746 unsigned char *bptr = dbuffer;
752 int start_offset = 0;
755 int size_offsets = sizeof(offsets)/sizeof(int);
759 if (infile == stdin) printf("data> ");
760 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
765 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
767 len = (int)strlen((char *)buffer);
768 while (len > 0 && isspace(buffer[len-1])) len--;
773 while (isspace(*p)) p++;
776 while ((c = *p++) != 0)
780 if (c == '\\') switch ((c = *p++))
782 case 'a': c = 7; break;
783 case 'b': c = '\b'; break;
784 case 'e': c = 27; break;
785 case 'f': c = '\f'; break;
786 case 'n': c = '\n'; break;
787 case 'r': c = '\r'; break;
788 case 't': c = '\t'; break;
789 case 'v': c = '\v'; break;
791 case '0': case '1': case '2': case '3':
792 case '4': case '5': case '6': case '7':
794 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
795 c = c * 8 + *p++ - '0';
800 while (i++ < 2 && isxdigit(*p))
802 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
807 case 0: /* Allows for an empty line */
811 case 'A': /* Option setting */
812 options |= PCRE_ANCHORED;
816 options |= PCRE_NOTBOL;
820 while(isdigit(*p)) n = n * 10 + *p++ - '0';
821 copystrings |= 1 << n;
825 while(isdigit(*p)) n = n * 10 + *p++ - '0';
826 getstrings |= 1 << n;
834 options |= PCRE_NOTEMPTY;
838 while(isdigit(*p)) n = n * 10 + *p++ - '0';
839 if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
843 options |= PCRE_NOTEOL;
851 /* Handle matching via the POSIX interface, which does not
855 if (posix || do_posix)
859 regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
860 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
861 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
863 rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
867 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
868 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
873 for (i = 0; i < size_offsets; i++)
875 if (pmatch[i].rm_so >= 0)
877 fprintf(outfile, "%2d: ", (int)i);
878 pchars(dbuffer + pmatch[i].rm_so,
879 pmatch[i].rm_eo - pmatch[i].rm_so);
880 fprintf(outfile, "\n");
881 if (i == 0 && do_showrest)
883 fprintf(outfile, " 0+ ");
884 pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);
885 fprintf(outfile, "\n");
892 /* Handle matching via the native interface - repeats for /g and /G */
895 #endif /* !defined NOPOSIX */
897 for (;; gmatched++) /* Loop for /g or /G */
903 clock_t start_time = clock();
904 for (i = 0; i < LOOPREPEAT; i++)
905 count = pcre_exec(re, extra, (char *)bptr, len,
906 start_offset, options | g_notempty, offsets, size_offsets);
907 time_taken = clock() - start_time;
908 fprintf(outfile, "Execute time %.3f milliseconds\n",
909 ((double)time_taken * 1000.0)/
910 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
913 count = pcre_exec(re, extra, (char *)bptr, len,
914 start_offset, options | g_notempty, offsets, size_offsets);
918 fprintf(outfile, "Matched, but too many substrings\n");
919 count = size_offsets/3;
927 for (i = 0; i < count * 2; i += 2)
930 fprintf(outfile, "%2d: <unset>\n", i/2);
933 fprintf(outfile, "%2d: ", i/2);
934 pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);
935 fprintf(outfile, "\n");
940 fprintf(outfile, " 0+ ");
941 pchars(bptr + offsets[i+1], len - offsets[i+1]);
942 fprintf(outfile, "\n");
948 for (i = 0; i < 32; i++)
950 if ((copystrings & (1 << i)) != 0)
953 int rc = pcre_copy_substring((char *)bptr, offsets, count,
954 i, copybuffer, sizeof(copybuffer));
956 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
958 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
962 for (i = 0; i < 32; i++)
964 if ((getstrings & (1 << i)) != 0)
966 const char *substring;
967 int rc = pcre_get_substring((char *)bptr, offsets, count,
970 fprintf(outfile, "get substring %d failed %d\n", i, rc);
973 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
974 free((void *)substring);
981 const char **stringlist;
982 int rc = pcre_get_substring_list((char *)bptr, offsets, count,
985 fprintf(outfile, "get substring list failed %d\n", rc);
988 for (i = 0; i < count; i++)
989 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
990 if (stringlist[i] != NULL)
991 fprintf(outfile, "string list not terminated by NULL\n");
992 free((void *)stringlist);
997 /* Failed to match. If this is a /g or /G loop and we previously set
998 g_notempty after a null match, this is not necessarily the end.
999 We want to advance the start offset, and continue. Fudge the offset
1000 values to achieve this. We won't be at the end of the string - that
1001 was checked before setting g_notempty. */
1005 if (g_notempty != 0)
1007 offsets[0] = start_offset;
1008 offsets[1] = start_offset + 1;
1012 if (gmatched == 0) /* Error if no previous matches */
1014 if (count == -1) fprintf(outfile, "No match\n");
1015 else fprintf(outfile, "Error %d\n", count);
1017 break; /* Out of the /g loop */
1021 /* If not /g or /G we are done */
1023 if (!do_g && !do_G) break;
1025 /* If we have matched an empty string, first check to see if we are at
1026 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1027 what Perl's /g options does. This turns out to be rather cunning. First
1028 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1029 same point. If this fails (picked up above) we advance to the next
1033 if (offsets[0] == offsets[1])
1035 if (offsets[0] == len) break;
1036 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1039 /* For /g, update the start offset, leaving the rest alone */
1041 if (do_g) start_offset = offsets[1];
1043 /* For /G, update the pointer and length */
1050 } /* End of loop for /g and /G */
1051 } /* End of loop for data lines */
1055 #if !defined NOPOSIX
1056 if (posix || do_posix) regfree(&preg);
1059 if (re != NULL) free(re);
1060 if (extra != NULL) free(extra);
1063 free((void *)tables);
1064 setlocale(LC_CTYPE, "C");
1068 fprintf(outfile, "\n");