--- /dev/null
+/* #define DEBUG */\r
+/* LGPLd GNU regex for Native WIN32 */\r
+\r
+/* Part of the ht://Dig package <http://www.htdig.org/> */\r
+/* Copyright (c) 2003 The ht://Dig Group */\r
+/* For copyright details, see the file COPYING in your distribution */\r
+/* or the GNU Library General Public License (LGPL) version 2 or later or later */\r
+/* <http://www.gnu.org/copyleft/lgpl.html> */\r
+\r
+/* Added June 2003 Neal Richter, RightNow Technologies */\r
+\r
+/* note that this version is significantly different from the original */\r
+/* version 0.12 GNU source code. It compiles and works on Native WIN32. */\r
+\r
+/* Extended regular expression matching and search library,\r
+ version 0.12.\r
+ (Implements POSIX draft P1003.2/D11.2, except for some of the\r
+ internationalization features.)\r
+\r
+ Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.\r
+\r
+ This file is part of the GNU C Library. Its master source is NOT part of\r
+ the C library, however. The master source lives in /gd/gnu/lib.\r
+\r
+ The GNU C Library is free software; you can redistribute it and/or\r
+ modify it under the terms of the GNU Library General Public License as\r
+ published by the Free Software Foundation; either version 2 of the\r
+ License, or (at your option) any later version.\r
+\r
+ The GNU C Library is distributed in the hope that it will be useful,\r
+ but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+ Library General Public License for more details.\r
+\r
+ You should have received a copy of the GNU Library General Public\r
+ License along with the GNU C Library; see the file COPYING.LIB. If not,\r
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,\r
+ Boston, MA 02111-1307, USA. */\r
+\r
+#if defined(_WIN32)\r
+#pragma warning(disable: 4018 4101)\r
+#endif\r
+\r
+/* AIX requires this to be the first thing in the file. */\r
+#if defined (_AIX) && !defined (REGEX_MALLOC)\r
+#pragma alloca\r
+#endif\r
+\r
+#undef _GNU_SOURCE\r
+#define _GNU_SOURCE\r
+\r
+#if defined(LINUX)\r
+#define STDC_HEADERS\r
+#endif\r
+\r
+#if defined(STDC_HEADERS) && !defined(emacs)\r
+#include <stddef.h>\r
+#else\r
+/* We need this for `regex.h', and perhaps for the Emacs include files. */\r
+#include <sys/types.h>\r
+#endif\r
+\r
+/* For platform which support the ISO C amendement 1 functionality we\r
+ support user defined character classes. */\r
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)\r
+# include <wctype.h>\r
+# include <wchar.h>\r
+#endif\r
+\r
+/* This is for other GNU distributions with internationalized messages. */\r
+#undef HAVE_LIBINTL_H\r
+#if HAVE_LIBINTL_H || defined (_LIBC)\r
+# include <libintl.h>\r
+#else\r
+# define gettext(msgid) (msgid)\r
+#endif\r
+\r
+#ifndef gettext_noop\r
+/* This define is so xgettext can find the internationalizable\r
+ strings. */\r
+#define gettext_noop(String) String\r
+#endif\r
+\r
+/* The `emacs' switch turns on certain matching commands\r
+ that make sense only in Emacs. */\r
+#ifdef emacs\r
+\r
+#include "lisp.h"\r
+#include "buffer.h"\r
+#include "syntax.h"\r
+\r
+#else /* not emacs */\r
+\r
+/* If we are not linking with Emacs proper,\r
+ we can't use the relocating allocator\r
+ even if config.h says that we can. */\r
+#undef REL_ALLOC\r
+\r
+#if defined (STDC_HEADERS) || defined (_LIBC) || defined(_WIN32)\r
+#include <stdlib.h>\r
+#else\r
+char *malloc ();\r
+char *realloc ();\r
+void free();\r
+#endif\r
+\r
+/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.\r
+ If nothing else has been done, use the method below. */\r
+#ifdef INHIBIT_STRING_HEADER\r
+#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))\r
+#if !defined (bzero) && !defined (bcopy)\r
+#undef INHIBIT_STRING_HEADER\r
+#endif\r
+#endif\r
+#endif\r
+\r
+#include <string.h>\r
+\r
+/* This is the normal way of making sure we have a bcopy and a bzero.\r
+ This is used in most programs--a few other programs avoid this\r
+ by defining INHIBIT_STRING_HEADER. */\r
+#ifndef INHIBIT_STRING_HEADER\r
+#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC) || defined (_WIN32)\r
+#ifndef bcmp\r
+#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))\r
+#endif\r
+#ifndef bcopy\r
+#define bcopy(s, d, n) memcpy ((d), (s), (n))\r
+#endif\r
+#ifndef bzero\r
+#define bzero(s, n) memset ((s), 0, (n))\r
+#endif\r
+#else\r
+#include <strings.h>\r
+#endif\r
+#endif\r
+\r
+/* Define the syntax stuff for \<, \>, etc. */\r
+\r
+/* This must be nonzero for the wordchar and notwordchar pattern\r
+ commands in re_match_2. */\r
+#ifndef Sword\r
+#define Sword 1\r
+#endif\r
+\r
+#ifdef SWITCH_ENUM_BUG\r
+#define SWITCH_ENUM_CAST(x) ((int)(x))\r
+#else\r
+#define SWITCH_ENUM_CAST(x) (x)\r
+#endif\r
+\r
+#ifdef SYNTAX_TABLE\r
+\r
+extern char *re_syntax_table;\r
+\r
+#else /* not SYNTAX_TABLE */\r
+\r
+/* How many characters in the character set. */\r
+#define CHAR_SET_SIZE 256\r
+\r
+static char re_syntax_table[CHAR_SET_SIZE];\r
+\r
+static void\r
+init_syntax_once ()\r
+{\r
+ register int c;\r
+ static int done = 0;\r
+\r
+ if (done)\r
+ return;\r
+\r
+ bzero (re_syntax_table, sizeof re_syntax_table);\r
+\r
+ for (c = 'a'; c <= 'z'; c++)\r
+ re_syntax_table[c] = Sword;\r
+\r
+ for (c = 'A'; c <= 'Z'; c++)\r
+ re_syntax_table[c] = Sword;\r
+\r
+ for (c = '0'; c <= '9'; c++)\r
+ re_syntax_table[c] = Sword;\r
+\r
+ re_syntax_table['_'] = Sword;\r
+\r
+ done = 1;\r
+}\r
+\r
+#endif /* not SYNTAX_TABLE */\r
+\r
+#define SYNTAX(c) re_syntax_table[c]\r
+\r
+#endif /* not emacs */\r
+\f\r
+/* Get the interface, including the syntax bits. */\r
+/* #include "regex.h" */\r
+#include "regex_win32.h"\r
+\r
+/* isalpha etc. are used for the character classes. */\r
+#include <ctype.h>\r
+\r
+/* Jim Meyering writes:\r
+\r
+ "... Some ctype macros are valid only for character codes that\r
+ isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when\r
+ using /bin/cc or gcc but without giving an ansi option). So, all\r
+ ctype uses should be through macros like ISPRINT... If\r
+ STDC_HEADERS is defined, then autoconf has verified that the ctype\r
+ macros don't need to be guarded with references to isascii. ...\r
+ Defining isascii to 1 should let any compiler worth its salt\r
+ eliminate the && through constant folding." */\r
+\r
+#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))\r
+#define ISASCII(c) 1\r
+#else\r
+#define ISASCII(c) isascii(c)\r
+#endif\r
+\r
+#ifdef isblank\r
+#define ISBLANK(c) (ISASCII (c) && isblank (c))\r
+#else\r
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')\r
+#endif\r
+#ifdef isgraph\r
+#define ISGRAPH(c) (ISASCII (c) && isgraph (c))\r
+#else\r
+#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))\r
+#endif\r
+\r
+#define ISPRINT(c) (ISASCII (c) && isprint (c))\r
+#define ISDIGIT(c) (ISASCII (c) && isdigit (c))\r
+#define ISALNUM(c) (ISASCII (c) && isalnum (c))\r
+#define ISALPHA(c) (ISASCII (c) && isalpha (c))\r
+#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))\r
+#define ISLOWER(c) (ISASCII (c) && islower (c))\r
+#define ISPUNCT(c) (ISASCII (c) && ispunct (c))\r
+#define ISSPACE(c) (ISASCII (c) && isspace (c))\r
+#define ISUPPER(c) (ISASCII (c) && isupper (c))\r
+#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))\r
+\r
+#ifndef NULL\r
+#define NULL (void *)0\r
+#endif\r
+\r
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',\r
+ since ours (we hope) works properly with all combinations of\r
+ machines, compilers, `char' and `unsigned char' argument types.\r
+ (Per Bothner suggested the basic approach.) */\r
+#undef SIGN_EXTEND_CHAR\r
+#if __STDC__\r
+#define SIGN_EXTEND_CHAR(c) ((signed char) (c))\r
+#else /* not __STDC__ */\r
+/* As in Harbison and Steele. */\r
+#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)\r
+#endif\r
+\f\r
+/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we\r
+ use `alloca' instead of `malloc'. This is because using malloc in\r
+ re_search* or re_match* could cause memory leaks when C-g is used in\r
+ Emacs; also, malloc is slower and causes storage fragmentation. On\r
+ the other hand, malloc is more portable, and easier to debug.\r
+\r
+ Because we sometimes use alloca, some routines have to be macros,\r
+ not functions -- `alloca'-allocated space disappears at the end of the\r
+ function it is called in. */\r
+\r
+#if defined(REGEX_MALLOC) || defined(_WIN32)\r
+\r
+#define REGEX_ALLOCATE malloc\r
+#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)\r
+#define REGEX_FREE free\r
+#define REGEX_MALLOC\r
+\r
+#else /* not REGEX_MALLOC */\r
+\r
+/* Emacs already defines alloca, sometimes. */\r
+#ifndef alloca\r
+\r
+/* Make alloca work the best possible way. */\r
+#ifdef __GNUC__\r
+#define alloca __builtin_alloca\r
+#else /* not __GNUC__ */\r
+#if HAVE_ALLOCA_H\r
+#include <alloca.h>\r
+#else /* not __GNUC__ or HAVE_ALLOCA_H */\r
+#if 0 /* It is a bad idea to declare alloca. We always cast the result. */\r
+#ifndef _AIX /* Already did AIX, up at the top. */\r
+char *alloca ();\r
+#endif /* not _AIX */\r
+#endif\r
+#endif /* not HAVE_ALLOCA_H */\r
+#endif /* not __GNUC__ */\r
+\r
+#endif /* not alloca */\r
+\r
+#define REGEX_ALLOCATE alloca\r
+\r
+/* Assumes a `char *destination' variable. */\r
+#define REGEX_REALLOCATE(source, osize, nsize) \\r
+ (destination = (char *) alloca (nsize), \\r
+ bcopy (source, destination, osize), \\r
+ destination)\r
+\r
+/* No need to do anything to free, after alloca. */\r
+#define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */\r
+\r
+#endif /* not REGEX_MALLOC */\r
+\r
+/* Define how to allocate the failure stack. */\r
+\r
+#if defined (REL_ALLOC) && defined (REGEX_MALLOC)\r
+\r
+#define REGEX_ALLOCATE_STACK(size) \\r
+ r_alloc (&failure_stack_ptr, (size))\r
+#define REGEX_REALLOCATE_STACK(source, osize, nsize) \\r
+ r_re_alloc (&failure_stack_ptr, (nsize))\r
+#define REGEX_FREE_STACK(ptr) \\r
+ r_alloc_free (&failure_stack_ptr)\r
+\r
+#else /* not using relocating allocator */\r
+\r
+#ifdef REGEX_MALLOC\r
+\r
+#define REGEX_ALLOCATE_STACK malloc\r
+#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)\r
+#define REGEX_FREE_STACK free\r
+\r
+#else /* not REGEX_MALLOC */\r
+\r
+#define REGEX_ALLOCATE_STACK alloca\r
+\r
+#define REGEX_REALLOCATE_STACK(source, osize, nsize) \\r
+ REGEX_REALLOCATE (source, osize, nsize)\r
+/* No need to explicitly free anything. */\r
+#define REGEX_FREE_STACK(arg)\r
+\r
+#endif /* not REGEX_MALLOC */\r
+#endif /* not using relocating allocator */\r
+\r
+\r
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside\r
+ `string1' or just past its end. This works if PTR is NULL, which is\r
+ a good thing. */\r
+#define FIRST_STRING_P(ptr) \\r
+ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)\r
+\r
+/* (Re)Allocate N items of type T using malloc, or fail. */\r
+#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))\r
+#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))\r
+#define RETALLOC_IF(addr, n, t) \\r
+ if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)\r
+#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))\r
+\r
+#define BYTEWIDTH 8 /* In bits. */\r
+\r
+#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))\r
+\r
+#undef MAX\r
+#undef MIN\r
+#define MAX(a, b) ((a) > (b) ? (a) : (b))\r
+#define MIN(a, b) ((a) < (b) ? (a) : (b))\r
+\r
+/* typedef char boolean; */\r
+#define false 0\r
+#define true 1\r
+\r
+static int\r
+re_match_2_internal(struct re_pattern_buffer *bufp,\r
+ const char *string1,\r
+ int size1,\r
+ const char *string2,\r
+ int size2,\r
+ int pos,\r
+ struct re_registers *regs,\r
+ int stop);\r
+\f\r
+/* These are the command codes that appear in compiled regular\r
+ expressions. Some opcodes are followed by argument bytes. A\r
+ command code can specify any interpretation whatsoever for its\r
+ arguments. Zero bytes may appear in the compiled regular expression. */\r
+\r
+typedef enum\r
+{\r
+ no_op = 0,\r
+\r
+ /* Succeed right away--no more backtracking. */\r
+ succeed,\r
+\r
+ /* Followed by one byte giving n, then by n literal bytes. */\r
+ exactn,\r
+\r
+ /* Matches any (more or less) character. */\r
+ anychar,\r
+\r
+ /* Matches any one char belonging to specified set. First\r
+ following byte is number of bitmap bytes. Then come bytes\r
+ for a bitmap saying which chars are in. Bits in each byte\r
+ are ordered low-bit-first. A character is in the set if its\r
+ bit is 1. A character too large to have a bit in the map is\r
+ automatically not in the set. */\r
+ charset,\r
+\r
+ /* Same parameters as charset, but match any character that is\r
+ not one of those specified. */\r
+ charset_not,\r
+\r
+ /* Start remembering the text that is matched, for storing in a\r
+ register. Followed by one byte with the register number, in\r
+ the range 0 to one less than the pattern buffer's re_nsub\r
+ field. Then followed by one byte with the number of groups\r
+ inner to this one. (This last has to be part of the\r
+ start_memory only because we need it in the on_failure_jump\r
+ of re_match_2.) */\r
+ start_memory,\r
+\r
+ /* Stop remembering the text that is matched and store it in a\r
+ memory register. Followed by one byte with the register\r
+ number, in the range 0 to one less than `re_nsub' in the\r
+ pattern buffer, and one byte with the number of inner groups,\r
+ just like `start_memory'. (We need the number of inner\r
+ groups here because we don't have any easy way of finding the\r
+ corresponding start_memory when we're at a stop_memory.) */\r
+ stop_memory,\r
+\r
+ /* Match a duplicate of something remembered. Followed by one\r
+ byte containing the register number. */\r
+ duplicate,\r
+\r
+ /* Fail unless at beginning of line. */\r
+ begline,\r
+\r
+ /* Fail unless at end of line. */\r
+ endline,\r
+\r
+ /* Succeeds if at beginning of buffer (if emacs) or at beginning\r
+ of string to be matched (if not). */\r
+ begbuf,\r
+\r
+ /* Analogously, for end of buffer/string. */\r
+ endbuf,\r
+\r
+ /* Followed by two byte relative address to which to jump. */\r
+ jump,\r
+\r
+ /* Same as jump, but marks the end of an alternative. */\r
+ jump_past_alt,\r
+\r
+ /* Followed by two-byte relative address of place to resume at\r
+ in case of failure. */\r
+ on_failure_jump,\r
+\r
+ /* Like on_failure_jump, but pushes a placeholder instead of the\r
+ current string position when executed. */\r
+ on_failure_keep_string_jump,\r
+\r
+ /* Throw away latest failure point and then jump to following\r
+ two-byte relative address. */\r
+ pop_failure_jump,\r
+\r
+ /* Change to pop_failure_jump if know won't have to backtrack to\r
+ match; otherwise change to jump. This is used to jump\r
+ back to the beginning of a repeat. If what follows this jump\r
+ clearly won't match what the repeat does, such that we can be\r
+ sure that there is no use backtracking out of repetitions\r
+ already matched, then we change it to a pop_failure_jump.\r
+ Followed by two-byte address. */\r
+ maybe_pop_jump,\r
+\r
+ /* Jump to following two-byte address, and push a dummy failure\r
+ point. This failure point will be thrown away if an attempt\r
+ is made to use it for a failure. A `+' construct makes this\r
+ before the first repeat. Also used as an intermediary kind\r
+ of jump when compiling an alternative. */\r
+ dummy_failure_jump,\r
+\r
+ /* Push a dummy failure point and continue. Used at the end of\r
+ alternatives. */\r
+ push_dummy_failure,\r
+\r
+ /* Followed by two-byte relative address and two-byte number n.\r
+ After matching N times, jump to the address upon failure. */\r
+ succeed_n,\r
+\r
+ /* Followed by two-byte relative address, and two-byte number n.\r
+ Jump to the address N times, then fail. */\r
+ jump_n,\r
+\r
+ /* Set the following two-byte relative address to the\r
+ subsequent two-byte number. The address *includes* the two\r
+ bytes of number. */\r
+ set_number_at,\r
+\r
+ wordchar, /* Matches any word-constituent character. */\r
+ notwordchar, /* Matches any char that is not a word-constituent. */\r
+\r
+ wordbeg, /* Succeeds if at word beginning. */\r
+ wordend, /* Succeeds if at word end. */\r
+\r
+ wordbound, /* Succeeds if at a word boundary. */\r
+ notwordbound /* Succeeds if not at a word boundary. */\r
+\r
+#ifdef emacs\r
+ ,before_dot, /* Succeeds if before point. */\r
+ at_dot, /* Succeeds if at point. */\r
+ after_dot, /* Succeeds if after point. */\r
+\r
+ /* Matches any character whose syntax is specified. Followed by\r
+ a byte which contains a syntax code, e.g., Sword. */\r
+ syntaxspec,\r
+\r
+ /* Matches any character whose syntax is not that specified. */\r
+ notsyntaxspec\r
+#endif /* emacs */\r
+} re_opcode_t;\r
+\f\r
+/* Common operations on the compiled pattern. */\r
+\r
+/* Store NUMBER in two contiguous bytes starting at DESTINATION. */\r
+\r
+#define STORE_NUMBER(destination, number) \\r
+ do { \\r
+ (destination)[0] = (number) & 0377; \\r
+ (destination)[1] = (number) >> 8; \\r
+ } while (0)\r
+\r
+/* Same as STORE_NUMBER, except increment DESTINATION to\r
+ the byte after where the number is stored. Therefore, DESTINATION\r
+ must be an lvalue. */\r
+\r
+#define STORE_NUMBER_AND_INCR(destination, number) \\r
+ do { \\r
+ STORE_NUMBER (destination, number); \\r
+ (destination) += 2; \\r
+ } while (0)\r
+\r
+/* Put into DESTINATION a number stored in two contiguous bytes starting\r
+ at SOURCE. */\r
+\r
+#define EXTRACT_NUMBER(destination, source) \\r
+ do { \\r
+ (destination) = *(source) & 0377; \\r
+ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \\r
+ } while (0)\r
+\r
+#ifdef DEBUG\r
+static void extract_number _RE_ARGS ((int *dest, unsigned char *source));\r
+static void\r
+extract_number (dest, source)\r
+ int *dest;\r
+ unsigned char *source;\r
+{\r
+ int temp = SIGN_EXTEND_CHAR (*(source + 1));\r
+ *dest = *source & 0377;\r
+ *dest += temp << 8;\r
+}\r
+\r
+#ifndef EXTRACT_MACROS /* To debug the macros. */\r
+#undef EXTRACT_NUMBER\r
+#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)\r
+#endif /* not EXTRACT_MACROS */\r
+\r
+#endif /* DEBUG */\r
+\r
+/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.\r
+ SOURCE must be an lvalue. */\r
+\r
+#define EXTRACT_NUMBER_AND_INCR(destination, source) \\r
+ do { \\r
+ EXTRACT_NUMBER (destination, source); \\r
+ (source) += 2; \\r
+ } while (0)\r
+\r
+#ifdef DEBUG\r
+static void extract_number_and_incr _RE_ARGS ((int *destination,\r
+ unsigned char **source));\r
+static void\r
+extract_number_and_incr (destination, source)\r
+ int *destination;\r
+ unsigned char **source;\r
+{\r
+ extract_number (destination, *source);\r
+ *source += 2;\r
+}\r
+\r
+#ifndef EXTRACT_MACROS\r
+#undef EXTRACT_NUMBER_AND_INCR\r
+#define EXTRACT_NUMBER_AND_INCR(dest, src) \\r
+ extract_number_and_incr (&dest, &src)\r
+#endif /* not EXTRACT_MACROS */\r
+\r
+#endif /* DEBUG */\r
+\f\r
+/* If DEBUG is defined, Regex prints many voluminous messages about what\r
+ it is doing (if the variable `debug' is nonzero). If linked with the\r
+ main program in `iregex.c', you can enter patterns and strings\r
+ interactively. And if linked with the main program in `main.c' and\r
+ the other test files, you can run the already-written tests. */\r
+\r
+#ifdef DEBUG\r
+\r
+/* We use standard I/O for debugging. */\r
+#include <stdio.h>\r
+\r
+/* It is useful to test things that ``must'' be true when debugging. */\r
+#include <assert.h>\r
+\r
+static int debug = 0;\r
+\r
+#define DEBUG_STATEMENT(e) e\r
+#define DEBUG_PRINT1(x) if (debug) printf (x)\r
+#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)\r
+#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)\r
+#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)\r
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \\r
+ if (debug) print_partial_compiled_pattern (s, e)\r
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \\r
+ if (debug) print_double_string (w, s1, sz1, s2, sz2)\r
+\r
+\r
+/* Print the fastmap in human-readable form. */\r
+\r
+void\r
+print_fastmap (fastmap)\r
+ char *fastmap;\r
+{\r
+ unsigned was_a_range = 0;\r
+ unsigned i = 0;\r
+\r
+ while (i < (1 << BYTEWIDTH))\r
+ {\r
+ if (fastmap[i++])\r
+ {\r
+ was_a_range = 0;\r
+ putchar (i - 1);\r
+ while (i < (1 << BYTEWIDTH) && fastmap[i])\r
+ {\r
+ was_a_range = 1;\r
+ i++;\r
+ }\r
+ if (was_a_range)\r
+ {\r
+ printf ("-");\r
+ putchar (i - 1);\r
+ }\r
+ }\r
+ }\r
+ putchar ('\n');\r
+}\r
+\r
+\r
+/* Print a compiled pattern string in human-readable form, starting at\r
+ the START pointer into it and ending just before the pointer END. */\r
+\r
+void\r
+print_partial_compiled_pattern (start, end)\r
+ unsigned char *start;\r
+ unsigned char *end;\r
+{\r
+ int mcnt, mcnt2;\r
+ unsigned char *p1;\r
+ unsigned char *p = start;\r
+ unsigned char *pend = end;\r
+\r
+ if (start == NULL)\r
+ {\r
+ printf ("(null)\n");\r
+ return;\r
+ }\r
+\r
+ /* Loop over pattern commands. */\r
+ while (p < pend)\r
+ {\r
+ printf ("%d:\t", p - start);\r
+\r
+ switch ((re_opcode_t) *p++)\r
+ {\r
+ case no_op:\r
+ printf ("/no_op");\r
+ break;\r
+\r
+ case exactn:\r
+ mcnt = *p++;\r
+ printf ("/exactn/%d", mcnt);\r
+ do\r
+ {\r
+ putchar ('/');\r
+ putchar (*p++);\r
+ }\r
+ while (--mcnt);\r
+ break;\r
+\r
+ case start_memory:\r
+ mcnt = *p++;\r
+ printf ("/start_memory/%d/%d", mcnt, *p++);\r
+ break;\r
+\r
+ case stop_memory:\r
+ mcnt = *p++;\r
+ printf ("/stop_memory/%d/%d", mcnt, *p++);\r
+ break;\r
+\r
+ case duplicate:\r
+ printf ("/duplicate/%d", *p++);\r
+ break;\r
+\r
+ case anychar:\r
+ printf ("/anychar");\r
+ break;\r
+\r
+ case charset:\r
+ case charset_not:\r
+ {\r
+ register int c, last = -100;\r
+ register int in_range = 0;\r
+\r
+ printf ("/charset [%s",\r
+ (re_opcode_t) *(p - 1) == charset_not ? "^" : "");\r
+\r
+ assert (p + *p < pend);\r
+\r
+ for (c = 0; c < 256; c++)\r
+ if (c / 8 < *p\r
+ && (p[1 + (c/8)] & (1 << (c % 8))))\r
+ {\r
+ /* Are we starting a range? */\r
+ if (last + 1 == c && ! in_range)\r
+ {\r
+ putchar ('-');\r
+ in_range = 1;\r
+ }\r
+ /* Have we broken a range? */\r
+ else if (last + 1 != c && in_range)\r
+ {\r
+ putchar (last);\r
+ in_range = 0;\r
+ }\r
+\r
+ if (! in_range)\r
+ putchar (c);\r
+\r
+ last = c;\r
+ }\r
+\r
+ if (in_range)\r
+ putchar (last);\r
+\r
+ putchar (']');\r
+\r
+ p += 1 + *p;\r
+ }\r
+ break;\r
+\r
+ case begline:\r
+ printf ("/begline");\r
+ break;\r
+\r
+ case endline:\r
+ printf ("/endline");\r
+ break;\r
+\r
+ case on_failure_jump:\r
+ extract_number_and_incr (&mcnt, &p);\r
+ printf ("/on_failure_jump to %d", p + mcnt - start);\r
+ break;\r
+\r
+ case on_failure_keep_string_jump:\r
+ extract_number_and_incr (&mcnt, &p);\r
+ printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);\r
+ break;\r
+\r
+ case dummy_failure_jump:\r
+ extract_number_and_incr (&mcnt, &p);\r
+ printf ("/dummy_failure_jump to %d", p + mcnt - start);\r
+ break;\r
+\r
+ case push_dummy_failure:\r
+ printf ("/push_dummy_failure");\r
+ break;\r
+\r
+ case maybe_pop_jump:\r
+ extract_number_and_incr (&mcnt, &p);\r
+ printf ("/maybe_pop_jump to %d", p + mcnt - start);\r
+ break;\r
+\r
+ case pop_failure_jump:\r
+ extract_number_and_incr (&mcnt, &p);\r
+ printf ("/pop_failure_jump to %d", p + mcnt - start);\r
+ break;\r
+\r
+ case jump_past_alt:\r
+ extract_number_and_incr (&mcnt, &p);\r
+ printf ("/jump_past_alt to %d", p + mcnt - start);\r
+ break;\r
+\r
+ case jump:\r
+ extract_number_and_incr (&mcnt, &p);\r
+ printf ("/jump to %d", p + mcnt - start);\r
+ break;\r
+\r
+ case succeed_n:\r
+ extract_number_and_incr (&mcnt, &p);\r
+ p1 = p + mcnt;\r
+ extract_number_and_incr (&mcnt2, &p);\r
+ printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);\r
+ break;\r
+\r
+ case jump_n:\r
+ extract_number_and_incr (&mcnt, &p);\r
+ p1 = p + mcnt;\r
+ extract_number_and_incr (&mcnt2, &p);\r
+ printf ("/jump_n to %d, %d times", p1 - start, mcnt2);\r
+ break;\r
+\r
+ case set_number_at:\r
+ extract_number_and_incr (&mcnt, &p);\r
+ p1 = p + mcnt;\r
+ extract_number_and_incr (&mcnt2, &p);\r
+ printf ("/set_number_at location %d to %d", p1 - start, mcnt2);\r
+ break;\r
+\r
+ case wordbound:\r
+ printf ("/wordbound");\r
+ break;\r
+\r
+ case notwordbound:\r
+ printf ("/notwordbound");\r
+ break;\r
+\r
+ case wordbeg:\r
+ printf ("/wordbeg");\r
+ break;\r
+\r
+ case wordend:\r
+ printf ("/wordend");\r
+\r
+#ifdef emacs\r
+ case before_dot:\r
+ printf ("/before_dot");\r
+ break;\r
+\r
+ case at_dot:\r
+ printf ("/at_dot");\r
+ break;\r
+\r
+ case after_dot:\r
+ printf ("/after_dot");\r
+ break;\r
+\r
+ case syntaxspec:\r
+ printf ("/syntaxspec");\r
+ mcnt = *p++;\r
+ printf ("/%d", mcnt);\r
+ break;\r
+\r
+ case notsyntaxspec:\r
+ printf ("/notsyntaxspec");\r
+ mcnt = *p++;\r
+ printf ("/%d", mcnt);\r
+ break;\r
+#endif /* emacs */\r
+\r
+ case wordchar:\r
+ printf ("/wordchar");\r
+ break;\r
+\r
+ case notwordchar:\r
+ printf ("/notwordchar");\r
+ break;\r
+\r
+ case begbuf:\r
+ printf ("/begbuf");\r
+ break;\r
+\r
+ case endbuf:\r
+ printf ("/endbuf");\r
+ break;\r
+\r
+ default:\r
+ printf ("?%d", *(p-1));\r
+ }\r
+\r
+ putchar ('\n');\r
+ }\r
+\r
+ printf ("%d:\tend of pattern.\n", p - start);\r
+}\r
+\r
+\r
+void\r
+print_compiled_pattern (bufp)\r
+ struct re_pattern_buffer *bufp;\r
+{\r
+ unsigned char *buffer = bufp->buffer;\r
+\r
+ print_partial_compiled_pattern (buffer, buffer + bufp->used);\r
+ printf ("%ld bytes used/%ld bytes allocated.\n",\r
+ bufp->used, bufp->allocated);\r
+\r
+ if (bufp->fastmap_accurate && bufp->fastmap)\r
+ {\r
+ printf ("fastmap: ");\r
+ print_fastmap (bufp->fastmap);\r
+ }\r
+\r
+ printf ("re_nsub: %d\t", bufp->re_nsub);\r
+ printf ("regs_alloc: %d\t", bufp->regs_allocated);\r
+ printf ("can_be_null: %d\t", bufp->can_be_null);\r
+ printf ("newline_anchor: %d\n", bufp->newline_anchor);\r
+ printf ("no_sub: %d\t", bufp->no_sub);\r
+ printf ("not_bol: %d\t", bufp->not_bol);\r
+ printf ("not_eol: %d\t", bufp->not_eol);\r
+ printf ("syntax: %lx\n", bufp->syntax);\r
+ /* Perhaps we should print the translate table? */\r
+}\r
+\r
+\r
+void\r
+print_double_string (where, string1, size1, string2, size2)\r
+ const char *where;\r
+ const char *string1;\r
+ const char *string2;\r
+ int size1;\r
+ int size2;\r
+{\r
+ int this_char;\r
+\r
+ if (where == NULL)\r
+ printf ("(null)");\r
+ else\r
+ {\r
+ if (FIRST_STRING_P (where))\r
+ {\r
+ for (this_char = where - string1; this_char < size1; this_char++)\r
+ putchar (string1[this_char]);\r
+\r
+ where = string2;\r
+ }\r
+\r
+ for (this_char = where - string2; this_char < size2; this_char++)\r
+ putchar (string2[this_char]);\r
+ }\r
+}\r
+\r
+void\r
+printchar (c)\r
+ int c;\r
+{\r
+ putc (c, stderr);\r
+}\r
+\r
+#else /* not DEBUG */\r
+\r
+#undef assert\r
+#define assert(e)\r
+\r
+#define DEBUG_STATEMENT(e)\r
+#define DEBUG_PRINT1(x)\r
+#define DEBUG_PRINT2(x1, x2)\r
+#define DEBUG_PRINT3(x1, x2, x3)\r
+#define DEBUG_PRINT4(x1, x2, x3, x4)\r
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)\r
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)\r
+\r
+#endif /* not DEBUG */\r
+\f\r
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can\r
+ also be assigned to arbitrarily: each pattern buffer stores its own\r
+ syntax, so it can be changed between regex compilations. */\r
+/* This has no initializer because initialized variables in Emacs\r
+ become read-only after dumping. */\r
+reg_syntax_t re_syntax_options;\r
+\r
+\r
+/* Specify the precise syntax of regexps for compilation. This provides\r
+ for compatibility for various utilities which historically have\r
+ different, incompatible syntaxes.\r
+\r
+ The argument SYNTAX is a bit mask comprised of the various bits\r
+ defined in regex.h. We return the old syntax. */\r
+\r
+reg_syntax_t\r
+re_set_syntax(reg_syntax_t syntax)\r
+{\r
+ reg_syntax_t ret = re_syntax_options;\r
+\r
+ re_syntax_options = syntax;\r
+#ifdef DEBUG\r
+ if (syntax & RE_DEBUG)\r
+ debug = 1;\r
+ else if (debug) /* was on but now is not */\r
+ debug = 0;\r
+#endif /* DEBUG */\r
+ return ret;\r
+}\r
+\f\r
+/* This table gives an error message for each of the error codes listed\r
+ in regex.h. Obviously the order here has to be same as there.\r
+ POSIX doesn't require that we do anything for REG_NOERROR,\r
+ but why not be nice? */\r
+\r
+static const char *re_error_msgid[] =\r
+{\r
+ gettext_noop ("Success"), /* REG_NOERROR */\r
+ gettext_noop ("No match"), /* REG_NOMATCH */\r
+ gettext_noop ("Invalid regular expression"), /* REG_BADPAT */\r
+ gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */\r
+ gettext_noop ("Invalid character class name"), /* REG_ECTYPE */\r
+ gettext_noop ("Trailing backslash"), /* REG_EESCAPE */\r
+ gettext_noop ("Invalid back reference"), /* REG_ESUBREG */\r
+ gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */\r
+ gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */\r
+ gettext_noop ("Unmatched \\{"), /* REG_EBRACE */\r
+ gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */\r
+ gettext_noop ("Invalid range end"), /* REG_ERANGE */\r
+ gettext_noop ("Memory exhausted"), /* REG_ESPACE */\r
+ gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */\r
+ gettext_noop ("Premature end of regular expression"), /* REG_EEND */\r
+ gettext_noop ("Regular expression too big"), /* REG_ESIZE */\r
+ gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */\r
+};\r
+\f\r
+/* Avoiding alloca during matching, to placate r_alloc. */\r
+\r
+/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the\r
+ searching and matching functions should not call alloca. On some\r
+ systems, alloca is implemented in terms of malloc, and if we're\r
+ using the relocating allocator routines, then malloc could cause a\r
+ relocation, which might (if the strings being searched are in the\r
+ ralloc heap) shift the data out from underneath the regexp\r
+ routines.\r
+\r
+ Here's another reason to avoid allocation: Emacs\r
+ processes input from X in a signal handler; processing X input may\r
+ call malloc; if input arrives while a matching routine is calling\r
+ malloc, then we're scrod. But Emacs can't just block input while\r
+ calling matching routines; then we don't notice interrupts when\r
+ they come in. So, Emacs blocks input around all regexp calls\r
+ except the matching calls, which it leaves unprotected, in the\r
+ faith that they will not malloc. */\r
+\r
+/* Normally, this is fine. */\r
+#define MATCH_MAY_ALLOCATE\r
+\r
+/* When using GNU C, we are not REALLY using the C alloca, no matter\r
+ what config.h may say. So don't take precautions for it. */\r
+#ifdef __GNUC__\r
+#undef C_ALLOCA\r
+#endif\r
+\r
+/* The match routines may not allocate if (1) they would do it with malloc\r
+ and (2) it's not safe for them to use malloc.\r
+ Note that if REL_ALLOC is defined, matching would not use malloc for the\r
+ failure stack, but we would still use it for the register vectors;\r
+ so REL_ALLOC should not affect this. */\r
+#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)\r
+#undef MATCH_MAY_ALLOCATE\r
+#endif\r
+\r
+\f\r
+/* Failure stack declarations and macros; both re_compile_fastmap and\r
+ re_match_2 use a failure stack. These have to be macros because of\r
+ REGEX_ALLOCATE_STACK. */\r
+\r
+\r
+/* Number of failure points for which to initially allocate space\r
+ when matching. If this number is exceeded, we allocate more\r
+ space, so it is not a hard limit. */\r
+#ifndef INIT_FAILURE_ALLOC\r
+#define INIT_FAILURE_ALLOC 5\r
+#endif\r
+\r
+/* Roughly the maximum number of failure points on the stack. Would be\r
+ exactly that if always used MAX_FAILURE_ITEMS items each time we failed.\r
+ This is a variable only so users of regex can assign to it; we never\r
+ change it ourselves. */\r
+\r
+#ifdef INT_IS_16BIT\r
+\r
+#if defined (MATCH_MAY_ALLOCATE)\r
+/* 4400 was enough to cause a crash on Alpha OSF/1,\r
+ whose default stack limit is 2mb. */\r
+static long int re_max_failures = 4000;\r
+#else\r
+static long int re_max_failures = 2000;\r
+#endif\r
+\r
+union fail_stack_elt\r
+{\r
+ unsigned char *pointer;\r
+ long int integer;\r
+};\r
+\r
+typedef union fail_stack_elt fail_stack_elt_t;\r
+\r
+typedef struct\r
+{\r
+ fail_stack_elt_t *stack;\r
+ unsigned long int size;\r
+ unsigned long int avail; /* Offset of next open position. */\r
+} fail_stack_type;\r
+\r
+#else /* not INT_IS_16BIT */\r
+\r
+#if defined (MATCH_MAY_ALLOCATE)\r
+/* 4400 was enough to cause a crash on Alpha OSF/1,\r
+ whose default stack limit is 2mb. */\r
+static int re_max_failures = 20000;\r
+#else\r
+static int re_max_failures = 2000;\r
+#endif\r
+\r
+union fail_stack_elt\r
+{\r
+ unsigned char *pointer;\r
+ int integer;\r
+};\r
+\r
+typedef union fail_stack_elt fail_stack_elt_t;\r
+\r
+typedef struct\r
+{\r
+ fail_stack_elt_t *stack;\r
+ unsigned size;\r
+ unsigned avail; /* Offset of next open position. */\r
+} fail_stack_type;\r
+\r
+#endif /* INT_IS_16BIT */\r
+\r
+#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)\r
+#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)\r
+#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)\r
+\r
+\r
+/* Define macros to initialize and free the failure stack.\r
+ Do `return -2' if the alloc fails. */\r
+\r
+#ifdef MATCH_MAY_ALLOCATE\r
+#define INIT_FAIL_STACK() \\r
+ do { \\r
+ fail_stack.stack = (fail_stack_elt_t *) \\r
+ REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \\r
+ \\r
+ if (fail_stack.stack == NULL) \\r
+ return -2; \\r
+ \\r
+ fail_stack.size = INIT_FAILURE_ALLOC; \\r
+ fail_stack.avail = 0; \\r
+ } while (0)\r
+\r
+#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)\r
+#else\r
+#define INIT_FAIL_STACK() \\r
+ do { \\r
+ fail_stack.avail = 0; \\r
+ } while (0)\r
+\r
+#define RESET_FAIL_STACK()\r
+#endif\r
+\r
+\r
+/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.\r
+\r
+ Return 1 if succeeds, and 0 if either ran out of memory\r
+ allocating space for it or it was already too large.\r
+\r
+ REGEX_REALLOCATE_STACK requires `destination' be declared. */\r
+\r
+#define DOUBLE_FAIL_STACK(fail_stack) \\r
+ ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \\r
+ ? 0 \\r
+ : ((fail_stack).stack = (fail_stack_elt_t *) \\r
+ REGEX_REALLOCATE_STACK ((fail_stack).stack, \\r
+ (fail_stack).size * sizeof (fail_stack_elt_t), \\r
+ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \\r
+ \\r
+ (fail_stack).stack == NULL \\r
+ ? 0 \\r
+ : ((fail_stack).size <<= 1, \\r
+ 1)))\r
+\r
+\r
+/* Push pointer POINTER on FAIL_STACK.\r
+ Return 1 if was able to do so and 0 if ran out of memory allocating\r
+ space to do so. */\r
+#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \\r
+ ((FAIL_STACK_FULL () \\r
+ && !DOUBLE_FAIL_STACK (FAIL_STACK)) \\r
+ ? 0 \\r
+ : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \\r
+ 1))\r
+\r
+/* Push a pointer value onto the failure stack.\r
+ Assumes the variable `fail_stack'. Probably should only\r
+ be called from within `PUSH_FAILURE_POINT'. */\r
+#define PUSH_FAILURE_POINTER(item) \\r
+ fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)\r
+\r
+/* This pushes an integer-valued item onto the failure stack.\r
+ Assumes the variable `fail_stack'. Probably should only\r
+ be called from within `PUSH_FAILURE_POINT'. */\r
+#define PUSH_FAILURE_INT(item) \\r
+ fail_stack.stack[fail_stack.avail++].integer = (item)\r
+\r
+/* Push a fail_stack_elt_t value onto the failure stack.\r
+ Assumes the variable `fail_stack'. Probably should only\r
+ be called from within `PUSH_FAILURE_POINT'. */\r
+#define PUSH_FAILURE_ELT(item) \\r
+ fail_stack.stack[fail_stack.avail++] = (item)\r
+\r
+/* These three POP... operations complement the three PUSH... operations.\r
+ All assume that `fail_stack' is nonempty. */\r
+#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer\r
+#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer\r
+#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]\r
+\r
+/* Used to omit pushing failure point id's when we're not debugging. */\r
+#ifdef DEBUG\r
+#define DEBUG_PUSH PUSH_FAILURE_INT\r
+#define DEBUG_POP(item_addr) (item_addr)->integer = POP_FAILURE_INT ()\r
+#else\r
+#define DEBUG_PUSH(item)\r
+#define DEBUG_POP(item_addr)\r
+#endif\r
+\r
+\r
+/* Push the information about the state we will need\r
+ if we ever fail back to it.\r
+\r
+ Requires variables fail_stack, regstart, regend, reg_info, and\r
+ num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be\r
+ declared.\r
+\r
+ Does `return FAILURE_CODE' if runs out of memory. */\r
+\r
+#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \\r
+ do { \\r
+ char *destination; \\r
+ /* Must be int, so when we don't save any registers, the arithmetic \\r
+ of 0 + -1 isn't done as unsigned. */ \\r
+ /* Can't be int, since there is not a shred of a guarantee that int \\r
+ is wide enough to hold a value of something to which pointer can \\r
+ be assigned */ \\r
+ s_reg_t this_reg; \\r
+ \\r
+ DEBUG_STATEMENT (failure_id++); \\r
+ DEBUG_STATEMENT (nfailure_points_pushed++); \\r
+ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \\r
+ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\\r
+ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\\r
+ \\r
+ DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \\r
+ DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \\r
+ \\r
+ /* Ensure we have enough space allocated for what we will push. */ \\r
+ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \\r
+ { \\r
+ if (!DOUBLE_FAIL_STACK (fail_stack)) \\r
+ return failure_code; \\r
+ \\r
+ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \\r
+ (fail_stack).size); \\r
+ DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\\r
+ } \\r
+ \\r
+ /* Push the info, starting with the registers. */ \\r
+ DEBUG_PRINT1 ("\n"); \\r
+ \\r
+ if (1) \\r
+ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \\r
+ this_reg++) \\r
+ { \\r
+ DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \\r
+ DEBUG_STATEMENT (num_regs_pushed++); \\r
+ \\r
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \\r
+ PUSH_FAILURE_POINTER (regstart[this_reg]); \\r
+ \\r
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \\r
+ PUSH_FAILURE_POINTER (regend[this_reg]); \\r
+ \\r
+ DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \\r
+ DEBUG_PRINT2 (" match_null=%d", \\r
+ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \\r
+ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \\r
+ DEBUG_PRINT2 (" matched_something=%d", \\r
+ MATCHED_SOMETHING (reg_info[this_reg])); \\r
+ DEBUG_PRINT2 (" ever_matched=%d", \\r
+ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \\r
+ DEBUG_PRINT1 ("\n"); \\r
+ PUSH_FAILURE_ELT (reg_info[this_reg].word); \\r
+ } \\r
+ \\r
+ DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\\r
+ PUSH_FAILURE_INT (lowest_active_reg); \\r
+ \\r
+ DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\\r
+ PUSH_FAILURE_INT (highest_active_reg); \\r
+ \\r
+ DEBUG_PRINT2 (" Pushing pattern 0x%x:\n", pattern_place); \\r
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \\r
+ PUSH_FAILURE_POINTER (pattern_place); \\r
+ \\r
+ DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \\r
+ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \\r
+ size2); \\r
+ DEBUG_PRINT1 ("'\n"); \\r
+ PUSH_FAILURE_POINTER (string_place); \\r
+ \\r
+ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \\r
+ DEBUG_PUSH (failure_id); \\r
+ } while (0)\r
+\r
+/* This is the number of items that are pushed and popped on the stack\r
+ for each register. */\r
+#define NUM_REG_ITEMS 3\r
+\r
+/* Individual items aside from the registers. */\r
+#ifdef DEBUG\r
+#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */\r
+#else\r
+#define NUM_NONREG_ITEMS 4\r
+#endif\r
+\r
+/* We push at most this many items on the stack. */\r
+/* We used to use (num_regs - 1), which is the number of registers\r
+ this regexp will save; but that was changed to 5\r
+ to avoid stack overflow for a regexp with lots of parens. */\r
+#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)\r
+\r
+/* We actually push this many items. */\r
+#define NUM_FAILURE_ITEMS \\r
+ (((0 \\r
+ ? 0 : highest_active_reg - lowest_active_reg + 1) \\r
+ * NUM_REG_ITEMS) \\r
+ + NUM_NONREG_ITEMS)\r
+\r
+/* How many items can still be added to the stack without overflowing it. */\r
+#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)\r
+\r
+\r
+/* Pops what PUSH_FAIL_STACK pushes.\r
+\r
+ We restore into the parameters, all of which should be lvalues:\r
+ STR -- the saved data position.\r
+ PAT -- the saved pattern position.\r
+ LOW_REG, HIGH_REG -- the highest and lowest active registers.\r
+ REGSTART, REGEND -- arrays of string positions.\r
+ REG_INFO -- array of information about each subexpression.\r
+\r
+ Also assumes the variables `fail_stack' and (if debugging), `bufp',\r
+ `pend', `string1', `size1', `string2', and `size2'. */\r
+\r
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\\r
+{ \\r
+ DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \\r
+ s_reg_t this_reg; \\r
+ const unsigned char *string_temp; \\r
+ \\r
+ assert (!FAIL_STACK_EMPTY ()); \\r
+ \\r
+ /* Remove failure points and point to how many regs pushed. */ \\r
+ DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \\r
+ DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \\r
+ DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \\r
+ \\r
+ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \\r
+ \\r
+ DEBUG_POP (&failure_id); \\r
+ DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \\r
+ \\r
+ /* If the saved string location is NULL, it came from an \\r
+ on_failure_keep_string_jump opcode, and we want to throw away the \\r
+ saved NULL, thus retaining our current position in the string. */ \\r
+ string_temp = POP_FAILURE_POINTER (); \\r
+ if (string_temp != NULL) \\r
+ str = (const char *) string_temp; \\r
+ \\r
+ DEBUG_PRINT2 (" Popping string 0x%x: `", str); \\r
+ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \\r
+ DEBUG_PRINT1 ("'\n"); \\r
+ \\r
+ pat = (unsigned char *) POP_FAILURE_POINTER (); \\r
+ DEBUG_PRINT2 (" Popping pattern 0x%x:\n", pat); \\r
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \\r
+ \\r
+ /* Restore register info. */ \\r
+ high_reg = (active_reg_t) POP_FAILURE_INT (); \\r
+ DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \\r
+ \\r
+ low_reg = (active_reg_t) POP_FAILURE_INT (); \\r
+ DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \\r
+ \\r
+ if (1) \\r
+ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \\r
+ { \\r
+ DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \\r
+ \\r
+ reg_info[this_reg].word = POP_FAILURE_ELT (); \\r
+ DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \\r
+ \\r
+ regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \\r
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \\r
+ \\r
+ regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \\r
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \\r
+ } \\r
+ else \\r
+ { \\r
+ for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \\r
+ { \\r
+ reg_info[this_reg].word.integer = 0; \\r
+ regend[this_reg] = 0; \\r
+ regstart[this_reg] = 0; \\r
+ } \\r
+ highest_active_reg = high_reg; \\r
+ } \\r
+ \\r
+ set_regs_matched_done = 0; \\r
+ DEBUG_STATEMENT (nfailure_points_popped++); \\r
+} /* POP_FAILURE_POINT */\r
+\r
+\r
+\f\r
+/* Structure for per-register (a.k.a. per-group) information.\r
+ Other register information, such as the\r
+ starting and ending positions (which are addresses), and the list of\r
+ inner groups (which is a bits list) are maintained in separate\r
+ variables.\r
+\r
+ We are making a (strictly speaking) nonportable assumption here: that\r
+ the compiler will pack our bit fields into something that fits into\r
+ the type of `word', i.e., is something that fits into one item on the\r
+ failure stack. */\r
+\r
+\r
+/* Declarations and macros for re_match_2. */\r
+\r
+typedef union\r
+{\r
+ fail_stack_elt_t word;\r
+ struct\r
+ {\r
+ /* This field is one if this group can match the empty string,\r
+ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */\r
+#define MATCH_NULL_UNSET_VALUE 3\r
+ unsigned match_null_string_p : 2;\r
+ unsigned is_active : 1;\r
+ unsigned matched_something : 1;\r
+ unsigned ever_matched_something : 1;\r
+ } bits;\r
+} register_info_type;\r
+\r
+#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)\r
+#define IS_ACTIVE(R) ((R).bits.is_active)\r
+#define MATCHED_SOMETHING(R) ((R).bits.matched_something)\r
+#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)\r
+\r
+\r
+/* Call this when have matched a real character; it sets `matched' flags\r
+ for the subexpressions which we are currently inside. Also records\r
+ that those subexprs have matched. */\r
+#define SET_REGS_MATCHED() \\r
+ do \\r
+ { \\r
+ if (!set_regs_matched_done) \\r
+ { \\r
+ active_reg_t r; \\r
+ set_regs_matched_done = 1; \\r
+ for (r = lowest_active_reg; r <= highest_active_reg; r++) \\r
+ { \\r
+ MATCHED_SOMETHING (reg_info[r]) \\r
+ = EVER_MATCHED_SOMETHING (reg_info[r]) \\r
+ = 1; \\r
+ } \\r
+ } \\r
+ } \\r
+ while (0)\r
+\r
+/* Registers are set to a sentinel when they haven't yet matched. */\r
+static char reg_unset_dummy;\r
+#define REG_UNSET_VALUE (®_unset_dummy)\r
+#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)\r
+\f\r
+/* Subroutine declarations and macros for regex_compile. */\r
+\r
+static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,\r
+ reg_syntax_t syntax,\r
+ struct re_pattern_buffer *bufp));\r
+static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));\r
+static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,\r
+ int arg1, int arg2));\r
+static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,\r
+ int arg, unsigned char *end));\r
+static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,\r
+ int arg1, int arg2, unsigned char *end));\r
+static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,\r
+ reg_syntax_t syntax));\r
+static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,\r
+ reg_syntax_t syntax));\r
+static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,\r
+ const char *pend,\r
+ char *translate,\r
+ reg_syntax_t syntax,\r
+ unsigned char *b));\r
+\r
+/* Fetch the next character in the uncompiled pattern---translating it\r
+ if necessary. Also cast from a signed character in the constant\r
+ string passed to us by the user to an unsigned char that we can use\r
+ as an array index (in, e.g., `translate'). */\r
+#ifndef PATFETCH\r
+#define PATFETCH(c) \\r
+ do {if (p == pend) return REG_EEND; \\r
+ c = (unsigned char) *p++; \\r
+ if (translate) c = (unsigned char) translate[c]; \\r
+ } while (0)\r
+#endif\r
+\r
+/* Fetch the next character in the uncompiled pattern, with no\r
+ translation. */\r
+#define PATFETCH_RAW(c) \\r
+ do {if (p == pend) return REG_EEND; \\r
+ c = (unsigned char) *p++; \\r
+ } while (0)\r
+\r
+/* Go backwards one character in the pattern. */\r
+#define PATUNFETCH p--\r
+\r
+\r
+/* If `translate' is non-null, return translate[D], else just D. We\r
+ cast the subscript to translate because some data is declared as\r
+ `char *', to avoid warnings when a string constant is passed. But\r
+ when we use a character as a subscript we must make it unsigned. */\r
+#ifndef TRANSLATE\r
+#define TRANSLATE(d) \\r
+ (translate ? (char) translate[(unsigned char) (d)] : (d))\r
+#endif\r
+\r
+\r
+/* Macros for outputting the compiled pattern into `buffer'. */\r
+\r
+/* If the buffer isn't allocated when it comes in, use this. */\r
+#define INIT_BUF_SIZE 32\r
+\r
+/* Make sure we have at least N more bytes of space in buffer. */\r
+#define GET_BUFFER_SPACE(n) \\r
+ while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \\r
+ EXTEND_BUFFER ()\r
+\r
+/* Make sure we have one more byte of buffer space and then add C to it. */\r
+#define BUF_PUSH(c) \\r
+ do { \\r
+ GET_BUFFER_SPACE (1); \\r
+ *b++ = (unsigned char) (c); \\r
+ } while (0)\r
+\r
+\r
+/* Ensure we have two more bytes of buffer space and then append C1 and C2. */\r
+#define BUF_PUSH_2(c1, c2) \\r
+ do { \\r
+ GET_BUFFER_SPACE (2); \\r
+ *b++ = (unsigned char) (c1); \\r
+ *b++ = (unsigned char) (c2); \\r
+ } while (0)\r
+\r
+\r
+/* As with BUF_PUSH_2, except for three bytes. */\r
+#define BUF_PUSH_3(c1, c2, c3) \\r
+ do { \\r
+ GET_BUFFER_SPACE (3); \\r
+ *b++ = (unsigned char) (c1); \\r
+ *b++ = (unsigned char) (c2); \\r
+ *b++ = (unsigned char) (c3); \\r
+ } while (0)\r
+\r
+\r
+/* Store a jump with opcode OP at LOC to location TO. We store a\r
+ relative address offset by the three bytes the jump itself occupies. */\r
+#define STORE_JUMP(op, loc, to) \\r
+ store_op1 (op, loc, (int) ((to) - (loc) - 3))\r
+\r
+/* Likewise, for a two-argument jump. */\r
+#define STORE_JUMP2(op, loc, to, arg) \\r
+ store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)\r
+\r
+/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */\r
+#define INSERT_JUMP(op, loc, to) \\r
+ insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)\r
+\r
+/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */\r
+#define INSERT_JUMP2(op, loc, to, arg) \\r
+ insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)\r
+\r
+\r
+/* This is not an arbitrary limit: the arguments which represent offsets\r
+ into the pattern are two bytes long. So if 2^16 bytes turns out to\r
+ be too small, many things would have to change. */\r
+/* Any other compiler which, like MSC, has allocation limit below 2^16\r
+ bytes will have to use approach similar to what was done below for\r
+ MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up\r
+ reallocating to 0 bytes. Such thing is not going to work too well.\r
+ You have been warned!! */\r
+#if defined(_MSC_VER) && !defined(_WIN32)\r
+/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.\r
+ The REALLOC define eliminates a flurry of conversion warnings,\r
+ but is not required. */\r
+#define MAX_BUF_SIZE 65500L\r
+#define REALLOC(p,s) realloc ((p), (size_t) (s))\r
+#else\r
+#define MAX_BUF_SIZE (1L << 16)\r
+#define REALLOC(p,s) realloc ((p), (s))\r
+#endif\r
+\r
+/* Extend the buffer by twice its current size via realloc and\r
+ reset the pointers that pointed into the old block to point to the\r
+ correct places in the new one. If extending the buffer results in it\r
+ being larger than MAX_BUF_SIZE, then flag memory exhausted. */\r
+#define EXTEND_BUFFER() \\r
+ do { \\r
+ unsigned char *old_buffer = bufp->buffer; \\r
+ if (bufp->allocated == MAX_BUF_SIZE) \\r
+ return REG_ESIZE; \\r
+ bufp->allocated <<= 1; \\r
+ if (bufp->allocated > MAX_BUF_SIZE) \\r
+ bufp->allocated = MAX_BUF_SIZE; \\r
+ bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\\r
+ if (bufp->buffer == NULL) \\r
+ return REG_ESPACE; \\r
+ /* If the buffer moved, move all the pointers into it. */ \\r
+ if (old_buffer != bufp->buffer) \\r
+ { \\r
+ b = (b - old_buffer) + bufp->buffer; \\r
+ begalt = (begalt - old_buffer) + bufp->buffer; \\r
+ if (fixup_alt_jump) \\r
+ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\\r
+ if (laststart) \\r
+ laststart = (laststart - old_buffer) + bufp->buffer; \\r
+ if (pending_exact) \\r
+ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \\r
+ } \\r
+ } while (0)\r
+\r
+\r
+/* Since we have one byte reserved for the register number argument to\r
+ {start,stop}_memory, the maximum number of groups we can report\r
+ things about is what fits in that byte. */\r
+#define MAX_REGNUM 255\r
+\r
+/* But patterns can have more than `MAX_REGNUM' registers. We just\r
+ ignore the excess. */\r
+typedef unsigned regnum_t;\r
+\r
+\r
+/* Macros for the compile stack. */\r
+\r
+/* Since offsets can go either forwards or backwards, this type needs to\r
+ be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */\r
+/* int may be not enough when sizeof(int) == 2. */\r
+typedef long pattern_offset_t;\r
+\r
+typedef struct\r
+{\r
+ pattern_offset_t begalt_offset;\r
+ pattern_offset_t fixup_alt_jump;\r
+ pattern_offset_t inner_group_offset;\r
+ pattern_offset_t laststart_offset;\r
+ regnum_t regnum;\r
+} compile_stack_elt_t;\r
+\r
+\r
+typedef struct\r
+{\r
+ compile_stack_elt_t *stack;\r
+ unsigned size;\r
+ unsigned avail; /* Offset of next open position. */\r
+} compile_stack_type;\r
+\r
+\r
+#define INIT_COMPILE_STACK_SIZE 32\r
+\r
+#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)\r
+#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)\r
+\r
+/* The next available element. */\r
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])\r
+\r
+\r
+/* Set the bit for character C in a list. */\r
+#define SET_LIST_BIT(c) \\r
+ (b[((unsigned char) (c)) / BYTEWIDTH] \\r
+ |= 1 << (((unsigned char) c) % BYTEWIDTH))\r
+\r
+\r
+/* Get the next unsigned number in the uncompiled pattern. */\r
+#define GET_UNSIGNED_NUMBER(num) \\r
+ { if (p != pend) \\r
+ { \\r
+ PATFETCH (c); \\r
+ while (ISDIGIT (c)) \\r
+ { \\r
+ if (num < 0) \\r
+ num = 0; \\r
+ num = num * 10 + c - '0'; \\r
+ if (p == pend) \\r
+ break; \\r
+ PATFETCH (c); \\r
+ } \\r
+ } \\r
+ }\r
+\r
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)\r
+/* The GNU C library provides support for user-defined character classes\r
+ and the functions from ISO C amendement 1. */\r
+# ifdef CHARCLASS_NAME_MAX\r
+# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX\r
+# else\r
+/* This shouldn't happen but some implementation might still have this\r
+ problem. Use a reasonable default value. */\r
+# define CHAR_CLASS_MAX_LENGTH 256\r
+# endif\r
+\r
+# define IS_CHAR_CLASS(string) wctype (string)\r
+#else\r
+# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */\r
+\r
+# define IS_CHAR_CLASS(string) \\r
+ (STREQ (string, "alpha") || STREQ (string, "upper") \\r
+ || STREQ (string, "lower") || STREQ (string, "digit") \\r
+ || STREQ (string, "alnum") || STREQ (string, "xdigit") \\r
+ || STREQ (string, "space") || STREQ (string, "print") \\r
+ || STREQ (string, "punct") || STREQ (string, "graph") \\r
+ || STREQ (string, "cntrl") || STREQ (string, "blank"))\r
+#endif\r
+\f\r
+#ifndef MATCH_MAY_ALLOCATE\r
+\r
+/* If we cannot allocate large objects within re_match_2_internal,\r
+ we make the fail stack and register vectors global.\r
+ The fail stack, we grow to the maximum size when a regexp\r
+ is compiled.\r
+ The register vectors, we adjust in size each time we\r
+ compile a regexp, according to the number of registers it needs. */\r
+\r
+static fail_stack_type fail_stack;\r
+\r
+/* Size with which the following vectors are currently allocated.\r
+ That is so we can make them bigger as needed,\r
+ but never make them smaller. */\r
+static int regs_allocated_size;\r
+\r
+static const char ** regstart, ** regend;\r
+static const char ** old_regstart, ** old_regend;\r
+static const char **best_regstart, **best_regend;\r
+static register_info_type *reg_info;\r
+static const char **reg_dummy;\r
+static register_info_type *reg_info_dummy;\r
+\r
+/* Make the register vectors big enough for NUM_REGS registers,\r
+ but don't make them smaller. */\r
+\r
+static\r
+regex_grow_registers (num_regs)\r
+ int num_regs;\r
+{\r
+ if (num_regs > regs_allocated_size)\r
+ {\r
+ RETALLOC_IF (regstart, num_regs, const char *);\r
+ RETALLOC_IF (regend, num_regs, const char *);\r
+ RETALLOC_IF (old_regstart, num_regs, const char *);\r
+ RETALLOC_IF (old_regend, num_regs, const char *);\r
+ RETALLOC_IF (best_regstart, num_regs, const char *);\r
+ RETALLOC_IF (best_regend, num_regs, const char *);\r
+ RETALLOC_IF (reg_info, num_regs, register_info_type);\r
+ RETALLOC_IF (reg_dummy, num_regs, const char *);\r
+ RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);\r
+\r
+ regs_allocated_size = num_regs;\r
+ }\r
+}\r
+\r
+#endif /* not MATCH_MAY_ALLOCATE */\r
+\f\r
+static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type\r
+ compile_stack,\r
+ regnum_t regnum));\r
+\r
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.\r
+ Returns one of error codes defined in `regex.h', or zero for success.\r
+\r
+ Assumes the `allocated' (and perhaps `buffer') and `translate'\r
+ fields are set in BUFP on entry.\r
+\r
+ If it succeeds, results are put in BUFP (if it returns an error, the\r
+ contents of BUFP are undefined):\r
+ `buffer' is the compiled pattern;\r
+ `syntax' is set to SYNTAX;\r
+ `used' is set to the length of the compiled pattern;\r
+ `fastmap_accurate' is zero;\r
+ `re_nsub' is the number of subexpressions in PATTERN;\r
+ `not_bol' and `not_eol' are zero;\r
+\r
+ The `fastmap' and `newline_anchor' fields are neither\r
+ examined nor set. */\r
+\r
+/* Return, freeing storage we allocated. */\r
+#define FREE_STACK_RETURN(value) \\r
+ return (free (compile_stack.stack), value)\r
+\r
+static reg_errcode_t\r
+regex_compile (const char *pattern,\r
+ size_t size,\r
+ reg_syntax_t syntax,\r
+ struct re_pattern_buffer *bufp)\r
+{\r
+ /* We fetch characters from PATTERN here. Even though PATTERN is\r
+ `char *' (i.e., signed), we declare these variables as unsigned, so\r
+ they can be reliably used as array indices. */\r
+ register unsigned char c, c1;\r
+\r
+ /* A random temporary spot in PATTERN. */\r
+ const char *p1;\r
+\r
+ /* Points to the end of the buffer, where we should append. */\r
+ register unsigned char *b;\r
+\r
+ /* Keeps track of unclosed groups. */\r
+ compile_stack_type compile_stack;\r
+\r
+ /* Points to the current (ending) position in the pattern. */\r
+ const char *p = pattern;\r
+ const char *pend = pattern + size;\r
+\r
+ /* How to translate the characters in the pattern. */\r
+ RE_TRANSLATE_TYPE translate = bufp->translate;\r
+\r
+ /* Address of the count-byte of the most recently inserted `exactn'\r
+ command. This makes it possible to tell if a new exact-match\r
+ character can be added to that command or if the character requires\r
+ a new `exactn' command. */\r
+ unsigned char *pending_exact = 0;\r
+\r
+ /* Address of start of the most recently finished expression.\r
+ This tells, e.g., postfix * where to find the start of its\r
+ operand. Reset at the beginning of groups and alternatives. */\r
+ unsigned char *laststart = 0;\r
+\r
+ /* Address of beginning of regexp, or inside of last group. */\r
+ unsigned char *begalt;\r
+\r
+ /* Place in the uncompiled pattern (i.e., the {) to\r
+ which to go back if the interval is invalid. */\r
+ const char *beg_interval;\r
+\r
+ /* Address of the place where a forward jump should go to the end of\r
+ the containing expression. Each alternative of an `or' -- except the\r
+ last -- ends with a forward jump of this sort. */\r
+ unsigned char *fixup_alt_jump = 0;\r
+\r
+ /* Counts open-groups as they are encountered. Remembered for the\r
+ matching close-group on the compile stack, so the same register\r
+ number is put in the stop_memory as the start_memory. */\r
+ regnum_t regnum = 0;\r
+\r
+#ifdef DEBUG\r
+ DEBUG_PRINT1 ("\nCompiling pattern: ");\r
+ if (debug)\r
+ {\r
+ unsigned debug_count;\r
+\r
+ for (debug_count = 0; debug_count < size; debug_count++)\r
+ putchar (pattern[debug_count]);\r
+ putchar ('\n');\r
+ }\r
+#endif /* DEBUG */\r
+\r
+ /* Initialize the compile stack. */\r
+ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);\r
+ if (compile_stack.stack == NULL)\r
+ return REG_ESPACE;\r
+\r
+ compile_stack.size = INIT_COMPILE_STACK_SIZE;\r
+ compile_stack.avail = 0;\r
+\r
+ /* Initialize the pattern buffer. */\r
+ bufp->syntax = syntax;\r
+ bufp->fastmap_accurate = 0;\r
+ bufp->not_bol = bufp->not_eol = 0;\r
+\r
+ /* Set `used' to zero, so that if we return an error, the pattern\r
+ printer (for debugging) will think there's no pattern. We reset it\r
+ at the end. */\r
+ bufp->used = 0;\r
+\r
+ /* Always count groups, whether or not bufp->no_sub is set. */\r
+ bufp->re_nsub = 0;\r
+\r
+#if !defined (emacs) && !defined (SYNTAX_TABLE)\r
+ /* Initialize the syntax table. */\r
+ init_syntax_once ();\r
+#endif\r
+\r
+ if (bufp->allocated == 0)\r
+ {\r
+ if (bufp->buffer)\r
+ { /* If zero allocated, but buffer is non-null, try to realloc\r
+ enough space. This loses if buffer's address is bogus, but\r
+ that is the user's responsibility. */\r
+ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);\r
+ }\r
+ else\r
+ { /* Caller did not allocate a buffer. Do it for them. */\r
+ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);\r
+ }\r
+ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);\r
+\r
+ bufp->allocated = INIT_BUF_SIZE;\r
+ }\r
+\r
+ begalt = b = bufp->buffer;\r
+\r
+ /* Loop through the uncompiled pattern until we're at the end. */\r
+ while (p != pend)\r
+ {\r
+ PATFETCH (c);\r
+\r
+ switch (c)\r
+ {\r
+ case '^':\r
+ {\r
+ if ( /* If at start of pattern, it's an operator. */\r
+ p == pattern + 1\r
+ /* If context independent, it's an operator. */\r
+ || syntax & RE_CONTEXT_INDEP_ANCHORS\r
+ /* Otherwise, depends on what's come before. */\r
+ || at_begline_loc_p (pattern, p, syntax))\r
+ BUF_PUSH (begline);\r
+ else\r
+ goto normal_char;\r
+ }\r
+ break;\r
+\r
+\r
+ case '$':\r
+ {\r
+ if ( /* If at end of pattern, it's an operator. */\r
+ p == pend\r
+ /* If context independent, it's an operator. */\r
+ || syntax & RE_CONTEXT_INDEP_ANCHORS\r
+ /* Otherwise, depends on what's next. */\r
+ || at_endline_loc_p (p, pend, syntax))\r
+ BUF_PUSH (endline);\r
+ else\r
+ goto normal_char;\r
+ }\r
+ break;\r
+\r
+\r
+ case '+':\r
+ case '?':\r
+ if ((syntax & RE_BK_PLUS_QM)\r
+ || (syntax & RE_LIMITED_OPS))\r
+ goto normal_char;\r
+ handle_plus:\r
+ case '*':\r
+ /* If there is no previous pattern... */\r
+ if (!laststart)\r
+ {\r
+ if (syntax & RE_CONTEXT_INVALID_OPS)\r
+ FREE_STACK_RETURN (REG_BADRPT);\r
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))\r
+ goto normal_char;\r
+ }\r
+\r
+ {\r
+ /* Are we optimizing this jump? */\r
+ boolean keep_string_p = false;\r
+\r
+ /* 1 means zero (many) matches is allowed. */\r
+ char zero_times_ok = 0, many_times_ok = 0;\r
+\r
+ /* If there is a sequence of repetition chars, collapse it\r
+ down to just one (the right one). We can't combine\r
+ interval operators with these because of, e.g., `a{2}*',\r
+ which should only match an even number of `a's. */\r
+\r
+ for (;;)\r
+ {\r
+ zero_times_ok |= c != '+';\r
+ many_times_ok |= c != '?';\r
+\r
+ if (p == pend)\r
+ break;\r
+\r
+ PATFETCH (c);\r
+\r
+ if (c == '*'\r
+ || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))\r
+ ;\r
+\r
+ else if (syntax & RE_BK_PLUS_QM && c == '\\')\r
+ {\r
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);\r
+\r
+ PATFETCH (c1);\r
+ if (!(c1 == '+' || c1 == '?'))\r
+ {\r
+ PATUNFETCH;\r
+ PATUNFETCH;\r
+ break;\r
+ }\r
+\r
+ c = c1;\r
+ }\r
+ else\r
+ {\r
+ PATUNFETCH;\r
+ break;\r
+ }\r
+\r
+ /* If we get here, we found another repeat character. */\r
+ }\r
+\r
+ /* Star, etc. applied to an empty pattern is equivalent\r
+ to an empty pattern. */\r
+ if (!laststart)\r
+ break;\r
+\r
+ /* Now we know whether or not zero matches is allowed\r
+ and also whether or not two or more matches is allowed. */\r
+ if (many_times_ok)\r
+ { /* More than one repetition is allowed, so put in at the\r
+ end a backward relative jump from `b' to before the next\r
+ jump we're going to put in below (which jumps from\r
+ laststart to after this jump).\r
+\r
+ But if we are at the `*' in the exact sequence `.*\n',\r
+ insert an unconditional jump backwards to the .,\r
+ instead of the beginning of the loop. This way we only\r
+ push a failure point once, instead of every time\r
+ through the loop. */\r
+ assert (p - 1 > pattern);\r
+\r
+ /* Allocate the space for the jump. */\r
+ GET_BUFFER_SPACE (3);\r
+\r
+ /* We know we are not at the first character of the pattern,\r
+ because laststart was nonzero. And we've already\r
+ incremented `p', by the way, to be the character after\r
+ the `*'. Do we have to do something analogous here\r
+ for null bytes, because of RE_DOT_NOT_NULL? */\r
+ if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')\r
+ && zero_times_ok\r
+ && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')\r
+ && !(syntax & RE_DOT_NEWLINE))\r
+ { /* We have .*\n. */\r
+ STORE_JUMP (jump, b, laststart);\r
+ keep_string_p = true;\r
+ }\r
+ else\r
+ /* Anything else. */\r
+ STORE_JUMP (maybe_pop_jump, b, laststart - 3);\r
+\r
+ /* We've added more stuff to the buffer. */\r
+ b += 3;\r
+ }\r
+\r
+ /* On failure, jump from laststart to b + 3, which will be the\r
+ end of the buffer after this jump is inserted. */\r
+ GET_BUFFER_SPACE (3);\r
+ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump\r
+ : on_failure_jump,\r
+ laststart, b + 3);\r
+ pending_exact = 0;\r
+ b += 3;\r
+\r
+ if (!zero_times_ok)\r
+ {\r
+ /* At least one repetition is required, so insert a\r
+ `dummy_failure_jump' before the initial\r
+ `on_failure_jump' instruction of the loop. This\r
+ effects a skip over that instruction the first time\r
+ we hit that loop. */\r
+ GET_BUFFER_SPACE (3);\r
+ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);\r
+ b += 3;\r
+ }\r
+ }\r
+ break;\r
+\r
+\r
+ case '.':\r
+ laststart = b;\r
+ BUF_PUSH (anychar);\r
+ break;\r
+\r
+\r
+ case '[':\r
+ {\r
+ boolean had_char_class = false;\r
+\r
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);\r
+\r
+ /* Ensure that we have enough space to push a charset: the\r
+ opcode, the length count, and the bitset; 34 bytes in all. */\r
+ GET_BUFFER_SPACE (34);\r
+\r
+ laststart = b;\r
+\r
+ /* We test `*p == '^' twice, instead of using an if\r
+ statement, so we only need one BUF_PUSH. */\r
+ BUF_PUSH (*p == '^' ? charset_not : charset);\r
+ if (*p == '^')\r
+ p++;\r
+\r
+ /* Remember the first position in the bracket expression. */\r
+ p1 = p;\r
+\r
+ /* Push the number of bytes in the bitmap. */\r
+ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);\r
+\r
+ /* Clear the whole map. */\r
+ bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);\r
+\r
+ /* charset_not matches newline according to a syntax bit. */\r
+ if ((re_opcode_t) b[-2] == charset_not\r
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE))\r
+ SET_LIST_BIT ('\n');\r
+\r
+ /* Read in characters and ranges, setting map bits. */\r
+ for (;;)\r
+ {\r
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);\r
+\r
+ PATFETCH (c);\r
+\r
+ /* \ might escape characters inside [...] and [^...]. */\r
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')\r
+ {\r
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);\r
+\r
+ PATFETCH (c1);\r
+ SET_LIST_BIT (c1);\r
+ continue;\r
+ }\r
+\r
+ /* Could be the end of the bracket expression. If it's\r
+ not (i.e., when the bracket expression is `[]' so\r
+ far), the ']' character bit gets set way below. */\r
+ if (c == ']' && p != p1 + 1)\r
+ break;\r
+\r
+ /* Look ahead to see if it's a range when the last thing\r
+ was a character class. */\r
+ if (had_char_class && c == '-' && *p != ']')\r
+ FREE_STACK_RETURN (REG_ERANGE);\r
+\r
+ /* Look ahead to see if it's a range when the last thing\r
+ was a character: if this is a hyphen not at the\r
+ beginning or the end of a list, then it's the range\r
+ operator. */\r
+ if (c == '-'\r
+ && !(p - 2 >= pattern && p[-2] == '[')\r
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')\r
+ && *p != ']')\r
+ {\r
+ reg_errcode_t ret\r
+ = compile_range (&p, pend, translate, syntax, b);\r
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);\r
+ }\r
+\r
+ else if (p[0] == '-' && p[1] != ']')\r
+ { /* This handles ranges made up of characters only. */\r
+ reg_errcode_t ret;\r
+\r
+ /* Move past the `-'. */\r
+ PATFETCH (c1);\r
+\r
+ ret = compile_range (&p, pend, translate, syntax, b);\r
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);\r
+ }\r
+\r
+ /* See if we're at the beginning of a possible character\r
+ class. */\r
+\r
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')\r
+ { /* Leave room for the null. */\r
+ char str[CHAR_CLASS_MAX_LENGTH + 1];\r
+\r
+ PATFETCH (c);\r
+ c1 = 0;\r
+\r
+ /* If pattern is `[[:'. */\r
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);\r
+\r
+ for (;;)\r
+ {\r
+ PATFETCH (c);\r
+ if (c == ':' || c == ']' || p == pend\r
+ || c1 == CHAR_CLASS_MAX_LENGTH)\r
+ break;\r
+ str[c1++] = c;\r
+ }\r
+ str[c1] = '\0';\r
+\r
+ /* If isn't a word bracketed by `[:' and:`]':\r
+ undo the ending character, the letters, and leave\r
+ the leading `:' and `[' (but set bits for them). */\r
+ if (c == ':' && *p == ']')\r
+ {\r
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)\r
+ boolean is_lower = STREQ (str, "lower");\r
+ boolean is_upper = STREQ (str, "upper");\r
+ wctype_t wt;\r
+ int ch;\r
+\r
+ wt = wctype (str);\r
+ if (wt == 0)\r
+ FREE_STACK_RETURN (REG_ECTYPE);\r
+\r
+ /* Throw away the ] at the end of the character\r
+ class. */\r
+ PATFETCH (c);\r
+\r
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);\r
+\r
+ for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)\r
+ {\r
+ if (iswctype (btowc (ch), wt))\r
+ SET_LIST_BIT (ch);\r
+\r
+ if (translate && (is_upper || is_lower)\r
+ && (ISUPPER (ch) || ISLOWER (ch)))\r
+ SET_LIST_BIT (ch);\r
+ }\r
+\r
+ had_char_class = true;\r
+#else\r
+ int ch;\r
+ boolean is_alnum = STREQ (str, "alnum");\r
+ boolean is_alpha = STREQ (str, "alpha");\r
+ boolean is_blank = STREQ (str, "blank");\r
+ boolean is_cntrl = STREQ (str, "cntrl");\r
+ boolean is_digit = STREQ (str, "digit");\r
+ boolean is_graph = STREQ (str, "graph");\r
+ boolean is_lower = STREQ (str, "lower");\r
+ boolean is_print = STREQ (str, "print");\r
+ boolean is_punct = STREQ (str, "punct");\r
+ boolean is_space = STREQ (str, "space");\r
+ boolean is_upper = STREQ (str, "upper");\r
+ boolean is_xdigit = STREQ (str, "xdigit");\r
+\r
+ if (!IS_CHAR_CLASS (str))\r
+ FREE_STACK_RETURN (REG_ECTYPE);\r
+\r
+ /* Throw away the ] at the end of the character\r
+ class. */\r
+ PATFETCH (c);\r
+\r
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);\r
+\r
+ for (ch = 0; ch < 1 << BYTEWIDTH; ch++)\r
+ {\r
+ /* This was split into 3 if's to\r
+ avoid an arbitrary limit in some compiler. */\r
+ if ( (is_alnum && ISALNUM (ch))\r
+ || (is_alpha && ISALPHA (ch))\r
+ || (is_blank && ISBLANK (ch))\r
+ || (is_cntrl && ISCNTRL (ch)))\r
+ SET_LIST_BIT (ch);\r
+ if ( (is_digit && ISDIGIT (ch))\r
+ || (is_graph && ISGRAPH (ch))\r
+ || (is_lower && ISLOWER (ch))\r
+ || (is_print && ISPRINT (ch)))\r
+ SET_LIST_BIT (ch);\r
+ if ( (is_punct && ISPUNCT (ch))\r
+ || (is_space && ISSPACE (ch))\r
+ || (is_upper && ISUPPER (ch))\r
+ || (is_xdigit && ISXDIGIT (ch)))\r
+ SET_LIST_BIT (ch);\r
+ if ( translate && (is_upper || is_lower)\r
+ && (ISUPPER (ch) || ISLOWER (ch)))\r
+ SET_LIST_BIT (ch);\r
+ }\r
+ had_char_class = true;\r
+#endif /* libc || wctype.h */\r
+ }\r
+ else\r
+ {\r
+ c1++;\r
+ while (c1--)\r
+ PATUNFETCH;\r
+ SET_LIST_BIT ('[');\r
+ SET_LIST_BIT (':');\r
+ had_char_class = false;\r
+ }\r
+ }\r
+ else\r
+ {\r
+ had_char_class = false;\r
+ SET_LIST_BIT (c);\r
+ }\r
+ }\r
+\r
+ /* Discard any (non)matching list bytes that are all 0 at the\r
+ end of the map. Decrease the map-length byte too. */\r
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)\r
+ b[-1]--;\r
+ b += b[-1];\r
+ }\r
+ break;\r
+\r
+\r
+ case '(':\r
+ if (syntax & RE_NO_BK_PARENS)\r
+ goto handle_open;\r
+ else\r
+ goto normal_char;\r
+\r
+\r
+ case ')':\r
+ if (syntax & RE_NO_BK_PARENS)\r
+ goto handle_close;\r
+ else\r
+ goto normal_char;\r
+\r
+\r
+ case '\n':\r
+ if (syntax & RE_NEWLINE_ALT)\r
+ goto handle_alt;\r
+ else\r
+ goto normal_char;\r
+\r
+\r
+ case '|':\r
+ if (syntax & RE_NO_BK_VBAR)\r
+ goto handle_alt;\r
+ else\r
+ goto normal_char;\r
+\r
+\r
+ case '{':\r
+ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)\r
+ goto handle_interval;\r
+ else\r
+ goto normal_char;\r
+\r
+\r
+ case '\\':\r
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);\r
+\r
+ /* Do not translate the character after the \, so that we can\r
+ distinguish, e.g., \B from \b, even if we normally would\r
+ translate, e.g., B to b. */\r
+ PATFETCH_RAW (c);\r
+\r
+ switch (c)\r
+ {\r
+ case '(':\r
+ if (syntax & RE_NO_BK_PARENS)\r
+ goto normal_backslash;\r
+\r
+ handle_open:\r
+ bufp->re_nsub++;\r
+ regnum++;\r
+\r
+ if (COMPILE_STACK_FULL)\r
+ {\r
+ RETALLOC (compile_stack.stack, compile_stack.size << 1,\r
+ compile_stack_elt_t);\r
+ if (compile_stack.stack == NULL) return REG_ESPACE;\r
+\r
+ compile_stack.size <<= 1;\r
+ }\r
+\r
+ /* These are the values to restore when we hit end of this\r
+ group. They are all relative offsets, so that if the\r
+ whole pattern moves because of realloc, they will still\r
+ be valid. */\r
+ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;\r
+ COMPILE_STACK_TOP.fixup_alt_jump\r
+ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;\r
+ COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;\r
+ COMPILE_STACK_TOP.regnum = regnum;\r
+\r
+ /* We will eventually replace the 0 with the number of\r
+ groups inner to this one. But do not push a\r
+ start_memory for groups beyond the last one we can\r
+ represent in the compiled pattern. */\r
+ if (regnum <= MAX_REGNUM)\r
+ {\r
+ COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;\r
+ BUF_PUSH_3 (start_memory, regnum, 0);\r
+ }\r
+\r
+ compile_stack.avail++;\r
+\r
+ fixup_alt_jump = 0;\r
+ laststart = 0;\r
+ begalt = b;\r
+ /* If we've reached MAX_REGNUM groups, then this open\r
+ won't actually generate any code, so we'll have to\r
+ clear pending_exact explicitly. */\r
+ pending_exact = 0;\r
+ break;\r
+\r
+\r
+ case ')':\r
+ if (syntax & RE_NO_BK_PARENS) goto normal_backslash;\r
+\r
+ if (COMPILE_STACK_EMPTY) {\r
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)\r
+ goto normal_backslash;\r
+ else\r
+ FREE_STACK_RETURN (REG_ERPAREN);\r
+ }\r
+ handle_close:\r
+ if (fixup_alt_jump)\r
+ { /* Push a dummy failure point at the end of the\r
+ alternative for a possible future\r
+ `pop_failure_jump' to pop. See comments at\r
+ `push_dummy_failure' in `re_match_2'. */\r
+ BUF_PUSH (push_dummy_failure);\r
+\r
+ /* We allocated space for this jump when we assigned\r
+ to `fixup_alt_jump', in the `handle_alt' case below. */\r
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);\r
+ }\r
+\r
+ /* See similar code for backslashed left paren above. */\r
+ if (COMPILE_STACK_EMPTY) {\r
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)\r
+ goto normal_char;\r
+ else\r
+ FREE_STACK_RETURN (REG_ERPAREN);\r
+ }\r
+ /* Since we just checked for an empty stack above, this\r
+ ``can't happen''. */\r
+ assert (compile_stack.avail != 0);\r
+ {\r
+ /* We don't just want to restore into `regnum', because\r
+ later groups should continue to be numbered higher,\r
+ as in `(ab)c(de)' -- the second group is #2. */\r
+ regnum_t this_group_regnum;\r
+\r
+ compile_stack.avail--;\r
+ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;\r
+ fixup_alt_jump\r
+ = COMPILE_STACK_TOP.fixup_alt_jump\r
+ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1\r
+ : 0;\r
+ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;\r
+ this_group_regnum = COMPILE_STACK_TOP.regnum;\r
+ /* If we've reached MAX_REGNUM groups, then this open\r
+ won't actually generate any code, so we'll have to\r
+ clear pending_exact explicitly. */\r
+ pending_exact = 0;\r
+\r
+ /* We're at the end of the group, so now we know how many\r
+ groups were inside this one. */\r
+ if (this_group_regnum <= MAX_REGNUM)\r
+ {\r
+ unsigned char *inner_group_loc\r
+ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;\r
+\r
+ *inner_group_loc = regnum - this_group_regnum;\r
+ BUF_PUSH_3 (stop_memory, this_group_regnum,\r
+ regnum - this_group_regnum);\r
+ }\r
+ }\r
+ break;\r
+\r
+\r
+ case '|': /* `\|'. */\r
+ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)\r
+ goto normal_backslash;\r
+ handle_alt:\r
+ if (syntax & RE_LIMITED_OPS)\r
+ goto normal_char;\r
+\r
+ /* Insert before the previous alternative a jump which\r
+ jumps to this alternative if the former fails. */\r
+ GET_BUFFER_SPACE (3);\r
+ INSERT_JUMP (on_failure_jump, begalt, b + 6);\r
+ pending_exact = 0;\r
+ b += 3;\r
+\r
+ /* The alternative before this one has a jump after it\r
+ which gets executed if it gets matched. Adjust that\r
+ jump so it will jump to this alternative's analogous\r
+ jump (put in below, which in turn will jump to the next\r
+ (if any) alternative's such jump, etc.). The last such\r
+ jump jumps to the correct final destination. A picture:\r
+ _____ _____\r
+ | | | |\r
+ | v | v\r
+ a | b | c\r
+\r
+ If we are at `b', then fixup_alt_jump right now points to a\r
+ three-byte space after `a'. We'll put in the jump, set\r
+ fixup_alt_jump to right after `b', and leave behind three\r
+ bytes which we'll fill in when we get to after `c'. */\r
+\r
+ if (fixup_alt_jump)\r
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);\r
+\r
+ /* Mark and leave space for a jump after this alternative,\r
+ to be filled in later either by next alternative or\r
+ when know we're at the end of a series of alternatives. */\r
+ fixup_alt_jump = b;\r
+ GET_BUFFER_SPACE (3);\r
+ b += 3;\r
+\r
+ laststart = 0;\r
+ begalt = b;\r
+ break;\r
+\r
+\r
+ case '{':\r
+ /* If \{ is a literal. */\r
+ if (!(syntax & RE_INTERVALS)\r
+ /* If we're at `\{' and it's not the open-interval\r
+ operator. */\r
+ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))\r
+ || (p - 2 == pattern && p == pend))\r
+ goto normal_backslash;\r
+\r
+ handle_interval:\r
+ {\r
+ /* If got here, then the syntax allows intervals. */\r
+\r
+ /* At least (most) this many matches must be made. */\r
+ int lower_bound = -1, upper_bound = -1;\r
+\r
+ beg_interval = p - 1;\r
+\r
+ if (p == pend)\r
+ {\r
+ if (syntax & RE_NO_BK_BRACES)\r
+ goto unfetch_interval;\r
+ else\r
+ FREE_STACK_RETURN (REG_EBRACE);\r
+ }\r
+\r
+ GET_UNSIGNED_NUMBER (lower_bound);\r
+\r
+ if (c == ',')\r
+ {\r
+ GET_UNSIGNED_NUMBER (upper_bound);\r
+ if (upper_bound < 0) upper_bound = RE_DUP_MAX;\r
+ }\r
+ else\r
+ /* Interval such as `{1}' => match exactly once. */\r
+ upper_bound = lower_bound;\r
+\r
+ if (lower_bound < 0 || upper_bound > RE_DUP_MAX\r
+ || lower_bound > upper_bound)\r
+ {\r
+ if (syntax & RE_NO_BK_BRACES)\r
+ goto unfetch_interval;\r
+ else\r
+ FREE_STACK_RETURN (REG_BADBR);\r
+ }\r
+\r
+ if (!(syntax & RE_NO_BK_BRACES))\r
+ {\r
+ if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);\r
+\r
+ PATFETCH (c);\r
+ }\r
+\r
+ if (c != '}')\r
+ {\r
+ if (syntax & RE_NO_BK_BRACES)\r
+ goto unfetch_interval;\r
+ else\r
+ FREE_STACK_RETURN (REG_BADBR);\r
+ }\r
+\r
+ /* We just parsed a valid interval. */\r
+\r
+ /* If it's invalid to have no preceding re. */\r
+ if (!laststart)\r
+ {\r
+ if (syntax & RE_CONTEXT_INVALID_OPS)\r
+ FREE_STACK_RETURN (REG_BADRPT);\r
+ else if (syntax & RE_CONTEXT_INDEP_OPS)\r
+ laststart = b;\r
+ else\r
+ goto unfetch_interval;\r
+ }\r
+\r
+ /* If the upper bound is zero, don't want to succeed at\r
+ all; jump from `laststart' to `b + 3', which will be\r
+ the end of the buffer after we insert the jump. */\r
+ if (upper_bound == 0)\r
+ {\r
+ GET_BUFFER_SPACE (3);\r
+ INSERT_JUMP (jump, laststart, b + 3);\r
+ b += 3;\r
+ }\r
+\r
+ /* Otherwise, we have a nontrivial interval. When\r
+ we're all done, the pattern will look like:\r
+ set_number_at <jump count> <upper bound>\r
+ set_number_at <succeed_n count> <lower bound>\r
+ succeed_n <after jump addr> <succeed_n count>\r
+ <body of loop>\r
+ jump_n <succeed_n addr> <jump count>\r
+ (The upper bound and `jump_n' are omitted if\r
+ `upper_bound' is 1, though.) */\r
+ else\r
+ { /* If the upper bound is > 1, we need to insert\r
+ more at the end of the loop. */\r
+ unsigned nbytes = 10 + (upper_bound > 1) * 10;\r
+\r
+ GET_BUFFER_SPACE (nbytes);\r
+\r
+ /* Initialize lower bound of the `succeed_n', even\r
+ though it will be set during matching by its\r
+ attendant `set_number_at' (inserted next),\r
+ because `re_compile_fastmap' needs to know.\r
+ Jump to the `jump_n' we might insert below. */\r
+ INSERT_JUMP2 (succeed_n, laststart,\r
+ b + 5 + (upper_bound > 1) * 5,\r
+ lower_bound);\r
+ b += 5;\r
+\r
+ /* Code to initialize the lower bound. Insert\r
+ before the `succeed_n'. The `5' is the last two\r
+ bytes of this `set_number_at', plus 3 bytes of\r
+ the following `succeed_n'. */\r
+ insert_op2 (set_number_at, laststart, 5, lower_bound, b);\r
+ b += 5;\r
+\r
+ if (upper_bound > 1)\r
+ { /* More than one repetition is allowed, so\r
+ append a backward jump to the `succeed_n'\r
+ that starts this interval.\r
+\r
+ When we've reached this during matching,\r
+ we'll have matched the interval once, so\r
+ jump back only `upper_bound - 1' times. */\r
+ STORE_JUMP2 (jump_n, b, laststart + 5,\r
+ upper_bound - 1);\r
+ b += 5;\r
+\r
+ /* The location we want to set is the second\r
+ parameter of the `jump_n'; that is `b-2' as\r
+ an absolute address. `laststart' will be\r
+ the `set_number_at' we're about to insert;\r
+ `laststart+3' the number to set, the source\r
+ for the relative address. But we are\r
+ inserting into the middle of the pattern --\r
+ so everything is getting moved up by 5.\r
+ Conclusion: (b - 2) - (laststart + 3) + 5,\r
+ i.e., b - laststart.\r
+\r
+ We insert this at the beginning of the loop\r
+ so that if we fail during matching, we'll\r
+ reinitialize the bounds. */\r
+ insert_op2 (set_number_at, laststart, b - laststart,\r
+ upper_bound - 1, b);\r
+ b += 5;\r
+ }\r
+ }\r
+ pending_exact = 0;\r
+ beg_interval = NULL;\r
+ }\r
+ break;\r
+\r
+ unfetch_interval:\r
+ /* If an invalid interval, match the characters as literals. */\r
+ assert (beg_interval);\r
+ p = beg_interval;\r
+ beg_interval = NULL;\r
+\r
+ /* normal_char and normal_backslash need `c'. */\r
+ PATFETCH (c);\r
+\r
+ if (!(syntax & RE_NO_BK_BRACES))\r
+ {\r
+ if (p > pattern && p[-1] == '\\')\r
+ goto normal_backslash;\r
+ }\r
+ goto normal_char;\r
+\r
+#ifdef emacs\r
+ /* There is no way to specify the before_dot and after_dot\r
+ operators. rms says this is ok. --karl */\r
+ case '=':\r
+ BUF_PUSH (at_dot);\r
+ break;\r
+\r
+ case 's':\r
+ laststart = b;\r
+ PATFETCH (c);\r
+ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);\r
+ break;\r
+\r
+ case 'S':\r
+ laststart = b;\r
+ PATFETCH (c);\r
+ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);\r
+ break;\r
+#endif /* emacs */\r
+\r
+\r
+ case 'w':\r
+ if (re_syntax_options & RE_NO_GNU_OPS)\r
+ goto normal_char;\r
+ laststart = b;\r
+ BUF_PUSH (wordchar);\r
+ break;\r
+\r
+\r
+ case 'W':\r
+ if (re_syntax_options & RE_NO_GNU_OPS)\r
+ goto normal_char;\r
+ laststart = b;\r
+ BUF_PUSH (notwordchar);\r
+ break;\r
+\r
+\r
+ case '<':\r
+ if (re_syntax_options & RE_NO_GNU_OPS)\r
+ goto normal_char;\r
+ BUF_PUSH (wordbeg);\r
+ break;\r
+\r
+ case '>':\r
+ if (re_syntax_options & RE_NO_GNU_OPS)\r
+ goto normal_char;\r
+ BUF_PUSH (wordend);\r
+ break;\r
+\r
+ case 'b':\r
+ if (re_syntax_options & RE_NO_GNU_OPS)\r
+ goto normal_char;\r
+ BUF_PUSH (wordbound);\r
+ break;\r
+\r
+ case 'B':\r
+ if (re_syntax_options & RE_NO_GNU_OPS)\r
+ goto normal_char;\r
+ BUF_PUSH (notwordbound);\r
+ break;\r
+\r
+ case '`':\r
+ if (re_syntax_options & RE_NO_GNU_OPS)\r
+ goto normal_char;\r
+ BUF_PUSH (begbuf);\r
+ break;\r
+\r
+ case '\'':\r
+ if (re_syntax_options & RE_NO_GNU_OPS)\r
+ goto normal_char;\r
+ BUF_PUSH (endbuf);\r
+ break;\r
+\r
+ case '1': case '2': case '3': case '4': case '5':\r
+ case '6': case '7': case '8': case '9':\r
+ if (syntax & RE_NO_BK_REFS)\r
+ goto normal_char;\r
+\r
+ c1 = c - '0';\r
+\r
+ if (c1 > regnum)\r
+ FREE_STACK_RETURN (REG_ESUBREG);\r
+\r
+ /* Can't back reference to a subexpression if inside of it. */\r
+ if (group_in_compile_stack (compile_stack, (regnum_t) c1))\r
+ goto normal_char;\r
+\r
+ laststart = b;\r
+ BUF_PUSH_2 (duplicate, c1);\r
+ break;\r
+\r
+\r
+ case '+':\r
+ case '?':\r
+ if (syntax & RE_BK_PLUS_QM)\r
+ goto handle_plus;\r
+ else\r
+ goto normal_backslash;\r
+\r
+ default:\r
+ normal_backslash:\r
+ /* You might think it would be useful for \ to mean\r
+ not to translate; but if we don't translate it\r
+ it will never match anything. */\r
+ c = TRANSLATE (c);\r
+ goto normal_char;\r
+ }\r
+ break;\r
+\r
+\r
+ default:\r
+ /* Expects the character in `c'. */\r
+ normal_char:\r
+ /* If no exactn currently being built. */\r
+ if (!pending_exact\r
+\r
+ /* If last exactn not at current position. */\r
+ || pending_exact + *pending_exact + 1 != b\r
+\r
+ /* We have only one byte following the exactn for the count. */\r
+ || *pending_exact == (1 << BYTEWIDTH) - 1\r
+\r
+ /* If followed by a repetition operator. */\r
+ || *p == '*' || *p == '^'\r
+ || ((syntax & RE_BK_PLUS_QM)\r
+ ? *p == '\\' && (p[1] == '+' || p[1] == '?')\r
+ : (*p == '+' || *p == '?'))\r
+ || ((syntax & RE_INTERVALS)\r
+ && ((syntax & RE_NO_BK_BRACES)\r
+ ? *p == '{'\r
+ : (p[0] == '\\' && p[1] == '{'))))\r
+ {\r
+ /* Start building a new exactn. */\r
+\r
+ laststart = b;\r
+\r
+ BUF_PUSH_2 (exactn, 0);\r
+ pending_exact = b - 1;\r
+ }\r
+\r
+ BUF_PUSH (c);\r
+ (*pending_exact)++;\r
+ break;\r
+ } /* switch (c) */\r
+ } /* while p != pend */\r
+\r
+\r
+ /* Through the pattern now. */\r
+\r
+ if (fixup_alt_jump)\r
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);\r
+\r
+ if (!COMPILE_STACK_EMPTY)\r
+ FREE_STACK_RETURN (REG_EPAREN);\r
+\r
+ /* If we don't want backtracking, force success\r
+ the first time we reach the end of the compiled pattern. */\r
+ if (syntax & RE_NO_POSIX_BACKTRACKING)\r
+ BUF_PUSH (succeed);\r
+\r
+ free (compile_stack.stack);\r
+\r
+ /* We have succeeded; set the length of the buffer. */\r
+ bufp->used = b - bufp->buffer;\r
+\r
+#ifdef DEBUG\r
+ if (debug)\r
+ {\r
+ DEBUG_PRINT1 ("\nCompiled pattern: \n");\r
+ print_compiled_pattern (bufp);\r
+ }\r
+#endif /* DEBUG */\r
+\r
+#ifndef MATCH_MAY_ALLOCATE\r
+ /* Initialize the failure stack to the largest possible stack. This\r
+ isn't necessary unless we're trying to avoid calling alloca in\r
+ the search and match routines. */\r
+ {\r
+ int num_regs = bufp->re_nsub + 1;\r
+\r
+ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size\r
+ is strictly greater than re_max_failures, the largest possible stack\r
+ is 2 * re_max_failures failure points. */\r
+ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))\r
+ {\r
+ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);\r
+\r
+#ifdef emacs\r
+ if (! fail_stack.stack)\r
+ fail_stack.stack\r
+ = (fail_stack_elt_t *) xmalloc (fail_stack.size\r
+ * sizeof (fail_stack_elt_t));\r
+ else\r
+ fail_stack.stack\r
+ = (fail_stack_elt_t *) xrealloc (fail_stack.stack,\r
+ (fail_stack.size\r
+ * sizeof (fail_stack_elt_t)));\r
+#else /* not emacs */\r
+ if (! fail_stack.stack)\r
+ fail_stack.stack\r
+ = (fail_stack_elt_t *) malloc (fail_stack.size\r
+ * sizeof (fail_stack_elt_t));\r
+ else\r
+ fail_stack.stack\r
+ = (fail_stack_elt_t *) realloc (fail_stack.stack,\r
+ (fail_stack.size\r
+ * sizeof (fail_stack_elt_t)));\r
+#endif /* not emacs */\r
+ }\r
+\r
+ regex_grow_registers (num_regs);\r
+ }\r
+#endif /* not MATCH_MAY_ALLOCATE */\r
+\r
+ return REG_NOERROR;\r
+} /* regex_compile */\r
+\f\r
+/* Subroutines for `regex_compile'. */\r
+\r
+/* Store OP at LOC followed by two-byte integer parameter ARG. */\r
+\r
+static void\r
+store_op1 (re_opcode_t op,\r
+ unsigned char *loc,\r
+ int arg)\r
+{\r
+ *loc = (unsigned char) op;\r
+ STORE_NUMBER (loc + 1, arg);\r
+}\r
+\r
+\r
+/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */\r
+\r
+static void\r
+store_op2(re_opcode_t op,\r
+ unsigned char *loc,\r
+ int arg1,\r
+ int arg2)\r
+{\r
+ *loc = (unsigned char) op;\r
+ STORE_NUMBER (loc + 1, arg1);\r
+ STORE_NUMBER (loc + 3, arg2);\r
+}\r
+\r
+\r
+/* Copy the bytes from LOC to END to open up three bytes of space at LOC\r
+ for OP followed by two-byte integer parameter ARG. */\r
+\r
+static void\r
+insert_op1(re_opcode_t op,\r
+ unsigned char *loc,\r
+ int arg,\r
+ unsigned char *end)\r
+{\r
+ register unsigned char *pfrom = end;\r
+ register unsigned char *pto = end + 3;\r
+\r
+ while (pfrom != loc)\r
+ *--pto = *--pfrom;\r
+\r
+ store_op1 (op, loc, arg);\r
+}\r
+\r
+\r
+/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */\r
+\r
+static void\r
+insert_op2(re_opcode_t op,\r
+ unsigned char *loc,\r
+ int arg1,\r
+ int arg2,\r
+ unsigned char *end)\r
+{\r
+ register unsigned char *pfrom = end;\r
+ register unsigned char *pto = end + 5;\r
+\r
+ while (pfrom != loc)\r
+ *--pto = *--pfrom;\r
+\r
+ store_op2 (op, loc, arg1, arg2);\r
+}\r
+\r
+\r
+/* P points to just after a ^ in PATTERN. Return true if that ^ comes\r
+ after an alternative or a begin-subexpression. We assume there is at\r
+ least one character before the ^. */\r
+\r
+static boolean\r
+at_begline_loc_p(const char *pattern,\r
+ const char *p,\r
+ reg_syntax_t syntax)\r
+{\r
+ const char *prev = p - 2;\r
+ boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';\r
+\r
+ return\r
+ /* After a subexpression? */\r
+ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))\r
+ /* After an alternative? */\r
+ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));\r
+}\r
+\r
+\r
+/* The dual of at_begline_loc_p. This one is for $. We assume there is\r
+ at least one character after the $, i.e., `P < PEND'. */\r
+\r
+static boolean\r
+at_endline_loc_p(const char *p,\r
+ const char *pend,\r
+ reg_syntax_t syntax)\r
+{\r
+ const char *next = p;\r
+ boolean next_backslash = *next == '\\';\r
+ const char *next_next = p + 1 < pend ? p + 1 : 0;\r
+\r
+ return\r
+ /* Before a subexpression? */\r
+ (syntax & RE_NO_BK_PARENS ? *next == ')'\r
+ : next_backslash && next_next && *next_next == ')')\r
+ /* Before an alternative? */\r
+ || (syntax & RE_NO_BK_VBAR ? *next == '|'\r
+ : next_backslash && next_next && *next_next == '|');\r
+}\r
+\r
+\r
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and\r
+ false if it's not. */\r
+\r
+static boolean\r
+group_in_compile_stack(compile_stack_type compile_stack,\r
+ regnum_t regnum)\r
+{\r
+ int this_element;\r
+\r
+ for (this_element = compile_stack.avail - 1;\r
+ this_element >= 0;\r
+ this_element--)\r
+ if (compile_stack.stack[this_element].regnum == regnum)\r
+ return true;\r
+\r
+ return false;\r
+}\r
+\r
+\r
+/* Read the ending character of a range (in a bracket expression) from the\r
+ uncompiled pattern *P_PTR (which ends at PEND). We assume the\r
+ starting character is in `P[-2]'. (`P[-1]' is the character `-'.)\r
+ Then we set the translation of all bits between the starting and\r
+ ending characters (inclusive) in the compiled pattern B.\r
+\r
+ Return an error code.\r
+\r
+ We use these short variable names so we can use the same macros as\r
+ `regex_compile' itself. */\r
+\r
+static reg_errcode_t\r
+compile_range(const char **p_ptr,\r
+ const char *pend,\r
+ RE_TRANSLATE_TYPE translate,\r
+ reg_syntax_t syntax,\r
+ unsigned char *b)\r
+{\r
+ unsigned this_char;\r
+\r
+ const char *p = *p_ptr;\r
+ unsigned int range_start, range_end;\r
+\r
+ if (p == pend)\r
+ return REG_ERANGE;\r
+\r
+ /* Even though the pattern is a signed `char *', we need to fetch\r
+ with unsigned char *'s; if the high bit of the pattern character\r
+ is set, the range endpoints will be negative if we fetch using a\r
+ signed char *.\r
+\r
+ We also want to fetch the endpoints without translating them; the\r
+ appropriate translation is done in the bit-setting loop below. */\r
+ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */\r
+ range_start = ((const unsigned char *) p)[-2];\r
+ range_end = ((const unsigned char *) p)[0];\r
+\r
+ /* Have to increment the pointer into the pattern string, so the\r
+ caller isn't still at the ending character. */\r
+ (*p_ptr)++;\r
+\r
+ /* If the start is after the end, the range is empty. */\r
+ if (range_start > range_end)\r
+ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;\r
+\r
+ /* Here we see why `this_char' has to be larger than an `unsigned\r
+ char' -- the range is inclusive, so if `range_end' == 0xff\r
+ (assuming 8-bit characters), we would otherwise go into an infinite\r
+ loop, since all characters <= 0xff. */\r
+ for (this_char = range_start; this_char <= range_end; this_char++)\r
+ {\r
+ SET_LIST_BIT (TRANSLATE (this_char));\r
+ }\r
+\r
+ return REG_NOERROR;\r
+}\r
+\f\r
+/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in\r
+ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible\r
+ characters can start a string that matches the pattern. This fastmap\r
+ is used by re_search to skip quickly over impossible starting points.\r
+\r
+ The caller must supply the address of a (1 << BYTEWIDTH)-byte data\r
+ area as BUFP->fastmap.\r
+\r
+ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in\r
+ the pattern buffer.\r
+\r
+ Returns 0 if we succeed, -2 if an internal error. */\r
+\r
+int\r
+re_compile_fastmap(struct re_pattern_buffer *bufp)\r
+{\r
+ int j, k;\r
+#ifdef MATCH_MAY_ALLOCATE\r
+ fail_stack_type fail_stack;\r
+#endif\r
+#ifndef REGEX_MALLOC\r
+ char *destination;\r
+#endif\r
+\r
+ register char *fastmap = bufp->fastmap;\r
+ unsigned char *pattern = bufp->buffer;\r
+ unsigned char *p = pattern;\r
+ register unsigned char *pend = pattern + bufp->used;\r
+\r
+#ifdef REL_ALLOC\r
+ /* This holds the pointer to the failure stack, when\r
+ it is allocated relocatably. */\r
+ fail_stack_elt_t *failure_stack_ptr;\r
+#endif\r
+\r
+ /* Assume that each path through the pattern can be null until\r
+ proven otherwise. We set this false at the bottom of switch\r
+ statement, to which we get only if a particular path doesn't\r
+ match the empty string. */\r
+ boolean path_can_be_null = true;\r
+\r
+ /* We aren't doing a `succeed_n' to begin with. */\r
+ boolean succeed_n_p = false;\r
+\r
+ assert (fastmap != NULL && p != NULL);\r
+\r
+ INIT_FAIL_STACK ();\r
+ bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */\r
+ bufp->fastmap_accurate = 1; /* It will be when we're done. */\r
+ bufp->can_be_null = 0;\r
+\r
+ while (1)\r
+ {\r
+ if (p == pend || *p == succeed)\r
+ {\r
+ /* We have reached the (effective) end of pattern. */\r
+ if (!FAIL_STACK_EMPTY ())\r
+ {\r
+ bufp->can_be_null |= path_can_be_null;\r
+\r
+ /* Reset for next path. */\r
+ path_can_be_null = true;\r
+\r
+ p = fail_stack.stack[--fail_stack.avail].pointer;\r
+\r
+ continue;\r
+ }\r
+ else\r
+ break;\r
+ }\r
+\r
+ /* We should never be about to go beyond the end of the pattern. */\r
+ assert (p < pend);\r
+\r
+ switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))\r
+ {\r
+\r
+ /* I guess the idea here is to simply not bother with a fastmap\r
+ if a backreference is used, since it's too hard to figure out\r
+ the fastmap for the corresponding group. Setting\r
+ `can_be_null' stops `re_search_2' from using the fastmap, so\r
+ that is all we do. */\r
+ case duplicate:\r
+ bufp->can_be_null = 1;\r
+ goto done;\r
+\r
+\r
+ /* Following are the cases which match a character. These end\r
+ with `break'. */\r
+\r
+ case exactn:\r
+ fastmap[p[1]] = 1;\r
+ break;\r
+\r
+\r
+ case charset:\r
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)\r
+ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))\r
+ fastmap[j] = 1;\r
+ break;\r
+\r
+\r
+ case charset_not:\r
+ /* Chars beyond end of map must be allowed. */\r
+ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)\r
+ fastmap[j] = 1;\r
+\r
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)\r
+ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))\r
+ fastmap[j] = 1;\r
+ break;\r
+\r
+\r
+ case wordchar:\r
+ for (j = 0; j < (1 << BYTEWIDTH); j++)\r
+ if (SYNTAX (j) == Sword)\r
+ fastmap[j] = 1;\r
+ break;\r
+\r
+\r
+ case notwordchar:\r
+ for (j = 0; j < (1 << BYTEWIDTH); j++)\r
+ if (SYNTAX (j) != Sword)\r
+ fastmap[j] = 1;\r
+ break;\r
+\r
+\r
+ case anychar:\r
+ {\r
+ int fastmap_newline = fastmap['\n'];\r
+\r
+ /* `.' matches anything ... */\r
+ for (j = 0; j < (1 << BYTEWIDTH); j++)\r
+ fastmap[j] = 1;\r
+\r
+ /* ... except perhaps newline. */\r
+ if (!(bufp->syntax & RE_DOT_NEWLINE))\r
+ fastmap['\n'] = fastmap_newline;\r
+\r
+ /* Return if we have already set `can_be_null'; if we have,\r
+ then the fastmap is irrelevant. Something's wrong here. */\r
+ else if (bufp->can_be_null)\r
+ goto done;\r
+\r
+ /* Otherwise, have to check alternative paths. */\r
+ break;\r
+ }\r
+\r
+#ifdef emacs\r
+ case syntaxspec:\r
+ k = *p++;\r
+ for (j = 0; j < (1 << BYTEWIDTH); j++)\r
+ if (SYNTAX (j) == (enum syntaxcode) k)\r
+ fastmap[j] = 1;\r
+ break;\r
+\r
+\r
+ case notsyntaxspec:\r
+ k = *p++;\r
+ for (j = 0; j < (1 << BYTEWIDTH); j++)\r
+ if (SYNTAX (j) != (enum syntaxcode) k)\r
+ fastmap[j] = 1;\r
+ break;\r
+\r
+\r
+ /* All cases after this match the empty string. These end with\r
+ `continue'. */\r
+\r
+\r
+ case before_dot:\r
+ case at_dot:\r
+ case after_dot:\r
+ continue;\r
+#endif /* emacs */\r
+\r
+\r
+ case no_op:\r
+ case begline:\r
+ case endline:\r
+ case begbuf:\r
+ case endbuf:\r
+ case wordbound:\r
+ case notwordbound:\r
+ case wordbeg:\r
+ case wordend:\r
+ case push_dummy_failure:\r
+ continue;\r
+\r
+\r
+ case jump_n:\r
+ case pop_failure_jump:\r
+ case maybe_pop_jump:\r
+ case jump:\r
+ case jump_past_alt:\r
+ case dummy_failure_jump:\r
+ EXTRACT_NUMBER_AND_INCR (j, p);\r
+ p += j;\r
+ if (j > 0)\r
+ continue;\r
+\r
+ /* Jump backward implies we just went through the body of a\r
+ loop and matched nothing. Opcode jumped to should be\r
+ `on_failure_jump' or `succeed_n'. Just treat it like an\r
+ ordinary jump. For a * loop, it has pushed its failure\r
+ point already; if so, discard that as redundant. */\r
+ if ((re_opcode_t) *p != on_failure_jump\r
+ && (re_opcode_t) *p != succeed_n)\r
+ continue;\r
+\r
+ p++;\r
+ EXTRACT_NUMBER_AND_INCR (j, p);\r
+ p += j;\r
+\r
+ /* If what's on the stack is where we are now, pop it. */\r
+ if (!FAIL_STACK_EMPTY ()\r
+ && fail_stack.stack[fail_stack.avail - 1].pointer == p)\r
+ fail_stack.avail--;\r
+\r
+ continue;\r
+\r
+\r
+ case on_failure_jump:\r
+ case on_failure_keep_string_jump:\r
+ handle_on_failure_jump:\r
+ EXTRACT_NUMBER_AND_INCR (j, p);\r
+\r
+ /* For some patterns, e.g., `(a?)?', `p+j' here points to the\r
+ end of the pattern. We don't want to push such a point,\r
+ since when we restore it above, entering the switch will\r
+ increment `p' past the end of the pattern. We don't need\r
+ to push such a point since we obviously won't find any more\r
+ fastmap entries beyond `pend'. Such a pattern can match\r
+ the null string, though. */\r
+ if (p + j < pend)\r
+ {\r
+ if (!PUSH_PATTERN_OP (p + j, fail_stack))\r
+ {\r
+ RESET_FAIL_STACK ();\r
+ return -2;\r
+ }\r
+ }\r
+ else\r
+ bufp->can_be_null = 1;\r
+\r
+ if (succeed_n_p)\r
+ {\r
+ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */\r
+ succeed_n_p = false;\r
+ }\r
+\r
+ continue;\r
+\r
+\r
+ case succeed_n:\r
+ /* Get to the number of times to succeed. */\r
+ p += 2;\r
+\r
+ /* Increment p past the n for when k != 0. */\r
+ EXTRACT_NUMBER_AND_INCR (k, p);\r
+ if (k == 0)\r
+ {\r
+ p -= 4;\r
+ succeed_n_p = true; /* Spaghetti code alert. */\r
+ goto handle_on_failure_jump;\r
+ }\r
+ continue;\r
+\r
+\r
+ case set_number_at:\r
+ p += 4;\r
+ continue;\r
+\r
+\r
+ case start_memory:\r
+ case stop_memory:\r
+ p += 2;\r
+ continue;\r
+\r
+\r
+ default:\r
+ abort (); /* We have listed all the cases. */\r
+ } /* switch *p++ */\r
+\r
+ /* Getting here means we have found the possible starting\r
+ characters for one path of the pattern -- and that the empty\r
+ string does not match. We need not follow this path further.\r
+ Instead, look at the next alternative (remembered on the\r
+ stack), or quit if no more. The test at the top of the loop\r
+ does these things. */\r
+ path_can_be_null = false;\r
+ p = pend;\r
+ } /* while p */\r
+\r
+ /* Set `can_be_null' for the last path (also the first path, if the\r
+ pattern is empty). */\r
+ bufp->can_be_null |= path_can_be_null;\r
+\r
+ done:\r
+ RESET_FAIL_STACK ();\r
+ return 0;\r
+} /* re_compile_fastmap */\r
+\f\r
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and\r
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use\r
+ this memory for recording register information. STARTS and ENDS\r
+ must be allocated using the malloc library routine, and must each\r
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.\r
+\r
+ If NUM_REGS == 0, then subsequent matches should allocate their own\r
+ register data.\r
+\r
+ Unless this function is called, the first search or match using\r
+ PATTERN_BUFFER will allocate its own register data, without\r
+ freeing the old data. */\r
+\r
+void\r
+re_set_registers(struct re_pattern_buffer *bufp,\r
+ struct re_registers *regs,\r
+ unsigned num_regs,\r
+ regoff_t *starts,\r
+ regoff_t *ends)\r
+{\r
+ if (num_regs)\r
+ {\r
+ bufp->regs_allocated = REGS_REALLOCATE;\r
+ regs->num_regs = num_regs;\r
+ regs->start = starts;\r
+ regs->end = ends;\r
+ }\r
+ else\r
+ {\r
+ bufp->regs_allocated = REGS_UNALLOCATED;\r
+ regs->num_regs = 0;\r
+ regs->start = regs->end = (regoff_t *) 0;\r
+ }\r
+}\r
+\f\r
+/* Searching routines. */\r
+\r
+/* Like re_search_2, below, but only one string is specified, and\r
+ doesn't let you say where to stop matching. */\r
+\r
+int\r
+re_search(struct re_pattern_buffer *bufp,\r
+ const char *string,\r
+ int size,\r
+ int startpos,\r
+ int range,\r
+ struct re_registers *regs)\r
+{\r
+ return re_search_2 (bufp, NULL, 0, string, size, startpos, range,\r
+ regs, size);\r
+}\r
+\r
+\r
+/* Using the compiled pattern in BUFP->buffer, first tries to match the\r
+ virtual concatenation of STRING1 and STRING2, starting first at index\r
+ STARTPOS, then at STARTPOS + 1, and so on.\r
+\r
+ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.\r
+\r
+ RANGE is how far to scan while trying to match. RANGE = 0 means try\r
+ only at STARTPOS; in general, the last start tried is STARTPOS +\r
+ RANGE.\r
+\r
+ In REGS, return the indices of the virtual concatenation of STRING1\r
+ and STRING2 that matched the entire BUFP->buffer and its contained\r
+ subexpressions.\r
+\r
+ Do not consider matching one past the index STOP in the virtual\r
+ concatenation of STRING1 and STRING2.\r
+\r
+ We return either the position in the strings at which the match was\r
+ found, -1 if no match, or -2 if error (such as failure\r
+ stack overflow). */\r
+\r
+int\r
+re_search_2(struct re_pattern_buffer *bufp,\r
+ const char *string1,\r
+ int size1,\r
+ const char *string2,\r
+ int size2,\r
+ int startpos,\r
+ int range,\r
+ struct re_registers *regs,\r
+ int stop)\r
+{\r
+ int val;\r
+ register char *fastmap = bufp->fastmap;\r
+ register RE_TRANSLATE_TYPE translate = bufp->translate;\r
+ int total_size = size1 + size2;\r
+ int endpos = startpos + range;\r
+\r
+ /* Check for out-of-range STARTPOS. */\r
+ if (startpos < 0 || startpos > total_size)\r
+ return -1;\r
+\r
+ /* Fix up RANGE if it might eventually take us outside\r
+ the virtual concatenation of STRING1 and STRING2.\r
+ Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */\r
+ if (endpos < 0)\r
+ range = 0 - startpos;\r
+ else if (endpos > total_size)\r
+ range = total_size - startpos;\r
+\r
+ /* If the search isn't to be a backwards one, don't waste time in a\r
+ search for a pattern that must be anchored. */\r
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)\r
+ {\r
+ if (startpos > 0)\r
+ return -1;\r
+ else\r
+ range = 1;\r
+ }\r
+\r
+#ifdef emacs\r
+ /* In a forward search for something that starts with \=.\r
+ don't keep searching past point. */\r
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)\r
+ {\r
+ range = PT - startpos;\r
+ if (range <= 0)\r
+ return -1;\r
+ }\r
+#endif /* emacs */\r
+\r
+ /* Update the fastmap now if not correct already. */\r
+ if (fastmap && !bufp->fastmap_accurate)\r
+ if (re_compile_fastmap (bufp) == -2)\r
+ return -2;\r
+\r
+ /* Loop through the string, looking for a place to start matching. */\r
+ for (;;)\r
+ {\r
+ /* If a fastmap is supplied, skip quickly over characters that\r
+ cannot be the start of a match. If the pattern can match the\r
+ null string, however, we don't need to skip characters; we want\r
+ the first null string. */\r
+ if (fastmap && startpos < total_size && !bufp->can_be_null)\r
+ {\r
+ if (range > 0) /* Searching forwards. */\r
+ {\r
+ register const char *d;\r
+ register int lim = 0;\r
+ int irange = range;\r
+\r
+ if (startpos < size1 && startpos + range >= size1)\r
+ lim = range - (size1 - startpos);\r
+\r
+ d = (startpos >= size1 ? string2 - size1 : string1) + startpos;\r
+\r
+ /* Written out as an if-else to avoid testing `translate'\r
+ inside the loop. */\r
+ if (translate)\r
+ while (range > lim\r
+ && !fastmap[(unsigned char)\r
+ translate[(unsigned char) *d++]])\r
+ range--;\r
+ else\r
+ while (range > lim && !fastmap[(unsigned char) *d++])\r
+ range--;\r
+\r
+ startpos += irange - range;\r
+ }\r
+ else /* Searching backwards. */\r
+ {\r
+ register char c = (size1 == 0 || startpos >= size1\r
+ ? string2[startpos - size1]\r
+ : string1[startpos]);\r
+\r
+ if (!fastmap[(unsigned char) TRANSLATE (c)])\r
+ goto advance;\r
+ }\r
+ }\r
+\r
+ /* If can't match the null string, and that's all we have left, fail. */\r
+ if (range >= 0 && startpos == total_size && fastmap\r
+ && !bufp->can_be_null)\r
+ return -1;\r
+\r
+ val = re_match_2_internal (bufp, string1, size1, string2, size2,\r
+ startpos, regs, stop);\r
+#ifndef REGEX_MALLOC\r
+#ifdef C_ALLOCA\r
+ alloca (0);\r
+#endif\r
+#endif\r
+\r
+ if (val >= 0)\r
+ return startpos;\r
+\r
+ if (val == -2)\r
+ return -2;\r
+\r
+ advance:\r
+ if (!range)\r
+ break;\r
+ else if (range > 0)\r
+ {\r
+ range--;\r
+ startpos++;\r
+ }\r
+ else\r
+ {\r
+ range++;\r
+ startpos--;\r
+ }\r
+ }\r
+ return -1;\r
+} /* re_search_2 */\r
+\f\r
+/* This converts PTR, a pointer into one of the search strings `string1'\r
+ and `string2' into an offset from the beginning of that string. */\r
+#define POINTER_TO_OFFSET(ptr) \\r
+ (FIRST_STRING_P (ptr) \\r
+ ? ((regoff_t) ((ptr) - string1)) \\r
+ : ((regoff_t) ((ptr) - string2 + size1)))\r
+\r
+/* Macros for dealing with the split strings in re_match_2. */\r
+\r
+#define MATCHING_IN_FIRST_STRING (dend == end_match_1)\r
+\r
+/* Call before fetching a character with *d. This switches over to\r
+ string2 if necessary. */\r
+#define PREFETCH() \\r
+ while (d == dend) \\r
+ { \\r
+ /* End of string2 => fail. */ \\r
+ if (dend == end_match_2) \\r
+ goto fail; \\r
+ /* End of string1 => advance to string2. */ \\r
+ d = string2; \\r
+ dend = end_match_2; \\r
+ }\r
+\r
+\r
+/* Test if at very beginning or at very end of the virtual concatenation\r
+ of `string1' and `string2'. If only one string, it's `string2'. */\r
+#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)\r
+#define AT_STRINGS_END(d) ((d) == end2)\r
+\r
+\r
+/* Test if D points to a character which is word-constituent. We have\r
+ two special cases to check for: if past the end of string1, look at\r
+ the first character in string2; and if before the beginning of\r
+ string2, look at the last character in string1. */\r
+#define WORDCHAR_P(d) \\r
+ (SYNTAX ((d) == end1 ? *string2 \\r
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \\r
+ == Sword)\r
+\r
+/* Disabled due to a compiler bug -- see comment at case wordbound */\r
+#if 0\r
+/* Test if the character before D and the one at D differ with respect\r
+ to being word-constituent. */\r
+#define AT_WORD_BOUNDARY(d) \\r
+ (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \\r
+ || WORDCHAR_P (d - 1) != WORDCHAR_P (d))\r
+#endif\r
+\r
+/* Free everything we malloc. */\r
+#ifdef MATCH_MAY_ALLOCATE\r
+#define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL\r
+#define FREE_VARIABLES() \\r
+ do { \\r
+ REGEX_FREE_STACK (fail_stack.stack); \\r
+ FREE_VAR ((void*) regstart); \\r
+ FREE_VAR ((void*) regend); \\r
+ FREE_VAR ((void*) old_regstart); \\r
+ FREE_VAR ((void*) old_regend); \\r
+ FREE_VAR ((void*) best_regstart); \\r
+ FREE_VAR ((void*) best_regend); \\r
+ FREE_VAR ((void*) reg_info); \\r
+ FREE_VAR ((void*) reg_dummy); \\r
+ FREE_VAR ((void*) reg_info_dummy); \\r
+ } while (0)\r
+#else\r
+#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */\r
+#endif /* not MATCH_MAY_ALLOCATE */\r
+\r
+/* These values must meet several constraints. They must not be valid\r
+ register values; since we have a limit of 255 registers (because\r
+ we use only one byte in the pattern for the register number), we can\r
+ use numbers larger than 255. They must differ by 1, because of\r
+ NUM_FAILURE_ITEMS above. And the value for the lowest register must\r
+ be larger than the value for the highest register, so we do not try\r
+ to actually save any registers when none are active. */\r
+#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)\r
+#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)\r
+\f\r
+/* Matching routines. */\r
+\r
+#ifndef emacs /* Emacs never uses this. */\r
+/* re_match is like re_match_2 except it takes only a single string. */\r
+\r
+int\r
+re_match(struct re_pattern_buffer *bufp,\r
+ const char *string,\r
+ int size,\r
+ int pos,\r
+ struct re_registers *regs)\r
+{\r
+ int result = re_match_2_internal (bufp, NULL, 0, string, size,\r
+ pos, regs, size);\r
+#ifndef REGEX_MALLOC\r
+#ifdef C_ALLOCA\r
+ alloca (0);\r
+#endif\r
+#endif\r
+ return result;\r
+}\r
+#endif /* not emacs */\r
+\r
+static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,\r
+ unsigned char *end,\r
+ register_info_type *reg_info));\r
+static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,\r
+ unsigned char *end,\r
+ register_info_type *reg_info));\r
+static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,\r
+ unsigned char *end,\r
+ register_info_type *reg_info));\r
+static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,\r
+ int len, char *translate));\r
+\r
+/* re_match_2 matches the compiled pattern in BUFP against the\r
+ the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1\r
+ and SIZE2, respectively). We start matching at POS, and stop\r
+ matching at STOP.\r
+\r
+ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we\r
+ store offsets for the substring each group matched in REGS. See the\r
+ documentation for exactly how many groups we fill.\r
+\r
+ We return -1 if no match, -2 if an internal error (such as the\r
+ failure stack overflowing). Otherwise, we return the length of the\r
+ matched substring. */\r
+\r
+int\r
+re_match_2(struct re_pattern_buffer *bufp,\r
+ const char *string1,\r
+ int size1,\r
+ const char *string2,\r
+ int size2,\r
+ int pos,\r
+ struct re_registers *regs,\r
+ int stop)\r
+{\r
+ int result = re_match_2_internal (bufp, string1, size1, string2, size2,\r
+ pos, regs, stop);\r
+#ifndef REGEX_MALLOC\r
+#ifdef C_ALLOCA\r
+ alloca (0);\r
+#endif\r
+#endif\r
+ return result;\r
+}\r
+\r
+/* This is a separate function so that we can force an alloca cleanup\r
+ afterwards. */\r
+static int\r
+re_match_2_internal(struct re_pattern_buffer *bufp,\r
+ const char *string1,\r
+ int size1,\r
+ const char *string2,\r
+ int size2,\r
+ int pos,\r
+ struct re_registers *regs,\r
+ int stop)\r
+{\r
+ /* General temporaries. */\r
+ int mcnt;\r
+ unsigned char *p1;\r
+\r
+ /* Just past the end of the corresponding string. */\r
+ const char *end1, *end2;\r
+\r
+ /* Pointers into string1 and string2, just past the last characters in\r
+ each to consider matching. */\r
+ const char *end_match_1, *end_match_2;\r
+\r
+ /* Where we are in the data, and the end of the current string. */\r
+ const char *d, *dend;\r
+\r
+ /* Where we are in the pattern, and the end of the pattern. */\r
+ unsigned char *p = bufp->buffer;\r
+ register unsigned char *pend = p + bufp->used;\r
+\r
+ /* Mark the opcode just after a start_memory, so we can test for an\r
+ empty subpattern when we get to the stop_memory. */\r
+ unsigned char *just_past_start_mem = 0;\r
+\r
+ /* We use this to map every character in the string. */\r
+ RE_TRANSLATE_TYPE translate = bufp->translate;\r
+\r
+ /* Failure point stack. Each place that can handle a failure further\r
+ down the line pushes a failure point on this stack. It consists of\r
+ restart, regend, and reg_info for all registers corresponding to\r
+ the subexpressions we're currently inside, plus the number of such\r
+ registers, and, finally, two char *'s. The first char * is where\r
+ to resume scanning the pattern; the second one is where to resume\r
+ scanning the strings. If the latter is zero, the failure point is\r
+ a ``dummy''; if a failure happens and the failure point is a dummy,\r
+ it gets discarded and the next next one is tried. */\r
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */\r
+ fail_stack_type fail_stack;\r
+#endif\r
+#ifdef DEBUG\r
+ static unsigned failure_id = 0;\r
+ unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;\r
+#endif\r
+\r
+#ifdef REL_ALLOC\r
+ /* This holds the pointer to the failure stack, when\r
+ it is allocated relocatably. */\r
+ fail_stack_elt_t *failure_stack_ptr;\r
+#endif\r
+\r
+ /* We fill all the registers internally, independent of what we\r
+ return, for use in backreferences. The number here includes\r
+ an element for register zero. */\r
+ size_t num_regs = bufp->re_nsub + 1;\r
+\r
+ /* The currently active registers. */\r
+ active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;\r
+ active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;\r
+\r
+ /* Information on the contents of registers. These are pointers into\r
+ the input strings; they record just what was matched (on this\r
+ attempt) by a subexpression part of the pattern, that is, the\r
+ regnum-th regstart pointer points to where in the pattern we began\r
+ matching and the regnum-th regend points to right after where we\r
+ stopped matching the regnum-th subexpression. (The zeroth register\r
+ keeps track of what the whole pattern matches.) */\r
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */\r
+ const char **regstart, **regend;\r
+#endif\r
+\r
+ /* If a group that's operated upon by a repetition operator fails to\r
+ match anything, then the register for its start will need to be\r
+ restored because it will have been set to wherever in the string we\r
+ are when we last see its open-group operator. Similarly for a\r
+ register's end. */\r
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */\r
+ const char **old_regstart, **old_regend;\r
+#endif\r
+\r
+ /* The is_active field of reg_info helps us keep track of which (possibly\r
+ nested) subexpressions we are currently in. The matched_something\r
+ field of reg_info[reg_num] helps us tell whether or not we have\r
+ matched any of the pattern so far this time through the reg_num-th\r
+ subexpression. These two fields get reset each time through any\r
+ loop their register is in. */\r
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */\r
+ register_info_type *reg_info;\r
+#endif\r
+\r
+ /* The following record the register info as found in the above\r
+ variables when we find a match better than any we've seen before.\r
+ This happens as we backtrack through the failure points, which in\r
+ turn happens only if we have not yet matched the entire string. */\r
+ unsigned best_regs_set = false;\r
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */\r
+ const char **best_regstart, **best_regend;\r
+#endif\r
+\r
+ /* Logically, this is `best_regend[0]'. But we don't want to have to\r
+ allocate space for that if we're not allocating space for anything\r
+ else (see below). Also, we never need info about register 0 for\r
+ any of the other register vectors, and it seems rather a kludge to\r
+ treat `best_regend' differently than the rest. So we keep track of\r
+ the end of the best match so far in a separate variable. We\r
+ initialize this to NULL so that when we backtrack the first time\r
+ and need to test it, it's not garbage. */\r
+ const char *match_end = NULL;\r
+\r
+ /* This helps SET_REGS_MATCHED avoid doing redundant work. */\r
+ int set_regs_matched_done = 0;\r
+\r
+ /* Used when we pop values we don't care about. */\r
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */\r
+ const char **reg_dummy;\r
+ register_info_type *reg_info_dummy;\r
+#endif\r
+\r
+#ifdef DEBUG\r
+ /* Counts the total number of registers pushed. */\r
+ unsigned num_regs_pushed = 0;\r
+#endif\r
+\r
+ DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");\r
+\r
+ INIT_FAIL_STACK ();\r
+\r
+#ifdef MATCH_MAY_ALLOCATE\r
+ /* Do not bother to initialize all the register variables if there are\r
+ no groups in the pattern, as it takes a fair amount of time. If\r
+ there are groups, we include space for register 0 (the whole\r
+ pattern), even though we never use it, since it simplifies the\r
+ array indexing. We should fix this. */\r
+ if (bufp->re_nsub)\r
+ {\r
+ regstart = REGEX_TALLOC (num_regs, const char *);\r
+ regend = REGEX_TALLOC (num_regs, const char *);\r
+ old_regstart = REGEX_TALLOC (num_regs, const char *);\r
+ old_regend = REGEX_TALLOC (num_regs, const char *);\r
+ best_regstart = REGEX_TALLOC (num_regs, const char *);\r
+ best_regend = REGEX_TALLOC (num_regs, const char *);\r
+ reg_info = REGEX_TALLOC (num_regs, register_info_type);\r
+ reg_dummy = REGEX_TALLOC (num_regs, const char *);\r
+ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);\r
+\r
+ if (!(regstart && regend && old_regstart && old_regend && reg_info\r
+ && best_regstart && best_regend && reg_dummy && reg_info_dummy))\r
+ {\r
+ FREE_VARIABLES ();\r
+ return -2;\r
+ }\r
+ }\r
+ else\r
+ {\r
+ /* We must initialize all our variables to NULL, so that\r
+ `FREE_VARIABLES' doesn't try to free them. */\r
+ regstart = regend = old_regstart = old_regend = best_regstart\r
+ = best_regend = reg_dummy = NULL;\r
+ reg_info = reg_info_dummy = (register_info_type *) NULL;\r
+ }\r
+#endif /* MATCH_MAY_ALLOCATE */\r
+\r
+ /* The starting position is bogus. */\r
+ if (pos < 0 || pos > size1 + size2)\r
+ {\r
+ FREE_VARIABLES ();\r
+ return -1;\r
+ }\r
+\r
+ /* Initialize subexpression text positions to -1 to mark ones that no\r
+ start_memory/stop_memory has been seen for. Also initialize the\r
+ register information struct. */\r
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)\r
+ {\r
+ regstart[mcnt] = regend[mcnt]\r
+ = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;\r
+\r
+ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;\r
+ IS_ACTIVE (reg_info[mcnt]) = 0;\r
+ MATCHED_SOMETHING (reg_info[mcnt]) = 0;\r
+ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;\r
+ }\r
+\r
+ /* We move `string1' into `string2' if the latter's empty -- but not if\r
+ `string1' is null. */\r
+ if (size2 == 0 && string1 != NULL)\r
+ {\r
+ string2 = string1;\r
+ size2 = size1;\r
+ string1 = 0;\r
+ size1 = 0;\r
+ }\r
+ end1 = string1 + size1;\r
+ end2 = string2 + size2;\r
+\r
+ /* Compute where to stop matching, within the two strings. */\r
+ if (stop <= size1)\r
+ {\r
+ end_match_1 = string1 + stop;\r
+ end_match_2 = string2;\r
+ }\r
+ else\r
+ {\r
+ end_match_1 = end1;\r
+ end_match_2 = string2 + stop - size1;\r
+ }\r
+\r
+ /* `p' scans through the pattern as `d' scans through the data.\r
+ `dend' is the end of the input string that `d' points within. `d'\r
+ is advanced into the following input string whenever necessary, but\r
+ this happens before fetching; therefore, at the beginning of the\r
+ loop, `d' can be pointing at the end of a string, but it cannot\r
+ equal `string2'. */\r
+ if (size1 > 0 && pos <= size1)\r
+ {\r
+ d = string1 + pos;\r
+ dend = end_match_1;\r
+ }\r
+ else\r
+ {\r
+ d = string2 + pos - size1;\r
+ dend = end_match_2;\r
+ }\r
+\r
+ DEBUG_PRINT1 ("The compiled pattern is:\n");\r
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);\r
+ DEBUG_PRINT1 ("The string to match is: `");\r
+ DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);\r
+ DEBUG_PRINT1 ("'\n");\r
+\r
+ /* This loops over pattern commands. It exits by returning from the\r
+ function if the match is complete, or it drops through if the match\r
+ fails at this starting point in the input data. */\r
+ for (;;)\r
+ {\r
+#ifdef _LIBC\r
+ DEBUG_PRINT2 ("\n%p: ", p);\r
+#else\r
+ DEBUG_PRINT2 ("\n0x%x: ", p);\r
+#endif\r
+\r
+ if (p == pend)\r
+ { /* End of pattern means we might have succeeded. */\r
+ DEBUG_PRINT1 ("end of pattern ... ");\r
+\r
+ /* If we haven't matched the entire string, and we want the\r
+ longest match, try backtracking. */\r
+ if (d != end_match_2)\r
+ {\r
+ /* 1 if this match ends in the same string (string1 or string2)\r
+ as the best previous match. */\r
+ boolean same_str_p = (FIRST_STRING_P (match_end)\r
+ == MATCHING_IN_FIRST_STRING);\r
+ /* 1 if this match is the best seen so far. */\r
+ boolean best_match_p;\r
+\r
+ /* AIX compiler got confused when this was combined\r
+ with the previous declaration. */\r
+ if (same_str_p)\r
+ best_match_p = d > match_end;\r
+ else\r
+ best_match_p = !MATCHING_IN_FIRST_STRING;\r
+\r
+ DEBUG_PRINT1 ("backtracking.\n");\r
+\r
+ if (!FAIL_STACK_EMPTY ())\r
+ { /* More failure points to try. */\r
+\r
+ /* If exceeds best match so far, save it. */\r
+ if (!best_regs_set || best_match_p)\r
+ {\r
+ best_regs_set = true;\r
+ match_end = d;\r
+\r
+ DEBUG_PRINT1 ("\nSAVING match as best so far.\n");\r
+\r
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)\r
+ {\r
+ best_regstart[mcnt] = regstart[mcnt];\r
+ best_regend[mcnt] = regend[mcnt];\r
+ }\r
+ }\r
+ goto fail;\r
+ }\r
+\r
+ /* If no failure points, don't restore garbage. And if\r
+ last match is real best match, don't restore second\r
+ best one. */\r
+ else if (best_regs_set && !best_match_p)\r
+ {\r
+ restore_best_regs:\r
+ /* Restore best match. It may happen that `dend ==\r
+ end_match_1' while the restored d is in string2.\r
+ For example, the pattern `x.*y.*z' against the\r
+ strings `x-' and `y-z-', if the two strings are\r
+ not consecutive in memory. */\r
+ DEBUG_PRINT1 ("Restoring best registers.\n");\r
+\r
+ d = match_end;\r
+ dend = ((d >= string1 && d <= end1)\r
+ ? end_match_1 : end_match_2);\r
+\r
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)\r
+ {\r
+ regstart[mcnt] = best_regstart[mcnt];\r
+ regend[mcnt] = best_regend[mcnt];\r
+ }\r
+ }\r
+ } /* d != end_match_2 */\r
+\r
+ succeed_label:\r
+ DEBUG_PRINT1 ("Accepting match.\n");\r
+\r
+ /* If caller wants register contents data back, do it. */\r
+ if (regs && !bufp->no_sub)\r
+ {\r
+ /* Have the register data arrays been allocated? */\r
+ if (bufp->regs_allocated == REGS_UNALLOCATED)\r
+ { /* No. So allocate them with malloc. We need one\r
+ extra element beyond `num_regs' for the `-1' marker\r
+ GNU code uses. */\r
+ regs->num_regs = MAX (RE_NREGS, num_regs + 1);\r
+ regs->start = TALLOC (regs->num_regs, regoff_t);\r
+ regs->end = TALLOC (regs->num_regs, regoff_t);\r
+ if (regs->start == NULL || regs->end == NULL)\r
+ {\r
+ FREE_VARIABLES ();\r
+ return -2;\r
+ }\r
+ bufp->regs_allocated = REGS_REALLOCATE;\r
+ }\r
+ else if (bufp->regs_allocated == REGS_REALLOCATE)\r
+ { /* Yes. If we need more elements than were already\r
+ allocated, reallocate them. If we need fewer, just\r
+ leave it alone. */\r
+ if (regs->num_regs < num_regs + 1)\r
+ {\r
+ regs->num_regs = num_regs + 1;\r
+ RETALLOC (regs->start, regs->num_regs, regoff_t);\r
+ RETALLOC (regs->end, regs->num_regs, regoff_t);\r
+ if (regs->start == NULL || regs->end == NULL)\r
+ {\r
+ FREE_VARIABLES ();\r
+ return -2;\r
+ }\r
+ }\r
+ }\r
+ else\r
+ {\r
+ /* These braces fend off a "empty body in an else-statement"\r
+ warning under GCC when assert expands to nothing. */\r
+ assert (bufp->regs_allocated == REGS_FIXED);\r
+ }\r
+\r
+ /* Convert the pointer data in `regstart' and `regend' to\r
+ indices. Register zero has to be set differently,\r
+ since we haven't kept track of any info for it. */\r
+ if (regs->num_regs > 0)\r
+ {\r
+ regs->start[0] = pos;\r
+ regs->end[0] = (MATCHING_IN_FIRST_STRING\r
+ ? ((regoff_t) (d - string1))\r
+ : ((regoff_t) (d - string2 + size1)));\r
+ }\r
+\r
+ /* Go through the first `min (num_regs, regs->num_regs)'\r
+ registers, since that is all we initialized. */\r
+ for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);\r
+ mcnt++)\r
+ {\r
+ if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))\r
+ regs->start[mcnt] = regs->end[mcnt] = -1;\r
+ else\r
+ {\r
+ regs->start[mcnt]\r
+ = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);\r
+ regs->end[mcnt]\r
+ = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);\r
+ }\r
+ }\r
+\r
+ /* If the regs structure we return has more elements than\r
+ were in the pattern, set the extra elements to -1. If\r
+ we (re)allocated the registers, this is the case,\r
+ because we always allocate enough to have at least one\r
+ -1 at the end. */\r
+ for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)\r
+ regs->start[mcnt] = regs->end[mcnt] = -1;\r
+ } /* regs && !bufp->no_sub */\r
+\r
+ DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",\r
+ nfailure_points_pushed, nfailure_points_popped,\r
+ nfailure_points_pushed - nfailure_points_popped);\r
+ DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);\r
+\r
+ mcnt = d - pos - (MATCHING_IN_FIRST_STRING\r
+ ? string1\r
+ : string2 - size1);\r
+\r
+ DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);\r
+\r
+ FREE_VARIABLES ();\r
+ return mcnt;\r
+ }\r
+\r
+ /* Otherwise match next pattern command. */\r
+ switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))\r
+ {\r
+ /* Ignore these. Used to ignore the n of succeed_n's which\r
+ currently have n == 0. */\r
+ case no_op:\r
+ DEBUG_PRINT1 ("EXECUTING no_op.\n");\r
+ break;\r
+\r
+ case succeed:\r
+ DEBUG_PRINT1 ("EXECUTING succeed.\n");\r
+ goto succeed_label;\r
+\r
+ /* Match the next n pattern characters exactly. The following\r
+ byte in the pattern defines n, and the n bytes after that\r
+ are the characters to match. */\r
+ case exactn:\r
+ mcnt = *p++;\r
+ DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);\r
+\r
+ /* This is written out as an if-else so we don't waste time\r
+ testing `translate' inside the loop. */\r
+ if (translate)\r
+ {\r
+ do\r
+ {\r
+ PREFETCH ();\r
+ if ((unsigned char) translate[(unsigned char) *d++]\r
+ != (unsigned char) *p++)\r
+ goto fail;\r
+ }\r
+ while (--mcnt);\r
+ }\r
+ else\r
+ {\r
+ do\r
+ {\r
+ PREFETCH ();\r
+ if (*d++ != (char) *p++) goto fail;\r
+ }\r
+ while (--mcnt);\r
+ }\r
+ SET_REGS_MATCHED ();\r
+ break;\r
+\r
+\r
+ /* Match any character except possibly a newline or a null. */\r
+ case anychar:\r
+ DEBUG_PRINT1 ("EXECUTING anychar.\n");\r
+\r
+ PREFETCH ();\r
+\r
+ if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')\r
+ || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))\r
+ goto fail;\r
+\r
+ SET_REGS_MATCHED ();\r
+ DEBUG_PRINT2 (" Matched `%d'.\n", *d);\r
+ d++;\r
+ break;\r
+\r
+\r
+ case charset:\r
+ case charset_not:\r
+ {\r
+ register unsigned char c;\r
+ boolean not = (re_opcode_t) *(p - 1) == charset_not;\r
+\r
+ DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");\r
+\r
+ PREFETCH ();\r
+ c = TRANSLATE (*d); /* The character to match. */\r
+\r
+ /* Cast to `unsigned' instead of `unsigned char' in case the\r
+ bit list is a full 32 bytes long. */\r
+ if (c < (unsigned) (*p * BYTEWIDTH)\r
+ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))\r
+ not = !not;\r
+\r
+ p += 1 + *p;\r
+\r
+ if (!not) goto fail;\r
+\r
+ SET_REGS_MATCHED ();\r
+ d++;\r
+ break;\r
+ }\r
+\r
+\r
+ /* The beginning of a group is represented by start_memory.\r
+ The arguments are the register number in the next byte, and the\r
+ number of groups inner to this one in the next. The text\r
+ matched within the group is recorded (in the internal\r
+ registers data structure) under the register number. */\r
+ case start_memory:\r
+ DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);\r
+\r
+ /* Find out if this group can match the empty string. */\r
+ p1 = p; /* To send to group_match_null_string_p. */\r
+\r
+ if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)\r
+ REG_MATCH_NULL_STRING_P (reg_info[*p])\r
+ = group_match_null_string_p (&p1, pend, reg_info);\r
+\r
+ /* Save the position in the string where we were the last time\r
+ we were at this open-group operator in case the group is\r
+ operated upon by a repetition operator, e.g., with `(a*)*b'\r
+ against `ab'; then we want to ignore where we are now in\r
+ the string in case this attempt to match fails. */\r
+ old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])\r
+ ? REG_UNSET (regstart[*p]) ? d : regstart[*p]\r
+ : regstart[*p];\r
+ DEBUG_PRINT2 (" old_regstart: %d\n",\r
+ POINTER_TO_OFFSET (old_regstart[*p]));\r
+\r
+ regstart[*p] = d;\r
+ DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));\r
+\r
+ IS_ACTIVE (reg_info[*p]) = 1;\r
+ MATCHED_SOMETHING (reg_info[*p]) = 0;\r
+\r
+ /* Clear this whenever we change the register activity status. */\r
+ set_regs_matched_done = 0;\r
+\r
+ /* This is the new highest active register. */\r
+ highest_active_reg = *p;\r
+\r
+ /* If nothing was active before, this is the new lowest active\r
+ register. */\r
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)\r
+ lowest_active_reg = *p;\r
+\r
+ /* Move past the register number and inner group count. */\r
+ p += 2;\r
+ just_past_start_mem = p;\r
+\r
+ break;\r
+\r
+\r
+ /* The stop_memory opcode represents the end of a group. Its\r
+ arguments are the same as start_memory's: the register\r
+ number, and the number of inner groups. */\r
+ case stop_memory:\r
+ DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);\r
+\r
+ /* We need to save the string position the last time we were at\r
+ this close-group operator in case the group is operated\r
+ upon by a repetition operator, e.g., with `((a*)*(b*)*)*'\r
+ against `aba'; then we want to ignore where we are now in\r
+ the string in case this attempt to match fails. */\r
+ old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])\r
+ ? REG_UNSET (regend[*p]) ? d : regend[*p]\r
+ : regend[*p];\r
+ DEBUG_PRINT2 (" old_regend: %d\n",\r
+ POINTER_TO_OFFSET (old_regend[*p]));\r
+\r
+ regend[*p] = d;\r
+ DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));\r
+\r
+ /* This register isn't active anymore. */\r
+ IS_ACTIVE (reg_info[*p]) = 0;\r
+\r
+ /* Clear this whenever we change the register activity status. */\r
+ set_regs_matched_done = 0;\r
+\r
+ /* If this was the only register active, nothing is active\r
+ anymore. */\r
+ if (lowest_active_reg == highest_active_reg)\r
+ {\r
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;\r
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;\r
+ }\r
+ else\r
+ { /* We must scan for the new highest active register, since\r
+ it isn't necessarily one less than now: consider\r
+ (a(b)c(d(e)f)g). When group 3 ends, after the f), the\r
+ new highest active register is 1. */\r
+ unsigned char r = *p - 1;\r
+ while (r > 0 && !IS_ACTIVE (reg_info[r]))\r
+ r--;\r
+\r
+ /* If we end up at register zero, that means that we saved\r
+ the registers as the result of an `on_failure_jump', not\r
+ a `start_memory', and we jumped to past the innermost\r
+ `stop_memory'. For example, in ((.)*) we save\r
+ registers 1 and 2 as a result of the *, but when we pop\r
+ back to the second ), we are at the stop_memory 1.\r
+ Thus, nothing is active. */\r
+ if (r == 0)\r
+ {\r
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;\r
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;\r
+ }\r
+ else\r
+ highest_active_reg = r;\r
+ }\r
+\r
+ /* If just failed to match something this time around with a\r
+ group that's operated on by a repetition operator, try to\r
+ force exit from the ``loop'', and restore the register\r
+ information for this group that we had before trying this\r
+ last match. */\r
+ if ((!MATCHED_SOMETHING (reg_info[*p])\r
+ || just_past_start_mem == p - 1)\r
+ && (p + 2) < pend)\r
+ {\r
+ boolean is_a_jump_n = false;\r
+\r
+ p1 = p + 2;\r
+ mcnt = 0;\r
+ switch ((re_opcode_t) *p1++)\r
+ {\r
+ case jump_n:\r
+ is_a_jump_n = true;\r
+ case pop_failure_jump:\r
+ case maybe_pop_jump:\r
+ case jump:\r
+ case dummy_failure_jump:\r
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);\r
+ if (is_a_jump_n)\r
+ p1 += 2;\r
+ break;\r
+\r
+ default:\r
+ /* do nothing */ ;\r
+ }\r
+ p1 += mcnt;\r
+\r
+ /* If the next operation is a jump backwards in the pattern\r
+ to an on_failure_jump right before the start_memory\r
+ corresponding to this stop_memory, exit from the loop\r
+ by forcing a failure after pushing on the stack the\r
+ on_failure_jump's jump in the pattern, and d. */\r
+ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump\r
+ && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)\r
+ {\r
+ /* If this group ever matched anything, then restore\r
+ what its registers were before trying this last\r
+ failed match, e.g., with `(a*)*b' against `ab' for\r
+ regstart[1], and, e.g., with `((a*)*(b*)*)*'\r
+ against `aba' for regend[3].\r
+\r
+ Also restore the registers for inner groups for,\r
+ e.g., `((a*)(b*))*' against `aba' (register 3 would\r
+ otherwise get trashed). */\r
+\r
+ if (EVER_MATCHED_SOMETHING (reg_info[*p]))\r
+ {\r
+ unsigned r;\r
+\r
+ EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;\r
+\r
+ /* Restore this and inner groups' (if any) registers. */\r
+ for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);\r
+ r++)\r
+ {\r
+ regstart[r] = old_regstart[r];\r
+\r
+ /* xx why this test? */\r
+ if (old_regend[r] >= regstart[r])\r
+ regend[r] = old_regend[r];\r
+ }\r
+ }\r
+ p1++;\r
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);\r
+ PUSH_FAILURE_POINT (p1 + mcnt, d, -2);\r
+\r
+ goto fail;\r
+ }\r
+ }\r
+\r
+ /* Move past the register number and the inner group count. */\r
+ p += 2;\r
+ break;\r
+\r
+\r
+ /* \<digit> has been turned into a `duplicate' command which is\r
+ followed by the numeric value of <digit> as the register number. */\r
+ case duplicate:\r
+ {\r
+ register const char *d2, *dend2;\r
+ int regno = *p++; /* Get which register to match against. */\r
+ DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);\r
+\r
+ /* Can't back reference a group which we've never matched. */\r
+ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))\r
+ goto fail;\r
+\r
+ /* Where in input to try to start matching. */\r
+ d2 = regstart[regno];\r
+\r
+ /* Where to stop matching; if both the place to start and\r
+ the place to stop matching are in the same string, then\r
+ set to the place to stop, otherwise, for now have to use\r
+ the end of the first string. */\r
+\r
+ dend2 = ((FIRST_STRING_P (regstart[regno])\r
+ == FIRST_STRING_P (regend[regno]))\r
+ ? regend[regno] : end_match_1);\r
+ for (;;)\r
+ {\r
+ /* If necessary, advance to next segment in register\r
+ contents. */\r
+ while (d2 == dend2)\r
+ {\r
+ if (dend2 == end_match_2) break;\r
+ if (dend2 == regend[regno]) break;\r
+\r
+ /* End of string1 => advance to string2. */\r
+ d2 = string2;\r
+ dend2 = regend[regno];\r
+ }\r
+ /* At end of register contents => success */\r
+ if (d2 == dend2) break;\r
+\r
+ /* If necessary, advance to next segment in data. */\r
+ PREFETCH ();\r
+\r
+ /* How many characters left in this segment to match. */\r
+ mcnt = dend - d;\r
+\r
+ /* Want how many consecutive characters we can match in\r
+ one shot, so, if necessary, adjust the count. */\r
+ if (mcnt > dend2 - d2)\r
+ mcnt = dend2 - d2;\r
+\r
+ /* Compare that many; failure if mismatch, else move\r
+ past them. */\r
+ if (translate\r
+ ? bcmp_translate (d, d2, mcnt, translate)\r
+ : bcmp (d, d2, mcnt))\r
+ goto fail;\r
+ d += mcnt, d2 += mcnt;\r
+\r
+ /* Do this because we've match some characters. */\r
+ SET_REGS_MATCHED ();\r
+ }\r
+ }\r
+ break;\r
+\r
+\r
+ /* begline matches the empty string at the beginning of the string\r
+ (unless `not_bol' is set in `bufp'), and, if\r
+ `newline_anchor' is set, after newlines. */\r
+ case begline:\r
+ DEBUG_PRINT1 ("EXECUTING begline.\n");\r
+\r
+ if (AT_STRINGS_BEG (d))\r
+ {\r
+ if (!bufp->not_bol) break;\r
+ }\r
+ else if (d[-1] == '\n' && bufp->newline_anchor)\r
+ {\r
+ break;\r
+ }\r
+ /* In all other cases, we fail. */\r
+ goto fail;\r
+\r
+\r
+ /* endline is the dual of begline. */\r
+ case endline:\r
+ DEBUG_PRINT1 ("EXECUTING endline.\n");\r
+\r
+ if (AT_STRINGS_END (d))\r
+ {\r
+ if (!bufp->not_eol) break;\r
+ }\r
+\r
+ /* We have to ``prefetch'' the next character. */\r
+ else if ((d == end1 ? *string2 : *d) == '\n'\r
+ && bufp->newline_anchor)\r
+ {\r
+ break;\r
+ }\r
+ goto fail;\r
+\r
+\r
+ /* Match at the very beginning of the data. */\r
+ case begbuf:\r
+ DEBUG_PRINT1 ("EXECUTING begbuf.\n");\r
+ if (AT_STRINGS_BEG (d))\r
+ break;\r
+ goto fail;\r
+\r
+\r
+ /* Match at the very end of the data. */\r
+ case endbuf:\r
+ DEBUG_PRINT1 ("EXECUTING endbuf.\n");\r
+ if (AT_STRINGS_END (d))\r
+ break;\r
+ goto fail;\r
+\r
+\r
+ /* on_failure_keep_string_jump is used to optimize `.*\n'. It\r
+ pushes NULL as the value for the string on the stack. Then\r
+ `pop_failure_point' will keep the current value for the\r
+ string, instead of restoring it. To see why, consider\r
+ matching `foo\nbar' against `.*\n'. The .* matches the foo;\r
+ then the . fails against the \n. But the next thing we want\r
+ to do is match the \n against the \n; if we restored the\r
+ string value, we would be back at the foo.\r
+\r
+ Because this is used only in specific cases, we don't need to\r
+ check all the things that `on_failure_jump' does, to make\r
+ sure the right things get saved on the stack. Hence we don't\r
+ share its code. The only reason to push anything on the\r
+ stack at all is that otherwise we would have to change\r
+ `anychar's code to do something besides goto fail in this\r
+ case; that seems worse than this. */\r
+ case on_failure_keep_string_jump:\r
+ DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");\r
+\r
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);\r
+#ifdef _LIBC\r
+ DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);\r
+#else\r
+ DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);\r
+#endif\r
+\r
+ PUSH_FAILURE_POINT (p + mcnt, NULL, -2);\r
+ break;\r
+\r
+\r
+ /* Uses of on_failure_jump:\r
+\r
+ Each alternative starts with an on_failure_jump that points\r
+ to the beginning of the next alternative. Each alternative\r
+ except the last ends with a jump that in effect jumps past\r
+ the rest of the alternatives. (They really jump to the\r
+ ending jump of the following alternative, because tensioning\r
+ these jumps is a hassle.)\r
+\r
+ Repeats start with an on_failure_jump that points past both\r
+ the repetition text and either the following jump or\r
+ pop_failure_jump back to this on_failure_jump. */\r
+ case on_failure_jump:\r
+ on_failure:\r
+ DEBUG_PRINT1 ("EXECUTING on_failure_jump");\r
+\r
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);\r
+#ifdef _LIBC\r
+ DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);\r
+#else\r
+ DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);\r
+#endif\r
+\r
+ /* If this on_failure_jump comes right before a group (i.e.,\r
+ the original * applied to a group), save the information\r
+ for that group and all inner ones, so that if we fail back\r
+ to this point, the group's information will be correct.\r
+ For example, in \(a*\)*\1, we need the preceding group,\r
+ and in \(zz\(a*\)b*\)\2, we need the inner group. */\r
+\r
+ /* We can't use `p' to check ahead because we push\r
+ a failure point to `p + mcnt' after we do this. */\r
+ p1 = p;\r
+\r
+ /* We need to skip no_op's before we look for the\r
+ start_memory in case this on_failure_jump is happening as\r
+ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1\r
+ against aba. */\r
+ while (p1 < pend && (re_opcode_t) *p1 == no_op)\r
+ p1++;\r
+\r
+ if (p1 < pend && (re_opcode_t) *p1 == start_memory)\r
+ {\r
+ /* We have a new highest active register now. This will\r
+ get reset at the start_memory we are about to get to,\r
+ but we will have saved all the registers relevant to\r
+ this repetition op, as described above. */\r
+ highest_active_reg = *(p1 + 1) + *(p1 + 2);\r
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)\r
+ lowest_active_reg = *(p1 + 1);\r
+ }\r
+\r
+ DEBUG_PRINT1 (":\n");\r
+ PUSH_FAILURE_POINT (p + mcnt, d, -2);\r
+ break;\r
+\r
+\r
+ /* A smart repeat ends with `maybe_pop_jump'.\r
+ We change it to either `pop_failure_jump' or `jump'. */\r
+ case maybe_pop_jump:\r
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);\r
+ DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);\r
+ {\r
+ register unsigned char *p2 = p;\r
+\r
+ /* Compare the beginning of the repeat with what in the\r
+ pattern follows its end. If we can establish that there\r
+ is nothing that they would both match, i.e., that we\r
+ would have to backtrack because of (as in, e.g., `a*a')\r
+ then we can change to pop_failure_jump, because we'll\r
+ never have to backtrack.\r
+\r
+ This is not true in the case of alternatives: in\r
+ `(a|ab)*' we do need to backtrack to the `ab' alternative\r
+ (e.g., if the string was `ab'). But instead of trying to\r
+ detect that here, the alternative has put on a dummy\r
+ failure point which is what we will end up popping. */\r
+\r
+ /* Skip over open/close-group commands.\r
+ If what follows this loop is a ...+ construct,\r
+ look at what begins its body, since we will have to\r
+ match at least one of that. */\r
+ while (1)\r
+ {\r
+ if (p2 + 2 < pend\r
+ && ((re_opcode_t) *p2 == stop_memory\r
+ || (re_opcode_t) *p2 == start_memory))\r
+ p2 += 3;\r
+ else if (p2 + 6 < pend\r
+ && (re_opcode_t) *p2 == dummy_failure_jump)\r
+ p2 += 6;\r
+ else\r
+ break;\r
+ }\r
+\r
+ p1 = p + mcnt;\r
+ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding\r
+ to the `maybe_finalize_jump' of this case. Examine what\r
+ follows. */\r
+\r
+ /* If we're at the end of the pattern, we can change. */\r
+ if (p2 == pend)\r
+ {\r
+ /* Consider what happens when matching ":\(.*\)"\r
+ against ":/". I don't really understand this code\r
+ yet. */\r
+ p[-3] = (unsigned char) pop_failure_jump;\r
+ DEBUG_PRINT1\r
+ (" End of pattern: change to `pop_failure_jump'.\n");\r
+ }\r
+\r
+ else if ((re_opcode_t) *p2 == exactn\r
+ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))\r
+ {\r
+ register unsigned char c\r
+ = *p2 == (unsigned char) endline ? '\n' : p2[2];\r
+\r
+ if ((re_opcode_t) p1[3] == exactn && p1[5] != c)\r
+ {\r
+ p[-3] = (unsigned char) pop_failure_jump;\r
+ DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",\r
+ c, p1[5]);\r
+ }\r
+\r
+ else if ((re_opcode_t) p1[3] == charset\r
+ || (re_opcode_t) p1[3] == charset_not)\r
+ {\r
+ int not = (re_opcode_t) p1[3] == charset_not;\r
+\r
+ if (c < (unsigned char) (p1[4] * BYTEWIDTH)\r
+ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))\r
+ not = !not;\r
+\r
+ /* `not' is equal to 1 if c would match, which means\r
+ that we can't change to pop_failure_jump. */\r
+ if (!not)\r
+ {\r
+ p[-3] = (unsigned char) pop_failure_jump;\r
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");\r
+ }\r
+ }\r
+ }\r
+ else if ((re_opcode_t) *p2 == charset)\r
+ {\r
+#ifdef DEBUG\r
+ register unsigned char c\r
+ = *p2 == (unsigned char) endline ? '\n' : p2[2];\r
+#endif\r
+\r
+#if 0\r
+ if ((re_opcode_t) p1[3] == exactn\r
+ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]\r
+ && (p2[2 + p1[5] / BYTEWIDTH]\r
+ & (1 << (p1[5] % BYTEWIDTH)))))\r
+#else\r
+ if ((re_opcode_t) p1[3] == exactn\r
+ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]\r
+ && (p2[2 + p1[4] / BYTEWIDTH]\r
+ & (1 << (p1[4] % BYTEWIDTH)))))\r
+#endif\r
+ {\r
+ p[-3] = (unsigned char) pop_failure_jump;\r
+ DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",\r
+ c, p1[5]);\r
+ }\r
+\r
+ else if ((re_opcode_t) p1[3] == charset_not)\r
+ {\r
+ int idx;\r
+ /* We win if the charset_not inside the loop\r
+ lists every character listed in the charset after. */\r
+ for (idx = 0; idx < (int) p2[1]; idx++)\r
+ if (! (p2[2 + idx] == 0\r
+ || (idx < (int) p1[4]\r
+ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))\r
+ break;\r
+\r
+ if (idx == p2[1])\r
+ {\r
+ p[-3] = (unsigned char) pop_failure_jump;\r
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");\r
+ }\r
+ }\r
+ else if ((re_opcode_t) p1[3] == charset)\r
+ {\r
+ int idx;\r
+ /* We win if the charset inside the loop\r
+ has no overlap with the one after the loop. */\r
+ for (idx = 0;\r
+ idx < (int) p2[1] && idx < (int) p1[4];\r
+ idx++)\r
+ if ((p2[2 + idx] & p1[5 + idx]) != 0)\r
+ break;\r
+\r
+ if (idx == p2[1] || idx == p1[4])\r
+ {\r
+ p[-3] = (unsigned char) pop_failure_jump;\r
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");\r
+ }\r
+ }\r
+ }\r
+ }\r
+ p -= 2; /* Point at relative address again. */\r
+ if ((re_opcode_t) p[-1] != pop_failure_jump)\r
+ {\r
+ p[-1] = (unsigned char) jump;\r
+ DEBUG_PRINT1 (" Match => jump.\n");\r
+ goto unconditional_jump;\r
+ }\r
+ /* Note fall through. */\r
+\r
+\r
+ /* The end of a simple repeat has a pop_failure_jump back to\r
+ its matching on_failure_jump, where the latter will push a\r
+ failure point. The pop_failure_jump takes off failure\r
+ points put on by this pop_failure_jump's matching\r
+ on_failure_jump; we got through the pattern to here from the\r
+ matching on_failure_jump, so didn't fail. */\r
+ case pop_failure_jump:\r
+ {\r
+ /* We need to pass separate storage for the lowest and\r
+ highest registers, even though we don't care about the\r
+ actual values. Otherwise, we will restore only one\r
+ register from the stack, since lowest will == highest in\r
+ `pop_failure_point'. */\r
+ active_reg_t dummy_low_reg, dummy_high_reg;\r
+ unsigned char *pdummy;\r
+ const char *sdummy;\r
+\r
+ DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");\r
+ POP_FAILURE_POINT (sdummy, pdummy,\r
+ dummy_low_reg, dummy_high_reg,\r
+ reg_dummy, reg_dummy, reg_info_dummy);\r
+ }\r
+ /* Note fall through. */\r
+\r
+ unconditional_jump:\r
+#ifdef _LIBC\r
+ DEBUG_PRINT2 ("\n%p: ", p);\r
+#else\r
+ DEBUG_PRINT2 ("\n0x%x: ", p);\r
+#endif\r
+ /* Note fall through. */\r
+\r
+ /* Unconditionally jump (without popping any failure points). */\r
+ case jump:\r
+ EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */\r
+ DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);\r
+ p += mcnt; /* Do the jump. */\r
+#ifdef _LIBC\r
+ DEBUG_PRINT2 ("(to %p).\n", p);\r
+#else\r
+ DEBUG_PRINT2 ("(to 0x%x).\n", p);\r
+#endif\r
+ break;\r
+\r
+\r
+ /* We need this opcode so we can detect where alternatives end\r
+ in `group_match_null_string_p' et al. */\r
+ case jump_past_alt:\r
+ DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");\r
+ goto unconditional_jump;\r
+\r
+\r
+ /* Normally, the on_failure_jump pushes a failure point, which\r
+ then gets popped at pop_failure_jump. We will end up at\r
+ pop_failure_jump, also, and with a pattern of, say, `a+', we\r
+ are skipping over the on_failure_jump, so we have to push\r
+ something meaningless for pop_failure_jump to pop. */\r
+ case dummy_failure_jump:\r
+ DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");\r
+ /* It doesn't matter what we push for the string here. What\r
+ the code at `fail' tests is the value for the pattern. */\r
+ PUSH_FAILURE_POINT (0, 0, -2);\r
+ goto unconditional_jump;\r
+\r
+\r
+ /* At the end of an alternative, we need to push a dummy failure\r
+ point in case we are followed by a `pop_failure_jump', because\r
+ we don't want the failure point for the alternative to be\r
+ popped. For example, matching `(a|ab)*' against `aab'\r
+ requires that we match the `ab' alternative. */\r
+ case push_dummy_failure:\r
+ DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");\r
+ /* See comments just above at `dummy_failure_jump' about the\r
+ two zeroes. */\r
+ PUSH_FAILURE_POINT (0, 0, -2);\r
+ break;\r
+\r
+ /* Have to succeed matching what follows at least n times.\r
+ After that, handle like `on_failure_jump'. */\r
+ case succeed_n:\r
+ EXTRACT_NUMBER (mcnt, p + 2);\r
+ DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);\r
+\r
+ assert (mcnt >= 0);\r
+ /* Originally, this is how many times we HAVE to succeed. */\r
+ if (mcnt > 0)\r
+ {\r
+ mcnt--;\r
+ p += 2;\r
+ STORE_NUMBER_AND_INCR (p, mcnt);\r
+#ifdef _LIBC\r
+ DEBUG_PRINT3 (" Setting %p to %d.\n", p - 2, mcnt);\r
+#else\r
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - 2, mcnt);\r
+#endif\r
+ }\r
+ else if (mcnt == 0)\r
+ {\r
+#ifdef _LIBC\r
+ DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2);\r
+#else\r
+ DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);\r
+#endif\r
+ p[2] = (unsigned char) no_op;\r
+ p[3] = (unsigned char) no_op;\r
+ goto on_failure;\r
+ }\r
+ break;\r
+\r
+ case jump_n:\r
+ EXTRACT_NUMBER (mcnt, p + 2);\r
+ DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);\r
+\r
+ /* Originally, this is how many times we CAN jump. */\r
+ if (mcnt)\r
+ {\r
+ mcnt--;\r
+ STORE_NUMBER (p + 2, mcnt);\r
+#ifdef _LIBC\r
+ DEBUG_PRINT3 (" Setting %p to %d.\n", p + 2, mcnt);\r
+#else\r
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + 2, mcnt);\r
+#endif\r
+ goto unconditional_jump;\r
+ }\r
+ /* If don't have to jump any more, skip over the rest of command. */\r
+ else\r
+ p += 4;\r
+ break;\r
+\r
+ case set_number_at:\r
+ {\r
+ DEBUG_PRINT1 ("EXECUTING set_number_at.\n");\r
+\r
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);\r
+ p1 = p + mcnt;\r
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);\r
+#ifdef _LIBC\r
+ DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);\r
+#else\r
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);\r
+#endif\r
+ STORE_NUMBER (p1, mcnt);\r
+ break;\r
+ }\r
+\r
+#if 0\r
+ /* The DEC Alpha C compiler 3.x generates incorrect code for the\r
+ test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of\r
+ AT_WORD_BOUNDARY, so this code is disabled. Expanding the\r
+ macro and introducing temporary variables works around the bug. */\r
+\r
+ case wordbound:\r
+ DEBUG_PRINT1 ("EXECUTING wordbound.\n");\r
+ if (AT_WORD_BOUNDARY (d))\r
+ break;\r
+ goto fail;\r
+\r
+ case notwordbound:\r
+ DEBUG_PRINT1 ("EXECUTING notwordbound.\n");\r
+ if (AT_WORD_BOUNDARY (d))\r
+ goto fail;\r
+ break;\r
+#else\r
+ case wordbound:\r
+ {\r
+ boolean prevchar, thischar;\r
+\r
+ DEBUG_PRINT1 ("EXECUTING wordbound.\n");\r
+ if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))\r
+ break;\r
+\r
+ prevchar = WORDCHAR_P (d - 1);\r
+ thischar = WORDCHAR_P (d);\r
+ if (prevchar != thischar)\r
+ break;\r
+ goto fail;\r
+ }\r
+\r
+ case notwordbound:\r
+ {\r
+ boolean prevchar, thischar;\r
+\r
+ DEBUG_PRINT1 ("EXECUTING notwordbound.\n");\r
+ if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))\r
+ goto fail;\r
+\r
+ prevchar = WORDCHAR_P (d - 1);\r
+ thischar = WORDCHAR_P (d);\r
+ if (prevchar != thischar)\r
+ goto fail;\r
+ break;\r
+ }\r
+#endif\r
+\r
+ case wordbeg:\r
+ DEBUG_PRINT1 ("EXECUTING wordbeg.\n");\r
+ if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))\r
+ break;\r
+ goto fail;\r
+\r
+ case wordend:\r
+ DEBUG_PRINT1 ("EXECUTING wordend.\n");\r
+ if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)\r
+ && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))\r
+ break;\r
+ goto fail;\r
+\r
+#ifdef emacs\r
+ case before_dot:\r
+ DEBUG_PRINT1 ("EXECUTING before_dot.\n");\r
+ if (PTR_CHAR_POS ((unsigned char *) d) >= point)\r
+ goto fail;\r
+ break;\r
+\r
+ case at_dot:\r
+ DEBUG_PRINT1 ("EXECUTING at_dot.\n");\r
+ if (PTR_CHAR_POS ((unsigned char *) d) != point)\r
+ goto fail;\r
+ break;\r
+\r
+ case after_dot:\r
+ DEBUG_PRINT1 ("EXECUTING after_dot.\n");\r
+ if (PTR_CHAR_POS ((unsigned char *) d) <= point)\r
+ goto fail;\r
+ break;\r
+\r
+ case syntaxspec:\r
+ DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);\r
+ mcnt = *p++;\r
+ goto matchsyntax;\r
+\r
+ case wordchar:\r
+ DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");\r
+ mcnt = (int) Sword;\r
+ matchsyntax:\r
+ PREFETCH ();\r
+ /* Can't use *d++ here; SYNTAX may be an unsafe macro. */\r
+ d++;\r
+ if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)\r
+ goto fail;\r
+ SET_REGS_MATCHED ();\r
+ break;\r
+\r
+ case notsyntaxspec:\r
+ DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);\r
+ mcnt = *p++;\r
+ goto matchnotsyntax;\r
+\r
+ case notwordchar:\r
+ DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");\r
+ mcnt = (int) Sword;\r
+ matchnotsyntax:\r
+ PREFETCH ();\r
+ /* Can't use *d++ here; SYNTAX may be an unsafe macro. */\r
+ d++;\r
+ if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)\r
+ goto fail;\r
+ SET_REGS_MATCHED ();\r
+ break;\r
+\r
+#else /* not emacs */\r
+ case wordchar:\r
+ DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");\r
+ PREFETCH ();\r
+ if (!WORDCHAR_P (d))\r
+ goto fail;\r
+ SET_REGS_MATCHED ();\r
+ d++;\r
+ break;\r
+\r
+ case notwordchar:\r
+ DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");\r
+ PREFETCH ();\r
+ if (WORDCHAR_P (d))\r
+ goto fail;\r
+ SET_REGS_MATCHED ();\r
+ d++;\r
+ break;\r
+#endif /* not emacs */\r
+\r
+ default:\r
+ abort ();\r
+ }\r
+ continue; /* Successfully executed one pattern command; keep going. */\r
+\r
+\r
+ /* We goto here if a matching operation fails. */\r
+ fail:\r
+ if (!FAIL_STACK_EMPTY ())\r
+ { /* A restart point is known. Restore to that state. */\r
+ DEBUG_PRINT1 ("\nFAIL:\n");\r
+ POP_FAILURE_POINT (d, p,\r
+ lowest_active_reg, highest_active_reg,\r
+ regstart, regend, reg_info);\r
+\r
+ /* If this failure point is a dummy, try the next one. */\r
+ if (!p)\r
+ goto fail;\r
+\r
+ /* If we failed to the end of the pattern, don't examine *p. */\r
+ assert (p <= pend);\r
+ if (p < pend)\r
+ {\r
+ boolean is_a_jump_n = false;\r
+\r
+ /* If failed to a backwards jump that's part of a repetition\r
+ loop, need to pop this failure point and use the next one. */\r
+ switch ((re_opcode_t) *p)\r
+ {\r
+ case jump_n:\r
+ is_a_jump_n = true;\r
+ case maybe_pop_jump:\r
+ case pop_failure_jump:\r
+ case jump:\r
+ p1 = p + 1;\r
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);\r
+ p1 += mcnt;\r
+\r
+ if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)\r
+ || (!is_a_jump_n\r
+ && (re_opcode_t) *p1 == on_failure_jump))\r
+ goto fail;\r
+ break;\r
+ default:\r
+ /* do nothing */ ;\r
+ }\r
+ }\r
+\r
+ if (d >= string1 && d <= end1)\r
+ dend = end_match_1;\r
+ }\r
+ else\r
+ break; /* Matching at this starting point really fails. */\r
+ } /* for (;;) */\r
+\r
+ if (best_regs_set)\r
+ goto restore_best_regs;\r
+\r
+ FREE_VARIABLES ();\r
+\r
+ return -1; /* Failure to match. */\r
+} /* re_match_2 */\r
+\f\r
+/* Subroutine definitions for re_match_2. */\r
+\r
+\r
+/* We are passed P pointing to a register number after a start_memory.\r
+\r
+ Return true if the pattern up to the corresponding stop_memory can\r
+ match the empty string, and false otherwise.\r
+\r
+ If we find the matching stop_memory, sets P to point to one past its number.\r
+ Otherwise, sets P to an undefined byte less than or equal to END.\r
+\r
+ We don't handle duplicates properly (yet). */\r
+\r
+static boolean\r
+group_match_null_string_p(unsigned char **p,\r
+ unsigned char *end,\r
+ register_info_type *reg_info)\r
+{\r
+ int mcnt;\r
+ /* Point to after the args to the start_memory. */\r
+ unsigned char *p1 = *p + 2;\r
+\r
+ while (p1 < end)\r
+ {\r
+ /* Skip over opcodes that can match nothing, and return true or\r
+ false, as appropriate, when we get to one that can't, or to the\r
+ matching stop_memory. */\r
+\r
+ switch ((re_opcode_t) *p1)\r
+ {\r
+ /* Could be either a loop or a series of alternatives. */\r
+ case on_failure_jump:\r
+ p1++;\r
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);\r
+\r
+ /* If the next operation is not a jump backwards in the\r
+ pattern. */\r
+\r
+ if (mcnt >= 0)\r
+ {\r
+ /* Go through the on_failure_jumps of the alternatives,\r
+ seeing if any of the alternatives cannot match nothing.\r
+ The last alternative starts with only a jump,\r
+ whereas the rest start with on_failure_jump and end\r
+ with a jump, e.g., here is the pattern for `a|b|c':\r
+\r
+ /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6\r
+ /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3\r
+ /exactn/1/c\r
+\r
+ So, we have to first go through the first (n-1)\r
+ alternatives and then deal with the last one separately. */\r
+\r
+\r
+ /* Deal with the first (n-1) alternatives, which start\r
+ with an on_failure_jump (see above) that jumps to right\r
+ past a jump_past_alt. */\r
+\r
+ while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)\r
+ {\r
+ /* `mcnt' holds how many bytes long the alternative\r
+ is, including the ending `jump_past_alt' and\r
+ its number. */\r
+\r
+ if (!alt_match_null_string_p (p1, p1 + mcnt - 3,\r
+ reg_info))\r
+ return false;\r
+\r
+ /* Move to right after this alternative, including the\r
+ jump_past_alt. */\r
+ p1 += mcnt;\r
+\r
+ /* Break if it's the beginning of an n-th alternative\r
+ that doesn't begin with an on_failure_jump. */\r
+ if ((re_opcode_t) *p1 != on_failure_jump)\r
+ break;\r
+\r
+ /* Still have to check that it's not an n-th\r
+ alternative that starts with an on_failure_jump. */\r
+ p1++;\r
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);\r
+ if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)\r
+ {\r
+ /* Get to the beginning of the n-th alternative. */\r
+ p1 -= 3;\r
+ break;\r
+ }\r
+ }\r
+\r
+ /* Deal with the last alternative: go back and get number\r
+ of the `jump_past_alt' just before it. `mcnt' contains\r
+ the length of the alternative. */\r
+ EXTRACT_NUMBER (mcnt, p1 - 2);\r
+\r
+ if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))\r
+ return false;\r
+\r
+ p1 += mcnt; /* Get past the n-th alternative. */\r
+ } /* if mcnt > 0 */\r
+ break;\r
+\r
+\r
+ case stop_memory:\r
+ assert (p1[1] == **p);\r
+ *p = p1 + 2;\r
+ return true;\r
+\r
+\r
+ default:\r
+ if (!common_op_match_null_string_p (&p1, end, reg_info))\r
+ return false;\r
+ }\r
+ } /* while p1 < end */\r
+\r
+ return false;\r
+} /* group_match_null_string_p */\r
+\r
+\r
+/* Similar to group_match_null_string_p, but doesn't deal with alternatives:\r
+ It expects P to be the first byte of a single alternative and END one\r
+ byte past the last. The alternative can contain groups. */\r
+\r
+static boolean\r
+alt_match_null_string_p(unsigned char *p,\r
+ unsigned char *end,\r
+ register_info_type *reg_info)\r
+{\r
+ int mcnt;\r
+ unsigned char *p1 = p;\r
+\r
+ while (p1 < end)\r
+ {\r
+ /* Skip over opcodes that can match nothing, and break when we get\r
+ to one that can't. */\r
+\r
+ switch ((re_opcode_t) *p1)\r
+ {\r
+ /* It's a loop. */\r
+ case on_failure_jump:\r
+ p1++;\r
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);\r
+ p1 += mcnt;\r
+ break;\r
+\r
+ default:\r
+ if (!common_op_match_null_string_p (&p1, end, reg_info))\r
+ return false;\r
+ }\r
+ } /* while p1 < end */\r
+\r
+ return true;\r
+} /* alt_match_null_string_p */\r
+\r
+\r
+/* Deals with the ops common to group_match_null_string_p and\r
+ alt_match_null_string_p.\r
+\r
+ Sets P to one after the op and its arguments, if any. */\r
+\r
+static boolean\r
+common_op_match_null_string_p(unsigned char **p,\r
+ unsigned char *end,\r
+ register_info_type *reg_info)\r
+{\r
+ int mcnt;\r
+ boolean ret;\r
+ int reg_no;\r
+ unsigned char *p1 = *p;\r
+\r
+ switch ((re_opcode_t) *p1++)\r
+ {\r
+ case no_op:\r
+ case begline:\r
+ case endline:\r
+ case begbuf:\r
+ case endbuf:\r
+ case wordbeg:\r
+ case wordend:\r
+ case wordbound:\r
+ case notwordbound:\r
+#ifdef emacs\r
+ case before_dot:\r
+ case at_dot:\r
+ case after_dot:\r
+#endif\r
+ break;\r
+\r
+ case start_memory:\r
+ reg_no = *p1;\r
+ assert (reg_no > 0 && reg_no <= MAX_REGNUM);\r
+ ret = group_match_null_string_p (&p1, end, reg_info);\r
+\r
+ /* Have to set this here in case we're checking a group which\r
+ contains a group and a back reference to it. */\r
+\r
+ if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)\r
+ REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;\r
+\r
+ if (!ret)\r
+ return false;\r
+ break;\r
+\r
+ /* If this is an optimized succeed_n for zero times, make the jump. */\r
+ case jump:\r
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);\r
+ if (mcnt >= 0)\r
+ p1 += mcnt;\r
+ else\r
+ return false;\r
+ break;\r
+\r
+ case succeed_n:\r
+ /* Get to the number of times to succeed. */\r
+ p1 += 2;\r
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);\r
+\r
+ if (mcnt == 0)\r
+ {\r
+ p1 -= 4;\r
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);\r
+ p1 += mcnt;\r
+ }\r
+ else\r
+ return false;\r
+ break;\r
+\r
+ case duplicate:\r
+ if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))\r
+ return false;\r
+ break;\r
+\r
+ case set_number_at:\r
+ p1 += 4;\r
+\r
+ default:\r
+ /* All other opcodes mean we cannot match the empty string. */\r
+ return false;\r
+ }\r
+\r
+ *p = p1;\r
+ return true;\r
+} /* common_op_match_null_string_p */\r
+\r
+\r
+/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN\r
+ bytes; nonzero otherwise. */\r
+\r
+static int\r
+bcmp_translate(const char *s1,\r
+ const char *s2,\r
+ register int len,\r
+ RE_TRANSLATE_TYPE translate)\r
+{\r
+ register const unsigned char *p1 = (const unsigned char *) s1;\r
+ register const unsigned char *p2 = (const unsigned char *) s2;\r
+ while (len)\r
+ {\r
+ if (translate[*p1++] != translate[*p2++]) return 1;\r
+ len--;\r
+ }\r
+ return 0;\r
+}\r
+\f\r
+/* Entry points for GNU code. */\r
+\r
+/* re_compile_pattern is the GNU regular expression compiler: it\r
+ compiles PATTERN (of length SIZE) and puts the result in BUFP.\r
+ Returns 0 if the pattern was valid, otherwise an error string.\r
+\r
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields\r
+ are set in BUFP on entry.\r
+\r
+ We call regex_compile to do the actual compilation. */\r
+\r
+const char *\r
+re_compile_pattern(const char *pattern,\r
+ size_t length,\r
+ struct re_pattern_buffer *bufp)\r
+{\r
+ reg_errcode_t ret;\r
+\r
+ /* GNU code is written to assume at least RE_NREGS registers will be set\r
+ (and at least one extra will be -1). */\r
+ bufp->regs_allocated = REGS_UNALLOCATED;\r
+\r
+ /* And GNU code determines whether or not to get register information\r
+ by passing null for the REGS argument to re_match, etc., not by\r
+ setting no_sub. */\r
+ bufp->no_sub = 0;\r
+\r
+ /* Match anchors at newline. */\r
+ bufp->newline_anchor = 1;\r
+\r
+ ret = regex_compile (pattern, length, re_syntax_options, bufp);\r
+\r
+ if (!ret)\r
+ return NULL;\r
+ return gettext (re_error_msgid[(int) ret]);\r
+}\r
+\f\r
+/* Entry points compatible with 4.2 BSD regex library. We don't define\r
+ them unless specifically requested. */\r
+\r
+#if defined (_REGEX_RE_COMP) || defined (_LIBC)\r
+\r
+/* BSD has one and only one pattern buffer. */\r
+static struct re_pattern_buffer re_comp_buf;\r
+\r
+char *\r
+#ifdef _LIBC\r
+/* Make these definitions weak in libc, so POSIX programs can redefine\r
+ these names if they don't use our functions, and still use\r
+ regcomp/regexec below without link errors. */\r
+weak_function\r
+#endif\r
+re_comp (s)\r
+ const char *s;\r
+{\r
+ reg_errcode_t ret;\r
+\r
+ if (!s)\r
+ {\r
+ if (!re_comp_buf.buffer)\r
+ return gettext ("No previous regular expression");\r
+ return 0;\r
+ }\r
+\r
+ if (!re_comp_buf.buffer)\r
+ {\r
+ re_comp_buf.buffer = (unsigned char *) malloc (200);\r
+ if (re_comp_buf.buffer == NULL)\r
+ return gettext (re_error_msgid[(int) REG_ESPACE]);\r
+ re_comp_buf.allocated = 200;\r
+\r
+ re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);\r
+ if (re_comp_buf.fastmap == NULL)\r
+ return gettext (re_error_msgid[(int) REG_ESPACE]);\r
+ }\r
+\r
+ /* Since `re_exec' always passes NULL for the `regs' argument, we\r
+ don't need to initialize the pattern buffer fields which affect it. */\r
+\r
+ /* Match anchors at newlines. */\r
+ re_comp_buf.newline_anchor = 1;\r
+\r
+ ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);\r
+\r
+ if (!ret)\r
+ return NULL;\r
+\r
+ /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */\r
+ return (char *) gettext (re_error_msgid[(int) ret]);\r
+}\r
+\r
+\r
+int\r
+#ifdef _LIBC\r
+weak_function\r
+#endif\r
+re_exec (s)\r
+ const char *s;\r
+{\r
+ const int len = strlen (s);\r
+ return\r
+ 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);\r
+}\r
+\r
+#endif /* _REGEX_RE_COMP */\r
+\f\r
+/* POSIX.2 functions. Don't define these for Emacs. */\r
+\r
+#ifndef emacs\r
+\r
+/* regcomp takes a regular expression as a string and compiles it.\r
+\r
+ PREG is a regex_t *. We do not expect any fields to be initialized,\r
+ since POSIX says we shouldn't. Thus, we set\r
+\r
+ `buffer' to the compiled pattern;\r
+ `used' to the length of the compiled pattern;\r
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the\r
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to\r
+ RE_SYNTAX_POSIX_BASIC;\r
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;\r
+ `fastmap' and `fastmap_accurate' to zero;\r
+ `re_nsub' to the number of subexpressions in PATTERN.\r
+\r
+ PATTERN is the address of the pattern string.\r
+\r
+ CFLAGS is a series of bits which affect compilation.\r
+\r
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we\r
+ use POSIX basic syntax.\r
+\r
+ If REG_NEWLINE is set, then . and [^...] don't match newline.\r
+ Also, regexec will try a match beginning after every newline.\r
+\r
+ If REG_ICASE is set, then we considers upper- and lowercase\r
+ versions of letters to be equivalent when matching.\r
+\r
+ If REG_NOSUB is set, then when PREG is passed to regexec, that\r
+ routine will report only success or failure, and nothing about the\r
+ registers.\r
+\r
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for\r
+ the return codes and their meanings.) */\r
+\r
+int\r
+regcomp(regex_t *preg,\r
+ const char *pattern,\r
+ int cflags)\r
+{\r
+ reg_errcode_t ret;\r
+ reg_syntax_t syntax\r
+ = (cflags & REG_EXTENDED) ?\r
+ RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;\r
+\r
+#ifdef DEBUG\r
+ debug=0;\r
+ DEBUG_PRINT1("EXECUTING regcomp");\r
+ debug=0; \r
+#endif\r
+ /* regex_compile will allocate the space for the compiled pattern. */\r
+ preg->buffer = 0;\r
+ preg->allocated = 0;\r
+ preg->used = 0;\r
+\r
+ /* Don't bother to use a fastmap when searching. This simplifies the\r
+ REG_NEWLINE case: if we used a fastmap, we'd have to put all the\r
+ characters after newlines into the fastmap. This way, we just try\r
+ every character. */\r
+ preg->fastmap = 0;\r
+\r
+ if (cflags & REG_ICASE)\r
+ {\r
+ unsigned i;\r
+\r
+ preg->translate\r
+ = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE\r
+ * sizeof (*(RE_TRANSLATE_TYPE)0));\r
+ if (preg->translate == NULL)\r
+ return (int) REG_ESPACE;\r
+\r
+ /* Map uppercase characters to corresponding lowercase ones. */\r
+ for (i = 0; i < CHAR_SET_SIZE; i++)\r
+ preg->translate[i] = ISUPPER (i) ? tolower (i) : i;\r
+ }\r
+ else\r
+ preg->translate = NULL;\r
+\r
+ /* If REG_NEWLINE is set, newlines are treated differently. */\r
+ if (cflags & REG_NEWLINE)\r
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */\r
+ syntax &= ~RE_DOT_NEWLINE;\r
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;\r
+ /* It also changes the matching behavior. */\r
+ preg->newline_anchor = 1;\r
+ }\r
+ else\r
+ preg->newline_anchor = 0;\r
+\r
+ preg->no_sub = !!(cflags & REG_NOSUB);\r
+\r
+ /* POSIX says a null character in the pattern terminates it, so we\r
+ can use strlen here in compiling the pattern. */\r
+ ret = regex_compile (pattern, strlen (pattern), syntax, preg);\r
+\r
+ /* POSIX doesn't distinguish between an unmatched open-group and an\r
+ unmatched close-group: both are REG_EPAREN. */\r
+ if (ret == REG_ERPAREN) ret = REG_EPAREN;\r
+\r
+// printf("done with regcomp\n");\r
+\r
+ return (int) ret;\r
+}\r
+\r
+\r
+/* regexec searches for a given pattern, specified by PREG, in the\r
+ string STRING.\r
+\r
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to\r
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at\r
+ least NMATCH elements, and we set them to the offsets of the\r
+ corresponding matched substrings.\r
+\r
+ EFLAGS specifies `execution flags' which affect matching: if\r
+ REG_NOTBOL is set, then ^ does not match at the beginning of the\r
+ string; if REG_NOTEOL is set, then $ does not match at the end.\r
+\r
+ We return 0 if we find a match and REG_NOMATCH if not. */\r
+\r
+int\r
+regexec(const regex_t *preg,\r
+ const char *string,\r
+ size_t nmatch,\r
+ regmatch_t pmatch[],\r
+ int eflags)\r
+{\r
+ int ret;\r
+ struct re_registers regs;\r
+ regex_t private_preg;\r
+ int len = strlen (string);\r
+ boolean want_reg_info = !preg->no_sub && nmatch > 0;\r
+\r
+ private_preg = *preg;\r
+\r
+ private_preg.not_bol = !!(eflags & REG_NOTBOL);\r
+ private_preg.not_eol = !!(eflags & REG_NOTEOL);\r
+\r
+ /* The user has told us exactly how many registers to return\r
+ information about, via `nmatch'. We have to pass that on to the\r
+ matching routines. */\r
+ private_preg.regs_allocated = REGS_FIXED;\r
+\r
+ if (want_reg_info)\r
+ {\r
+ regs.num_regs = nmatch;\r
+ regs.start = TALLOC (nmatch, regoff_t);\r
+ regs.end = TALLOC (nmatch, regoff_t);\r
+ if (regs.start == NULL || regs.end == NULL)\r
+ return (int) REG_NOMATCH;\r
+ }\r
+\r
+ /* Perform the searching operation. */\r
+ ret = re_search (&private_preg, string, len,\r
+ /* start: */ 0, /* range: */ len,\r
+ want_reg_info ? ®s : (struct re_registers *) 0);\r
+\r
+ /* Copy the register information to the POSIX structure. */\r
+ if (want_reg_info)\r
+ {\r
+ if (ret >= 0)\r
+ {\r
+ unsigned r;\r
+\r
+ for (r = 0; r < nmatch; r++)\r
+ {\r
+ pmatch[r].rm_so = regs.start[r];\r
+ pmatch[r].rm_eo = regs.end[r];\r
+ }\r
+ }\r
+\r
+ /* If we needed the temporary register info, free the space now. */\r
+ free (regs.start);\r
+ free (regs.end);\r
+ }\r
+\r
+ /* We want zero return to mean success, unlike `re_search'. */\r
+ return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;\r
+}\r
+\r
+\r
+/* Returns a message corresponding to an error code, ERRCODE, returned\r
+ from either regcomp or regexec. We don't use PREG here. */\r
+\r
+size_t\r
+regerror(int errcode,\r
+ const regex_t *preg,\r
+ char *errbuf,\r
+ size_t errbuf_size)\r
+{\r
+ const char *msg;\r
+ size_t msg_size;\r
+\r
+ if (errcode < 0\r
+ || errcode >= (int) (sizeof (re_error_msgid)\r
+ / sizeof (re_error_msgid[0])))\r
+ /* Only error codes returned by the rest of the code should be passed\r
+ to this routine. If we are given anything else, or if other regex\r
+ code generates an invalid error code, then the program has a bug.\r
+ Dump core so we can fix it. */\r
+ abort ();\r
+\r
+ msg = gettext (re_error_msgid[errcode]);\r
+\r
+ msg_size = strlen (msg) + 1; /* Includes the null. */\r
+\r
+ if (errbuf_size != 0)\r
+ {\r
+ if (msg_size > errbuf_size)\r
+ {\r
+ strncpy (errbuf, msg, errbuf_size - 1);\r
+ errbuf[errbuf_size - 1] = 0;\r
+ }\r
+ else\r
+ strcpy (errbuf, msg);\r
+ }\r
+\r
+ return msg_size;\r
+}\r
+\r
+\r
+/* Free dynamically allocated space used by PREG. */\r
+\r
+void\r
+regfree(regex_t *preg)\r
+{\r
+ if (preg->buffer != NULL)\r
+ free (preg->buffer);\r
+ preg->buffer = NULL;\r
+\r
+ preg->allocated = 0;\r
+ preg->used = 0;\r
+\r
+ if (preg->fastmap != NULL)\r
+ free (preg->fastmap);\r
+ preg->fastmap = NULL;\r
+ preg->fastmap_accurate = 0;\r
+\r
+ if (preg->translate != NULL)\r
+ free (preg->translate);\r
+ preg->translate = NULL;\r
+}\r
+\r
+#endif /* not emacs */\r