regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
*/
/* for debug */
-/* #define ONIG_DEBUG_PARSE_TREE */
-/* #define ONIG_DEBUG_COMPILE */
+#define ONIG_DEBUG_PARSE_TREE
+#define ONIG_DEBUG_COMPILE
/* #define ONIG_DEBUG_SEARCH */
/* #define ONIG_DEBUG_MATCH */
/* #define ONIG_DONT_OPTIMIZE */
/* config */
/* spec. config */
#define USE_NAMED_GROUP
-#define USE_SUBEXP_CALL
+#define USE_CALL
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
-#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
+#define USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */
/* internal config */
#define USE_OP_PUSH_OR_JUMP_EXACT
-#define USE_QTFR_PEEK_NEXT
+#define USE_QUANT_PEEK_NEXT
#define USE_ST_LIBRARY
#define INIT_MATCH_STACK_SIZE 160
#endif
#endif
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+
#ifdef __BORLANDC__
#include <malloc.h>
#endif
# include <stdio.h>
#endif
+#ifdef _WIN32
+#if defined(_MSC_VER) && (_MSC_VER < 1300)
+typedef int intptr_t;
+#endif
+#endif
+
#include "regenc.h"
#ifdef MIN
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
+typedef struct {
+ int num_keeper;
+ int* keepers;
+} RegExt;
+
+#define REG_EXTP(reg) (RegExt* )((reg)->chain)
+#define REG_EXTPL(reg) ((reg)->chain)
+
/* stack pop level */
#define STACK_POP_LEVEL_FREE 0
#define STACK_POP_LEVEL_MEM_START 1
#define ONIG_OPTIMIZE_MAP 5 /* char map */
/* bit status */
-typedef unsigned int BitStatusType;
-
-#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8)
-#define BIT_STATUS_CLEAR(stats) (stats) = 0
-#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0)
-#define BIT_STATUS_AT(stats,n) \
- ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1))
-
-#define BIT_STATUS_ON_AT(stats,n) do {\
- if ((n) < (int )BIT_STATUS_BITS_NUM) \
- (stats) |= (1 << (n));\
+typedef unsigned int MemStatusType;
+
+#define MEM_STATUS_BITS_NUM (sizeof(MemStatusType) * 8)
+#define MEM_STATUS_CLEAR(stats) (stats) = 0
+#define MEM_STATUS_ON_ALL(stats) (stats) = ~((MemStatusType )0)
+#define MEM_STATUS_AT(stats,n) \
+ ((n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : ((stats) & 1))
+#define MEM_STATUS_AT0(stats,n) \
+ ((n) > 0 && (n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : ((stats) & 1))
+
+#define MEM_STATUS_ON(stats,n) do {\
+ if ((n) < (int )MEM_STATUS_BITS_NUM) {\
+ if ((n) != 0)\
+ (stats) |= ((MemStatusType )1 << (n));\
+ }\
else\
(stats) |= 1;\
} while (0)
-#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\
- if ((n) < (int )BIT_STATUS_BITS_NUM)\
- (stats) |= (1 << (n));\
+#define MEM_STATUS_ON_SIMPLE(stats,n) do {\
+ if ((n) < (int )MEM_STATUS_BITS_NUM)\
+ (stats) |= ((MemStatusType )1 << (n));\
} while (0)
#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)]
-#define ANCHOR_BEGIN_BUF (1<<0)
-#define ANCHOR_BEGIN_LINE (1<<1)
-#define ANCHOR_BEGIN_POSITION (1<<2)
-#define ANCHOR_END_BUF (1<<3)
-#define ANCHOR_SEMI_END_BUF (1<<4)
-#define ANCHOR_END_LINE (1<<5)
-
-#define ANCHOR_WORD_BOUND (1<<6)
-#define ANCHOR_NOT_WORD_BOUND (1<<7)
-#define ANCHOR_WORD_BEGIN (1<<8)
-#define ANCHOR_WORD_END (1<<9)
-#define ANCHOR_PREC_READ (1<<10)
-#define ANCHOR_PREC_READ_NOT (1<<11)
-#define ANCHOR_LOOK_BEHIND (1<<12)
-#define ANCHOR_LOOK_BEHIND_NOT (1<<13)
-
+/* has body */
+#define ANCHOR_PREC_READ (1<<0)
+#define ANCHOR_PREC_READ_NOT (1<<1)
+#define ANCHOR_LOOK_BEHIND (1<<2)
+#define ANCHOR_LOOK_BEHIND_NOT (1<<3)
+/* no body */
+#define ANCHOR_BEGIN_BUF (1<<4)
+#define ANCHOR_BEGIN_LINE (1<<5)
+#define ANCHOR_BEGIN_POSITION (1<<6)
+#define ANCHOR_END_BUF (1<<7)
+#define ANCHOR_SEMI_END_BUF (1<<8)
+#define ANCHOR_END_LINE (1<<9)
+#define ANCHOR_WORD_BOUND (1<<10)
+#define ANCHOR_NOT_WORD_BOUND (1<<11)
+#define ANCHOR_WORD_BEGIN (1<<12)
+#define ANCHOR_WORD_END (1<<13)
#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */
#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */
+#define ANCHOR_HAS_BODY(a) ((a)->type < ANCHOR_BEGIN_BUF)
+
+
/* operation code */
enum OpCode {
OP_FINISH = 0, /* matching process terminator (no more alternative) */
OP_BACKREF1,
OP_BACKREF2,
- OP_BACKREFN,
- OP_BACKREFN_IC,
+ OP_BACKREF_N,
+ OP_BACKREF_N_IC,
OP_BACKREF_MULTI,
OP_BACKREF_MULTI_IC,
- OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
+ OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
+ OP_BACKREF_CHECK, /* (?(n)), (?('name')) */
+ OP_BACKREF_CHECK_WITH_LEVEL, /* (?(n)), (?('name')) */
OP_MEMORY_START,
OP_MEMORY_START_PUSH, /* push back-tracker to stack */
OP_REPEAT_INC_NG, /* non greedy */
OP_REPEAT_INC_SG, /* search and get in stack */
OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
- OP_NULL_CHECK_START, /* null loop checker start */
- OP_NULL_CHECK_END, /* null loop checker end */
- OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
- OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
+ OP_EMPTY_CHECK_START, /* null loop checker start */
+ OP_EMPTY_CHECK_END, /* null loop checker end */
+ OP_EMPTY_CHECK_END_MEMST, /* null loop checker end (with capture status) */
+ OP_EMPTY_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
OP_PUSH_POS, /* (?=...) start */
OP_POP_POS, /* (?=...) end */
- OP_PUSH_POS_NOT, /* (?!...) start */
- OP_FAIL_POS, /* (?!...) end */
+ OP_PUSH_PREC_READ_NOT, /* (?!...) start */
+ OP_FAIL_PREC_READ_NOT, /* (?!...) end */
OP_PUSH_STOP_BT, /* (?>...) start */
OP_POP_STOP_BT, /* (?>...) end */
OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
OP_CALL, /* \g<name> */
OP_RETURN,
+ OP_PUSH_SAVE_VAL,
+ OP_UPDATE_VAR,
OP_STATE_CHECK_PUSH, /* combination explosion check and push */
OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
OP_SET_OPTION /* set option */
};
+enum SaveType {
+ SAVE_KEEP = 0,
+ SAVE_RIGHT_RANGE = 1,
+};
+
+enum UpdateVarType {
+ UPDATE_VAR_KEEP_FROM_STACK_LAST = 0,
+ UPDATE_VAR_RIGHT_RANGE_FROM_STACK_LAST = 1,
+ UPDATE_VAR_RIGHT_RANGE_SPREV = 2,
+};
+
typedef int RelAddrType;
typedef int AbsAddrType;
typedef int LengthType;
typedef int MemNumType;
typedef short int StateCheckNumType;
typedef void* PointerType;
+typedef int SaveType;
+typedef int UpdateVarType;
#define SIZE_OPCODE 1
#define SIZE_RELADDR sizeof(RelAddrType)
#define SIZE_OPTION sizeof(OnigOptionType)
#define SIZE_CODE_POINT sizeof(OnigCodePoint)
#define SIZE_POINTER sizeof(PointerType)
-
+#define SIZE_SAVE_TYPE sizeof(SaveType)
+#define SIZE_UPDATE_VAR_TYPE sizeof(UpdateVarType)
#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType)
#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType)
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType)
+#define GET_SAVE_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, SaveType)
+#define GET_UPDATE_VAR_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, UpdateVarType)
/* code point's address must be aligned address. */
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_PUSH_POS SIZE_OPCODE
-#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR)
+#define SIZE_OP_PUSH_PREC_READ_NOT (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_POP_POS SIZE_OPCODE
-#define SIZE_OP_FAIL_POS SIZE_OPCODE
+#define SIZE_OP_FAIL_PREC_READ_NOT SIZE_OPCODE
#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION)
#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION)
#define SIZE_OP_FAIL SIZE_OPCODE
#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE
#define SIZE_OP_POP_STOP_BT SIZE_OPCODE
-#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_EMPTY_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_EMPTY_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH)
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH)
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE
#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
#define SIZE_OP_RETURN SIZE_OPCODE
+#define SIZE_OP_PUSH_SAVE_VAL (SIZE_OPCODE + SIZE_SAVE_TYPE + SIZE_MEMNUM)
+#define SIZE_OP_UPDATE_VAR (SIZE_OPCODE + SIZE_UPDATE_VAR_TYPE + SIZE_MEMNUM)
#ifdef USE_COMBINATION_EXPLOSION_CHECK
#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
#define FLAG_NCCLASS_SHARE (1<<1)
#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT)
-#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE)
#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
-#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE)
-
-typedef struct {
- int type;
- /* struct _Node* next; */
- /* unsigned int flags; */
-} NodeBase;
-
-typedef struct {
- NodeBase base;
- unsigned int flags;
- BitSet bs;
- BBuf* mbuf; /* multi-byte info or NULL */
-} CClassNode;
-
-typedef long OnigStackIndex;
-
-typedef struct _OnigStackType {
- unsigned int type;
- union {
- struct {
- UChar *pcode; /* byte code position */
- UChar *pstr; /* string position */
- UChar *pstr_prev; /* previous char position of pstr */
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- unsigned int state_check;
-#endif
- } state;
- struct {
- int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
- UChar *pcode; /* byte code position (head of repeated target) */
- int num; /* repeat id */
- } repeat;
- struct {
- OnigStackIndex si; /* index of stack */
- } repeat_inc;
- struct {
- int num; /* memory num */
- UChar *pstr; /* start/end position */
- /* Following information is set, if this stack type is MEM-START */
- OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */
- OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */
- } mem;
- struct {
- int num; /* null check id */
- UChar *pstr; /* start position */
- } null_check;
-#ifdef USE_SUBEXP_CALL
- struct {
- UChar *ret_addr; /* byte code position */
- int num; /* null check id */
- UChar *pstr; /* string position */
- } call_frame;
-#endif
- } u;
-} OnigStackType;
typedef struct {
void* stack_p;
extern OnigOpInfoType OnigOpInfo[];
-extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc));
+extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, UChar* start, OnigEncoding enc));
#ifdef ONIG_DEBUG_STATISTICS
extern void onig_statistics_init P_((void));
extern int onig_bbuf_init P_((BBuf* buf, int size));
extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_transfer P_((regex_t* to, regex_t* from));
-extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
-extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc));
+extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc));
/* strend hash */
typedef void hash_table_type;