From 7205f9126b563eb2827cf94dcd383fd930c1fd65 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Fri, 22 Aug 2014 23:15:11 +0300 Subject: [PATCH] Alternation of 'RegExp's should preserve 'ins_access' attribute. When one builds 'AltOp' from two 'RegExp's, one sometimes has to break these 'RegExp's in pieces in order to merge their common prefix. In such cases, if one of the original 'RegExp's has 'ins_access' set to 'PRIVATE', it is lost (defaults to 'SHARED') after alternation. This commit fixes Gentoo bug https://bugs.gentoo.org/show_bug.cgi?id=518904. --- re2c/actions.cc | 11 +- re2c/test/zend_language_scanner.igcFd.c | 7163 ++++++++++++++++++++++ re2c/test/zend_language_scanner.igcFd.re | 2442 ++++++++ 3 files changed, 9615 insertions(+), 1 deletion(-) create mode 100644 re2c/test/zend_language_scanner.igcFd.c create mode 100644 re2c/test/zend_language_scanner.igcFd.re diff --git a/re2c/actions.cc b/re2c/actions.cc index 02f47499..24356cf0 100644 --- a/re2c/actions.cc +++ b/re2c/actions.cc @@ -126,7 +126,12 @@ MatchOp *merge(MatchOp *m1, MatchOp *m2) if (!m2) return m1; - return new MatchOp(doUnion(m1->match, m2->match)); + MatchOp* m = new MatchOp(doUnion(m1->match, m2->match)); + if (m1->ins_access == RegExp::PRIVATE + || m2->ins_access == RegExp::PRIVATE) + m->ins_access = RegExp::PRIVATE; + + return m; } void MatchOp::display(std::ostream &o) const @@ -273,6 +278,8 @@ RegExp *mkAlt(RegExp *e1, RegExp *e2) m1 = dynamic_cast(a->exp1); if (m1 != NULL) { + m1->ins_access = e1->ins_access; + a->exp2->ins_access = e1->ins_access; e1 = a->exp2; } } @@ -289,6 +296,8 @@ RegExp *mkAlt(RegExp *e1, RegExp *e2) m2 = dynamic_cast(a->exp1); if (m2 != NULL) { + m2->ins_access = e2->ins_access; + a->exp2->ins_access = e2->ins_access; e2 = a->exp2; } } diff --git a/re2c/test/zend_language_scanner.igcFd.c b/re2c/test/zend_language_scanner.igcFd.c new file mode 100644 index 00000000..b21de193 --- /dev/null +++ b/re2c/test/zend_language_scanner.igcFd.c @@ -0,0 +1,7163 @@ +/* Generated by re2c */ +/* + +----------------------------------------------------------------------+ + | Zend Engine | + +----------------------------------------------------------------------+ + | Copyright (c) 1998-2014 Zend Technologies Ltd. (http://www.zend.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 2.00 of the Zend license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.zend.com/license/2_00.txt. | + | If you did not receive a copy of the Zend license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@zend.com so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Marcus Boerger | + | Nuno Lopes | + | Scott MacVicar | + | Flex version authors: | + | Andi Gutmans | + | Zeev Suraski | + +----------------------------------------------------------------------+ +*/ + +/* $Id$ */ + +#if 0 +# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c) +#else +# define YYDEBUG(s, c) +#endif + +#include "zend_language_scanner_defs.h" + +#include +#include "zend.h" +#ifdef PHP_WIN32 +# include +#endif +#include "zend_alloc.h" +#include +#include "zend_compile.h" +#include "zend_language_scanner.h" +#include "zend_highlight.h" +#include "zend_constants.h" +#include "zend_variables.h" +#include "zend_operators.h" +#include "zend_API.h" +#include "zend_strtod.h" +#include "zend_exceptions.h" +#include "tsrm_virtual_cwd.h" +#include "tsrm_config_common.h" + +#define YYCTYPE unsigned char +#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } } +#define YYCURSOR SCNG(yy_cursor) +#define YYLIMIT SCNG(yy_limit) +#define YYMARKER SCNG(yy_marker) + +#define YYGETCONDITION() SCNG(yy_state) +#define YYSETCONDITION(s) SCNG(yy_state) = s + +#define STATE(name) yyc##name + +/* emulate flex constructs */ +#define BEGIN(state) YYSETCONDITION(STATE(state)) +#define YYSTATE YYGETCONDITION() +#define yytext ((char*)SCNG(yy_text)) +#define yyleng SCNG(yy_leng) +#define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \ + yyleng = (unsigned int)x; } while(0) +#define yymore() goto yymore_restart + +/* perform sanity check. If this message is triggered you should + increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */ +#define YYMAXFILL 16 +#if ZEND_MMAP_AHEAD < YYMAXFILL +# error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL +#endif + +#ifdef HAVE_STDARG_H +# include +#endif + +#ifdef HAVE_UNISTD_H +# include +#endif + +/* Globals Macros */ +#define SCNG LANG_SCNG +#ifdef ZTS +ZEND_API ts_rsrc_id language_scanner_globals_id; +#else +ZEND_API zend_php_scanner_globals language_scanner_globals; +#endif + +#define HANDLE_NEWLINES(s, l) \ +do { \ + char *p = (s), *boundary = p+(l); \ + \ + while (p= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F) + +#define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7') +#define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F')) + +BEGIN_EXTERN_C() + +static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) +{ + const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); + assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding)); + return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC); +} + +static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) +{ + return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC); +} + +static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) +{ + return zend_multibyte_encoding_converter(to, to_length, from, from_length, +LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC); +} + +static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) +{ + const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); + assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding)); + return zend_multibyte_encoding_converter(to, to_length, from, from_length, +internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC); +} + + +static void _yy_push_state(int new_state TSRMLS_DC) +{ + zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int)); + YYSETCONDITION(new_state); +} + +#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm) + +static void yy_pop_state(TSRMLS_D) +{ + int *stack_state; + zend_stack_top(&SCNG(state_stack), (void **) &stack_state); + YYSETCONDITION(*stack_state); + zend_stack_del_top(&SCNG(state_stack)); +} + +static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC) +{ + YYCURSOR = (YYCTYPE*)str; + YYLIMIT = YYCURSOR + len; + if (!SCNG(yy_start)) { + SCNG(yy_start) = YYCURSOR; + } +} + +void startup_scanner(TSRMLS_D) +{ + CG(parse_error) = 0; + CG(doc_comment) = NULL; + CG(doc_comment_len) = 0; + zend_stack_init(&SCNG(state_stack)); + zend_ptr_stack_init(&SCNG(heredoc_label_stack)); +} + +static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) { + efree(heredoc_label->label); +} + +void shutdown_scanner(TSRMLS_D) +{ + CG(parse_error) = 0; + RESET_DOC_COMMENT(); + zend_stack_destroy(&SCNG(state_stack)); + zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1); + zend_ptr_stack_destroy(&SCNG(heredoc_label_stack)); +} + +ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC) +{ + lex_state->yy_leng = SCNG(yy_leng); + lex_state->yy_start = SCNG(yy_start); + lex_state->yy_text = SCNG(yy_text); + lex_state->yy_cursor = SCNG(yy_cursor); + lex_state->yy_marker = SCNG(yy_marker); + lex_state->yy_limit = SCNG(yy_limit); + + lex_state->state_stack = SCNG(state_stack); + zend_stack_init(&SCNG(state_stack)); + + lex_state->heredoc_label_stack = SCNG(heredoc_label_stack); + zend_ptr_stack_init(&SCNG(heredoc_label_stack)); + + lex_state->in = SCNG(yy_in); + lex_state->yy_state = YYSTATE; + lex_state->filename = zend_get_compiled_filename(TSRMLS_C); + lex_state->lineno = CG(zend_lineno); + + lex_state->script_org = SCNG(script_org); + lex_state->script_org_size = SCNG(script_org_size); + lex_state->script_filtered = SCNG(script_filtered); + lex_state->script_filtered_size = SCNG(script_filtered_size); + lex_state->input_filter = SCNG(input_filter); + lex_state->output_filter = SCNG(output_filter); + lex_state->script_encoding = SCNG(script_encoding); +} + +ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) +{ + SCNG(yy_leng) = lex_state->yy_leng; + SCNG(yy_start) = lex_state->yy_start; + SCNG(yy_text) = lex_state->yy_text; + SCNG(yy_cursor) = lex_state->yy_cursor; + SCNG(yy_marker) = lex_state->yy_marker; + SCNG(yy_limit) = lex_state->yy_limit; + + zend_stack_destroy(&SCNG(state_stack)); + SCNG(state_stack) = lex_state->state_stack; + + zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1); + zend_ptr_stack_destroy(&SCNG(heredoc_label_stack)); + SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack; + + SCNG(yy_in) = lex_state->in; + YYSETCONDITION(lex_state->yy_state); + CG(zend_lineno) = lex_state->lineno; + zend_restore_compiled_filename(lex_state->filename TSRMLS_CC); + + if (SCNG(script_filtered)) { + efree(SCNG(script_filtered)); + SCNG(script_filtered) = NULL; + } + SCNG(script_org) = lex_state->script_org; + SCNG(script_org_size) = lex_state->script_org_size; + SCNG(script_filtered) = lex_state->script_filtered; + SCNG(script_filtered_size) = lex_state->script_filtered_size; + SCNG(input_filter) = lex_state->input_filter; + SCNG(output_filter) = lex_state->output_filter; + SCNG(script_encoding) = lex_state->script_encoding; + + RESET_DOC_COMMENT(); +} + +ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC) +{ + zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles); + /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */ + file_handle->opened_path = NULL; + if (file_handle->free_filename) { + file_handle->filename = NULL; + } +} + +#define BOM_UTF32_BE "\x00\x00\xfe\xff" +#define BOM_UTF32_LE "\xff\xfe\x00\x00" +#define BOM_UTF16_BE "\xfe\xff" +#define BOM_UTF16_LE "\xff\xfe" +#define BOM_UTF8 "\xef\xbb\xbf" + +static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC) +{ + const unsigned char *p; + int wchar_size = 2; + int le = 0; + + /* utf-16 or utf-32? */ + p = script; + while ((p-script) < script_size) { + p = memchr(p, 0, script_size-(p-script)-2); + if (!p) { + break; + } + if (*(p+1) == '\0' && *(p+2) == '\0') { + wchar_size = 4; + break; + } + + /* searching for UTF-32 specific byte orders, so this will do */ + p += 4; + } + + /* BE or LE? */ + p = script; + while ((p-script) < script_size) { + if (*p == '\0' && *(p+wchar_size-1) != '\0') { + /* BE */ + le = 0; + break; + } else if (*p != '\0' && *(p+wchar_size-1) == '\0') { + /* LE* */ + le = 1; + break; + } + p += wchar_size; + } + + if (wchar_size == 2) { + return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be; + } else { + return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be; + } + + return NULL; +} + +static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D) +{ + const zend_encoding *script_encoding = NULL; + int bom_size; + unsigned char *pos1, *pos2; + + if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) { + return NULL; + } + + /* check out BOM */ + if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) { + script_encoding = zend_multibyte_encoding_utf32be; + bom_size = sizeof(BOM_UTF32_BE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) { + script_encoding = zend_multibyte_encoding_utf32le; + bom_size = sizeof(BOM_UTF32_LE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) { + script_encoding = zend_multibyte_encoding_utf16be; + bom_size = sizeof(BOM_UTF16_BE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) { + script_encoding = zend_multibyte_encoding_utf16le; + bom_size = sizeof(BOM_UTF16_LE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) { + script_encoding = zend_multibyte_encoding_utf8; + bom_size = sizeof(BOM_UTF8)-1; + } + + if (script_encoding) { + /* remove BOM */ + LANG_SCNG(script_org) += bom_size; + LANG_SCNG(script_org_size) -= bom_size; + + return script_encoding; + } + + /* script contains NULL bytes -> auto-detection */ + if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) { + /* check if the NULL byte is after the __HALT_COMPILER(); */ + pos2 = LANG_SCNG(script_org); + + while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) { + pos2 = memchr(pos2, '_', pos1 - pos2); + if (!pos2) break; + pos2++; + if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) { + pos2 += sizeof("_HALT_COMPILER")-1; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == '(') { + pos2++; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == ')') { + pos2++; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == ';') { + return NULL; + } + } + } + } + } + /* make best effort if BOM is missing */ + return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC); + } + + return NULL; +} + +static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D) +{ + const zend_encoding *script_encoding; + + if (CG(detect_unicode)) { + /* check out bom(byte order mark) and see if containing wchars */ + script_encoding = zend_multibyte_detect_unicode(TSRMLS_C); + if (script_encoding != NULL) { + /* bom or wchar detection is prior to 'script_encoding' option */ + return script_encoding; + } + } + + /* if no script_encoding specified, just leave alone */ + if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) { + return NULL; + } + + /* if multiple encodings specified, detect automagically */ + if (CG(script_encoding_list_size) > 1) { + return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC); + } + + return CG(script_encoding_list)[0]; +} + +ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC) +{ + const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); + const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C); + + if (!script_encoding) { + return FAILURE; + } + + /* judge input/output filter */ + LANG_SCNG(script_encoding) = script_encoding; + LANG_SCNG(input_filter) = NULL; + LANG_SCNG(output_filter) = NULL; + + if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) { + if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) { + /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */ + LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate; + LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script; + } else { + LANG_SCNG(input_filter) = NULL; + LANG_SCNG(output_filter) = NULL; + } + return SUCCESS; + } + + if (zend_multibyte_check_lexer_compatibility(internal_encoding)) { + LANG_SCNG(input_filter) = encoding_filter_script_to_internal; + LANG_SCNG(output_filter) = NULL; + } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) { + LANG_SCNG(input_filter) = NULL; + LANG_SCNG(output_filter) = encoding_filter_script_to_internal; + } else { + /* both script and internal encodings are incompatible w/ flex */ + LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate; + LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal; + } + + return 0; +} + +ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC) +{ + const char *file_path = NULL; + char *buf; + size_t size, offset = 0; + + /* The shebang line was read, get the current position to obtain the buffer start */ + if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) { + if ((offset = ftell(file_handle->handle.fp)) == -1) { + offset = 0; + } + } + + if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) { + return FAILURE; + } + + zend_llist_add_element(&CG(open_files), file_handle); + if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) { + zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files)); + size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle; + fh->handle.stream.handle = (void*)(((char*)fh) + diff); + file_handle->handle.stream.handle = fh->handle.stream.handle; + } + + /* Reset the scanner for scanning the new file */ + SCNG(yy_in) = file_handle; + SCNG(yy_start) = NULL; + + if (size != -1) { + if (CG(multibyte)) { + SCNG(script_org) = (unsigned char*)buf; + SCNG(script_org_size) = size; + SCNG(script_filtered) = NULL; + + zend_multibyte_set_filter(NULL TSRMLS_CC); + + if (SCNG(input_filter)) { + if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { + zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " + "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); + } + buf = (char*)SCNG(script_filtered); + size = SCNG(script_filtered_size); + } + } + SCNG(yy_start) = (unsigned char *)buf - offset; + yy_scan_buffer(buf, size TSRMLS_CC); + } else { + zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed"); + } + + BEGIN(INITIAL); + + if (file_handle->opened_path) { + file_path = file_handle->opened_path; + } else { + file_path = file_handle->filename; + } + + zend_set_compiled_filename(file_path TSRMLS_CC); + + if (CG(start_lineno)) { + CG(zend_lineno) = CG(start_lineno); + CG(start_lineno) = 0; + } else { + CG(zend_lineno) = 1; + } + + RESET_DOC_COMMENT(); + CG(increment_lineno) = 0; + return SUCCESS; +} +END_EXTERN_C() + + +ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC) +{ + zend_lex_state original_lex_state; + zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array)); + zend_op_array *original_active_op_array = CG(active_op_array); + zend_op_array *retval=NULL; + int compiler_result; + zend_bool compilation_successful=0; + znode retval_znode; + zend_bool original_in_compilation = CG(in_compilation); + + retval_znode.op_type = IS_CONST; + retval_znode.u.constant.type = IS_LONG; + retval_znode.u.constant.value.lval = 1; + Z_UNSET_ISREF(retval_znode.u.constant); + Z_SET_REFCOUNT(retval_znode.u.constant, 1); + + zend_save_lexical_state(&original_lex_state TSRMLS_CC); + + retval = op_array; /* success oriented */ + + if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) { + if (type==ZEND_REQUIRE) { + zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC); + zend_bailout(); + } else { + zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC); + } + compilation_successful=0; + } else { + init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC); + CG(in_compilation) = 1; + CG(active_op_array) = op_array; + zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context))); + zend_init_compiler_context(TSRMLS_C); + compiler_result = zendparse(TSRMLS_C); + zend_do_return(&retval_znode, 0 TSRMLS_CC); + CG(in_compilation) = original_in_compilation; + if (compiler_result != 0) { /* parser error */ + zend_bailout(); + } + compilation_successful=1; + } + + if (retval) { + CG(active_op_array) = original_active_op_array; + if (compilation_successful) { + pass_two(op_array TSRMLS_CC); + zend_release_labels(0 TSRMLS_CC); + } else { + efree(op_array); + retval = NULL; + } + } + zend_restore_lexical_state(&original_lex_state TSRMLS_CC); + return retval; +} + + +zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC) +{ + zend_file_handle file_handle; + zval tmp; + zend_op_array *retval; + char *opened_path = NULL; + + if (filename->type != IS_STRING) { + tmp = *filename; + zval_copy_ctor(&tmp); + convert_to_string(&tmp); + filename = &tmp; + } + file_handle.filename = filename->value.str.val; + file_handle.free_filename = 0; + file_handle.type = ZEND_HANDLE_FILENAME; + file_handle.opened_path = NULL; + file_handle.handle.fp = NULL; + + retval = zend_compile_file(&file_handle, type TSRMLS_CC); + if (retval && file_handle.handle.stream.handle) { + int dummy = 1; + + if (!file_handle.opened_path) { + file_handle.opened_path = opened_path = estrndup(filename->value.str.val, filename->value.str.len); + } + + zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL); + + if (opened_path) { + efree(opened_path); + } + } + zend_destroy_file_handle(&file_handle TSRMLS_CC); + + if (filename==&tmp) { + zval_dtor(&tmp); + } + return retval; +} + +ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC) +{ + char *buf; + size_t size; + + /* enforce two trailing NULLs for flex... */ + if (IS_INTERNED(str->value.str.val)) { + char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD); + memcpy(tmp, str->value.str.val, str->value.str.len + ZEND_MMAP_AHEAD); + str->value.str.val = tmp; + } else { + str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD); + } + + memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD); + + SCNG(yy_in) = NULL; + SCNG(yy_start) = NULL; + + buf = str->value.str.val; + size = str->value.str.len; + + if (CG(multibyte)) { + SCNG(script_org) = (unsigned char*)buf; + SCNG(script_org_size) = size; + SCNG(script_filtered) = NULL; + + zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC); + + if (SCNG(input_filter)) { + if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { + zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " + "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); + } + buf = (char*)SCNG(script_filtered); + size = SCNG(script_filtered_size); + } + } + + yy_scan_buffer(buf, size TSRMLS_CC); + + zend_set_compiled_filename(filename TSRMLS_CC); + CG(zend_lineno) = 1; + CG(increment_lineno) = 0; + RESET_DOC_COMMENT(); + return SUCCESS; +} + + +ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D) +{ + size_t offset = SCNG(yy_cursor) - SCNG(yy_start); + if (SCNG(input_filter)) { + size_t original_offset = offset, length = 0; + do { + unsigned char *p = NULL; + if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) { + return (size_t)-1; + } + efree(p); + if (length > original_offset) { + offset--; + } else if (length < original_offset) { + offset++; + } + } while (original_offset != length); + } + return offset; +} + + +zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC) +{ + zend_lex_state original_lex_state; + zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array)); + zend_op_array *original_active_op_array = CG(active_op_array); + zend_op_array *retval; + zval tmp; + int compiler_result; + zend_bool original_in_compilation = CG(in_compilation); + + if (source_string->value.str.len==0) { + efree(op_array); + return NULL; + } + + CG(in_compilation) = 1; + + tmp = *source_string; + zval_copy_ctor(&tmp); + convert_to_string(&tmp); + source_string = &tmp; + + zend_save_lexical_state(&original_lex_state TSRMLS_CC); + if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) { + efree(op_array); + retval = NULL; + } else { + zend_bool orig_interactive = CG(interactive); + + CG(interactive) = 0; + init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC); + CG(interactive) = orig_interactive; + CG(active_op_array) = op_array; + zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context))); + zend_init_compiler_context(TSRMLS_C); + BEGIN(ST_IN_SCRIPTING); + compiler_result = zendparse(TSRMLS_C); + + if (SCNG(script_filtered)) { + efree(SCNG(script_filtered)); + SCNG(script_filtered) = NULL; + } + + if (compiler_result != 0) { + CG(active_op_array) = original_active_op_array; + CG(unclean_shutdown)=1; + destroy_op_array(op_array TSRMLS_CC); + efree(op_array); + retval = NULL; + } else { + zend_do_return(NULL, 0 TSRMLS_CC); + CG(active_op_array) = original_active_op_array; + pass_two(op_array TSRMLS_CC); + zend_release_labels(0 TSRMLS_CC); + retval = op_array; + } + } + zend_restore_lexical_state(&original_lex_state TSRMLS_CC); + zval_dtor(&tmp); + CG(in_compilation) = original_in_compilation; + return retval; +} + + +BEGIN_EXTERN_C() +int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC) +{ + zend_lex_state original_lex_state; + zend_file_handle file_handle; + + file_handle.type = ZEND_HANDLE_FILENAME; + file_handle.filename = filename; + file_handle.free_filename = 0; + file_handle.opened_path = NULL; + zend_save_lexical_state(&original_lex_state TSRMLS_CC); + if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) { + zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC); + zend_restore_lexical_state(&original_lex_state TSRMLS_CC); + return FAILURE; + } + zend_highlight(syntax_highlighter_ini TSRMLS_CC); + if (SCNG(script_filtered)) { + efree(SCNG(script_filtered)); + SCNG(script_filtered) = NULL; + } + zend_destroy_file_handle(&file_handle TSRMLS_CC); + zend_restore_lexical_state(&original_lex_state TSRMLS_CC); + return SUCCESS; +} + +int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC) +{ + zend_lex_state original_lex_state; + zval tmp = *str; + + str = &tmp; + zval_copy_ctor(str); + zend_save_lexical_state(&original_lex_state TSRMLS_CC); + if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) { + zend_restore_lexical_state(&original_lex_state TSRMLS_CC); + return FAILURE; + } + BEGIN(INITIAL); + zend_highlight(syntax_highlighter_ini TSRMLS_CC); + if (SCNG(script_filtered)) { + efree(SCNG(script_filtered)); + SCNG(script_filtered) = NULL; + } + zend_restore_lexical_state(&original_lex_state TSRMLS_CC); + zval_dtor(str); + return SUCCESS; +} + +ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC) +{ + size_t length; + unsigned char *new_yy_start; + + /* convert and set */ + if (!SCNG(input_filter)) { + if (SCNG(script_filtered)) { + efree(SCNG(script_filtered)); + SCNG(script_filtered) = NULL; + } + SCNG(script_filtered_size) = 0; + length = SCNG(script_org_size); + new_yy_start = SCNG(script_org); + } else { + if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { + zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " + "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); + } + SCNG(script_filtered) = new_yy_start; + SCNG(script_filtered_size) = length; + } + + SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start)); + SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start)); + SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start)); + SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start)); + + SCNG(yy_start) = new_yy_start; +} + + +# define zend_copy_value(zendlval, yytext, yyleng) \ + if (SCNG(output_filter)) { \ + size_t sz = 0; \ + SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \ + zendlval->value.str.len = sz; \ + } else { \ + zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \ + zendlval->value.str.len = yyleng; \ + } + +static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC) +{ + register char *s, *t; + char *end; + + ZVAL_STRINGL(zendlval, str, len, 1); + + /* convert escape sequences */ + s = t = zendlval->value.str.val; + end = s+zendlval->value.str.len; + while (s= end) { + *t++ = '\\'; + break; + } + + switch(*s) { + case 'n': + *t++ = '\n'; + zendlval->value.str.len--; + break; + case 'r': + *t++ = '\r'; + zendlval->value.str.len--; + break; + case 't': + *t++ = '\t'; + zendlval->value.str.len--; + break; + case 'f': + *t++ = '\f'; + zendlval->value.str.len--; + break; + case 'v': + *t++ = '\v'; + zendlval->value.str.len--; + break; + case 'e': +#ifdef PHP_WIN32 + *t++ = VK_ESCAPE; +#else + *t++ = '\e'; +#endif + zendlval->value.str.len--; + break; + case '"': + case '`': + if (*s != quote_type) { + *t++ = '\\'; + *t++ = *s; + break; + } + case '\\': + case '$': + *t++ = *s; + zendlval->value.str.len--; + break; + case 'x': + case 'X': + if (ZEND_IS_HEX(*(s+1))) { + char hex_buf[3] = { 0, 0, 0 }; + + zendlval->value.str.len--; /* for the 'x' */ + + hex_buf[0] = *(++s); + zendlval->value.str.len--; + if (ZEND_IS_HEX(*(s+1))) { + hex_buf[1] = *(++s); + zendlval->value.str.len--; + } + *t++ = (char) strtol(hex_buf, NULL, 16); + } else { + *t++ = '\\'; + *t++ = *s; + } + break; + default: + /* check for an octal */ + if (ZEND_IS_OCT(*s)) { + char octal_buf[4] = { 0, 0, 0, 0 }; + + octal_buf[0] = *s; + zendlval->value.str.len--; + if (ZEND_IS_OCT(*(s+1))) { + octal_buf[1] = *(++s); + zendlval->value.str.len--; + if (ZEND_IS_OCT(*(s+1))) { + octal_buf[2] = *(++s); + zendlval->value.str.len--; + } + } + *t++ = (char) strtol(octal_buf, NULL, 8); + } else { + *t++ = '\\'; + *t++ = *s; + } + break; + } + } else { + *t++ = *s; + } + + if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) { + CG(zend_lineno)++; + } + s++; + } + *t = 0; + if (SCNG(output_filter)) { + size_t sz = 0; + s = zendlval->value.str.val; + SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC); + zendlval->value.str.len = sz; + efree(s); + } +} + + +int lex_scan(zval *zendlval TSRMLS_DC) +{ +restart: + SCNG(yy_text) = YYCURSOR; + +yymore_restart: + + +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + static void *yyctable[10] = { + &&yyc_ST_IN_SCRIPTING, + &&yyc_ST_LOOKING_FOR_PROPERTY, + &&yyc_ST_BACKQUOTE, + &&yyc_ST_DOUBLE_QUOTES, + &&yyc_ST_HEREDOC, + &&yyc_ST_LOOKING_FOR_VARNAME, + &&yyc_ST_VAR_OFFSET, + &&yyc_INITIAL, + &&yyc_ST_END_HEREDOC, + &&yyc_ST_NOWDOC, + }; + goto *yyctable[YYGETCONDITION()]; +/* *********************************** */ +yyc_INITIAL: + { + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 128, 0, 0, 128, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + YYDEBUG(1, *YYCURSOR); + YYFILL(8); + yych = *YYCURSOR; + if (yych != '<') goto yy5; + YYDEBUG(3, *YYCURSOR); + yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if (yych <= '>') { + if (yych == '%') goto yy8; + } else { + if (yych <= '?') goto yy6; + if (yych == 's') goto yy10; + } +yy4: + YYDEBUG(4, *YYCURSOR); + yyleng = YYCURSOR - SCNG(yy_text); + { + if (YYCURSOR > YYLIMIT) { + return 0; + } + +inline_char_handler: + + while (1) { + YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR); + + YYCURSOR = ptr ? ptr + 1 : YYLIMIT; + + if (YYCURSOR < YYLIMIT) { + switch (*YYCURSOR) { + case '?': + if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */ + break; + } + continue; + case '%': + if (CG(asp_tags)) { + break; + } + continue; + case 's': + case 'S': + /* Probably NOT an opening PHP