From: Ulya Trofimovich Date: Sat, 5 Nov 2016 15:24:02 +0000 (+0000) Subject: run_tests.sh: patch line endings in the generated file. X-Git-Tag: 1.0~53 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=334c650bd33c45a21f8fb467d02b2b6cfda674e1;p=re2c run_tests.sh: patch line endings in the generated file. Line endings in the generated code depend on the target platform: e.g., "\r\n" on Windows vs. "\n" on Linux. However, reference test results are (currently) generated on Linux and therefore contain "\n" line endings. So we have to patch line endings in the generated code in order to pass the tests on Windows. Testing script did patch line endings in stdout and stderr, but forgot to patch them in the generated file (it was broken since we started to use '-o' option for testing). This commit fixes testing script. It also deletes a couple of tests in which source code contains "\r\n" instead of "\n". These tests are duplicates of other tests (they were added by commit bd2875441cae4ab3934bfafcd34728021295b842 supposedly to test that re2c preserves line endings in source code). They are broken by current commit and fixing them is probably not worth of the effort. --- diff --git a/re2c/run_tests.sh.in b/re2c/run_tests.sh.in index 6101fe4b..d8e22cd6 100644 --- a/re2c/run_tests.sh.in +++ b/re2c/run_tests.sh.in @@ -170,8 +170,8 @@ run_pack() { && cp "../../$x" "$outx" # run re2c $valgrind $wine ../../$re2c $switches "$outx" 2>"$outc.stderr" 1>&2 - # on windows stdout and stderr contain CR LF, cut CR to match test results - sed -i 's/\r//g' "$outc.stderr" + # on windows output contains CR LF, cut CR to match test results + sed -i 's/\r//g' "$outc" "$outc.stderr" # paste all files dropped by re2c into output file rm "$outx" && find . -type f \ | lc_run sort \ diff --git a/re2c/test/php20140822_zend_language_scanner_crlf.igcd--flex-syntax.c b/re2c/test/php20140822_zend_language_scanner_crlf.igcd--flex-syntax.c deleted file mode 100644 index b757eb63..00000000 --- a/re2c/test/php20140822_zend_language_scanner_crlf.igcd--flex-syntax.c +++ /dev/null @@ -1,7553 +0,0 @@ -/* Generated by re2c */ -/* - +----------------------------------------------------------------------+ - | Zend Engine | - +----------------------------------------------------------------------+ - | Copyright (c) 1998-2014 Zend Technologies Ltd. (http://www.zend.com) | - +----------------------------------------------------------------------+ - | This source file is subject to version 2.00 of the Zend license, | - | that is bundled with this package in the file LICENSE, and is | - | available through the world-wide-web at the following url: | - | http://www.zend.com/license/2_00.txt. | - | If you did not receive a copy of the Zend license and are unable to | - | obtain it through the world-wide-web, please send a note to | - | license@zend.com so we can mail you a copy immediately. | - +----------------------------------------------------------------------+ - | Authors: Marcus Boerger | - | Nuno Lopes | - | Scott MacVicar | - | Flex version authors: | - | Andi Gutmans | - | Zeev Suraski | - +----------------------------------------------------------------------+ -*/ - -/* $Id$ */ - -#if 0 -# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c) -#else -# define YYDEBUG(s, c) -#endif - -#include "zend_language_scanner_defs.h" - -#include -#include "zend.h" -#ifdef PHP_WIN32 -# include -#endif -#include "zend_alloc.h" -#include -#include "zend_compile.h" -#include "zend_language_scanner.h" -#include "zend_highlight.h" -#include "zend_constants.h" -#include "zend_variables.h" -#include "zend_operators.h" -#include "zend_API.h" -#include "zend_strtod.h" -#include "zend_exceptions.h" -#include "tsrm_virtual_cwd.h" -#include "tsrm_config_common.h" - -#define YYCTYPE unsigned char -#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } } -#define YYCURSOR SCNG(yy_cursor) -#define YYLIMIT SCNG(yy_limit) -#define YYMARKER SCNG(yy_marker) - -#define YYGETCONDITION() SCNG(yy_state) -#define YYSETCONDITION(s) SCNG(yy_state) = s - -#define STATE(name) yyc##name - -/* emulate flex constructs */ -#define BEGIN(state) YYSETCONDITION(STATE(state)) -#define YYSTATE YYGETCONDITION() -#define yytext ((char*)SCNG(yy_text)) -#define yyleng SCNG(yy_leng) -#define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \ - yyleng = (unsigned int)x; } while(0) -#define yymore() goto yymore_restart - -/* perform sanity check. If this message is triggered you should - increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */ -#define YYMAXFILL 16 -#if ZEND_MMAP_AHEAD < YYMAXFILL -# error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL -#endif - -#ifdef HAVE_STDARG_H -# include -#endif - -#ifdef HAVE_UNISTD_H -# include -#endif - -/* Globals Macros */ -#define SCNG LANG_SCNG -#ifdef ZTS -ZEND_API ts_rsrc_id language_scanner_globals_id; -#else -ZEND_API zend_php_scanner_globals language_scanner_globals; -#endif - -#define HANDLE_NEWLINES(s, l) \ -do { \ - char *p = (s), *boundary = p+(l); \ - \ - while (p= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F) - -#define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7') -#define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F')) - -BEGIN_EXTERN_C() - -static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) -{ - const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); - assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding)); - return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC); -} - -static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) -{ - return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC); -} - -static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) -{ - return zend_multibyte_encoding_converter(to, to_length, from, from_length, -LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC); -} - -static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) -{ - const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); - assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding)); - return zend_multibyte_encoding_converter(to, to_length, from, from_length, -internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC); -} - - -static void _yy_push_state(int new_state TSRMLS_DC) -{ - zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int)); - YYSETCONDITION(new_state); -} - -#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm) - -static void yy_pop_state(TSRMLS_D) -{ - int *stack_state; - zend_stack_top(&SCNG(state_stack), (void **) &stack_state); - YYSETCONDITION(*stack_state); - zend_stack_del_top(&SCNG(state_stack)); -} - -static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC) -{ - YYCURSOR = (YYCTYPE*)str; - YYLIMIT = YYCURSOR + len; - if (!SCNG(yy_start)) { - SCNG(yy_start) = YYCURSOR; - } -} - -void startup_scanner(TSRMLS_D) -{ - CG(parse_error) = 0; - CG(doc_comment) = NULL; - CG(doc_comment_len) = 0; - zend_stack_init(&SCNG(state_stack)); - zend_ptr_stack_init(&SCNG(heredoc_label_stack)); -} - -static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) { - efree(heredoc_label->label); -} - -void shutdown_scanner(TSRMLS_D) -{ - CG(parse_error) = 0; - RESET_DOC_COMMENT(); - zend_stack_destroy(&SCNG(state_stack)); - zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1); - zend_ptr_stack_destroy(&SCNG(heredoc_label_stack)); -} - -ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC) -{ - lex_state->yy_leng = SCNG(yy_leng); - lex_state->yy_start = SCNG(yy_start); - lex_state->yy_text = SCNG(yy_text); - lex_state->yy_cursor = SCNG(yy_cursor); - lex_state->yy_marker = SCNG(yy_marker); - lex_state->yy_limit = SCNG(yy_limit); - - lex_state->state_stack = SCNG(state_stack); - zend_stack_init(&SCNG(state_stack)); - - lex_state->heredoc_label_stack = SCNG(heredoc_label_stack); - zend_ptr_stack_init(&SCNG(heredoc_label_stack)); - - lex_state->in = SCNG(yy_in); - lex_state->yy_state = YYSTATE; - lex_state->filename = zend_get_compiled_filename(TSRMLS_C); - lex_state->lineno = CG(zend_lineno); - - lex_state->script_org = SCNG(script_org); - lex_state->script_org_size = SCNG(script_org_size); - lex_state->script_filtered = SCNG(script_filtered); - lex_state->script_filtered_size = SCNG(script_filtered_size); - lex_state->input_filter = SCNG(input_filter); - lex_state->output_filter = SCNG(output_filter); - lex_state->script_encoding = SCNG(script_encoding); -} - -ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) -{ - SCNG(yy_leng) = lex_state->yy_leng; - SCNG(yy_start) = lex_state->yy_start; - SCNG(yy_text) = lex_state->yy_text; - SCNG(yy_cursor) = lex_state->yy_cursor; - SCNG(yy_marker) = lex_state->yy_marker; - SCNG(yy_limit) = lex_state->yy_limit; - - zend_stack_destroy(&SCNG(state_stack)); - SCNG(state_stack) = lex_state->state_stack; - - zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1); - zend_ptr_stack_destroy(&SCNG(heredoc_label_stack)); - SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack; - - SCNG(yy_in) = lex_state->in; - YYSETCONDITION(lex_state->yy_state); - CG(zend_lineno) = lex_state->lineno; - zend_restore_compiled_filename(lex_state->filename TSRMLS_CC); - - if (SCNG(script_filtered)) { - efree(SCNG(script_filtered)); - SCNG(script_filtered) = NULL; - } - SCNG(script_org) = lex_state->script_org; - SCNG(script_org_size) = lex_state->script_org_size; - SCNG(script_filtered) = lex_state->script_filtered; - SCNG(script_filtered_size) = lex_state->script_filtered_size; - SCNG(input_filter) = lex_state->input_filter; - SCNG(output_filter) = lex_state->output_filter; - SCNG(script_encoding) = lex_state->script_encoding; - - RESET_DOC_COMMENT(); -} - -ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC) -{ - zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles); - /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */ - file_handle->opened_path = NULL; - if (file_handle->free_filename) { - file_handle->filename = NULL; - } -} - -#define BOM_UTF32_BE "\x00\x00\xfe\xff" -#define BOM_UTF32_LE "\xff\xfe\x00\x00" -#define BOM_UTF16_BE "\xfe\xff" -#define BOM_UTF16_LE "\xff\xfe" -#define BOM_UTF8 "\xef\xbb\xbf" - -static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC) -{ - const unsigned char *p; - int wchar_size = 2; - int le = 0; - - /* utf-16 or utf-32? */ - p = script; - while ((p-script) < script_size) { - p = memchr(p, 0, script_size-(p-script)-2); - if (!p) { - break; - } - if (*(p+1) == '\0' && *(p+2) == '\0') { - wchar_size = 4; - break; - } - - /* searching for UTF-32 specific byte orders, so this will do */ - p += 4; - } - - /* BE or LE? */ - p = script; - while ((p-script) < script_size) { - if (*p == '\0' && *(p+wchar_size-1) != '\0') { - /* BE */ - le = 0; - break; - } else if (*p != '\0' && *(p+wchar_size-1) == '\0') { - /* LE* */ - le = 1; - break; - } - p += wchar_size; - } - - if (wchar_size == 2) { - return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be; - } else { - return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be; - } - - return NULL; -} - -static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D) -{ - const zend_encoding *script_encoding = NULL; - int bom_size; - unsigned char *pos1, *pos2; - - if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) { - return NULL; - } - - /* check out BOM */ - if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) { - script_encoding = zend_multibyte_encoding_utf32be; - bom_size = sizeof(BOM_UTF32_BE)-1; - } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) { - script_encoding = zend_multibyte_encoding_utf32le; - bom_size = sizeof(BOM_UTF32_LE)-1; - } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) { - script_encoding = zend_multibyte_encoding_utf16be; - bom_size = sizeof(BOM_UTF16_BE)-1; - } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) { - script_encoding = zend_multibyte_encoding_utf16le; - bom_size = sizeof(BOM_UTF16_LE)-1; - } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) { - script_encoding = zend_multibyte_encoding_utf8; - bom_size = sizeof(BOM_UTF8)-1; - } - - if (script_encoding) { - /* remove BOM */ - LANG_SCNG(script_org) += bom_size; - LANG_SCNG(script_org_size) -= bom_size; - - return script_encoding; - } - - /* script contains NULL bytes -> auto-detection */ - if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) { - /* check if the NULL byte is after the __HALT_COMPILER(); */ - pos2 = LANG_SCNG(script_org); - - while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) { - pos2 = memchr(pos2, '_', pos1 - pos2); - if (!pos2) break; - pos2++; - if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) { - pos2 += sizeof("_HALT_COMPILER")-1; - while (*pos2 == ' ' || - *pos2 == '\t' || - *pos2 == '\r' || - *pos2 == '\n') { - pos2++; - } - if (*pos2 == '(') { - pos2++; - while (*pos2 == ' ' || - *pos2 == '\t' || - *pos2 == '\r' || - *pos2 == '\n') { - pos2++; - } - if (*pos2 == ')') { - pos2++; - while (*pos2 == ' ' || - *pos2 == '\t' || - *pos2 == '\r' || - *pos2 == '\n') { - pos2++; - } - if (*pos2 == ';') { - return NULL; - } - } - } - } - } - /* make best effort if BOM is missing */ - return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC); - } - - return NULL; -} - -static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D) -{ - const zend_encoding *script_encoding; - - if (CG(detect_unicode)) { - /* check out bom(byte order mark) and see if containing wchars */ - script_encoding = zend_multibyte_detect_unicode(TSRMLS_C); - if (script_encoding != NULL) { - /* bom or wchar detection is prior to 'script_encoding' option */ - return script_encoding; - } - } - - /* if no script_encoding specified, just leave alone */ - if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) { - return NULL; - } - - /* if multiple encodings specified, detect automagically */ - if (CG(script_encoding_list_size) > 1) { - return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC); - } - - return CG(script_encoding_list)[0]; -} - -ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC) -{ - const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); - const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C); - - if (!script_encoding) { - return FAILURE; - } - - /* judge input/output filter */ - LANG_SCNG(script_encoding) = script_encoding; - LANG_SCNG(input_filter) = NULL; - LANG_SCNG(output_filter) = NULL; - - if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) { - if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) { - /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */ - LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate; - LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script; - } else { - LANG_SCNG(input_filter) = NULL; - LANG_SCNG(output_filter) = NULL; - } - return SUCCESS; - } - - if (zend_multibyte_check_lexer_compatibility(internal_encoding)) { - LANG_SCNG(input_filter) = encoding_filter_script_to_internal; - LANG_SCNG(output_filter) = NULL; - } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) { - LANG_SCNG(input_filter) = NULL; - LANG_SCNG(output_filter) = encoding_filter_script_to_internal; - } else { - /* both script and internal encodings are incompatible w/ flex */ - LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate; - LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal; - } - - return 0; -} - -ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC) -{ - const char *file_path = NULL; - char *buf; - size_t size, offset = 0; - - /* The shebang line was read, get the current position to obtain the buffer start */ - if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) { - if ((offset = ftell(file_handle->handle.fp)) == -1) { - offset = 0; - } - } - - if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) { - return FAILURE; - } - - zend_llist_add_element(&CG(open_files), file_handle); - if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) { - zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files)); - size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle; - fh->handle.stream.handle = (void*)(((char*)fh) + diff); - file_handle->handle.stream.handle = fh->handle.stream.handle; - } - - /* Reset the scanner for scanning the new file */ - SCNG(yy_in) = file_handle; - SCNG(yy_start) = NULL; - - if (size != -1) { - if (CG(multibyte)) { - SCNG(script_org) = (unsigned char*)buf; - SCNG(script_org_size) = size; - SCNG(script_filtered) = NULL; - - zend_multibyte_set_filter(NULL TSRMLS_CC); - - if (SCNG(input_filter)) { - if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { - zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " - "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); - } - buf = (char*)SCNG(script_filtered); - size = SCNG(script_filtered_size); - } - } - SCNG(yy_start) = (unsigned char *)buf - offset; - yy_scan_buffer(buf, size TSRMLS_CC); - } else { - zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed"); - } - - BEGIN(INITIAL); - - if (file_handle->opened_path) { - file_path = file_handle->opened_path; - } else { - file_path = file_handle->filename; - } - - zend_set_compiled_filename(file_path TSRMLS_CC); - - if (CG(start_lineno)) { - CG(zend_lineno) = CG(start_lineno); - CG(start_lineno) = 0; - } else { - CG(zend_lineno) = 1; - } - - RESET_DOC_COMMENT(); - CG(increment_lineno) = 0; - return SUCCESS; -} -END_EXTERN_C() - - -ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC) -{ - zend_lex_state original_lex_state; - zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array)); - zend_op_array *original_active_op_array = CG(active_op_array); - zend_op_array *retval=NULL; - int compiler_result; - zend_bool compilation_successful=0; - znode retval_znode; - zend_bool original_in_compilation = CG(in_compilation); - - retval_znode.op_type = IS_CONST; - retval_znode.u.constant.type = IS_LONG; - retval_znode.u.constant.value.lval = 1; - Z_UNSET_ISREF(retval_znode.u.constant); - Z_SET_REFCOUNT(retval_znode.u.constant, 1); - - zend_save_lexical_state(&original_lex_state TSRMLS_CC); - - retval = op_array; /* success oriented */ - - if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) { - if (type==ZEND_REQUIRE) { - zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC); - zend_bailout(); - } else { - zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC); - } - compilation_successful=0; - } else { - init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC); - CG(in_compilation) = 1; - CG(active_op_array) = op_array; - zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context))); - zend_init_compiler_context(TSRMLS_C); - compiler_result = zendparse(TSRMLS_C); - zend_do_return(&retval_znode, 0 TSRMLS_CC); - CG(in_compilation) = original_in_compilation; - if (compiler_result != 0) { /* parser error */ - zend_bailout(); - } - compilation_successful=1; - } - - if (retval) { - CG(active_op_array) = original_active_op_array; - if (compilation_successful) { - pass_two(op_array TSRMLS_CC); - zend_release_labels(0 TSRMLS_CC); - } else { - efree(op_array); - retval = NULL; - } - } - zend_restore_lexical_state(&original_lex_state TSRMLS_CC); - return retval; -} - - -zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC) -{ - zend_file_handle file_handle; - zval tmp; - zend_op_array *retval; - char *opened_path = NULL; - - if (filename->type != IS_STRING) { - tmp = *filename; - zval_copy_ctor(&tmp); - convert_to_string(&tmp); - filename = &tmp; - } - file_handle.filename = filename->value.str.val; - file_handle.free_filename = 0; - file_handle.type = ZEND_HANDLE_FILENAME; - file_handle.opened_path = NULL; - file_handle.handle.fp = NULL; - - retval = zend_compile_file(&file_handle, type TSRMLS_CC); - if (retval && file_handle.handle.stream.handle) { - int dummy = 1; - - if (!file_handle.opened_path) { - file_handle.opened_path = opened_path = estrndup(filename->value.str.val, filename->value.str.len); - } - - zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL); - - if (opened_path) { - efree(opened_path); - } - } - zend_destroy_file_handle(&file_handle TSRMLS_CC); - - if (filename==&tmp) { - zval_dtor(&tmp); - } - return retval; -} - -ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC) -{ - char *buf; - size_t size; - - /* enforce two trailing NULLs for flex... */ - if (IS_INTERNED(str->value.str.val)) { - char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD); - memcpy(tmp, str->value.str.val, str->value.str.len + ZEND_MMAP_AHEAD); - str->value.str.val = tmp; - } else { - str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD); - } - - memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD); - - SCNG(yy_in) = NULL; - SCNG(yy_start) = NULL; - - buf = str->value.str.val; - size = str->value.str.len; - - if (CG(multibyte)) { - SCNG(script_org) = (unsigned char*)buf; - SCNG(script_org_size) = size; - SCNG(script_filtered) = NULL; - - zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC); - - if (SCNG(input_filter)) { - if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { - zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " - "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); - } - buf = (char*)SCNG(script_filtered); - size = SCNG(script_filtered_size); - } - } - - yy_scan_buffer(buf, size TSRMLS_CC); - - zend_set_compiled_filename(filename TSRMLS_CC); - CG(zend_lineno) = 1; - CG(increment_lineno) = 0; - RESET_DOC_COMMENT(); - return SUCCESS; -} - - -ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D) -{ - size_t offset = SCNG(yy_cursor) - SCNG(yy_start); - if (SCNG(input_filter)) { - size_t original_offset = offset, length = 0; - do { - unsigned char *p = NULL; - if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) { - return (size_t)-1; - } - efree(p); - if (length > original_offset) { - offset--; - } else if (length < original_offset) { - offset++; - } - } while (original_offset != length); - } - return offset; -} - - -zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC) -{ - zend_lex_state original_lex_state; - zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array)); - zend_op_array *original_active_op_array = CG(active_op_array); - zend_op_array *retval; - zval tmp; - int compiler_result; - zend_bool original_in_compilation = CG(in_compilation); - - if (source_string->value.str.len==0) { - efree(op_array); - return NULL; - } - - CG(in_compilation) = 1; - - tmp = *source_string; - zval_copy_ctor(&tmp); - convert_to_string(&tmp); - source_string = &tmp; - - zend_save_lexical_state(&original_lex_state TSRMLS_CC); - if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) { - efree(op_array); - retval = NULL; - } else { - zend_bool orig_interactive = CG(interactive); - - CG(interactive) = 0; - init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC); - CG(interactive) = orig_interactive; - CG(active_op_array) = op_array; - zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context))); - zend_init_compiler_context(TSRMLS_C); - BEGIN(ST_IN_SCRIPTING); - compiler_result = zendparse(TSRMLS_C); - - if (SCNG(script_filtered)) { - efree(SCNG(script_filtered)); - SCNG(script_filtered) = NULL; - } - - if (compiler_result != 0) { - CG(active_op_array) = original_active_op_array; - CG(unclean_shutdown)=1; - destroy_op_array(op_array TSRMLS_CC); - efree(op_array); - retval = NULL; - } else { - zend_do_return(NULL, 0 TSRMLS_CC); - CG(active_op_array) = original_active_op_array; - pass_two(op_array TSRMLS_CC); - zend_release_labels(0 TSRMLS_CC); - retval = op_array; - } - } - zend_restore_lexical_state(&original_lex_state TSRMLS_CC); - zval_dtor(&tmp); - CG(in_compilation) = original_in_compilation; - return retval; -} - - -BEGIN_EXTERN_C() -int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC) -{ - zend_lex_state original_lex_state; - zend_file_handle file_handle; - - file_handle.type = ZEND_HANDLE_FILENAME; - file_handle.filename = filename; - file_handle.free_filename = 0; - file_handle.opened_path = NULL; - zend_save_lexical_state(&original_lex_state TSRMLS_CC); - if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) { - zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC); - zend_restore_lexical_state(&original_lex_state TSRMLS_CC); - return FAILURE; - } - zend_highlight(syntax_highlighter_ini TSRMLS_CC); - if (SCNG(script_filtered)) { - efree(SCNG(script_filtered)); - SCNG(script_filtered) = NULL; - } - zend_destroy_file_handle(&file_handle TSRMLS_CC); - zend_restore_lexical_state(&original_lex_state TSRMLS_CC); - return SUCCESS; -} - -int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC) -{ - zend_lex_state original_lex_state; - zval tmp = *str; - - str = &tmp; - zval_copy_ctor(str); - zend_save_lexical_state(&original_lex_state TSRMLS_CC); - if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) { - zend_restore_lexical_state(&original_lex_state TSRMLS_CC); - return FAILURE; - } - BEGIN(INITIAL); - zend_highlight(syntax_highlighter_ini TSRMLS_CC); - if (SCNG(script_filtered)) { - efree(SCNG(script_filtered)); - SCNG(script_filtered) = NULL; - } - zend_restore_lexical_state(&original_lex_state TSRMLS_CC); - zval_dtor(str); - return SUCCESS; -} - -ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC) -{ - size_t length; - unsigned char *new_yy_start; - - /* convert and set */ - if (!SCNG(input_filter)) { - if (SCNG(script_filtered)) { - efree(SCNG(script_filtered)); - SCNG(script_filtered) = NULL; - } - SCNG(script_filtered_size) = 0; - length = SCNG(script_org_size); - new_yy_start = SCNG(script_org); - } else { - if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { - zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " - "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); - } - SCNG(script_filtered) = new_yy_start; - SCNG(script_filtered_size) = length; - } - - SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start)); - SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start)); - SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start)); - SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start)); - - SCNG(yy_start) = new_yy_start; -} - - -# define zend_copy_value(zendlval, yytext, yyleng) \ - if (SCNG(output_filter)) { \ - size_t sz = 0; \ - SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \ - zendlval->value.str.len = sz; \ - } else { \ - zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \ - zendlval->value.str.len = yyleng; \ - } - -static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC) -{ - register char *s, *t; - char *end; - - ZVAL_STRINGL(zendlval, str, len, 1); - - /* convert escape sequences */ - s = t = zendlval->value.str.val; - end = s+zendlval->value.str.len; - while (s= end) { - *t++ = '\\'; - break; - } - - switch(*s) { - case 'n': - *t++ = '\n'; - zendlval->value.str.len--; - break; - case 'r': - *t++ = '\r'; - zendlval->value.str.len--; - break; - case 't': - *t++ = '\t'; - zendlval->value.str.len--; - break; - case 'f': - *t++ = '\f'; - zendlval->value.str.len--; - break; - case 'v': - *t++ = '\v'; - zendlval->value.str.len--; - break; - case 'e': -#ifdef PHP_WIN32 - *t++ = VK_ESCAPE; -#else - *t++ = '\e'; -#endif - zendlval->value.str.len--; - break; - case '"': - case '`': - if (*s != quote_type) { - *t++ = '\\'; - *t++ = *s; - break; - } - case '\\': - case '$': - *t++ = *s; - zendlval->value.str.len--; - break; - case 'x': - case 'X': - if (ZEND_IS_HEX(*(s+1))) { - char hex_buf[3] = { 0, 0, 0 }; - - zendlval->value.str.len--; /* for the 'x' */ - - hex_buf[0] = *(++s); - zendlval->value.str.len--; - if (ZEND_IS_HEX(*(s+1))) { - hex_buf[1] = *(++s); - zendlval->value.str.len--; - } - *t++ = (char) strtol(hex_buf, NULL, 16); - } else { - *t++ = '\\'; - *t++ = *s; - } - break; - default: - /* check for an octal */ - if (ZEND_IS_OCT(*s)) { - char octal_buf[4] = { 0, 0, 0, 0 }; - - octal_buf[0] = *s; - zendlval->value.str.len--; - if (ZEND_IS_OCT(*(s+1))) { - octal_buf[1] = *(++s); - zendlval->value.str.len--; - if (ZEND_IS_OCT(*(s+1))) { - octal_buf[2] = *(++s); - zendlval->value.str.len--; - } - } - *t++ = (char) strtol(octal_buf, NULL, 8); - } else { - *t++ = '\\'; - *t++ = *s; - } - break; - } - } else { - *t++ = *s; - } - - if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) { - CG(zend_lineno)++; - } - s++; - } - *t = 0; - if (SCNG(output_filter)) { - size_t sz = 0; - s = zendlval->value.str.val; - SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC); - zendlval->value.str.len = sz; - efree(s); - } -} - - -int lex_scan(zval *zendlval TSRMLS_DC) -{ -restart: - SCNG(yy_text) = YYCURSOR; - -yymore_restart: - - -{ - YYCTYPE yych; - unsigned int yyaccept = 0; - static void *yyctable[10] = { - &&yyc_ST_IN_SCRIPTING, - &&yyc_ST_LOOKING_FOR_PROPERTY, - &&yyc_ST_BACKQUOTE, - &&yyc_ST_DOUBLE_QUOTES, - &&yyc_ST_HEREDOC, - &&yyc_ST_LOOKING_FOR_VARNAME, - &&yyc_ST_VAR_OFFSET, - &&yyc_INITIAL, - &&yyc_ST_END_HEREDOC, - &&yyc_ST_NOWDOC, - }; - goto *yyctable[YYGETCONDITION()]; -/* *********************************** */ -yyc_INITIAL: - { - static const unsigned char yybm[] = { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 128, 128, 0, 0, 128, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 128, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - }; - YYDEBUG(1, *YYCURSOR); - YYFILL(8); - yych = *YYCURSOR; - if (yych == '<') goto yy5; - YYDEBUG(3, *YYCURSOR); - ++YYCURSOR; -yy4: - YYDEBUG(4, *YYCURSOR); - yyleng = YYCURSOR - SCNG(yy_text); - { - if (YYCURSOR > YYLIMIT) { - return 0; - } - -inline_char_handler: - - while (1) { - YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR); - - YYCURSOR = ptr ? ptr + 1 : YYLIMIT; - - if (YYCURSOR < YYLIMIT) { - switch (*YYCURSOR) { - case '?': - if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */ - break; - } - continue; - case '%': - if (CG(asp_tags)) { - break; - } - continue; - case 's': - case 'S': - /* Probably NOT an opening PHP