From e55a0de4966b5be44ff68aa6c293a5ab03198bef Mon Sep 17 00:00:00 2001 From: Scott MacVicar Date: Wed, 26 Mar 2008 14:23:02 +0000 Subject: [PATCH] MFB 5.3: Rewrite scanner to be based on re2c instead of flex There are still changes in regards to parsing of Unicode encoded scripts to come. --- Zend/FlexLexer.h | 186 ---- Zend/Makefile.am | 4 +- Zend/Zend.m4 | 1 - Zend/configure.in | 1 - Zend/flex.skl | 1636 ---------------------------------- Zend/tests/bug38779.phpt | 3 + Zend/tests/bug42767.phpt | 7 + Zend/zend.c | 32 +- Zend/zend_compile.c | 12 +- Zend/zend_compile.h | 9 - Zend/zend_globals.h | 52 +- Zend/zend_globals_macros.h | 11 +- Zend/zend_highlight.c | 4 +- Zend/zend_ini_parser.y | 2 +- Zend/zend_ini_scanner.h | 1 - Zend/zend_ini_scanner.l | 221 ++--- Zend/zend_language_scanner.h | 12 +- Zend/zend_language_scanner.l | 457 +++++----- Zend/zend_stack.c | 11 +- Zend/zend_stream.c | 291 ++++-- Zend/zend_stream.h | 58 +- Zend/zend_strtod.c | 2 +- Zend/zend_vm_def.h | 2 +- Zend/zend_vm_execute.h | 8 +- 24 files changed, 712 insertions(+), 2311 deletions(-) delete mode 100644 Zend/FlexLexer.h delete mode 100644 Zend/flex.skl diff --git a/Zend/FlexLexer.h b/Zend/FlexLexer.h deleted file mode 100644 index fd65258570..0000000000 --- a/Zend/FlexLexer.h +++ /dev/null @@ -1,186 +0,0 @@ -// $Header$ - -// FlexLexer.h -- define interfaces for lexical analyzer classes generated -// by flex - -// Copyright (c) 1993 The Regents of the University of California. -// All rights reserved. -// -// This code is derived from software contributed to Berkeley by -// Kent Williams and Tom Epperly. -// -// Redistribution and use in source and binary forms with or without -// modification are permitted provided that: (1) source distributions retain -// this entire copyright notice and comment, and (2) distributions including -// binaries display the following acknowledgement: ``This product includes -// software developed by the University of California, Berkeley and its -// contributors'' in the documentation or other materials provided with the -// distribution and in all advertising materials mentioning features or use -// of this software. Neither the name of the University nor the names of -// its contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. - -// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED -// WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -// This file defines FlexLexer, an abstract class which specifies the -// external interface provided to flex C++ lexer objects, and yyFlexLexer, -// which defines a particular lexer class. -// -// If you want to create multiple lexer classes, you use the -P flag -// to rename each yyFlexLexer to some other xxFlexLexer. You then -// include in your other sources once per lexer class: -// -// #undef yyFlexLexer -// #define yyFlexLexer xxFlexLexer -// #include -// -// #undef yyFlexLexer -// #define yyFlexLexer zzFlexLexer -// #include -// ... - -#ifndef FLEXLEXER_H -// Never included before - need to define base class. -#define FLEXLEXER_H -#include - -extern "C++" { - -struct yy_buffer_state; -typedef int yy_state_type; - -class FlexLexer { -public: - virtual ~FlexLexer() { } - - const char* YYText() { return yytext; } - int YYLeng() { return yyleng; } - - virtual void - yy_switch_to_buffer( struct yy_buffer_state* new_buffer ) = 0; - virtual struct yy_buffer_state* - yy_create_buffer( istream* s, int size ) = 0; - virtual void yy_delete_buffer( struct yy_buffer_state* b ) = 0; - virtual void yyrestart( istream* s ) = 0; - - virtual int yylex() = 0; - - // Call yylex with new input/output sources. - int yylex( istream* new_in, ostream* new_out = 0 ) - { - switch_streams( new_in, new_out ); - return yylex(); - } - - // Switch to new input/output streams. A nil stream pointer - // indicates "keep the current one". - virtual void switch_streams( istream* new_in = 0, - ostream* new_out = 0 ) = 0; - - int lineno() const { return yylineno; } - - int debug() const { return yy_flex_debug; } - void set_debug( int flag ) { yy_flex_debug = flag; } - -protected: - char* yytext; - int yyleng; - int yylineno; // only maintained if you use %option yylineno - int yy_flex_debug; // only has effect with -d or "%option debug" -}; - -} -#endif - -#if defined(yyFlexLexer) || ! defined(yyFlexLexerOnce) -// Either this is the first time through (yyFlexLexerOnce not defined), -// or this is a repeated include to define a different flavor of -// yyFlexLexer, as discussed in the flex man page. -#define yyFlexLexerOnce - -class yyFlexLexer : public FlexLexer { -public: - // arg_yyin and arg_yyout default to the cin and cout, but we - // only make that assignment when initializing in yylex(). - yyFlexLexer( istream* arg_yyin = 0, ostream* arg_yyout = 0 ); - - virtual ~yyFlexLexer(); - - void yy_switch_to_buffer( struct yy_buffer_state* new_buffer ); - struct yy_buffer_state* yy_create_buffer( istream* s, int size ); - void yy_delete_buffer( struct yy_buffer_state* b ); - void yyrestart( istream* s ); - - virtual int yylex(); - virtual void switch_streams( istream* new_in, ostream* new_out ); - -protected: - virtual int LexerInput( char* buf, int max_size ); - virtual void LexerOutput( const char* buf, int size ); - virtual void LexerError( const char* msg ); - - void yyunput( int c, char* buf_ptr ); - int yyinput(); - - void yy_load_buffer_state(); - void yy_init_buffer( struct yy_buffer_state* b, istream* s ); - void yy_flush_buffer( struct yy_buffer_state* b ); - - int yy_start_stack_ptr; - int yy_start_stack_depth; - int* yy_start_stack; - - void yy_push_state( int new_state ); - void yy_pop_state(); - int yy_top_state(); - - yy_state_type yy_get_previous_state(); - yy_state_type yy_try_NUL_trans( yy_state_type current_state ); - int yy_get_next_buffer(); - - istream* yyin; // input source for default LexerInput - ostream* yyout; // output sink for default LexerOutput - - struct yy_buffer_state* yy_current_buffer; - - // yy_hold_char holds the character lost when yytext is formed. - char yy_hold_char; - - // Number of characters read into yy_ch_buf. - int yy_n_chars; - - // Points to current character in buffer. - char* yy_c_buf_p; - - int yy_init; // whether we need to initialize - int yy_start; // start state number - - // Flag which is used to allow yywrap()'s to do buffer switches - // instead of setting up a fresh yyin. A bit of a hack ... - int yy_did_buffer_switch_on_eof; - - // The following are not always needed, but may be depending - // on use of certain flex features (like REJECT or yymore()). - - yy_state_type yy_last_accepting_state; - char* yy_last_accepting_cpos; - - yy_state_type* yy_state_buf; - yy_state_type* yy_state_ptr; - - char* yy_full_match; - int* yy_full_state; - int yy_full_lp; - - int yy_lp; - int yy_looking_for_trail_begin; - - int yy_more_flag; - int yy_more_len; - int yy_more_offset; - int yy_prev_more_offset; -}; - -#endif diff --git a/Zend/Makefile.am b/Zend/Makefile.am index 92fdd79692..464cbe9881 100644 --- a/Zend/Makefile.am +++ b/Zend/Makefile.am @@ -31,7 +31,7 @@ zend_ini_scanner.lo: zend_ini_parser.h # Language parser/scanner rules zend_language_scanner.c: $(srcdir)/zend_language_scanner.l - $(LEX) -Pzend -S$(srcdir)/flex.skl -o$@ -i $(srcdir)/zend_language_scanner.l + $(RE2C) $(RE2C_FLAGS) --case-inverted -cbdFt $(srcdir)/zend_language_scanner_defs.h -o$@ $(srcdir)/zend_language_scanner.l zend_language_parser.h: zend_language_parser.c zend_language_parser.c: $(srcdir)/zend_language_parser.y @@ -43,7 +43,7 @@ zend_ini_parser.c: $(srcdir)/zend_ini_parser.y $(YACC) -p ini_ -v -d $(srcdir)/zend_ini_parser.y -o zend_ini_parser.c zend_ini_scanner.c: $(srcdir)/zend_ini_scanner.l - $(LEX) -Pini_ -S$(srcdir)/flex.skl -o$@ -i $(srcdir)/zend_ini_scanner.l + $(RE2C) $(RE2C_FLAGS) --case-inverted -cbdFt $(srcdir)/zend_ini_scanner_defs.h -o$@ $(srcdir)/zend_ini_scanner.l zend_ini_parser.h: zend_ini_parser.c diff --git a/Zend/Zend.m4 b/Zend/Zend.m4 index fe0110e341..bbb3411a4a 100644 --- a/Zend/Zend.m4 +++ b/Zend/Zend.m4 @@ -31,7 +31,6 @@ AC_DEFUN([LIBZEND_BASIC_CHECKS],[ AC_REQUIRE([AC_PROG_YACC]) AC_REQUIRE([AC_PROG_CC]) AC_REQUIRE([AC_PROG_CC_C_O]) -AC_REQUIRE([AC_PROG_LEX]) AC_REQUIRE([AC_HEADER_STDC]) LIBZEND_BISON_CHECK diff --git a/Zend/configure.in b/Zend/configure.in index 9cbd1e11ce..f53d8af6e5 100644 --- a/Zend/configure.in +++ b/Zend/configure.in @@ -7,7 +7,6 @@ AM_CONFIG_HEADER(zend_config.h) AM_SANITY_CHECK AM_MAINTAINER_MODE AC_PROG_CC -AM_PROG_LEX AM_PROG_CC_STDC ZEND_VERSION=$VERSION AC_ZEND_C_BIGENDIAN diff --git a/Zend/flex.skl b/Zend/flex.skl deleted file mode 100644 index 060a34bbee..0000000000 --- a/Zend/flex.skl +++ /dev/null @@ -1,1636 +0,0 @@ -/* A Lexical scanner generated by flex */ - -/* Scanner skeleton version: - * $Header$ - * vim:ft=lex: - */ - -#define FLEX_SCANNER -#define YY_FLEX_MAJOR_VERSION 2 -#define YY_FLEX_MINOR_VERSION 5 - -%- -#include -%* - - -/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ -#ifdef c_plusplus -#ifndef __cplusplus -#define __cplusplus -#endif -#endif - - -#ifdef __cplusplus - -#include -%+ -class istream; -%* -#if HAVE_UNISTD_H -#include -#endif - -/* Use prototypes in function declarations. */ -#define YY_USE_PROTOS - -/* The "const" storage-class-modifier is valid. */ -#define YY_USE_CONST - -#else /* ! __cplusplus */ - -#if __STDC__ - -#define YY_USE_PROTOS -#define YY_USE_CONST - -#endif /* __STDC__ */ -#endif /* ! __cplusplus */ - -#ifdef __TURBOC__ - #pragma warn -rch - #pragma warn -use -#include -#include -#define YY_USE_CONST -#define YY_USE_PROTOS -#endif - -#ifdef YY_USE_CONST -#define yyconst const -#else -#define yyconst -#endif - -#undef YY_USE_PROTOS -#define YY_USE_PROTOS - -#ifdef YY_USE_PROTOS -#define YY_PROTO(proto) proto -#else -#define YY_PROTO(proto) () -#endif - -/* Returned upon end-of-file. */ -#define YY_NULL 0 - -/* Promotes a possibly negative, possibly signed char to an unsigned - * integer for use as an array index. If the signed char is negative, - * we want to instead treat it as an 8-bit unsigned char, hence the - * double cast. - */ -#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) - -/* Enter a start condition. This macro really ought to take a parameter, - * but we do it the disgusting crufty way forced on us by the ()-less - * definition of BEGIN. - */ -#define BEGIN yy_start = 1 + 2 * - -/* Translate the current start state into a value that can be later handed - * to BEGIN to return to the state. The YYSTATE alias is for lex - * compatibility. - */ -#define YY_START ((yy_start - 1) / 2) -#define YYSTATE YY_START - -/* Action number for EOF rule of a given start state. */ -#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) - -/* Special action meaning "start processing a new file". */ -#define YY_NEW_FILE yyrestart( SCNG(yy_in) TSRMLS_CC ) - -#define YY_END_OF_BUFFER_CHAR 0 - -/* Size of default input buffer. */ -#define YY_BUF_SIZE 16384 - -typedef struct yy_buffer_state *YY_BUFFER_STATE; - -extern int yyleng; -%- -%* - - -#define EOB_ACT_CONTINUE_SCAN 0 -#define EOB_ACT_END_OF_FILE 1 -#define EOB_ACT_LAST_MATCH 2 - -/* The funky do-while in the following #define is used to turn the definition - * int a single C statement (which needs a semi-colon terminator). This - * avoids problems with code like: - * - * if ( condition_holds ) - * yyless( 5 ); - * else - * do_something_else(); - * - * Prior to using the do-while the compiler would get upset at the - * "else" because it interpreted the "if" statement as being all - * done when it reached the ';' after the yyless() call. - */ - -/* Return all but the first 'n' matched characters back to the input stream. */ - -#define yyless(n) \ - do \ - { \ - /* Undo effects of setting up yytext. */ \ - *yy_cp = yy_hold_char; \ - yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \ - YY_DO_BEFORE_ACTION; /* set up yytext again */ \ - } \ - while ( 0 ) - -#define unput(c) yyunput( c, yytext_ptr TSRMLS_CC ) - -/* The following is because we cannot portably get our hands on size_t - * (without autoconf's help, which isn't available because we want - * flex-generated scanners to compile on their own). - */ -typedef unsigned int yy_size_t; - - -struct yy_buffer_state - { -%- - struct _zend_file_handle *yy_input_file; -%+ - istream* yy_input_file; -%* - - char *yy_ch_buf; /* input buffer */ - char *yy_buf_pos; /* current position in input buffer */ - - /* Size of input buffer in bytes, not including room for EOB - * characters. - */ - yy_size_t yy_buf_size; - - /* Number of characters read into yy_ch_buf, not including EOB - * characters. - */ - int yy_n_chars; - - /* Whether we "own" the buffer - i.e., we know we created it, - * and can realloc() it to grow it, and should free() it to - * delete it. - */ - int yy_is_our_buffer; - - /* Whether this is an "interactive" input source; if so, and - * if we're using stdio for input, then we want to use getc() - * instead of fread(), to make sure we stop fetching input after - * each newline. - */ - int yy_is_interactive; - - /* Whether we're considered to be at the beginning of a line. - * If so, '^' rules will be active on the next match, otherwise - * not. - */ - int yy_at_bol; - - /* Whether to try to fill the input buffer when we reach the - * end of it. - */ - int yy_fill_buffer; - - int yy_buffer_status; -#define YY_BUFFER_NEW 0 -#define YY_BUFFER_NORMAL 1 - /* When an EOF's been seen but there's still some text to process - * then we mark the buffer as YY_EOF_PENDING, to indicate that we - * shouldn't try reading from the input source any more. We might - * still have a bunch of tokens to match, though, because of - * possible backing-up. - * - * When we actually see the EOF, we change the status to "new" - * (via yyrestart()), so that the user can continue scanning by - * just pointing yyin at a new input file. - */ -#define YY_BUFFER_EOF_PENDING 2 - }; - -%- Standard (non-C++) definition -#define yy_current_buffer SCNG(current_buffer) -#define yy_hold_char SCNG(_yy_hold_char) -%* - -/* We provide macros for accessing buffer states in case in the - * future we want to put the buffer states in a more general - * "scanner state". - */ -#define YY_CURRENT_BUFFER yy_current_buffer - - -%- Standard (non-C++) definition -/* yy_hold_char holds the character lost when yytext is formed. */ - -#if 0 -static char yy_hold_char; - -static int yy_n_chars; /* number of characters read into yy_ch_buf */ -#endif - -/* Points to current character in buffer. */ -#define yy_c_buf_p SCNG(c_buf_p) -#define yy_init SCNG(init) -#define yy_start SCNG(start) - -#ifdef ZTS -#define TSRMLS_D void ***tsrm_ls -#define TSRMLS_DC , TSRMLS_D -#define TSRMLS_C tsrm_ls -#define TSRMLS_CC , TSRMLS_C -#else -#define TSRMLS_D -#define TSRMLS_DC -#define TSRMLS_C -#define TSRMLS_CC -#endif - -/* Flag which is used to allow yywrap()'s to do buffer switches - * instead of setting up a fresh yyin. A bit of a hack ... - */ -/* static int yy_did_buffer_switch_on_eof; */ -#define yy_did_buffer_switch_on_eof SCNG(_yy_did_buffer_switch_on_eof) - -void yyrestart YY_PROTO(( struct _zend_file_handle *input_file TSRMLS_DC )); - -void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer TSRMLS_DC )); -void yy_load_buffer_state YY_PROTO(( TSRMLS_D )); -YY_BUFFER_STATE yy_create_buffer YY_PROTO(( struct _zend_file_handle *file, int size TSRMLS_DC )); -void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b TSRMLS_DC )); -void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, struct _zend_file_handle *file TSRMLS_DC )); -void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b TSRMLS_DC )); -#define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer TSRMLS_CC ) - -YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( char *base, yy_size_t size TSRMLS_DC )); -YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst char *yy_str TSRMLS_DC )); -YY_BUFFER_STATE yy_scan_bytes YY_PROTO(( yyconst char *bytes, int len TSRMLS_DC )); - -%* - -static void *yy_flex_alloc YY_PROTO(( yy_size_t )); -static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t )); -static void yy_flex_free YY_PROTO(( void * )); - -#define yy_new_buffer yy_create_buffer - -#define yy_set_interactive(is_interactive) \ - { \ - if ( ! yy_current_buffer ) \ - yy_current_buffer = yy_create_buffer( SCNG(yy_in), YY_BUF_SIZE TSRMLS_CC ); \ - yy_current_buffer->yy_is_interactive = is_interactive; \ - } - -#define yy_set_bol(at_bol) \ - { \ - if ( ! yy_current_buffer ) \ - yy_current_buffer = yy_create_buffer( SCNG(yy_in), YY_BUF_SIZE TSRMLS_CC ); \ - yy_current_buffer->yy_at_bol = at_bol; \ - } - -#define YY_AT_BOL() (yy_current_buffer->yy_at_bol) - -%% yytext/yyin/yyout/yy_state_type/yylineno etc. def's & init go here -#undef yyleng -#define yyleng SCNG(yy_leng) -#undef yytext -#define yytext SCNG(yy_text) -#undef yytext_ptr -#define yytext_ptr SCNG(yy_text) -#undef yyin -#define yyin SCNG(yy_in) -#undef yyout -#define yyout SCNG(yy_out) -#undef yy_last_accepting_state -#define yy_last_accepting_state SCNG(_yy_last_accepting_state) -#undef yy_last_accepting_cpos -#define yy_last_accepting_cpos SCNG(_yy_last_accepting_cpos) -#undef yy_more_flag -#define yy_more_flag SCNG(_yy_more_flag) -#undef yy_more_len -#define yy_more_len SCNG(_yy_more_len) - - -%- Standard (non-C++) definition -static yy_state_type yy_get_previous_state YY_PROTO(( TSRMLS_D )); -static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state TSRMLS_DC )); -static int yy_get_next_buffer YY_PROTO(( TSRMLS_D )); -static void yy_fatal_error YY_PROTO(( yyconst char msg[] )); -%* - -#undef TSRMLS_D -#undef TSRMLS_DC -#undef TSRMLS_C -#undef TSRMLS_CC - -/* Done after the current pattern has been matched and before the - * corresponding action - sets up yytext. - */ -#define YY_DO_BEFORE_ACTION \ - yytext_ptr = yy_bp; \ -%% code to fiddle yytext and yyleng for yymore() goes here - yy_hold_char = *yy_cp; \ - *yy_cp = '\0'; \ -%% code to copy yytext_ptr to yytext[] goes here, if %array - yy_c_buf_p = yy_cp; - -#undef yyleng -#undef yytext -#undef yytext_ptr -#undef yyin -#undef yyout -#undef yy_last_accepting_state -#undef yy_last_accepting_cpos -#undef yy_more_flag -#undef yy_more_len - - -%% data tables for the DFA and the user's section 1 definitions go here - -/* Macros after this point can all be overridden by user definitions in - * section 1. - */ - -#ifndef YY_SKIP_YYWRAP -#ifdef __cplusplus -extern "C" int yywrap YY_PROTO(( void )); -#else -extern int yywrap YY_PROTO(( void )); -#endif -#endif - -%- -#ifndef YY_NO_UNPUT -static void yyunput YY_PROTO(( int c, char *buf_ptr TSRMLS_DC )); -#endif -%* - -#ifndef yytext_ptr -static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int )); -#endif - -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen YY_PROTO(( yyconst char * )); -#endif - -#ifndef YY_NO_INPUT -%- Standard (non-C++) definition -#ifdef __cplusplus -static int 3 YY_PROTO(( TSRMLS_D )); -#else -static int input YY_PROTO(( TSRMLS_D )); -#endif -%* -#endif - -#if YY_STACK_USED -#define yy_start_stack_ptr SCNG(yy_start_stack_ptr) -#define yy_start_stack_depth SCNG(yy_start_stack_depth) -#define yy_start_stack SCNG(yy_start_stack) -/* -static int yy_start_stack_ptr = 0; -static int yy_start_stack_depth = 0; -static int *yy_start_stack = 0; -*/ -#ifndef YY_NO_PUSH_STATE -static void yy_push_state YY_PROTO(( int new_state TSRMLS_DC )); -#endif -#ifndef YY_NO_POP_STATE -static void yy_pop_state YY_PROTO(( TSRMLS_D )); -#endif -#ifndef YY_NO_TOP_STATE -static int yy_top_state YY_PROTO(( TSRMLS_D )); -#endif - -#else -#define YY_NO_PUSH_STATE 1 -#define YY_NO_POP_STATE 1 -#define YY_NO_TOP_STATE 1 -#endif - -#ifdef YY_MALLOC_DECL -YY_MALLOC_DECL -#else -#if __STDC__ -#ifndef __cplusplus -#include -#endif -#else -/* Just try to get by without declaring the routines. This will fail - * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int) - * or sizeof(void*) != sizeof(int). - */ -#endif -#endif - -/* Amount of stuff to slurp up with each read. */ -#ifndef YY_READ_BUF_SIZE -#define YY_READ_BUF_SIZE 8192 -#endif - -/* Copy whatever the last rule matched to the standard output. */ - -/* Zend file handle reading */ -#ifndef ECHO -#define ECHO /* There is no output */ -#endif - -#ifndef ECHO -%- Standard (non-C++) definition -/* This used to be an fputs(), but since the string might contain NUL's, - * we now use fwrite(). - */ -#define ECHO (void) fwrite( yytext, yyleng, 1, SCNG(yy_out) ) -%+ C++ definition -#define ECHO LexerOutput( yytext, yyleng ) -%* -#endif - -/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, - * is returned in "result". - */ -#ifndef YY_INPUT -#define YY_INPUT(buf,result,max_size) \ -%% fread()/read() definition of YY_INPUT goes here unless we're doing C++ -%+ C++ definition - if ( (result = LexerInput( (char *) buf, max_size )) < 0 ) \ - YY_FATAL_ERROR( "input in flex scanner failed" ); -%* -#endif - -/* No semi-colon after return; correct usage is to write "yyterminate();" - - * we don't want an extra ';' after the "return" because that will cause - * some compilers to complain about unreachable statements. - */ -#ifndef yyterminate -#define yyterminate() return YY_NULL -#endif - -/* Number of entries by which start-condition stack grows. */ -#ifndef YY_START_STACK_INCR -#define YY_START_STACK_INCR 25 -#endif - -/* Report a fatal error. */ -#ifndef YY_FATAL_ERROR -%- -#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) -%+ -#define YY_FATAL_ERROR(msg) LexerError( msg ) -%* -#endif - - -/* Default declaration of generated scanner - a define so the user can - * easily add parameters. - */ -#ifndef YY_DECL -%- Standard (non-C++) definition -#define YY_DECL int yylex YY_PROTO(( void )) -%+ C++ definition -#define YY_DECL int yyFlexLexer::yylex() -%* -#endif - -/* Code executed at the beginning of each rule, after yytext and yyleng - * have been set up. - */ -#ifndef YY_USER_ACTION -#define YY_USER_ACTION -#endif - -/* Code executed at the end of each rule. */ -#ifndef YY_BREAK -#define YY_BREAK break; -#endif - -%% YY_RULE_SETUP definition goes here - -YY_DECL - { - register yy_state_type yy_current_state; - register char *yy_cp, *yy_bp; - register int yy_act; - -%% user's declarations go here - - if ( yy_init ) - { - yy_init = 0; - -#ifdef YY_USER_INIT - YY_USER_INIT; -#endif - - if ( ! yy_start ) - yy_start = 1; /* first start state */ - -#if 0 - if ( ! SCNG(yy_in) ) -%- - SCNG(yy_in) = stdin; -%+ - SCNG(yy_in) = &cin; -%* - - if ( ! SCNG(yy_out) ) -%- - SCNG(yy_out) = stdout; -%+ - SCNG(yy_out) = &cout; -%* -#endif - - if ( ! yy_current_buffer ) - yy_current_buffer = - yy_create_buffer( SCNG(yy_in), YY_BUF_SIZE TSRMLS_CC ); - - yy_load_buffer_state(TSRMLS_C); - } - - while ( 1 ) /* loops until end-of-file is reached */ - { -%% yymore()-related code goes here - yy_cp = yy_c_buf_p; - - /* Support of yytext. */ - *yy_cp = yy_hold_char; - - /* yy_bp points to the position in yy_ch_buf of the start of - * the current run. - */ - yy_bp = yy_cp; - -%% code to set up and find next match goes here - -yy_find_action: -%% code to find the action number goes here - - YY_DO_BEFORE_ACTION; - -%% code for yylineno update goes here - -do_action: /* This label is used only to access EOF actions. */ - -%% debug code goes here - - switch ( yy_act ) - { /* beginning of action switch */ -%% actions go here - - case YY_END_OF_BUFFER: - { - /* Amount of text matched not including the EOB char. */ - int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1; - - /* Undo the effects of YY_DO_BEFORE_ACTION. */ - *yy_cp = yy_hold_char; - - if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW ) - { - /* We're scanning a new file or input source. It's - * possible that this happened because the user - * just pointed yyin at a new source and called - * yylex(). If so, then we have to assure - * consistency between yy_current_buffer and our - * globals. Here is the right place to do so, because - * this is the first action (other than possibly a - * back-up) that will match for the new input source. - */ - SCNG(yy_n_chars) = yy_current_buffer->yy_n_chars; - yy_current_buffer->yy_input_file = SCNG(yy_in); - yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL; - } - - /* Note that here we test for yy_c_buf_p "<=" to the position - * of the first EOB in the buffer, since yy_c_buf_p will - * already have been incremented past the NUL character - * (since all states make transitions on EOB to the - * end-of-buffer state). Contrast this with the test - * in input(). - */ - if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[SCNG(yy_n_chars)] ) - { /* This was really a NUL. */ - yy_state_type yy_next_state; - - yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state(TSRMLS_C); - - /* Okay, we're now positioned to make the NUL - * transition. We couldn't have - * yy_get_previous_state() go ahead and do it - * for us because it doesn't know how to deal - * with the possibility of jamming (and we don't - * want to build jamming into it because then it - * will run more slowly). - */ - - yy_next_state = yy_try_NUL_trans( yy_current_state TSRMLS_CC ); - - yy_bp = yytext_ptr + YY_MORE_ADJ; - - if ( yy_next_state ) - { - /* Consume the NUL. */ - yy_cp = ++yy_c_buf_p; - yy_current_state = yy_next_state; - goto yy_match; - } - - else - { -%% code to do back-up for compressed tables and set up yy_cp goes here - goto yy_find_action; - } - } - - else switch ( yy_get_next_buffer(TSRMLS_C) ) - { - case EOB_ACT_END_OF_FILE: - { - yy_did_buffer_switch_on_eof = 0; - - if ( yywrap() ) - { - /* Note: because we've taken care in - * yy_get_next_buffer() to have set up - * yytext, we can now set up - * yy_c_buf_p so that if some total - * hoser (like flex itself) wants to - * call the scanner after we return the - * YY_NULL, it'll still work - another - * YY_NULL will get returned. - */ - yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; - - yy_act = YY_STATE_EOF(YY_START); - goto do_action; - } - - else - { - if ( ! yy_did_buffer_switch_on_eof ) - YY_NEW_FILE; - } - break; - } - - case EOB_ACT_CONTINUE_SCAN: - yy_c_buf_p = - yytext_ptr + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state(TSRMLS_C); - - yy_cp = yy_c_buf_p; - yy_bp = yytext_ptr + YY_MORE_ADJ; - goto yy_match; - - case EOB_ACT_LAST_MATCH: - yy_c_buf_p = - &yy_current_buffer->yy_ch_buf[SCNG(yy_n_chars)]; - - yy_current_state = yy_get_previous_state(TSRMLS_C); - - yy_cp = yy_c_buf_p; - yy_bp = yytext_ptr + YY_MORE_ADJ; - goto yy_find_action; - } - break; - } - - default: - YY_FATAL_ERROR( - "fatal flex scanner internal error--no action found" ); - } /* end of action switch */ - } /* end of scanning one token */ - } /* end of yylex */ - -%+ -yyFlexLexer::yyFlexLexer( istream* arg_yyin, ostream* arg_yyout ) - { - SCNG(yy_in) = arg_yyin; - SCNG(yy_out) = arg_yyout; - yy_c_buf_p = 0; - yy_init = 1; - yy_start = 0; - yy_flex_debug = 0; - yylineno = 1; // this will only get updated if %option yylineno - - yy_did_buffer_switch_on_eof = 0; - - yy_looking_for_trail_begin = 0; - yy_more_flag = 0; - yy_more_len = 0; - yy_more_offset = yy_prev_more_offset = 0; - - yy_start_stack_ptr = yy_start_stack_depth = 0; - yy_start_stack = 0; - - yy_current_buffer = 0; - -#ifdef YY_USES_REJECT - yy_state_buf = new yy_state_type[YY_BUF_SIZE + 2]; -#else - yy_state_buf = 0; -#endif - } - -yyFlexLexer::~yyFlexLexer() - { - delete yy_state_buf; - yy_delete_buffer( yy_current_buffer TSRMLS_CC ); - } - -void yyFlexLexer::switch_streams( istream* new_in, ostream* new_out ) - { - if ( new_in ) - { - yy_delete_buffer( SCNG(yy_current_buffer TSRMLS_CC ) ); - yy_switch_to_buffer( yy_create_buffer( new_in, YY_BUF_SIZE TSRMLS_CC ) TSRMLS_CC ); - } - - if ( new_out ) - SCNG(yy_out) = new_out; - } - -int yyFlexLexer::LexerInput( char* buf, int max_size ) - { - if ( SCNG(yy_in)->eof() || SCNG(yy_in)->fail() ) - return 0; - - if (yy_current_buffer->yy_is_interactive) { - SCNG(yy_in)->get( buf[0] ); - - if ( SCNG(yy_in)->eof() ) - return 0; - - if ( SCNG(yy_in)->bad() ) - return -1; - - return 1; - } else { - (void) SCNG(yy_in)->read( buf, max_size ); - - if ( SCNG(yy_in)->bad() ) - return -1; - else - return SCNG(yy_in)->gcount(); - } - } - -void yyFlexLexer::LexerOutput( const char* buf, int size ) - { - (void) SCNG(yy_out)->write( buf, size ); - } -%* - -/* yy_get_next_buffer - try to read in a new buffer - * - * Returns a code representing an action: - * EOB_ACT_LAST_MATCH - - * EOB_ACT_CONTINUE_SCAN - continue scanning from current position - * EOB_ACT_END_OF_FILE - end of file - */ - -%- -static int yy_get_next_buffer(TSRMLS_D) -%+ -int yyFlexLexer::yy_get_next_buffer(TSRMLS_D) -%* - { - register char *dest = yy_current_buffer->yy_ch_buf; - register char *source = yytext_ptr; - register int number_to_move, i; - int ret_val; - - if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[SCNG(yy_n_chars) + 1] ) - YY_FATAL_ERROR( - "fatal flex scanner internal error--end of buffer missed" ); - - if ( yy_current_buffer->yy_fill_buffer == 0 ) - { /* Don't try to fill the buffer, so this is an EOF. */ - if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 ) - { - /* We matched a single character, the EOB, so - * treat this as a final EOF. - */ - return EOB_ACT_END_OF_FILE; - } - - else - { - /* We matched some text prior to the EOB, first - * process it. - */ - return EOB_ACT_LAST_MATCH; - } - } - - /* Try to read more data. */ - - /* First move last chars to start of buffer. */ - number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1; - - for ( i = 0; i < number_to_move; ++i ) - *(dest++) = *(source++); - - if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING ) - /* don't do the read, it's not guaranteed to return an EOF, - * just force an EOF - */ - yy_current_buffer->yy_n_chars = SCNG(yy_n_chars) = 0; - - else - { - int num_to_read = - yy_current_buffer->yy_buf_size - number_to_move - 1; - - while ( num_to_read <= 0 ) - { /* Not enough room in the buffer - grow it. */ -#ifdef YY_USES_REJECT - YY_FATAL_ERROR( -"input buffer overflow, can't enlarge buffer because scanner uses REJECT" ); -#else - - /* just a shorter name for the current buffer */ - YY_BUFFER_STATE b = yy_current_buffer; - - int yy_c_buf_p_offset = - (int) (yy_c_buf_p - b->yy_ch_buf); - - if ( b->yy_is_our_buffer ) - { - int new_size = b->yy_buf_size * 2; - - if ( new_size <= 0 ) - b->yy_buf_size += b->yy_buf_size / 8; - else - b->yy_buf_size *= 2; - - b->yy_ch_buf = (char *) - /* Include room in for 2 EOB chars. */ - yy_flex_realloc( (void *) b->yy_ch_buf, - b->yy_buf_size + 2 ); - } - else - /* Can't grow it, we don't own it. */ - b->yy_ch_buf = 0; - - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( - "fatal error - scanner input buffer overflow" ); - - yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; - - num_to_read = yy_current_buffer->yy_buf_size - - number_to_move - 1; -#endif - } - - if ( num_to_read > YY_READ_BUF_SIZE ) - num_to_read = YY_READ_BUF_SIZE; - - /* Read in more data. */ - YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), - SCNG(yy_n_chars), num_to_read ); - - yy_current_buffer->yy_n_chars = SCNG(yy_n_chars); - } - - if ( SCNG(yy_n_chars) == 0 ) - { - if ( number_to_move == YY_MORE_ADJ ) - { - ret_val = EOB_ACT_END_OF_FILE; - yyrestart( SCNG(yy_in) TSRMLS_CC ); - } - - else - { - ret_val = EOB_ACT_LAST_MATCH; - yy_current_buffer->yy_buffer_status = - YY_BUFFER_EOF_PENDING; - } - } - - else - ret_val = EOB_ACT_CONTINUE_SCAN; - - SCNG(yy_n_chars) += number_to_move; - yy_current_buffer->yy_ch_buf[SCNG(yy_n_chars)] = YY_END_OF_BUFFER_CHAR; - yy_current_buffer->yy_ch_buf[SCNG(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR; - - yytext_ptr = &yy_current_buffer->yy_ch_buf[0]; - - return ret_val; - } - - -/* yy_get_previous_state - get the state just before the EOB char was reached */ - -%- -static yy_state_type yy_get_previous_state(TSRMLS_D) -%+ -yy_state_type yyFlexLexer::yy_get_previous_state(TSRMLS_D) -%* - { - register yy_state_type yy_current_state; - register char *yy_cp; - -%% code to get the start state into yy_current_state goes here - - for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) - { -%% code to find the next state goes here - } - - return yy_current_state; - } - - -/* yy_try_NUL_trans - try to make a transition on the NUL character - * - * synopsis - * next_state = yy_try_NUL_trans( current_state ); - */ - -%- -#ifdef YY_USE_PROTOS -static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state TSRMLS_DC ) -#else -static yy_state_type yy_try_NUL_trans( yy_current_state TSRMLS_CC ) -yy_state_type yy_current_state; -#ifdef ZTS -void ***tsrm_ls; -#endif -#endif -%+ -yy_state_type yyFlexLexer::yy_try_NUL_trans( yy_state_type yy_current_state TSRMLS_DC ) -%* - { - register int yy_is_jam; -%% code to find the next state, and perhaps do backing up, goes here - - return yy_is_jam ? 0 : yy_current_state; - } - - -%- -#ifndef YY_NO_UNPUT -#ifdef YY_USE_PROTOS -static void yyunput( int c, register char *yy_bp TSRMLS_DC ) -#else -static void yyunput( c, yy_bp TSRMLS_CC ) -int c; -register char *yy_bp; -#ifdef ZTS -void ***tsrm_ls; -#endif -#endif -%+ -void yyFlexLexer::yyunput( int c, register char* yy_bp TSRMLS_DC ) -%* - { - register char *yy_cp = yy_c_buf_p; - - /* undo effects of setting up yytext */ - *yy_cp = yy_hold_char; - - if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) - { /* need to shift things up to make room */ - /* +2 for EOB chars. */ - register int number_to_move = SCNG(yy_n_chars) + 2; - register char *dest = &yy_current_buffer->yy_ch_buf[ - yy_current_buffer->yy_buf_size + 2]; - register char *source = - &yy_current_buffer->yy_ch_buf[number_to_move]; - - while ( source > yy_current_buffer->yy_ch_buf ) - *--dest = *--source; - - yy_cp += (int) (dest - source); - yy_bp += (int) (dest - source); - yy_current_buffer->yy_n_chars = - SCNG(yy_n_chars) = yy_current_buffer->yy_buf_size; - - if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) - YY_FATAL_ERROR( "flex scanner push-back overflow" ); - } - - *--yy_cp = (char) c; - -%% update yylineno here - - yytext_ptr = yy_bp; - yy_hold_char = *yy_cp; - yy_c_buf_p = yy_cp; - } -%- -#endif /* ifndef YY_NO_UNPUT */ -%* - - -%- -#ifdef __cplusplus -static int yyinput(TSRMLS_D) -#else -static int input(TSRMLS_C) -#ifdef ZTS -void ***tsrm_ls; -#endif -#endif -%+ -int yyFlexLexer::yyinput(TSRMLS_D) -%* - { - int c; - - *yy_c_buf_p = yy_hold_char; - - if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) - { - /* yy_c_buf_p now points to the character we want to return. - * If this occurs *before* the EOB characters, then it's a - * valid NUL; if not, then we've hit the end of the buffer. - */ - if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[SCNG(yy_n_chars)] ) - /* This was really a NUL. */ - *yy_c_buf_p = '\0'; - - else - { /* need more input */ - int offset = yy_c_buf_p - yytext_ptr; - ++yy_c_buf_p; - - switch ( yy_get_next_buffer(TSRMLS_C) ) - { - case EOB_ACT_LAST_MATCH: - /* This happens because yy_g_n_b() - * sees that we've accumulated a - * token and flags that we need to - * try matching the token before - * proceeding. But for input(), - * there's no matching to consider. - * So convert the EOB_ACT_LAST_MATCH - * to EOB_ACT_END_OF_FILE. - */ - - /* Reset buffer status. */ - yyrestart( SCNG(yy_in) TSRMLS_CC ); - - /* fall through */ - - case EOB_ACT_END_OF_FILE: - { - if ( yywrap() ) - return EOF; - - if ( ! yy_did_buffer_switch_on_eof ) - YY_NEW_FILE; -#ifdef __cplusplus - return yyinput(TSRMLS_C); -#else - return input(TSRMLS_C); -#endif - } - - case EOB_ACT_CONTINUE_SCAN: - yy_c_buf_p = yytext_ptr + offset; - break; - } - } - } - - c = *(unsigned char *) yy_c_buf_p; /* cast for 8-bit char's */ - *yy_c_buf_p = '\0'; /* preserve yytext */ - yy_hold_char = *++yy_c_buf_p; - -%% update BOL and yylineno - - return c; - } - - -%- -#ifdef YY_USE_PROTOS -void yyrestart( struct _zend_file_handle *input_file TSRMLS_DC ) -#else -void yyrestart( input_file TSRMLS_CC ) -struct _zend_file_handle *input_file; -#endif -%+ -void yyFlexLexer::yyrestart( istream* input_file TSRMLS_DC ) -%* - { - if ( ! yy_current_buffer ) - yy_current_buffer = yy_create_buffer( SCNG(yy_in), YY_BUF_SIZE TSRMLS_CC ); - - yy_init_buffer( yy_current_buffer, input_file TSRMLS_CC ); - yy_load_buffer_state(TSRMLS_C); - } - - -%- -#ifdef YY_USE_PROTOS -void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer TSRMLS_DC) -#else -void yy_switch_to_buffer( new_buffer TSRMLS_CC) -YY_BUFFER_STATE new_buffer; -#ifdef ZTS -void ***tsrm_ls; -#endif -#endif -%+ -void yyFlexLexer::yy_switch_to_buffer( YY_BUFFER_STATE new_buffer TSRMLS_DC ) -%* - { - if ( yy_current_buffer == new_buffer ) - return; - - if ( yy_current_buffer ) - { - /* Flush out information for old buffer. */ - *yy_c_buf_p = yy_hold_char; - yy_current_buffer->yy_buf_pos = yy_c_buf_p; - yy_current_buffer->yy_n_chars = SCNG(yy_n_chars); - } - - yy_current_buffer = new_buffer; - yy_load_buffer_state(TSRMLS_C); - - /* We don't actually know whether we did this switch during - * EOF (yywrap()) processing, but the only time this flag - * is looked at is after yywrap() is called, so it's safe - * to go ahead and always set it. - */ - yy_did_buffer_switch_on_eof = 1; - } - - -%- -#ifdef YY_USE_PROTOS -void yy_load_buffer_state( TSRMLS_D ) -#else -void yy_load_buffer_state(TSRMLS_C) -#ifdef ZTS -void ***tsrm_ls; -#endif -#endif -%+ -void yyFlexLexer::yy_load_buffer_state() -%* - { - SCNG(yy_n_chars) = yy_current_buffer->yy_n_chars; - yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos; - SCNG(yy_in) = yy_current_buffer->yy_input_file; - yy_hold_char = *yy_c_buf_p; - } - - -%- -#ifdef YY_USE_PROTOS -YY_BUFFER_STATE yy_create_buffer( struct _zend_file_handle *file, int size TSRMLS_DC ) -#else -YY_BUFFER_STATE yy_create_buffer( file, size TSRMLS_CC ) -struct _zend_file_handle *file; -int size; -#ifdef ZTS -void ***tsrm_ls; -#endif -#endif -%+ -YY_BUFFER_STATE yyFlexLexer::yy_create_buffer( istream* file, int size TSRMLS_DC ) -%* - { - YY_BUFFER_STATE b; - - b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); - if ( ! b ) - YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); - - b->yy_buf_size = size; - - /* yy_ch_buf has to be 2 characters longer than the size given because - * we need to put in 2 end-of-buffer characters. - */ - b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 ); - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); - - b->yy_is_our_buffer = 1; - - yy_init_buffer( b, file TSRMLS_CC ); - - return b; - } - - -%- -#ifdef YY_USE_PROTOS -void yy_delete_buffer( YY_BUFFER_STATE b TSRMLS_DC ) -#else -void yy_delete_buffer( b TSRMLS_CC ) -YY_BUFFER_STATE b; -#ifdef ZTS -void ***tsrm_ls; -#endif -#endif -%+ -void yyFlexLexer::yy_delete_buffer( YY_BUFFER_STATE b TSRMLS_DC ) -%* - { - if ( ! b ) - return; - - if ( b == yy_current_buffer ) - yy_current_buffer = (YY_BUFFER_STATE) 0; - - if ( b->yy_is_our_buffer ) - yy_flex_free( (void *) b->yy_ch_buf ); - - yy_flex_free( (void *) b ); - } - - -%- - -#ifdef YY_USE_PROTOS -void yy_init_buffer( YY_BUFFER_STATE b, struct _zend_file_handle *file TSRMLS_DC ) -#else -void yy_init_buffer( b, file TSRMLS_CC ) -YY_BUFFER_STATE b; -struct _zend_file_handle *file; -#ifdef ZTS -void ***tsrm_ls; -#endif -#endif - -%+ -extern "C" int isatty YY_PROTO(( int )); -void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, istream* file TSRMLS_DC ) -%* - - { - yy_flush_buffer( b TSRMLS_CC ); - - b->yy_input_file = file; - b->yy_fill_buffer = 1; - -%- -#if YY_ALWAYS_INTERACTIVE - b->yy_is_interactive = 1; -#else -#if YY_NEVER_INTERACTIVE - b->yy_is_interactive = 0; -#else - b->yy_is_interactive = file->handle.stream.interactive; -#endif -#endif -%+ - b->yy_is_interactive = (file == (istream *) &cin) ? 1 : 0; -%* - } - - -%- -#ifdef YY_USE_PROTOS -void yy_flush_buffer( YY_BUFFER_STATE b TSRMLS_DC ) -#else -void yy_flush_buffer( b TSRMLS_CC ) -YY_BUFFER_STATE b; -#ifdef ZTS -void ***tsrm_ls; -#endif -#endif - -%+ -void yyFlexLexer::yy_flush_buffer( YY_BUFFER_STATE b TSRMLS_DC ) -%* - { - if ( ! b ) - return; - - b->yy_n_chars = 0; - - /* We always need two end-of-buffer characters. The first causes - * a transition to the end-of-buffer state. The second causes - * a jam in that state. - */ - b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; - b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; - - b->yy_buf_pos = &b->yy_ch_buf[0]; - - b->yy_at_bol = 1; - b->yy_buffer_status = YY_BUFFER_NEW; - - if ( b == yy_current_buffer ) - yy_load_buffer_state(TSRMLS_C); - } -%* - - -#ifndef YY_NO_SCAN_BUFFER -%- -#ifdef YY_USE_PROTOS -YY_BUFFER_STATE yy_scan_buffer( char *base, yy_size_t size TSRMLS_DC ) -#else -YY_BUFFER_STATE yy_scan_buffer( base, size TSRMLS_CC ) -char *base; -yy_size_t size; -#ifdef ZTS -void ***tsrm_ls; -#endif -#endif - { - YY_BUFFER_STATE b; - - if ( size < 2 || - base[size-2] != YY_END_OF_BUFFER_CHAR || - base[size-1] != YY_END_OF_BUFFER_CHAR ) - /* They forgot to leave room for the EOB's. */ - return 0; - - b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); - if ( ! b ) - YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); - - b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ - b->yy_buf_pos = b->yy_ch_buf = base; - b->yy_is_our_buffer = 0; - b->yy_input_file = 0; - b->yy_n_chars = b->yy_buf_size; - b->yy_is_interactive = 0; - b->yy_at_bol = 1; - b->yy_fill_buffer = 0; - b->yy_buffer_status = YY_BUFFER_NEW; - - yy_switch_to_buffer( b TSRMLS_CC ); - - return b; - } -%* -#endif - - -#ifndef YY_NO_SCAN_STRING -%- -#ifdef YY_USE_PROTOS -YY_BUFFER_STATE yy_scan_string( yyconst char *yy_str TSRMLS_DC ) -#else -YY_BUFFER_STATE yy_scan_string( yy_str TSRMLS_CC ) -yyconst char *yy_str; -#ifdef ZTS -void ***tsrm_ls; -#endif -#endif - { - int len; - for ( len = 0; yy_str[len]; ++len ) - ; - - return yy_scan_bytes( yy_str, len TSRMLS_CC ); - } -%* -#endif - - -#ifndef YY_NO_SCAN_BYTES -%- -#ifdef YY_USE_PROTOS -YY_BUFFER_STATE yy_scan_bytes( yyconst char *bytes, int len TSRMLS_DC ) -#else -YY_BUFFER_STATE yy_scan_bytes( bytes, len TSRMLS_CC ) -yyconst char *bytes; -int len; -#ifdef ZTS -void ***tsrm_ls; -#endif -#endif - { - YY_BUFFER_STATE b; - char *buf; - yy_size_t n; - int i; - - /* Get memory for full buffer, including space for trailing EOB's. */ - n = len + 2; - buf = (char *) yy_flex_alloc( n ); - if ( ! buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); - - for ( i = 0; i < len; ++i ) - buf[i] = bytes[i]; - - buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR; - - b = yy_scan_buffer( buf, n TSRMLS_CC); - if ( ! b ) - YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); - - /* It's okay to grow etc. this buffer, and we should throw it - * away when we're done. - */ - b->yy_is_our_buffer = 1; - - return b; - } -%* -#endif - - -#ifndef YY_NO_PUSH_STATE -%- -#ifdef YY_USE_PROTOS -static void yy_push_state( int new_state TSRMLS_DC ) -#else -static void yy_push_state( new_state TSRMLS_CC ) -int new_state; -#ifdef ZTS -void ***tsrm_ls; -#endif -#endif -%+ -void yyFlexLexer::yy_push_state( int new_state TSRMLS_DC ) -%* - { - if ( yy_start_stack_ptr >= yy_start_stack_depth ) - { - yy_size_t new_size; - - yy_start_stack_depth += YY_START_STACK_INCR; - new_size = yy_start_stack_depth * sizeof( int ); - - if ( ! yy_start_stack ) - yy_start_stack = (int *) yy_flex_alloc( new_size ); - - else - yy_start_stack = (int *) yy_flex_realloc( - (void *) yy_start_stack, new_size ); - - if ( ! yy_start_stack ) - YY_FATAL_ERROR( - "out of memory expanding start-condition stack" ); - } - - yy_start_stack[yy_start_stack_ptr++] = YY_START; - - BEGIN(new_state); - } -#endif - - -#ifndef YY_NO_POP_STATE -%- -static void yy_pop_state(TSRMLS_D) -%+ -void yyFlexLexer::yy_pop_state(TSRMLS_D) -%* - { - if ( --yy_start_stack_ptr < 0 ) - YY_FATAL_ERROR( "start-condition stack underflow" ); - - BEGIN(yy_start_stack[yy_start_stack_ptr]); - } -#endif - - -#ifndef YY_NO_TOP_STATE -%- -static int yy_top_state(TSRMLS_D) -%+ -int yyFlexLexer::yy_top_state() -%* - { - return yy_start_stack[yy_start_stack_ptr - 1]; - } -#endif - -#ifndef YY_EXIT_FAILURE -#define YY_EXIT_FAILURE 2 -#endif - -%- -#ifdef YY_USE_PROTOS -static void yy_fatal_error( yyconst char msg[] ) -#else -static void yy_fatal_error( msg ) -char msg[]; -#endif - { - (void) fprintf( stderr, "%s\n", msg ); - exit( YY_EXIT_FAILURE ); - } - -%+ - -void yyFlexLexer::LexerError( yyconst char msg[] ) - { - cerr << msg << '\n'; - exit( YY_EXIT_FAILURE ); - } -%* - - -/* Redefine yyless() so it works in section 3 code. */ - -#undef yyless -#define yyless(n) \ - do \ - { \ - /* Undo effects of setting up yytext. */ \ - yytext[yyleng] = yy_hold_char; \ - yy_c_buf_p = yytext + n; \ - yy_hold_char = *yy_c_buf_p; \ - *yy_c_buf_p = '\0'; \ - yyleng = n; \ - } \ - while ( 0 ) - - -/* Internal utility routines. */ - -#ifndef yytext_ptr -#ifdef YY_USE_PROTOS -static void yy_flex_strncpy( char *s1, yyconst char *s2, int n ) -#else -static void yy_flex_strncpy( s1, s2, n ) -char *s1; -yyconst char *s2; -int n; -#endif - { - register int i; - for ( i = 0; i < n; ++i ) - s1[i] = s2[i]; - } -#endif - -#ifdef YY_NEED_STRLEN -#ifdef YY_USE_PROTOS -static int yy_flex_strlen( yyconst char *s ) -#else -static int yy_flex_strlen( s ) -yyconst char *s; -#endif - { - register int n; - for ( n = 0; s[n]; ++n ) - ; - - return n; - } -#endif - - -#ifdef YY_USE_PROTOS -static void *yy_flex_alloc( yy_size_t size ) -#else -static void *yy_flex_alloc( size ) -yy_size_t size; -#endif - { - return (void *) malloc( size ); - } - -#ifdef YY_USE_PROTOS -static void *yy_flex_realloc( void *ptr, yy_size_t size ) -#else -static void *yy_flex_realloc( ptr, size ) -void *ptr; -yy_size_t size; -#endif - { - /* The cast to (char *) in the following accommodates both - * implementations that use char* generic pointers, and those - * that use void* generic pointers. It works with the latter - * because both ANSI C and C++ allow castless assignment from - * any pointer type to void*, and deal with argument conversions - * as though doing an assignment. - */ - return (void *) realloc( (char *) ptr, size ); - } - -#ifdef YY_USE_PROTOS -static void yy_flex_free( void *ptr ) -#else -static void yy_flex_free( ptr ) -void *ptr; -#endif - { - free( ptr ); - } - -#if YY_MAIN -int main() - { - yylex(); - return 0; - } -#endif diff --git a/Zend/tests/bug38779.phpt b/Zend/tests/bug38779.phpt index bb62d38fa3..919a13c0ac 100644 --- a/Zend/tests/bug38779.phpt +++ b/Zend/tests/bug38779.phpt @@ -21,6 +21,9 @@ class Loader { function stream_eof() { return $this->position >= strlen($this->data); } + function stream_stat() { + return array('size' => strlen($this->data)); + } } stream_wrapper_register('Loader', 'Loader'); require 'Loader://qqq.php'; diff --git a/Zend/tests/bug42767.phpt b/Zend/tests/bug42767.phpt index 0de4dba5fb..484918cdb7 100644 --- a/Zend/tests/bug42767.phpt +++ b/Zend/tests/bug42767.phpt @@ -1,5 +1,12 @@ --TEST-- Bug #42767 (highlight_string() truncates trailing comments) +--INI-- +highlight.string = #DD0000 +highlight.comment = #FF8000 +highlight.keyword = #007700 +highlight.bg = #FFFFFF +highlight.default = #0000BB +highlight.html = #000000 --FILE-- #endif -static void scanner_globals_ctor(zend_scanner_globals *scanner_globals_p TSRMLS_DC) /* {{{ */ +static void ini_scanner_globals_ctor(zend_ini_scanner_globals *scanner_globals_p TSRMLS_DC) /* {{{ */ { - scanner_globals_p->c_buf_p = (char *) 0; - scanner_globals_p->init = 1; - scanner_globals_p->start = 0; - scanner_globals_p->current_buffer = NULL; - scanner_globals_p->yy_in = NULL; - scanner_globals_p->yy_out = NULL; - scanner_globals_p->_yy_more_flag = 0; - scanner_globals_p->_yy_more_len = 0; - scanner_globals_p->yy_start_stack_ptr = 0; - scanner_globals_p->yy_start_stack_depth = 0; - scanner_globals_p->yy_start_stack = 0; + memset(scanner_globals_p, 0, sizeof(*scanner_globals_p)); +} +/* }}} */ + +static void php_scanner_globals_ctor(zend_php_scanner_globals *scanner_globals_p TSRMLS_DC) /* {{{ */ +{ + memset(scanner_globals_p, 0, sizeof(*scanner_globals_p)); } /* }}} */ @@ -1022,8 +1018,8 @@ int zend_startup(zend_utility_functions *utility_functions, char **extensions, i extern ZEND_API ts_rsrc_id language_scanner_globals_id; extern ZEND_API ts_rsrc_id unicode_globals_id; #else - extern zend_scanner_globals ini_scanner_globals; - extern zend_scanner_globals language_scanner_globals; + extern zend_ini_scanner_globals ini_scanner_globals; + extern zend_php_scanner_globals language_scanner_globals; extern zend_unicode_globals unicode_globals; #endif TSRMLS_FETCH(); @@ -1104,8 +1100,8 @@ int zend_startup(zend_utility_functions *utility_functions, char **extensions, i ts_allocate_id(&unicode_globals_id, sizeof(zend_unicode_globals), (ts_allocate_ctor) unicode_globals_ctor, (ts_allocate_dtor) unicode_globals_dtor); ts_allocate_id(&compiler_globals_id, sizeof(zend_compiler_globals), (ts_allocate_ctor) compiler_globals_ctor, (ts_allocate_dtor) compiler_globals_dtor); ts_allocate_id(&executor_globals_id, sizeof(zend_executor_globals), (ts_allocate_ctor) executor_globals_ctor, (ts_allocate_dtor) executor_globals_dtor); - ts_allocate_id(&language_scanner_globals_id, sizeof(zend_scanner_globals), (ts_allocate_ctor) scanner_globals_ctor, NULL); - ts_allocate_id(&ini_scanner_globals_id, sizeof(zend_scanner_globals), (ts_allocate_ctor) scanner_globals_ctor, NULL); + ts_allocate_id(&language_scanner_globals_id, sizeof(zend_php_scanner_globals), (ts_allocate_ctor) php_scanner_globals_ctor, NULL); + ts_allocate_id(&ini_scanner_globals_id, sizeof(zend_ini_scanner_globals), (ts_allocate_ctor) ini_scanner_globals_ctor, NULL); compiler_globals = ts_resource(compiler_globals_id); executor_globals = ts_resource(executor_globals_id); tsrm_ls = ts_resource_ex(0, NULL); @@ -1123,8 +1119,8 @@ int zend_startup(zend_utility_functions *utility_functions, char **extensions, i *executor_globals->zend_constants = *GLOBAL_CONSTANTS_TABLE; #else unicode_globals_ctor(&unicode_globals TSRMLS_CC); - scanner_globals_ctor(&ini_scanner_globals TSRMLS_CC); - scanner_globals_ctor(&language_scanner_globals TSRMLS_CC); + ini_scanner_globals_ctor(&ini_scanner_globals TSRMLS_CC); + php_scanner_globals_ctor(&language_scanner_globals TSRMLS_CC); zend_set_default_compile_time_values(TSRMLS_C); EG(user_error_handler) = NULL; EG(user_exception_handler) = NULL; diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c index 70a836ae22..b821e53650 100644 --- a/Zend/zend_compile.c +++ b/Zend/zend_compile.c @@ -87,7 +87,7 @@ static void build_runtime_defined_function_key(zval *result, zend_uchar type, zs char *filename; uint filename_length; - char_pos_len = zend_sprintf(char_pos_buf, "%p", LANG_SCNG(_yy_last_accepting_cpos)); + char_pos_len = zend_sprintf(char_pos_buf, "%p", LANG_SCNG(yy_text)); if (CG(active_op_array)->filename) { filename = CG(active_op_array)->filename; } else { @@ -169,6 +169,14 @@ void zend_init_compiler_data_structures(TSRMLS_D) /* {{{ */ } /* }}} */ +ZEND_API void file_handle_dtor(zend_file_handle *fh) /* {{{ */ +{ + TSRMLS_FETCH(); + + zend_file_handle_dtor(fh TSRMLS_CC); +} +/* }}} */ + void init_compiler(TSRMLS_D) /* {{{ */ { CG(auto_globals_cache) = emalloc(sizeof(zval**) * zend_hash_num_elements(CG(auto_globals))); @@ -177,7 +185,7 @@ void init_compiler(TSRMLS_D) /* {{{ */ zend_init_rsrc_list(TSRMLS_C); zend_hash_init(&CG(filenames_table), 5, NULL, (dtor_func_t) free_estring, 0); zend_hash_init(&CG(script_encodings_table), 5, NULL, (dtor_func_t) free_estring, 0); - zend_llist_init(&CG(open_files), sizeof(zend_file_handle), (void (*)(void *)) zend_file_handle_dtor, 0); + zend_llist_init(&CG(open_files), sizeof(zend_file_handle), (void (*)(void *)) file_handle_dtor, 0); CG(unclean_shutdown) = 0; } /* }}} */ diff --git a/Zend/zend_compile.h b/Zend/zend_compile.h index bb2a79e8e3..fd6746caeb 100644 --- a/Zend/zend_compile.h +++ b/Zend/zend_compile.h @@ -554,7 +554,6 @@ ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC); ZEND_API void init_op_array(zend_op_array *op_array, zend_uchar type, int initial_ops_size TSRMLS_DC); ZEND_API void destroy_op_array(zend_op_array *op_array TSRMLS_DC); ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC); -ZEND_API void zend_file_handle_dtor(zend_file_handle *fh); ZEND_API int zend_cleanup_class_data(zend_class_entry **pce TSRMLS_DC); ZEND_API int zend_cleanup_function_data(zend_function *function TSRMLS_DC); ZEND_API int zend_cleanup_function_data_full(zend_function *function TSRMLS_DC); @@ -694,14 +693,6 @@ int zendlex(znode *zendlval TSRMLS_DC); #define ZEND_CT (1<<0) #define ZEND_RT (1<<1) - -#define ZEND_HANDLE_FILENAME 0 -#define ZEND_HANDLE_FD 1 -#define ZEND_HANDLE_FP 2 -#define ZEND_HANDLE_STDIOSTREAM 3 -#define ZEND_HANDLE_FSTREAM 4 -#define ZEND_HANDLE_STREAM 5 - #define ZEND_FETCH_STANDARD 0 #define ZEND_FETCH_ADD_LOCK (1<<0) #define ZEND_FETCH_MAKE_REF (1<<1) diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h index e2f21e4284..7f6ad15d92 100644 --- a/Zend/zend_globals.h +++ b/Zend/zend_globals.h @@ -243,37 +243,45 @@ struct _zend_executor_globals { void *reserved[ZEND_MAX_RESERVED_RESOURCES]; }; -struct _zend_scanner_globals { +struct _zend_ini_scanner_globals { zend_file_handle *yy_in; zend_file_handle *yy_out; - int yy_leng; - char *yy_text; - struct yy_buffer_state *current_buffer; - char *c_buf_p; - int init; - int start; + + unsigned int yy_leng; + unsigned char *yy_start; + unsigned char *yy_text; + unsigned char *yy_cursor; + unsigned char *yy_marker; + unsigned char *yy_limit; + int yy_state; + zend_stack state_stack; + char *filename; int lineno; - char _yy_hold_char; - int yy_n_chars; - int _yy_did_buffer_switch_on_eof; - int _yy_last_accepting_state; /* Must be of the same type as yy_state_type, - * if for whatever reason it's no longer int! - */ - char *_yy_last_accepting_cpos; - int _yy_more_flag; - int _yy_more_len; - int yy_start_stack_ptr; - int yy_start_stack_depth; - int *yy_start_stack; + + /* Modes are: ZEND_INI_SCANNER_NORMAL, ZEND_INI_SCANNER_RAW */ + int scanner_mode; +}; + +struct _zend_php_scanner_globals { + zend_file_handle *yy_in; + zend_file_handle *yy_out; + + unsigned int yy_leng; + unsigned char *yy_start; + unsigned char *yy_text; + unsigned char *yy_cursor; + unsigned char *yy_marker; + unsigned char *yy_limit; + int yy_state; + zend_stack state_stack; + + zend_llist used_state_stacks; UConverter *input_conv; /* converter for flex input */ UConverter *output_conv; /* converter for data from flex output */ zend_bool encoding_checked; char* rest_str; int rest_len; - - /* For ini scanner. Modes are: ZEND_INI_SCANNER_NORMAL, ZEND_INI_SCANNER_RAW */ - int scanner_mode; }; struct _zend_unicode_globals { diff --git a/Zend/zend_globals_macros.h b/Zend/zend_globals_macros.h index c6fa40f8aa..9dc202400c 100644 --- a/Zend/zend_globals_macros.h +++ b/Zend/zend_globals_macros.h @@ -24,7 +24,8 @@ typedef struct _zend_compiler_globals zend_compiler_globals; typedef struct _zend_executor_globals zend_executor_globals; -typedef struct _zend_scanner_globals zend_scanner_globals; +typedef struct _zend_php_scanner_globals zend_php_scanner_globals; +typedef struct _zend_ini_scanner_globals zend_ini_scanner_globals; typedef struct _zend_unicode_globals zend_unicode_globals; BEGIN_EXTERN_C() @@ -50,21 +51,21 @@ extern ZEND_API zend_executor_globals executor_globals; /* Language Scanner */ #ifdef ZTS -# define LANG_SCNG(v) TSRMG(language_scanner_globals_id, zend_scanner_globals *, v) +# define LANG_SCNG(v) TSRMG(language_scanner_globals_id, zend_php_scanner_globals *, v) extern ZEND_API ts_rsrc_id language_scanner_globals_id; #else # define LANG_SCNG(v) (language_scanner_globals.v) -extern ZEND_API zend_scanner_globals language_scanner_globals; +extern ZEND_API zend_php_scanner_globals language_scanner_globals; #endif /* INI Scanner */ #ifdef ZTS -# define INI_SCNG(v) TSRMG(ini_scanner_globals_id, zend_scanner_globals *, v) +# define INI_SCNG(v) TSRMG(ini_scanner_globals_id, zend_ini_scanner_globals *, v) extern ZEND_API ts_rsrc_id ini_scanner_globals_id; #else # define INI_SCNG(v) (ini_scanner_globals.v) -extern ZEND_API zend_scanner_globals ini_scanner_globals; +extern ZEND_API zend_ini_scanner_globals ini_scanner_globals; #endif diff --git a/Zend/zend_highlight.c b/Zend/zend_highlight.c index 3a546f291e..cccebe208c 100644 --- a/Zend/zend_highlight.c +++ b/Zend/zend_highlight.c @@ -163,7 +163,7 @@ ZEND_API void zend_highlight(zend_syntax_highlighter_ini *syntax_highlighter_ini } /* handler for trailing comments, see bug #42767 */ - if (LANG_SCNG(yy_leng) && LANG_SCNG(_yy_more_len)) { + if (LANG_SCNG(yy_leng) && LANG_SCNG(yy_text) < LANG_SCNG(yy_limit)) { if (last_color != syntax_highlighter_ini->highlight_comment) { if (last_color != syntax_highlighter_ini->highlight_html) { zend_printf(""); @@ -172,7 +172,7 @@ ZEND_API void zend_highlight(zend_syntax_highlighter_ini *syntax_highlighter_ini zend_printf("", syntax_highlighter_ini->highlight_comment); } } - zend_html_puts(LANG_SCNG(yy_text), LANG_SCNG(_yy_more_len) TSRMLS_CC); + zend_html_puts(LANG_SCNG(yy_text), (LANG_SCNG(yy_limit) - LANG_SCNG(yy_text)) TSRMLS_CC); } if (last_color != syntax_highlighter_ini->highlight_html) { diff --git a/Zend/zend_ini_parser.y b/Zend/zend_ini_parser.y index 633d1480aa..2533cebe67 100644 --- a/Zend/zend_ini_parser.y +++ b/Zend/zend_ini_parser.y @@ -204,7 +204,7 @@ ZEND_API int zend_parse_ini_file(zend_file_handle *fh, zend_bool unbuffered_erro CG(ini_parser_unbuffered_errors) = unbuffered_errors; retval = ini_parse(TSRMLS_C); - zend_ini_close_file(fh TSRMLS_CC); + zend_file_handle_dtor(fh TSRMLS_CC); shutdown_ini_scanner(TSRMLS_C); diff --git a/Zend/zend_ini_scanner.h b/Zend/zend_ini_scanner.h index 1c0345b89f..a6ff91ef2c 100644 --- a/Zend/zend_ini_scanner.h +++ b/Zend/zend_ini_scanner.h @@ -31,7 +31,6 @@ int zend_ini_scanner_get_lineno(TSRMLS_D); char *zend_ini_scanner_get_filename(TSRMLS_D); int zend_ini_open_file_for_scanning(zend_file_handle *fh, int scanner_mode TSRMLS_DC); int zend_ini_prepare_string_for_scanning(char *str, int scanner_mode TSRMLS_DC); -void zend_ini_close_file(zend_file_handle *fh TSRMLS_DC); int ini_lex(zval *ini_lval TSRMLS_DC); void shutdown_ini_scanner(TSRMLS_D); END_EXTERN_C() diff --git a/Zend/zend_ini_scanner.l b/Zend/zend_ini_scanner.l index e809ce3731..1b4022bc74 100644 --- a/Zend/zend_ini_scanner.l +++ b/Zend/zend_ini_scanner.l @@ -1,4 +1,3 @@ -%{ /* +----------------------------------------------------------------------+ | Zend Engine | @@ -15,18 +14,41 @@ +----------------------------------------------------------------------+ | Authors: Zeev Suraski | | Jani Taskinen | + | Marcus Boerger | + | Nuno Lopes | + | Scott MacVicar | +----------------------------------------------------------------------+ */ /* $Id$ */ -#define DEBUG_CFG_SCANNER 0 -#define yyleng SCNG(yy_leng) -#define yytext SCNG(yy_text) -#define yytext_ptr SCNG(yy_text) -#define yyin SCNG(yy_in) -#define yyout SCNG(yy_out) +#if 0 +# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c) +#else +# define YYDEBUG(s, c) +#endif + +#include "zend_ini_scanner_defs.h" + +#define YYCTYPE unsigned char +#define YYFILL(n) { if (YYCURSOR >= YYLIMIT) return 0; } +#define YYCURSOR SCNG(yy_cursor) +#define YYLIMIT SCNG(yy_limit) +#define YYMARKER SCNG(yy_marker) + +#define YYGETCONDITION() SCNG(yy_state) +#define YYSETCONDITION(s) SCNG(yy_state) = s + +#define STATE(name) yyc##name + +/* emulate flex constructs */ +#define BEGIN(state) YYSETCONDITION(STATE(state)) +#define YYSTATE YYGETCONDITION() +#define yytext ((char*)SCNG(yy_text)) +#define yyleng SCNG(yy_leng) +#define yyless(x) YYCURSOR = yytext + x +/* #define yymore() goto yymore_restart */ /* How it works (for the core ini directives): * =========================================== @@ -47,7 +69,7 @@ * 5. User defined ini files (like .htaccess for apache) are parsed for each request and * stored in separate hash defined by SAPI. */ - + /* TODO: (ordered by importance :-) * =============================================================================== * @@ -58,46 +80,18 @@ * */ -/* These are not needed when yymore() is not used */ -/* -#define yy_last_accepting_state SCNG(_yy_last_accepting_state) -#define yy_last_accepting_cpos SCNG(_yy_last_accepting_cpos) -#define yy_more_flag SCNG(_yy_more_flag) -#define yy_more_len SCNG(_yy_more_len) -*/ - -%} - -%x ST_DOUBLE_QUOTES -%x ST_OFFSET -%x ST_RAW -%x ST_SECTION_RAW -%x ST_SECTION_VALUE -%x ST_VALUE -%x ST_VARNAME -%option stack - -%{ - #include #include "zend.h" #include "zend_globals.h" #include #include "zend_ini_scanner.h" -#define YY_DECL int ini_lex(zval *ini_lval TSRMLS_DC) - -#define YY_INPUT(buf, result, max_size) \ - if ( ((result = zend_stream_read(yyin, buf, max_size TSRMLS_CC)) == 0) \ - && zend_stream_ferror( yyin TSRMLS_CC) ) \ - YY_FATAL_ERROR( "input in flex scanner failed" ); - /* Globals Macros */ #define SCNG INI_SCNG #ifdef ZTS ZEND_API ts_rsrc_id ini_scanner_globals_id; #else -ZEND_API zend_scanner_globals ini_scanner_globals; +ZEND_API zend_ini_scanner_globals ini_scanner_globals; #endif /* Eat trailing whitespace + extra char */ @@ -110,7 +104,6 @@ ZEND_API zend_scanner_globals ini_scanner_globals; yytext[yyleng - 1] == ' ') \ ) { \ yyleng--; \ - yytext[yyleng]=0; \ } /* Eat trailing whitespace */ @@ -127,7 +120,30 @@ ZEND_API zend_scanner_globals ini_scanner_globals; return type; \ } -static char *ini_filename; +static void _yy_push_state(int new_state TSRMLS_DC) +{ + zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int)); + YYSETCONDITION(new_state); +} + +#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm) + +static void yy_pop_state(TSRMLS_D) +{ + int *stack_state; + zend_stack_top(&SCNG(state_stack), (void **) &stack_state); + YYSETCONDITION(*stack_state); + zend_stack_del_top(&SCNG(state_stack)); +} + +static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC) +{ + YYCURSOR = (YYCTYPE*)str; + SCNG(yy_start) = YYCURSOR; + YYLIMIT = YYCURSOR + len; +} + +#define ini_filename SCNG(filename) /* {{{ init_ini_scanner() */ @@ -135,9 +151,8 @@ static void init_ini_scanner(TSRMLS_D) { SCNG(lineno) = 1; SCNG(scanner_mode) = ZEND_INI_SCANNER_NORMAL; - SCNG(yy_start_stack_ptr) = 0; - SCNG(yy_start_stack_depth) = 0; - SCNG(current_buffer) = NULL; + zend_stack_init(&SCNG(state_stack)); + BEGIN(INITIAL); } /* }}} */ @@ -145,11 +160,7 @@ static void init_ini_scanner(TSRMLS_D) */ void shutdown_ini_scanner(TSRMLS_D) { - if (SCNG(yy_start_stack)) { - yy_flex_free(SCNG(yy_start_stack)); - SCNG(yy_start_stack) = NULL; - } - yy_delete_buffer(SCNG(current_buffer) TSRMLS_CC); + zend_stack_destroy(&SCNG(state_stack)); if (ini_filename) { free(ini_filename); } @@ -176,14 +187,17 @@ char *zend_ini_scanner_get_filename(TSRMLS_D) */ int zend_ini_open_file_for_scanning(zend_file_handle *fh, int scanner_mode TSRMLS_DC) { - if (FAILURE == zend_stream_fixup(fh TSRMLS_CC)) { + char *buf; + size_t size; + + if (zend_stream_fixup(fh, &buf, &size TSRMLS_CC) == FAILURE) { return FAILURE; } init_ini_scanner(TSRMLS_C); SCNG(scanner_mode) = scanner_mode; - yyin = fh; - yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE TSRMLS_CC) TSRMLS_CC); + SCNG(yy_in) = fh; + yy_scan_buffer(buf, size TSRMLS_CC); ini_filename = zend_strndup(fh->filename, strlen(fh->filename)); return SUCCESS; } @@ -197,21 +211,13 @@ int zend_ini_prepare_string_for_scanning(char *str, int scanner_mode TSRMLS_DC) init_ini_scanner(TSRMLS_C); SCNG(scanner_mode) = scanner_mode; - yyin = NULL; - yy_scan_buffer(str, len + 2 TSRMLS_CC); + SCNG(yy_in) = NULL; + yy_scan_buffer(str, len TSRMLS_CC); ini_filename = NULL; return SUCCESS; } /* }}} */ -/* {{{ zend_ini_close_file() -*/ -void zend_ini_close_file(zend_file_handle *fh TSRMLS_DC) -{ - zend_stream_close(fh); -} -/* }}} */ - /* {{{ zend_ini_escape_string() */ static void zend_ini_escape_string(zval *lval, char *str, int len, char quote_type TSRMLS_DC) @@ -272,8 +278,23 @@ static void zend_ini_escape_string(zval *lval, char *str, int len, char quote_ty } /* }}} */ -%} +int ini_lex(zval *ini_lval TSRMLS_DC) +{ +restart: + SCNG(yy_text) = YYCURSOR; + +/* yymore_restart: */ + /* detect EOF */ + if (YYCURSOR >= YYLIMIT) { + if (YYSTATE == STATE(ST_VALUE) || YYSTATE == STATE(ST_RAW)) { + BEGIN(INITIAL); + return 0; + } + return 0; + } +/*!re2c +re2c:yyfill:check = 0; LNUM [0-9]+ DNUM ([0-9]*[\.][0-9]+)|([0-9]+[\.][0-9]*) NUMBER [-]?{LNUM}|{DNUM} @@ -292,20 +313,12 @@ SINGLE_QUOTED_CHARS [^'] RAW_VALUE_CHARS [^=\n\r;] /* Allow using ${foobar} in sections, quoted strings and values */ -LITERAL_DOLLAR ("$"([^a-zA-Z0-9{]|("\\"{ANY_CHAR}))) -VALUE_CHARS ([^$= \t\n\r;&|~()!"']|{LITERAL_DOLLAR}) +LITERAL_DOLLAR ("$"([^a-zA-Z0-9{\000]|("\\"{ANY_CHAR}))) +VALUE_CHARS ([^$= \t\n\r;&|~()!"'\000]|{LITERAL_DOLLAR}) SECTION_VALUE_CHARS ([^$\n\r;"'\]\\]|("\\"{ANY_CHAR})|{LITERAL_DOLLAR}) DOUBLE_QUOTES_CHARS ([^$"\\]|("\\"{ANY_CHAR})|{LITERAL_DOLLAR}) -/* " */ - -%option nounput -%option noyywrap -%option noyylineno -%option noyy_top_state -%option never-interactive - -%% ++ := yyleng = YYCURSOR - SCNG(yy_text); "[" { /* Section start */ /* Enter section data lookup state */ @@ -320,9 +333,8 @@ DOUBLE_QUOTES_CHARS ([^$"\\]|("\\"{ANY_CHAR})|{LITERAL_DOLLAR}) "'"{SINGLE_QUOTED_CHARS}+"'" { /* Raw string */ /* Eat leading and trailing single quotes */ if (yytext[0] == '\'' && yytext[yyleng - 1] == '\'') { - yytext++; + SCNG(yy_text)++; yyleng = yyleng - 2; - yytext[yyleng] = 0; } RETURN_TOKEN(TC_RAW, yytext, yyleng); } @@ -339,7 +351,7 @@ DOUBLE_QUOTES_CHARS ([^$"\\]|("\\"{ANY_CHAR})|{LITERAL_DOLLAR}) /* Enter offset lookup state */ yy_push_state(ST_OFFSET TSRMLS_CC); - + RETURN_TOKEN(TC_OFFSET, yytext, yyleng); } @@ -374,7 +386,7 @@ DOUBLE_QUOTES_CHARS ([^$"\\]|("\\"{ANY_CHAR})|{LITERAL_DOLLAR}) RETURN_TOKEN(TC_LABEL, yytext, yyleng); } -{TABS_AND_SPACES}*[=]{TABS_AND_SPACES}* { /* Start option value */ +{TABS_AND_SPACES}*[=]{TABS_AND_SPACES}* { /* Start option value */ if (SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW) { yy_push_state(ST_RAW TSRMLS_CC); } else { @@ -386,9 +398,9 @@ DOUBLE_QUOTES_CHARS ([^$"\\]|("\\"{ANY_CHAR})|{LITERAL_DOLLAR}) {RAW_VALUE_CHARS}+ { /* Raw value, only used when SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW. */ /* Eat leading and trailing double quotes */ if (yytext[0] == '"' && yytext[yyleng - 1] == '"') { - yytext++; + SCNG(yy_text)++; yyleng = yyleng - 2; - yytext[yyleng] = 0; + yytext[yyleng] = 0; } RETURN_TOKEN(TC_RAW, yytext, yyleng); } @@ -420,7 +432,7 @@ DOUBLE_QUOTES_CHARS ([^$"\\]|("\\"{ANY_CHAR})|{LITERAL_DOLLAR}) } [=] { /* Make = used in option value to trigger error */ - yyless(yyleng - 1); + yyless(0); BEGIN(INITIAL); return END_OF_LINE; } @@ -454,6 +466,7 @@ DOUBLE_QUOTES_CHARS ([^$"\\]|("\\"{ANY_CHAR})|{LITERAL_DOLLAR}) {TABS_AND_SPACES}+ { /* eat whitespace */ + goto restart; } {TABS_AND_SPACES}*{NEWLINE} { @@ -467,52 +480,14 @@ DOUBLE_QUOTES_CHARS ([^$"\\]|("\\"{ANY_CHAR})|{LITERAL_DOLLAR}) return END_OF_LINE; } -<> { /* End of option value (if EOF is reached before EOL */ +[^] { /* End of option value (if EOF is reached before EOL) */ BEGIN(INITIAL); - return END_OF_LINE; + return 0; } -<> { -#if DEBUG_CFG_SCANNER - while (YYSTATE != INITIAL) { - switch (YYSTATE) { - case INITIAL: - break; - - case ST_DOUBLE_QUOTES: - fprintf(stderr, "ERROR: Unterminated ini option value double quotes\n"); - break; - - case ST_OFFSET: - fprintf(stderr, "ERROR: Unterminated ini option offset\n"); - break; - - case ST_RAW: - fprintf(stderr, "ERROR: Unterminated raw ini option value\n"); - break; - - case ST_SECTION_RAW: - fprintf(stderr, "ERROR: Unterminated raw ini section value\n"); - break; - - case ST_SECTION_VALUE: - fprintf(stderr, "ERROR: Unterminated ini section value\n"); - break; - - case ST_VALUE: - fprintf(stderr, "ERROR: Unterminated ini option value\n"); - break; - - case ST_VARNAME: - fprintf(stderr, "ERROR: Unterminated ini variable\n"); - break; +<*>[^] { + return 0; +} - default: - fprintf(stderr, "BUG: Unknown state (%d)\n", YYSTATE); - break; - } - yy_pop_state(TSRMLS_C); - } -#endif - yyterminate(); +*/ } diff --git a/Zend/zend_language_scanner.h b/Zend/zend_language_scanner.h index f467d9cb3d..444e270b7f 100644 --- a/Zend/zend_language_scanner.h +++ b/Zend/zend_language_scanner.h @@ -23,8 +23,15 @@ #define ZEND_SCANNER_H typedef struct _zend_lex_state { - YY_BUFFER_STATE buffer_state; - int state; + unsigned int yy_leng; + unsigned char *yy_start; + unsigned char *yy_text; + unsigned char *yy_cursor; + unsigned char *yy_marker; + unsigned char *yy_limit; + int yy_state; + zend_stack state_stack; + zend_file_handle *in; uint lineno; char *filename; @@ -38,7 +45,6 @@ typedef struct _zend_lex_state { } zend_lex_state; -void zend_fatal_scanner_error(char *); BEGIN_EXTERN_C() int zend_compare_file_handles(zend_file_handle *fh1, zend_file_handle *fh2); ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC); diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index 179127a495..c6248e178e 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -1,5 +1,3 @@ -%{ - /* +----------------------------------------------------------------------+ | Zend Engine | @@ -14,43 +12,24 @@ | obtain it through the world-wide-web, please send a note to | | license@zend.com so we can mail you a copy immediately. | +----------------------------------------------------------------------+ - | Authors: Andi Gutmans | + | Authors: Marcus Boerger | + | Nuno Lopes | + | Scott MacVicar | + | Flex version authors: | + | Andi Gutmans | | Zeev Suraski | +----------------------------------------------------------------------+ */ /* $Id$ */ -#define yyleng SCNG(yy_leng) -#define yytext SCNG(yy_text) -#define yytext_ptr SCNG(yy_text) -#define yyin SCNG(yy_in) -#define yyout SCNG(yy_out) -#define yy_last_accepting_state SCNG(_yy_last_accepting_state) -#define yy_last_accepting_cpos SCNG(_yy_last_accepting_cpos) -#define yy_more_flag SCNG(_yy_more_flag) -#define yy_more_len SCNG(_yy_more_len) - -%} - -%x ST_IN_SCRIPTING -%x ST_DOUBLE_QUOTES -%x ST_BACKQUOTE -%x ST_HEREDOC -%x ST_START_HEREDOC -%x ST_END_HEREDOC -%x ST_NOWDOC -%x ST_START_NOWDOC -%x ST_END_NOWDOC -%x ST_LOOKING_FOR_PROPERTY -%x ST_LOOKING_FOR_VARNAME -%x ST_VAR_OFFSET -%x ST_COMMENT -%x ST_DOC_COMMENT -%x ST_ONE_LINE_COMMENT -%option stack - -%{ +#if 0 +# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c) +#else +# define YYDEBUG(s, c) +#endif + +#include "zend_language_scanner_defs.h" #include #include "zend.h" @@ -68,6 +47,25 @@ #include "tsrm_virtual_cwd.h" #include "tsrm_config_common.h" +#define YYCTYPE unsigned char +#define YYFILL(n) { if (YYCURSOR >= YYLIMIT) return 0; } +#define YYCURSOR SCNG(yy_cursor) +#define YYLIMIT SCNG(yy_limit) +#define YYMARKER SCNG(yy_marker) + +#define YYGETCONDITION() SCNG(yy_state) +#define YYSETCONDITION(s) SCNG(yy_state) = s + +#define STATE(name) yyc##name + +/* emulate flex constructs */ +#define BEGIN(state) YYSETCONDITION(STATE(state)) +#define YYSTATE YYGETCONDITION() +#define yytext ((char*)SCNG(yy_text)) +#define yyleng SCNG(yy_leng) +#define yyless(x) YYCURSOR = yytext + x +#define yymore() goto yymore_restart + #ifdef HAVE_STDARG_H # include #endif @@ -76,31 +74,19 @@ # include #endif -#define YY_DECL int lex_scan(zval *zendlval TSRMLS_DC) - -#define ECHO { ZEND_WRITE( yytext, yyleng ); } - -#ifdef ZTS -# define MY_INPUT yyinput -#else -# define MY_INPUT input -#endif - - /* Globals Macros */ #define SCNG LANG_SCNG #ifdef ZTS ZEND_API ts_rsrc_id language_scanner_globals_id; #else -ZEND_API zend_scanner_globals language_scanner_globals; +ZEND_API zend_php_scanner_globals language_scanner_globals; #endif +/* #define YY_INPUT(buf, result, max_size) \ - if ( ((result = zend_unicode_yyinput(yyin, buf, max_size TSRMLS_CC)) == 0) \ - && zend_stream_ferror( yyin TSRMLS_CC) ) \ + if ( ((result = zend_unicode_yyinput(yyin, buf, max_size TSRMLS_CC)) == 0)) \ YY_FATAL_ERROR( "input in flex scanner failed" ); - -#define YY_FATAL_ERROR zend_fatal_scanner_error +*/ #define HANDLE_NEWLINES(s, l) \ do { \ @@ -121,32 +107,50 @@ do { \ } \ } - #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7') #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F')) +BEGIN_EXTERN_C() + +static void _yy_push_state(int new_state TSRMLS_DC) + { + zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int)); + YYSETCONDITION(new_state); +} + +#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm) -void zend_fatal_scanner_error(char *message) +static void yy_pop_state(TSRMLS_D) { - zend_error(E_COMPILE_ERROR, "%s", message); + int *stack_state; + zend_stack_top(&SCNG(state_stack), (void **) &stack_state); + YYSETCONDITION(*stack_state); + zend_stack_del_top(&SCNG(state_stack)); } -BEGIN_EXTERN_C() +static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC) +{ + YYCURSOR = (YYCTYPE*)str; + SCNG(yy_start) = YYCURSOR; + YYLIMIT = YYCURSOR + len; + } + void startup_scanner(TSRMLS_D) { CG(heredoc) = NULL; CG(heredoc_len) = 0; CG(doc_comment) = NULL_ZSTR; CG(doc_comment_len) = 0; - SCNG(yy_start_stack_ptr) = 0; - SCNG(yy_start_stack_depth) = 0; - SCNG(current_buffer) = NULL; SCNG(input_conv) = NULL; SCNG(output_conv) = NULL; SCNG(encoding_checked) = 0; SCNG(rest_str) = NULL; SCNG(rest_len) = 0; + + zend_llist_init(&SCNG(used_state_stacks), sizeof(zend_stack), (llist_dtor_func_t) zend_stack_destroy, 0); + zend_stack_init(&SCNG(state_stack)); + zend_llist_add_element(&SCNG(used_state_stacks), &SCNG(state_stack)); } @@ -156,10 +160,7 @@ void shutdown_scanner(TSRMLS_D) efree(CG(heredoc)); CG(heredoc_len)=0; } - if (SCNG(yy_start_stack)) { - yy_flex_free(SCNG(yy_start_stack)); - SCNG(yy_start_stack) = NULL; - } + zend_llist_destroy(&SCNG(used_state_stacks)); RESET_DOC_COMMENT(); if (SCNG(input_conv)) { @@ -177,14 +178,27 @@ void shutdown_scanner(TSRMLS_D) } SCNG(rest_len) = 0; } -END_EXTERN_C() +static int compare_stacks(zend_stack *stack1, zend_stack *stack2) +{ + return (stack1 == stack2); +} ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC) { - memcpy(&lex_state->buffer_state, &YY_CURRENT_BUFFER, sizeof(YY_BUFFER_STATE)); + lex_state->yy_leng = SCNG(yy_leng); + lex_state->yy_start = SCNG(yy_start); + lex_state->yy_text = SCNG(yy_text); + lex_state->yy_cursor = SCNG(yy_cursor); + lex_state->yy_marker = SCNG(yy_marker); + lex_state->yy_limit = SCNG(yy_limit); + + lex_state->state_stack = SCNG(state_stack); + zend_stack_init(&SCNG(state_stack)); + zend_llist_add_element(&SCNG(used_state_stacks), &SCNG(state_stack)); + lex_state->in = SCNG(yy_in); - lex_state->state = YYSTATE; + lex_state->yy_state = YYSTATE; lex_state->filename = zend_get_compiled_filename(TSRMLS_C); lex_state->lineno = CG(zend_lineno); @@ -202,17 +216,18 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC) ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) { - YY_BUFFER_STATE original_buffer_state = YY_CURRENT_BUFFER; + SCNG(yy_leng) = lex_state->yy_leng; + SCNG(yy_start) = lex_state->yy_start; + SCNG(yy_text) = lex_state->yy_text; + SCNG(yy_cursor) = lex_state->yy_cursor; + SCNG(yy_marker) = lex_state->yy_marker; + SCNG(yy_limit) = lex_state->yy_limit; - if (lex_state->buffer_state) { - yy_switch_to_buffer(lex_state->buffer_state TSRMLS_CC); - } else { - YY_CURRENT_BUFFER = NULL; - } + zend_llist_del_element(&SCNG(used_state_stacks), &SCNG(state_stack), (int (*)(void *, void *)) compare_stacks); + SCNG(state_stack) = lex_state->state_stack; - yy_delete_buffer(original_buffer_state TSRMLS_CC); SCNG(yy_in) = lex_state->in; - BEGIN(lex_state->state); + YYSETCONDITION(lex_state->yy_state); CG(zend_lineno) = lex_state->lineno; zend_restore_compiled_filename(lex_state->filename TSRMLS_CC); zend_restore_compiled_script_encoding(lex_state->script_encoding TSRMLS_CC); @@ -233,57 +248,6 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) SCNG(rest_len) = lex_state->rest_len; } - -BEGIN_EXTERN_C() - - -ZEND_API void zend_file_handle_dtor(zend_file_handle *fh) -{ - TSRMLS_FETCH(); - - switch (fh->type) { - case ZEND_HANDLE_FP: - fclose(fh->handle.fp); - break; - case ZEND_HANDLE_STREAM: - if (fh->handle.stream.closer) { - fh->handle.stream.closer(fh->handle.stream.handle TSRMLS_CC); - } - break; - case ZEND_HANDLE_FILENAME: - /* We're only supposed to get here when destructing the used_files hash, - * which doesn't really contain open files, but references to their names/paths - */ - break; - } - if (fh->opened_path) { - efree(fh->opened_path); - fh->opened_path = NULL; - } - if (fh->free_filename && fh->filename) { - efree(fh->filename); - fh->filename = NULL; - } -} - - -int zend_compare_file_handles(zend_file_handle *fh1, zend_file_handle *fh2) -{ - if (fh1->type != fh2->type) { - return 0; - } - switch (fh1->type) { - case ZEND_HANDLE_FP: - return fh1->handle.fp==fh2->handle.fp; - break; - case ZEND_HANDLE_STREAM: - return fh1->handle.stream.handle == fh2->handle.stream.handle; - break; - } - return 0; -} - - ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC) { zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles); @@ -556,6 +520,7 @@ ZEND_API int zend_prepare_scanner_converters(const char *onetime_encoding, int r if (zend_set_converter_encoding(&SCNG(input_conv), encoding) == FAILURE) { return FAILURE; } +#ifdef scottmac_0 if (run_time) { /* Convert rest of the buffer to unicode.runtime_encoding. */ YY_BUFFER_STATE b = YY_CURRENT_BUFFER; @@ -590,6 +555,7 @@ ZEND_API int zend_prepare_scanner_converters(const char *onetime_encoding, int r b->yy_ch_buf[b->yy_n_chars] = YY_END_OF_BUFFER_CHAR; b->yy_ch_buf[b->yy_n_chars+1] = YY_END_OF_BUFFER_CHAR; } +#endif encoding = "UTF-8"; } return zend_set_converter_encoding(&SCNG(output_conv), encoding); @@ -622,6 +588,7 @@ int zend_unicode_yyinput(zend_file_handle *file_handle, char *buf, size_t len TS int c = '*'; const char *src = buf; +#ifdef scottmac_0 /* Look of we have rest from previous call */ if (SCNG(rest_str)) { if (len >= SCNG(rest_len)) { @@ -637,18 +604,9 @@ int zend_unicode_yyinput(zend_file_handle *file_handle, char *buf, size_t len TS SCNG(rest_len) -= len; } } else { - if (file_handle->handle.stream.interactive) { - for (n = 0; n < sizeof(buf) && (c = zend_stream_getc(yyin TSRMLS_CC)) != EOF && c != '\n'; ++n) { - buf[n] = (char)c; - } - if (c == '\n') { - buf[n++] = (char) c; - } - } else { - n = zend_stream_read(file_handle, buf, len TSRMLS_CC); - } + n = zend_stream_read(file_handle, buf, len TSRMLS_CC); } - +#endif /* Don't make any conversions if unicode=off */ if (!UG(unicode)) { return n; @@ -709,20 +667,34 @@ int zend_unicode_yyinput(zend_file_handle *file_handle, char *buf, size_t len TS ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC) { - char *file_path=NULL; + char *file_path=NULL, *buf; UErrorCode status = U_ZERO_ERROR; + size_t size; - if (FAILURE == zend_stream_fixup(file_handle TSRMLS_CC)) { + if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) { return FAILURE; } zend_llist_add_element(&CG(open_files), file_handle); + if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) { + zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files)); + size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle; + fh->handle.stream.handle = (void*)(((char*)fh) + diff); + file_handle->handle.stream.handle = fh->handle.stream.handle; + } + /* Reset the scanner for scanning the new file */ SCNG(yy_in) = file_handle; zend_prepare_scanner_converters(ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status), 0 TSRMLS_CC); - yy_switch_to_buffer(yy_create_buffer(SCNG(yy_in), YY_BUF_SIZE TSRMLS_CC) TSRMLS_CC); + + if (size != -1) { + /* Re-encode for Unicode if needed */ + yy_scan_buffer(buf, size TSRMLS_CC); + } else { + zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed"); + } BEGIN(INITIAL); @@ -869,7 +841,8 @@ ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_D SCNG(yy_in)=NULL; zend_prepare_scanner_converters(encoding, 0 TSRMLS_CC); - yy_scan_buffer(Z_STRVAL_P(str), Z_STRLEN_P(str)+2 TSRMLS_CC); + /* Re-encode for Unicode if needed */ + yy_scan_buffer(Z_STRVAL_P(str), Z_STRLEN_P(str) TSRMLS_CC); zend_set_compiled_filename(filename TSRMLS_CC); zend_set_compiled_script_encoding((char*)ucnv_getName(SCNG(output_conv), &status) TSRMLS_CC); @@ -881,15 +854,7 @@ ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_D ZEND_API int zend_get_scanned_file_offset(TSRMLS_D) { - if (yyin) { - int offset_in_buffer = (yy_c_buf_p - (YY_CURRENT_BUFFER)->yy_ch_buf); - int read_bytes = SCNG(yy_n_chars); - int offset_from_the_end = read_bytes - offset_in_buffer; - - return zend_stream_ftell(yyin TSRMLS_CC) - offset_from_the_end; - } else { - return -1; - } + return SCNG(yy_cursor) - SCNG(yy_start); } @@ -924,7 +889,7 @@ zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC) retval = NULL; } else { zend_bool orig_interactive = CG(interactive); - + CG(interactive) = 0; init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC); CG(interactive) = orig_interactive; @@ -1349,17 +1314,34 @@ static void zend_scan_binary_single_string(zval *zendlval, char *str, int len TS *t = 0; } -%} +int lex_scan(zval *zendlval TSRMLS_DC) +{ +restart: + SCNG(yy_text) = YYCURSOR; + +yymore_restart: + + /* detect EOF */ + if (YYCURSOR >= YYLIMIT) { + /* special case */ + if (YYSTATE == STATE(ST_COMMENT) || YYSTATE == STATE(ST_DOC_COMMENT)) { + zend_error(E_COMPILE_WARNING,"Unterminated comment starting line %d", CG(comment_start_line)); + } + return 0; + } + +/*!re2c +re2c:yyfill:check = 0; LNUM [0-9]+ -DNUM ([0-9]*[\.][0-9]+)|([0-9]+[\.][0-9]*) +DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*) EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM}) HNUM "0x"[0-9a-fA-F]+ LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]* WHITESPACE [ \n\r\t]+ TABS_AND_SPACES [ \t]* TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@] -ANY_CHAR (.|[\n]) +ANY_CHAR [^] NEWLINE ("\r"|"\n"|"\r\n") /* @@ -1416,9 +1398,9 @@ HEREDOC_CHARS ("{"*([^$\n\r\\{]|("\\"[^\n\r]))|{HEREDOC_LITERAL_DOLLAR}|({ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_\x7f-\xff;\n\r][^\n\r]*)|({LABEL}[;][^\n\r]+))) -%option noyylineno -%option noyywrap -%% +/* compute yyleng before each rule */ + := yyleng = YYCURSOR - SCNG(yy_text); + "exit" { return T_EXIT; @@ -1569,6 +1551,11 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ return T_OBJECT_OPERATOR; } +{WHITESPACE}+ { + /* do nothing */ + goto restart; +} + "->" { return T_OBJECT_OPERATOR; } @@ -1587,6 +1574,7 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ {ANY_CHAR} { yyless(0); yy_pop_state(TSRMLS_C); + goto restart; } "::" { @@ -1849,7 +1837,7 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ "}" { RESET_DOC_COMMENT(); /* This is a temporary fix which is dependant on flex and it's implementation */ - if (yy_start_stack_ptr) { + if (!zend_stack_is_empty(&SCNG(state_stack))) { yy_pop_state(TSRMLS_C); } return '}'; @@ -1873,6 +1861,7 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ yyless(0); yy_pop_state(TSRMLS_C); yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); + goto restart; } @@ -1918,7 +1907,7 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ } } -0|([1-9][0-9]*) { /* Offset could be treated as a long */ +[0]|([1-9][0-9]*) { /* Offset could be treated as a long */ if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) { Z_LVAL_P(zendlval) = strtol(yytext, NULL, 10); Z_TYPE_P(zendlval) = IS_LONG; @@ -1943,7 +1932,7 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ return T_DNUMBER; } -"__CLASS__" { +'__CLASS__' { zstr class_name = NULL_ZSTR; if (CG(active_class_entry)) { @@ -1958,7 +1947,7 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ return T_CLASS_C; } -"__FUNCTION__" { +'__FUNCTION__' { zstr func_name = NULL_ZSTR; if (CG(active_op_array)) { @@ -1973,7 +1962,7 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ return T_FUNC_C; } -"__METHOD__" { +'__METHOD__' { zstr class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL_ZSTR; zstr func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL_ZSTR; size_t len = 0; @@ -2022,13 +2011,13 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ return T_METHOD_C; } -"__LINE__" { +'__LINE__' { Z_LVAL_P(zendlval) = CG(zend_lineno); Z_TYPE_P(zendlval) = IS_LONG; return T_LINE; } -"__FILE__" { +'__FILE__' { char *filename = zend_get_compiled_filename(TSRMLS_C); if (!filename) { @@ -2038,7 +2027,7 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ return T_FILE; } -"__DIR__" { +'__DIR__' { char *filename = zend_get_compiled_filename(TSRMLS_C); const size_t filename_len = strlen(filename); char *dirname; @@ -2065,7 +2054,7 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ return T_DIR; } -"__NAMESPACE__" { +'__NAMESPACE__' { if (CG(current_namespace)) { *zendlval = *CG(current_namespace); zval_copy_ctor(zendlval); @@ -2075,22 +2064,22 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ return T_NS_C; } -(([^<]|"<"[^?%s<]){1,400})|""" { + HANDLE_NEWLINES(yytext, yyleng); + Z_STRVAL_P(zendlval) = yytext; /* no copying - intentional */ Z_STRLEN_P(zendlval) = yyleng; Z_TYPE_P(zendlval) = IS_STRING; - HANDLE_NEWLINES(yytext, yyleng); - return T_INLINE_HTML; + BEGIN(ST_IN_SCRIPTING); + return T_OPEN_TAG; } -"" { - HANDLE_NEWLINES(yytext, yyleng); - if (CG(short_tags) || yyleng>2) { /* yyleng>2 means it's not */ +"<%=" { + if (CG(asp_tags)) { Z_STRVAL_P(zendlval) = yytext; /* no copying - intentional */ Z_STRLEN_P(zendlval) = yyleng; Z_TYPE_P(zendlval) = IS_STRING; BEGIN(ST_IN_SCRIPTING); - return T_OPEN_TAG; + return T_OPEN_TAG_WITH_ECHO; } else { Z_STRVAL_P(zendlval) = (char *) estrndup(yytext, yyleng); Z_STRLEN_P(zendlval) = yyleng; @@ -2100,8 +2089,8 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ } -"<%="|"""$"{LABEL} { - if (!zend_copy_scanner_string(zendlval, (yytext+1), (yyleng-1), UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) { - return 0; + +""#".+ {NEWLINE} { + if ((YYCTYPE*)yytext == SCNG(yy_start)) { + /* ignore first line when it's started with a # */ + goto restart; + } else { + goto inline_char_handler; } - return T_VARIABLE; } -%{ +{ANY_CHAR} { + + while (1) { + YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR); + + if (ptr == NULL) { + YYCURSOR = YYLIMIT; + yyleng = YYCURSOR - SCNG(yy_text); + break; + + } else { + YYCURSOR = ptr + 1; + + /* if it can be an opening tag, stop */ + if (ptr < YYLIMIT && (*YYCURSOR == '?' || *YYCURSOR == '%')) { + --YYCURSOR; + yyleng = YYCURSOR - SCNG(yy_text); + break; + } + } + } + +inline_char_handler: + + Z_STRVAL_P(zendlval) = (char *) estrndup(yytext, yyleng); + Z_STRLEN_P(zendlval) = yyleng; + Z_TYPE_P(zendlval) = IS_STRING; + HANDLE_NEWLINES(yytext, yyleng); + return T_INLINE_HTML; +} + /* Make sure a label character follows "->", otherwise there is no property * and "->" will be taken literally - */ %} + */ "$"{LABEL}"->"[a-zA-Z_\x7f-\xff] { yyless(yyleng - 3); yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC); - if (!zend_copy_scanner_string(zendlval, (yytext+1), (yyleng-1), UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) { + if (!zend_copy_scanner_string(zendlval, (yytext+1), (yyleng-4), UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) { return 0; } if (UG(unicode) && !zend_check_and_normalize_identifier(zendlval)) { @@ -2168,13 +2203,22 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ return T_VARIABLE; } -%{ /* A [ always designates a variable offset, regardless of what follows - */ %} + */ "$"{LABEL}"[" { yyless(yyleng - 1); yy_push_state(ST_VAR_OFFSET TSRMLS_CC); + if (!zend_copy_scanner_string(zendlval, (yytext+1), (yyleng-2), UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) { + return 0; + } + if (UG(unicode) && !zend_check_and_normalize_identifier(zendlval)) { + return 0; + } + return T_VARIABLE; +} + +"$"{LABEL} { if (!zend_copy_scanner_string(zendlval, (yytext+1), (yyleng-1), UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) { return 0; } @@ -2270,7 +2314,8 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ Z_STRVAL_P(zendlval) = yytext; /* no copying - intentional */ Z_STRLEN_P(zendlval) = yyleng-2; Z_TYPE_P(zendlval) = IS_STRING; - yyless(yyleng-2); + yyleng -= 2; + yyless(yyleng); BEGIN(ST_IN_SCRIPTING); return T_COMMENT; } else { @@ -2342,9 +2387,8 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ } -%{ /* ("{"*|"$"*) handles { or $ at the end of a string (or the entire contents) - */ %} + */ (["]{DOUBLE_QUOTES_CHARS}*("{"*|"$"*)["]) { if (UG(unicode)) { return zend_scan_unicode_escape_string(zendlval, yytext+1, yyleng-2, 0x22 /*'"'*/, T_CONSTANT_ENCAPSED_STRING TSRMLS_CC); @@ -2426,6 +2470,7 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ {ANY_CHAR} { yyless(0); BEGIN(ST_HEREDOC); + goto restart; } {LABEL}";"?[\n\r] { @@ -2445,18 +2490,17 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ BEGIN(ST_IN_SCRIPTING); return T_END_HEREDOC; } else { - yymore(); BEGIN(ST_HEREDOC); + yymore(); } } -%{ /* Match everything up to and including a possible ending label, so if the label * doesn't match, it's kept with the rest of the string * * {HEREDOC_NEWLINE}+ handles the case of more than one newline sequence that * couldn't be matched with HEREDOC_CHARS, because of the following label - */ %} + */ {HEREDOC_CHARS}*{HEREDOC_NEWLINE}+{LABEL}";"?[\n\r] { char *end = yytext + yyleng - 1; @@ -2477,7 +2521,7 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ /* Subtract the remaining label length. yyleng must include newline * before label, for zend_highlight/strip, tokenizer, etc. */ - yyleng -= CG(heredoc_len) - 1; + yyleng = yyleng - CG(heredoc_len) - 1; CG(increment_lineno) = 1; /* For newline before label */ BEGIN(ST_END_HEREDOC); @@ -2499,7 +2543,7 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ {ANY_CHAR} { Z_STRVAL_P(zendlval) = CG(heredoc); Z_STRLEN_P(zendlval) = CG(heredoc_len); - yytext = Z_STRVAL_P(zendlval); + SCNG(yy_text) = Z_STRVAL_P(zendlval); yyleng = Z_STRLEN_P(zendlval); CG(heredoc) = NULL; CG(heredoc_len) = 0; @@ -2525,15 +2569,15 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ } } -%{ /* "{"{2,}|"$"{2,} handles { before "{$" or literal $ before a variable or "${" * (("{"+|"$"+)["]) handles { or $ at the end of a string * * Same for backquotes and heredocs, except the second case doesn't apply to * heredocs. yyless(yyleng - 1) is used to correct taking one character too many - */ %} + */ {DOUBLE_QUOTES_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)["])) { yyless(yyleng - 1); + if (yytext[yyleng-1] == '"') --yyleng; if (CG(literal_type) == IS_UNICODE) { return zend_scan_unicode_escape_string(zendlval, yytext, yyleng, 0x22 /*'"'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); @@ -2565,14 +2609,13 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ } -%{ /* ({HEREDOC_NEWLINE}+({LABEL}";"?)?)? handles the possible case of newline * sequences, possibly followed by a label, that couldn't be matched with * HEREDOC_CHARS because of a following variable or "{$" * * This doesn't affect real ending labels, as they are followed by a newline, * which will result in a longer match for the correct rule if present - */ %} + */ {HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)? { if (CG(literal_type) == IS_UNICODE) { return zend_scan_unicode_escape_string(zendlval, yytext, yyleng, 0, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); @@ -2594,9 +2637,8 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ } -%{ /* BEGIN nowdoc */ -%} + b?"<<<"{TABS_AND_SPACES}[']{LABEL}[']{NEWLINE} { int bprefix = (yytext[0] != '<') ? 1 : 0; char *s; @@ -2619,6 +2661,7 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ {ANY_CHAR} { yyless(0); BEGIN(ST_NOWDOC); + goto restart; } {LABEL}";"?[\r\n] { @@ -2639,9 +2682,9 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ } return T_ENCAPSED_AND_WHITESPACE; } else { + BEGIN(ST_NOWDOC); yyless(label_len); yymore(); - BEGIN(ST_NOWDOC); } } @@ -2665,7 +2708,7 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ /* Subtract the remaining label length. yyleng must include newline * before label, for zend_highlight/strip, tokenizer, etc. */ - yyleng -= CG(heredoc_len) - 1; + yyleng = yyleng - CG(heredoc_len) - 1; CG(increment_lineno) = 1; /* For newline before label */ BEGIN(ST_END_NOWDOC); @@ -2686,16 +2729,15 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ {ANY_CHAR} { Z_STRVAL_P(zendlval) = CG(heredoc); Z_STRLEN_P(zendlval) = CG(heredoc_len); - yytext = CG(heredoc); + SCNG(yy_text) = CG(heredoc); yyleng = CG(heredoc_len); CG(heredoc) = NULL; CG(heredoc_len) = 0; BEGIN(ST_IN_SCRIPTING); return T_END_NOWDOC; } -%{ + /* END nowdoc */ -%} ["] { BEGIN(ST_IN_SCRIPTING); @@ -2709,13 +2751,10 @@ NOWDOC_CHARS ({NEWLINE}*(([^a-zA-Z_\x7f-\xff\n\r][^\n\r]*)|({LABEL}[^a-zA-Z0-9_ } -<> { - zend_error(E_COMPILE_WARNING,"Unterminated comment starting line %d", CG(comment_start_line)); - return 0; -} - - - {ANY_CHAR} { zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE); + goto restart; +} + +*/ } diff --git a/Zend/zend_stack.c b/Zend/zend_stack.c index 5610a1b303..f4a1e521c2 100644 --- a/Zend/zend_stack.c +++ b/Zend/zend_stack.c @@ -95,15 +95,16 @@ ZEND_API int zend_stack_is_empty(zend_stack *stack) /* {{{ */ ZEND_API int zend_stack_destroy(zend_stack *stack) /* {{{ */ { - register int i; - - for (i = 0; i < stack->top; i++) { - efree(stack->elements[i]); - } + int i; if (stack->elements) { + for (i = 0; i < stack->top; i++) { + efree(stack->elements[i]); + } + efree(stack->elements); } + return SUCCESS; } /* }}} */ diff --git a/Zend/zend_stream.c b/Zend/zend_stream.c index 191f56b30e..5e4ad412b9 100644 --- a/Zend/zend_stream.c +++ b/Zend/zend_stream.c @@ -13,6 +13,9 @@ | license@zend.com so we can mail you a copy immediately. | +----------------------------------------------------------------------+ | Authors: Wez Furlong | + | Scott MacVicar | + | Nuno Lopes | + | Marcus Boerger | +----------------------------------------------------------------------+ */ @@ -22,6 +25,12 @@ #include "zend.h" #include "zend_compile.h" +#include +#include +#if HAVE_SYS_MMAN_H +# include +#endif + ZEND_DLIMPORT int isatty(int fd); static size_t zend_stream_stdio_reader(void *handle, char *buf, size_t len TSRMLS_DC) /* {{{ */ @@ -32,17 +41,67 @@ static size_t zend_stream_stdio_reader(void *handle, char *buf, size_t len TSRML static void zend_stream_stdio_closer(void *handle TSRMLS_DC) /* {{{ */ { - if ((FILE*)handle != stdin) + if (handle && (FILE*)handle != stdin) { fclose((FILE*)handle); + } } /* }}} */ -static long zend_stream_stdio_fteller(void *handle TSRMLS_DC) /* {{{ */ +static size_t zend_stream_stdio_fsizer(void *handle TSRMLS_DC) /* {{{ */ { - return ftell((FILE*) handle); + struct stat buf; + if (handle && fstat(fileno((FILE*)handle), &buf) == 0) { + return buf.st_size; + } + return 0; } /* }}} */ +static void zend_stream_unmap(zend_stream *stream TSRMLS_DC) { /* {{{ */ +#if HAVE_MMAP + if (stream->mmap.map) { + munmap(stream->mmap.map, stream->mmap.len); + } else +#endif + if (stream->mmap.buf) { + efree(stream->mmap.buf); + } + stream->mmap.len = 0; + stream->mmap.pos = 0; + stream->mmap.map = 0; + stream->mmap.buf = 0; + stream->handle = stream->mmap.old_handle; +} /* }}} */ + +static void zend_stream_mmap_closer(zend_stream *stream TSRMLS_DC) /* {{{ */ +{ + zend_stream_unmap(stream TSRMLS_CC); + if (stream->mmap.old_closer && stream->handle) { + stream->mmap.old_closer(stream->handle TSRMLS_CC); + } +} /* }}} */ + +static inline int zend_stream_is_mmap(zend_file_handle *file_handle) { /* {{{ */ + return file_handle->type == ZEND_HANDLE_MAPPED; +} /* }}} */ + +static size_t zend_stream_fsize(zend_file_handle *file_handle TSRMLS_DC) /* {{{ */ +{ + struct stat buf; + + if (zend_stream_is_mmap(file_handle)) { + return file_handle->handle.stream.mmap.len; + } + if (file_handle->type == ZEND_HANDLE_STREAM || file_handle->type == ZEND_HANDLE_MAPPED) { + return file_handle->handle.stream.fsizer(file_handle->handle.stream.handle TSRMLS_CC); + } + if (file_handle->handle.fp && fstat(fileno(file_handle->handle.fp), &buf) == 0) { + return buf.st_size; + } + + return -1; +} /* }}} */ + ZEND_API int zend_stream_open(const char *filename, zend_file_handle *handle TSRMLS_DC) /* {{{ */ { if (zend_stream_open_function) { @@ -53,56 +112,25 @@ ZEND_API int zend_stream_open(const char *filename, zend_file_handle *handle TSR handle->handle.fp = zend_fopen(filename, &handle->opened_path); handle->filename = (char *)filename; handle->free_filename = 0; + memset(&handle->handle.stream.mmap, 0, sizeof(zend_mmap)); return (handle->handle.fp) ? SUCCESS : FAILURE; } /* }}} */ -ZEND_API int zend_stream_fixup(zend_file_handle *file_handle TSRMLS_DC) /* {{{ */ +ZEND_API int zend_stream_getc(zend_file_handle *file_handle TSRMLS_DC) /* {{{ */ { - switch (file_handle->type) { - case ZEND_HANDLE_FILENAME: - if (FAILURE == zend_stream_open(file_handle->filename, file_handle TSRMLS_CC)) { - return FAILURE; - } - break; - - case ZEND_HANDLE_FD: - file_handle->handle.fp = fdopen(file_handle->handle.fd, "rb"); - file_handle->type = ZEND_HANDLE_FP; - break; - - case ZEND_HANDLE_FP: - file_handle->handle.fp = file_handle->handle.fp; - break; - - case ZEND_HANDLE_STREAM: - /* nothing to do */ - return SUCCESS; - - default: - return FAILURE; - } - if (file_handle->type == ZEND_HANDLE_FP) { - if (!file_handle->handle.fp) { - return FAILURE; - } - - /* make compatible with stream */ - file_handle->handle.stream.handle = file_handle->handle.fp; - file_handle->handle.stream.reader = zend_stream_stdio_reader; - file_handle->handle.stream.closer = zend_stream_stdio_closer; - file_handle->handle.stream.fteller = zend_stream_stdio_fteller; - - file_handle->handle.stream.interactive = isatty(fileno((FILE *)file_handle->handle.stream.handle)); + char buf; + if (file_handle->handle.stream.reader(file_handle->handle.stream.handle, &buf, sizeof(buf) TSRMLS_CC)) { + return (int)buf; } - return SUCCESS; + return EOF; } /* }}} */ -ZEND_API size_t zend_stream_read(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC) /* {{{ */ +static size_t zend_stream_read(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC) /* {{{ */ { - if (file_handle->handle.stream.interactive) { + if (!zend_stream_is_mmap(file_handle) && file_handle->handle.stream.isatty) { int c = '*'; size_t n; @@ -112,13 +140,15 @@ ZEND_API size_t zend_stream_read(zend_file_handle *file_handle, char *buf, size_ Ascii value 4 is actually EOT character which is not defined anywhere in the LibC or else we can use instead of hardcoded 4. */ - for ( n = 0; n < len && (c = zend_stream_getc( file_handle TSRMLS_CC)) != EOF && c != 4 && c != '\n'; ++n ) + for (n = 0; n < len && (c = zend_stream_getc(file_handle TSRMLS_CC)) != EOF && c != 4 && c != '\n'; ++n) { #else - for ( n = 0; n < len && (c = zend_stream_getc( file_handle TSRMLS_CC)) != EOF && c != '\n'; ++n ) + for (n = 0; n < len && (c = zend_stream_getc(file_handle TSRMLS_CC)) != EOF && c != '\n'; ++n) { #endif - buf[n] = (char) c; - if ( c == '\n' ) - buf[n++] = (char) c; + buf[n] = (char)c; + } + if (c == '\n') { + buf[n++] = (char)c; + } return n; } @@ -126,26 +156,171 @@ ZEND_API size_t zend_stream_read(zend_file_handle *file_handle, char *buf, size_ } /* }}} */ -ZEND_API int zend_stream_getc(zend_file_handle *file_handle TSRMLS_DC) /* {{{ */ +ZEND_API int zend_stream_fixup(zend_file_handle *file_handle, char **buf, size_t *len TSRMLS_DC) /* {{{ */ { - char buf; + size_t size; + zend_stream_type old_type; - if (file_handle->handle.stream.reader(file_handle->handle.stream.handle, &buf, sizeof(buf) TSRMLS_CC)) { - return (int)buf; + if (file_handle->type == ZEND_HANDLE_FILENAME) { + if (zend_stream_open(file_handle->filename, file_handle TSRMLS_CC) == FAILURE) { + return FAILURE; + } } - return EOF; + + switch (file_handle->type) { + case ZEND_HANDLE_FD: + file_handle->type = ZEND_HANDLE_FP; + file_handle->handle.fp = fdopen(file_handle->handle.fd, "rb"); + /* no break; */ + case ZEND_HANDLE_FP: + if (!file_handle->handle.fp) { + return FAILURE; + } + memset(&file_handle->handle.stream.mmap, 0, sizeof(zend_mmap)); + file_handle->handle.stream.isatty = isatty(fileno((FILE *)file_handle->handle.stream.handle)) ? 1 : 0; + file_handle->handle.stream.reader = (zend_stream_reader_t)zend_stream_stdio_reader; + file_handle->handle.stream.closer = (zend_stream_closer_t)zend_stream_stdio_closer; + file_handle->handle.stream.fsizer = (zend_stream_fsizer_t)zend_stream_stdio_fsizer; + memset(&file_handle->handle.stream.mmap, 0, sizeof(file_handle->handle.stream.mmap)); + /* no break; */ + case ZEND_HANDLE_STREAM: + /* nothing to do */ + break; + + case ZEND_HANDLE_MAPPED: + file_handle->handle.stream.mmap.pos = 0; + *buf = file_handle->handle.stream.mmap.buf; + *len = file_handle->handle.stream.mmap.len; + return SUCCESS; + + default: + return FAILURE; + } + + size = zend_stream_fsize(file_handle TSRMLS_CC); + if (size == (size_t)-1) { + return FAILURE; + } + + old_type = file_handle->type; + file_handle->type = ZEND_HANDLE_STREAM; /* we might still be _FP but we need fsize() work */ + + if (old_type == ZEND_HANDLE_FP && !file_handle->handle.stream.isatty && size) { +#if HAVE_MMAP + if (file_handle->handle.fp && size) { + /* *buf[size] is zeroed automatically by the kernel */ + *buf = mmap(0, size + ZEND_MMAP_AHEAD, PROT_READ, MAP_PRIVATE, fileno(file_handle->handle.fp), 0); + if (*buf != MAP_FAILED) { + file_handle->handle.stream.mmap.len = size; + file_handle->handle.stream.mmap.map = *buf; + file_handle->handle.stream.mmap.buf = *buf; + goto return_mapped; + } + } +#endif + file_handle->handle.stream.mmap.map = 0; + file_handle->handle.stream.mmap.buf = *buf = safe_emalloc(1, size, ZEND_MMAP_AHEAD); + file_handle->handle.stream.mmap.len = zend_stream_read(file_handle, *buf, size TSRMLS_CC); + } else { + size_t read, remain = 4*1024; + *buf = emalloc(remain); + size = 0; + + while ((read = zend_stream_read(file_handle, *buf + size, remain TSRMLS_CC)) > 0) { + size += read; + remain -= read; + + if (remain == 0) { + *buf = safe_erealloc(*buf, size, 2, 0); + remain = size; + } + } + file_handle->handle.stream.mmap.map = 0; + file_handle->handle.stream.mmap.buf = *buf; + file_handle->handle.stream.mmap.len = size; + if (size && remain < ZEND_MMAP_AHEAD) { + *buf = safe_erealloc(*buf, size, 1, ZEND_MMAP_AHEAD); + } + } + + if (file_handle->handle.stream.mmap.len == 0) { + *buf = erealloc(*buf, ZEND_MMAP_AHEAD); + file_handle->handle.stream.mmap.buf = *buf; + } + + if (ZEND_MMAP_AHEAD) { + memset(file_handle->handle.stream.mmap.buf + file_handle->handle.stream.mmap.len, 0, ZEND_MMAP_AHEAD); + } + +return_mapped: + file_handle->type = ZEND_HANDLE_MAPPED; + file_handle->handle.stream.mmap.pos = 0; + file_handle->handle.stream.mmap.old_handle = file_handle->handle.stream.handle; + file_handle->handle.stream.mmap.old_closer = file_handle->handle.stream.closer; + file_handle->handle.stream.handle = &file_handle->handle.stream; + file_handle->handle.stream.closer = (zend_stream_closer_t)zend_stream_mmap_closer; + + *buf = file_handle->handle.stream.mmap.buf; + *len = file_handle->handle.stream.mmap.len; + + return SUCCESS; } /* }}} */ -ZEND_API int zend_stream_ferror(zend_file_handle *file_handle TSRMLS_DC) /* {{{ */ -{ - return 0; +ZEND_API void zend_file_handle_dtor(zend_file_handle *fh TSRMLS_DC) /* {{{ */ + { + switch (fh->type) { + case ZEND_HANDLE_FD: + /* nothing to do */ + break; + case ZEND_HANDLE_FP: + fclose(fh->handle.fp); + break; + case ZEND_HANDLE_STREAM: + case ZEND_HANDLE_MAPPED: + if (fh->handle.stream.closer && fh->handle.stream.handle) { + fh->handle.stream.closer(fh->handle.stream.handle TSRMLS_CC); + } + fh->handle.stream.handle = NULL; + break; + case ZEND_HANDLE_FILENAME: + /* We're only supposed to get here when destructing the used_files hash, + * which doesn't really contain open files, but references to their names/paths + */ + break; + } + if (fh->opened_path) { + efree(fh->opened_path); + fh->opened_path = NULL; + } + if (fh->free_filename && fh->filename) { + efree(fh->filename); + fh->filename = NULL; + } } /* }}} */ -ZEND_API long zend_stream_ftell(zend_file_handle *file_handle TSRMLS_DC) /* {{{ */ -{ - return file_handle->handle.stream.fteller(file_handle->handle.stream.handle TSRMLS_CC); +ZEND_API int zend_compare_file_handles(zend_file_handle *fh1, zend_file_handle *fh2) /* {{{ */ + { + if (fh1->type != fh2->type) { + return 0; + } + switch (fh1->type) { + case ZEND_HANDLE_FD: + return fh1->handle.fd == fh2->handle.fd; + case ZEND_HANDLE_FP: + return fh1->handle.fp == fh2->handle.fp; + case ZEND_HANDLE_STREAM: + return fh1->handle.stream.handle == fh2->handle.stream.handle; + case ZEND_HANDLE_MAPPED: + return (fh1->handle.stream.handle == &fh1->handle.stream && + fh2->handle.stream.handle == &fh2->handle.stream && + fh1->handle.stream.mmap.old_handle == fh2->handle.stream.mmap.old_handle) + || fh1->handle.stream.handle == fh2->handle.stream.handle; + default: + return 0; + } + return 0; } /* }}} */ diff --git a/Zend/zend_stream.h b/Zend/zend_stream.h index 61c5ecfebb..ad62f2963a 100644 --- a/Zend/zend_stream.h +++ b/Zend/zend_stream.h @@ -25,39 +25,55 @@ * These functions are private to the engine. * */ -typedef size_t (*zend_stream_reader_t)(void *handle, char *buf, size_t len TSRMLS_DC); -typedef void (*zend_stream_closer_t)(void *handle TSRMLS_DC); -typedef long (*zend_stream_fteller_t)(void *handle TSRMLS_DC); +typedef size_t (*zend_stream_fsizer_t)(void* handle TSRMLS_DC); +typedef size_t (*zend_stream_reader_t)(void* handle, char *buf, size_t len TSRMLS_DC); +typedef void (*zend_stream_closer_t)(void* handle TSRMLS_DC); + +#define ZEND_MMAP_AHEAD 32 + +typedef enum { + ZEND_HANDLE_FILENAME, + ZEND_HANDLE_FD, + ZEND_HANDLE_FP, + ZEND_HANDLE_STREAM, + ZEND_HANDLE_MAPPED, +} zend_stream_type; + +typedef struct _zend_mmap { + size_t len; + size_t pos; + void *map; + char *buf; + void *old_handle; + zend_stream_closer_t old_closer; +} zend_mmap; typedef struct _zend_stream { - void *handle; - zend_stream_reader_t reader; - zend_stream_closer_t closer; - zend_stream_fteller_t fteller; - int interactive; + void *handle; + int isatty; + zend_mmap mmap; + zend_stream_reader_t reader; + zend_stream_fsizer_t fsizer; + zend_stream_closer_t closer; } zend_stream; typedef struct _zend_file_handle { - zend_uchar type; - char *filename; - char *opened_path; + zend_stream_type type; + char *filename; + char *opened_path; union { - int fd; - FILE *fp; - zend_stream stream; + int fd; + FILE *fp; + zend_stream stream; } handle; zend_bool free_filename; } zend_file_handle; BEGIN_EXTERN_C() ZEND_API int zend_stream_open(const char *filename, zend_file_handle *handle TSRMLS_DC); -ZEND_API int zend_stream_ferror(zend_file_handle *file_handle TSRMLS_DC); -ZEND_API int zend_stream_getc(zend_file_handle *file_handle TSRMLS_DC); -ZEND_API size_t zend_stream_read(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC); -ZEND_API long zend_stream_ftell(zend_file_handle *file_handle TSRMLS_DC); -ZEND_API int zend_stream_fixup(zend_file_handle *file_handle TSRMLS_DC); +ZEND_API int zend_stream_fixup(zend_file_handle *file_handle, char **buf, size_t *len TSRMLS_DC); +ZEND_API void zend_file_handle_dtor(zend_file_handle *fh TSRMLS_DC); +ZEND_API int zend_compare_file_handles(zend_file_handle *fh1, zend_file_handle *fh2); END_EXTERN_C() -#define zend_stream_close(handle) zend_file_handle_dtor((handle)) - #endif diff --git a/Zend/zend_strtod.c b/Zend/zend_strtod.c index 78dc79be98..7f06b0e9ec 100644 --- a/Zend/zend_strtod.c +++ b/Zend/zend_strtod.c @@ -2730,7 +2730,7 @@ ZEND_API double zend_oct_strtod(const char *str, char **endptr) /* {{{ */ s++; while ((c = *s++)) { - if (c > '7') { + if (c < '0' || c > '7') { /* break and return the current value if the number is not well-formed * that's what Linux strtol() does */ diff --git a/Zend/zend_vm_def.h b/Zend/zend_vm_def.h index bd658a94d3..ab25403154 100644 --- a/Zend/zend_vm_def.h +++ b/Zend/zend_vm_def.h @@ -3176,7 +3176,7 @@ ZEND_VM_HANDLER(73, ZEND_INCLUDE_OR_EVAL, CONST|TMP|VAR|CV, ANY) new_op_array = zend_compile_file(&file_handle, (Z_LVAL(opline->op2.u.constant)==ZEND_INCLUDE_ONCE?ZEND_INCLUDE:ZEND_REQUIRE) TSRMLS_CC); zend_destroy_file_handle(&file_handle TSRMLS_CC); } else { - zend_file_handle_dtor(&file_handle); + zend_file_handle_dtor(&file_handle TSRMLS_CC); failure_retval=1; } } else { diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h index 8013088595..9f3680cf76 100644 --- a/Zend/zend_vm_execute.h +++ b/Zend/zend_vm_execute.h @@ -1730,7 +1730,7 @@ static int ZEND_INCLUDE_OR_EVAL_SPEC_CONST_HANDLER(ZEND_OPCODE_HANDLER_ARGS) new_op_array = zend_compile_file(&file_handle, (Z_LVAL(opline->op2.u.constant)==ZEND_INCLUDE_ONCE?ZEND_INCLUDE:ZEND_REQUIRE) TSRMLS_CC); zend_destroy_file_handle(&file_handle TSRMLS_CC); } else { - zend_file_handle_dtor(&file_handle); + zend_file_handle_dtor(&file_handle TSRMLS_CC); failure_retval=1; } } else { @@ -5037,7 +5037,7 @@ static int ZEND_INCLUDE_OR_EVAL_SPEC_TMP_HANDLER(ZEND_OPCODE_HANDLER_ARGS) new_op_array = zend_compile_file(&file_handle, (Z_LVAL(opline->op2.u.constant)==ZEND_INCLUDE_ONCE?ZEND_INCLUDE:ZEND_REQUIRE) TSRMLS_CC); zend_destroy_file_handle(&file_handle TSRMLS_CC); } else { - zend_file_handle_dtor(&file_handle); + zend_file_handle_dtor(&file_handle TSRMLS_CC); failure_retval=1; } } else { @@ -8378,7 +8378,7 @@ static int ZEND_INCLUDE_OR_EVAL_SPEC_VAR_HANDLER(ZEND_OPCODE_HANDLER_ARGS) new_op_array = zend_compile_file(&file_handle, (Z_LVAL(opline->op2.u.constant)==ZEND_INCLUDE_ONCE?ZEND_INCLUDE:ZEND_REQUIRE) TSRMLS_CC); zend_destroy_file_handle(&file_handle TSRMLS_CC); } else { - zend_file_handle_dtor(&file_handle); + zend_file_handle_dtor(&file_handle TSRMLS_CC); failure_retval=1; } } else { @@ -22634,7 +22634,7 @@ static int ZEND_INCLUDE_OR_EVAL_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS) new_op_array = zend_compile_file(&file_handle, (Z_LVAL(opline->op2.u.constant)==ZEND_INCLUDE_ONCE?ZEND_INCLUDE:ZEND_REQUIRE) TSRMLS_CC); zend_destroy_file_handle(&file_handle TSRMLS_CC); } else { - zend_file_handle_dtor(&file_handle); + zend_file_handle_dtor(&file_handle TSRMLS_CC); failure_retval=1; } } else { -- 2.40.0