From a5fecda550c6a2d9422cf8b995ccb9ccd8bb47c8 Mon Sep 17 00:00:00 2001 From: Michael Meskes Date: Wed, 5 Oct 2005 14:58:36 +0000 Subject: [PATCH] Also synced the ecpg lexer with the backend lexer. --- src/interfaces/ecpg/ChangeLog | 4 + src/interfaces/ecpg/preproc/pgc.l | 148 ++++++++++++++++++++---------- 2 files changed, 106 insertions(+), 46 deletions(-) diff --git a/src/interfaces/ecpg/ChangeLog b/src/interfaces/ecpg/ChangeLog index c0b87a8907..4ad3725af0 100644 --- a/src/interfaces/ecpg/ChangeLog +++ b/src/interfaces/ecpg/ChangeLog @@ -1945,6 +1945,10 @@ Tue Oct 4 15:23:00 CEST 2005 - Synced parser. - Fixed another bug in check to report missing varchar pointer implementation. + +Wed Oct 5 16:57:42 CEST 2005 + + - Synced lexer. - Set ecpg library version to 5.1. - Set ecpg version to 4.1.1. diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l index c86f2cdf1e..f72b7bf7d2 100644 --- a/src/interfaces/ecpg/preproc/pgc.l +++ b/src/interfaces/ecpg/preproc/pgc.l @@ -12,7 +12,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.136 2005/06/16 01:43:48 momjian Exp $ + * $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.137 2005/10/05 14:58:36 meskes Exp $ * *------------------------------------------------------------------------- */ @@ -29,6 +29,8 @@ extern YYSTYPE yylval; static int xcdepth = 0; /* depth of nesting in slash-star comments */ static char *dolqstart; /* current $foo$ quote start string */ +bool escape_string_warning; +static bool warn_on_first_escape; /* * literalbuf is used to accumulate literal values when multiple rules @@ -44,6 +46,7 @@ static int literalalloc; /* current allocated buffer size */ static void addlit(char *ytext, int yleng); static void addlitchar (unsigned char); static void parse_include (void); +static void check_escape_warning(void); char *token_start; int state_before; @@ -111,48 +114,44 @@ static struct _if_value /* Bit string */ xbstart [bB]{quote} -xbstop {quote} xbinside [^']* -xbcat {quote}{whitespace_with_newline}{quote} -/* Hexadecimal number - */ +/* Hexadecimal number */ xhstart [xX]{quote} -xhstop {quote} xhinside [^']* -xhcat {quote}{whitespace_with_newline}{quote} -/* National character - */ +/* National character */ xnstart [nN]{quote} -/* C version of hex number - */ +/* Quoted string that allows backslash escapes */ +xestart [eE]{quote} + +/* C version of hex number */ xch 0[xX][0-9A-Fa-f]* /* Extended quote - * xqdouble implements embedded quote - * xqcat allows strings to cross input lines + * xqdouble implements embedded quote, '''' */ -quote ' xqstart {quote} -xqstop {quote} xqdouble {quote}{quote} xqinside [^\\']+ xqescape [\\][^0-7] xqoctesc [\\][0-7]{1,3} xqhexesc [\\]x[0-9A-Fa-f]{1,2} -xqcat {quote}{whitespace_with_newline}{quote} /* $foo$ style quotes ("dollar quoting") * The quoted string starts with $foo$ where "foo" is an optional string * in the form of an identifier, except that it may not contain "$", * and extends to the first occurrence of an identical string. * There is *no* processing of the quoted text. + * + * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim} + * fails to match its trailing "$". */ dolq_start [A-Za-z\200-\377_] dolq_cont [A-Za-z\200-\377_0-9] dolqdelim \$({dolq_start}{dolq_cont}*)?\$ +dolqfailed \${dolq_start}{dolq_cont}* dolqinside [^$]+ /* Double quote @@ -218,11 +217,16 @@ operator {op_chars}+ /* we no longer allow unary minus in numbers. * instead we pass it separately to parser. there it gets * coerced via doNegate() -- Leon aug 20 1999 + * + * {realfail1} and {realfail2} are added to prevent the need for scanner + * backup when the {real} rule fails to match completely. */ integer {digit}+ decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*)) -real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+)) +real ({integer}|{decimal})[Ee][-+]?{digit}+ +realfail1 ({integer}|{decimal})[Ee] +realfail2 ({integer}|{decimal})[Ee][-+] param \${integer} @@ -262,6 +266,11 @@ whitespace ({space}+|{comment}) horiz_whitespace ({horiz_space}|{comment}) whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*) +quote ' +quotestop {quote}{whitespace}* +quotecontinue {quote}{whitespace_with_newline}{quote} +quotefail {quote}{whitespace}*"-" + /* special characters for other dbms */ /* we have to react differently in compat mode */ informix_special [\$] @@ -343,6 +352,7 @@ cppline {space}*#(.*\\{space})*.*{newline} {xcinside} { ECHO; } {op_chars} { ECHO; } +\*+ { ECHO; } <> { mmerror(PARSE_ERROR, ET_FATAL, "Unterminated /* comment"); } @@ -352,7 +362,9 @@ cppline {space}*#(.*\\{space})*.*{newline} startlit(); addlitchar('b'); } -{xbstop} { +{quotestop} | +{quotefail} { + yyless(1); BEGIN(SQL); if (literalbuf[strspn(literalbuf, "01") + 1] != '\0') mmerror(PARSE_ERROR, ET_ERROR, "invalid bit string input."); @@ -362,8 +374,8 @@ cppline {space}*#(.*\\{space})*.*{newline} {xhinside} | {xbinside} { addlit(yytext, yyleng); } -{xhcat} | -{xbcat} { /* ignore */ } +{quotecontinue} | +{quotecontinue} { /* ignore */ } <> { mmerror(PARSE_ERROR, ET_FATAL, "Unterminated bit string"); } {xhstart} { @@ -371,44 +383,71 @@ cppline {space}*#(.*\\{space})*.*{newline} BEGIN(xh); startlit(); addlitchar('x'); - } -{xhstop} { - yylval.str = mm_strdup(literalbuf); - return XCONST; - } + } +{quotestop} | +{quotefail} { + yyless(1); + BEGIN(SQL); + yylval.str = mm_strdup(literalbuf); + return XCONST; + } <> { mmerror(PARSE_ERROR, ET_FATAL, "Unterminated hexadecimal integer"); } {xnstart} { /* National character. - * Need to remember type info to flow it forward into the parser. - * Not yet implemented. - thomas 2002-06-17 + * Transfer it as-is to the backend. */ token_start = yytext; BEGIN(xq); startlit(); } {xqstart} { - token_start = yytext; - state_before = YYSTATE; - BEGIN(xq); - startlit(); - } -{xqstop} { - BEGIN(state_before); - yylval.str = mm_strdup(literalbuf); - return SCONST; - } + warn_on_first_escape = true; + token_start = yytext; + state_before = YYSTATE; + BEGIN(xq); + startlit(); + } +{xestart} { + warn_on_first_escape = false; + token_start = yytext; + state_before = YYSTATE; + BEGIN(xq); + startlit(); + } +{quotestop} | +{quotefail} { + yyless(1); + BEGIN(state_before); + yylval.str = mm_strdup(literalbuf); + return SCONST; + } {xqdouble} { addlitchar('\''); } {xqinside} { addlit(yytext, yyleng); } -{xqescape} { addlit(yytext, yyleng); } -{xqoctesc} { addlit(yytext, yyleng); } -{xqhexesc} { addlit(yytext, yyleng); } -{xqcat} { /* ignore */ } +{xqescape} { + check_escape_warning(); + addlit(yytext, yyleng); + } +{xqoctesc} { + check_escape_warning(); + addlit(yytext, yyleng); + } +{xqhexesc} { + check_escape_warning(); + addlit(yytext, yyleng); + } +{quotecontinue} { /* ignore */ } . { /* This is only needed for \ just before EOF */ addlitchar(yytext[0]); } <> { mmerror(PARSE_ERROR, ET_FATAL, "Unterminated quoted string"); } +{dolqfailed} { + /* throw back all but the initial "$" */ + yyless(1); + /* and treat it as {other} */ + return yytext[0]; + } {dolqdelim} { token_start = yytext; dolqstart = mm_strdup(yytext); @@ -434,9 +473,8 @@ cppline {space}*#(.*\\{space})*.*{newline} yyless(yyleng-1); } } -{dolqinside} { - addlit(yytext, yyleng); - } +{dolqinside} { addlit(yytext, yyleng); } +{dolqfailed} { addlit(yytext, yyleng); } . { /* This is only needed for $ inside the quoted text */ addlitchar(yytext[0]); @@ -588,11 +626,21 @@ cppline {space}*#(.*\\{space})*.*{newline} {decimal} { yylval.str = mm_strdup(yytext); return FCONST; - } + } {real} { yylval.str = mm_strdup(yytext); return FCONST; - } + } +{realfail1} { + yyless(yyleng-1); + yylval.str = mm_strdup(yytext); + return FCONST; + } +{realfail2} { + yyless(yyleng-2); + yylval.str = mm_strdup(yytext); + return FCONST; + } :{identifier}((("->"|\.){identifier})|(\[{array}\]))* { yylval.str = mm_strdup(yytext+1); return(CVARIABLE); @@ -1189,3 +1237,11 @@ parse_include(void) BEGIN C; } + +static void +check_escape_warning(void) +{ + if (warn_on_first_escape && escape_string_warning) + mmerror (PARSE_ERROR, ET_WARNING, "nonstandard use of escape in a string literal"); + warn_on_first_escape = false; /* warn only once per string */ +} -- 2.40.0