From 61446e0927ab31bf4227c7eb3de95b72540f051a Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 1 May 2002 17:12:08 +0000 Subject: [PATCH] Improve lexer's error reporting. You get the whole token mentioned now in parse error messages, not just the part scanned by the last flex rule. For example, select "foo" "bar"; used to draw ERROR: parser: parse error at or near """ which was rather unhelpful. Now it gives ERROR: parser: parse error at or near ""bar"" Also, error messages concerning bitstring literals and suchlike will quote the source text at you, not the processed internal form of the literal. --- src/backend/parser/scan.l | 117 ++++++++++++++++---------- src/backend/po/nls.mk | 2 +- src/test/regress/expected/strings.out | 2 +- 3 files changed, 73 insertions(+), 48 deletions(-) diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index cb8610c87a..f59cd7b27b 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.92 2002/04/20 21:56:14 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.93 2002/05/01 17:12:07 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -56,6 +56,17 @@ static void addlit(char *ytext, int yleng); static void addlitchar(unsigned char ychar); static char *litbufdup(void); +/* + * When we parse a token that requires multiple lexer rules to process, + * we set token_start to point at the true start of the token, for use + * by yyerror(). yytext will point at just the text consumed by the last + * rule, so it's not very helpful (eg, it might contain just the last + * quote mark of a quoted identifier). But to avoid cluttering every rule + * with setting token_start, we allow token_start = NULL to denote that + * it's okay to use yytext. + */ +static char *token_start; + /* Handles to the buffer that the lexer uses internally */ static YY_BUFFER_STATE scanbufhandle; static char *scanbuf; @@ -208,7 +219,7 @@ non_newline [^\n\r] comment ("--"{non_newline}*) -whitespace ({space}|{comment}) +whitespace ({space}+|{comment}) /* * SQL92 requires at least one newline in the whitespace separating @@ -235,9 +246,16 @@ other . */ %% + +%{ + /* code to execute during start of each call of yylex() */ + token_start = NULL; +%} + {whitespace} { /* ignore */ } {xcstart} { + token_start = yytext; xcdepth = 0; BEGIN(xc); /* Put back any characters past slash-star; see above */ @@ -252,7 +270,11 @@ other . {xcstop} { if (xcdepth <= 0) + { BEGIN(INITIAL); + /* reset token_start for next token */ + token_start = NULL; + } else xcdepth--; } @@ -261,9 +283,10 @@ other . {op_chars} { /* ignore */ } -<> { elog(ERROR, "Unterminated /* comment"); } +<> { yyerror("unterminated /* comment"); } {xbitstart} { + token_start = yytext; BEGIN(xbit); startlit(); addlitchar('b'); @@ -271,8 +294,7 @@ other . {xbitstop} { BEGIN(INITIAL); if (literalbuf[strspn(literalbuf + 1, "01") + 1] != '\0') - elog(ERROR, "invalid bit string input: '%s'", - literalbuf); + yyerror("invalid bit string input"); yylval.str = litbufdup(); return BITCONST; } @@ -284,9 +306,10 @@ other . {xbitcat} { /* ignore */ } -<> { elog(ERROR, "unterminated bit string literal"); } +<> { yyerror("unterminated bit string literal"); } {xhstart} { + token_start = yytext; BEGIN(xh); startlit(); } @@ -303,14 +326,14 @@ other . || val != (long) ((int32) val) #endif ) - elog(ERROR, "Bad hexadecimal integer input '%s'", - literalbuf); + yyerror("bad hexadecimal integer input"); yylval.ival = val; return ICONST; } -<> { elog(ERROR, "Unterminated hexadecimal integer"); } +<> { yyerror("unterminated hexadecimal integer"); } {xqstart} { + token_start = yytext; BEGIN(xq); startlit(); } @@ -335,30 +358,31 @@ other . {xqcat} { /* ignore */ } -<> { elog(ERROR, "Unterminated quoted string"); } +<> { yyerror("unterminated quoted string"); } {xdstart} { + token_start = yytext; BEGIN(xd); startlit(); } {xdstop} { BEGIN(INITIAL); - if (strlen(literalbuf) == 0) - elog(ERROR, "zero-length delimited identifier"); - if (strlen(literalbuf) >= NAMEDATALEN) + if (literallen == 0) + yyerror("zero-length delimited identifier"); + if (literallen >= NAMEDATALEN) { -#ifdef MULTIBYTE int len; - len = pg_mbcliplen(literalbuf,strlen(literalbuf),NAMEDATALEN-1); - elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"", - literalbuf, len, literalbuf); - literalbuf[len] = '\0'; +#ifdef MULTIBYTE + len = pg_mbcliplen(literalbuf, literallen, + NAMEDATALEN-1); #else - elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"", - literalbuf, NAMEDATALEN-1, literalbuf); - literalbuf[NAMEDATALEN-1] = '\0'; + len = NAMEDATALEN-1; #endif + elog(NOTICE, "identifier \"%s\" will be truncated to \"%.*s\"", + literalbuf, len, literalbuf); + literalbuf[len] = '\0'; + literallen = len; } yylval.str = litbufdup(); return IDENT; @@ -369,7 +393,7 @@ other . {xdinside} { addlit(yytext, yyleng); } -<> { elog(ERROR, "Unterminated quoted identifier"); } +<> { yyerror("unterminated quoted identifier"); } {typecast} { return TYPECAST; } @@ -383,8 +407,8 @@ other . * character will match a prior rule, not this one. */ int nchars = yyleng; - char *slashstar = strstr((char*)yytext, "/*"); - char *dashdash = strstr((char*)yytext, "--"); + char *slashstar = strstr(yytext, "/*"); + char *dashdash = strstr(yytext, "--"); if (slashstar && dashdash) { @@ -395,7 +419,7 @@ other . else if (!slashstar) slashstar = dashdash; if (slashstar) - nchars = slashstar - ((char*)yytext); + nchars = slashstar - yytext; /* * For SQL92 compatibility, '+' and '-' cannot be the @@ -437,15 +461,15 @@ other . } /* Convert "!=" operator to "<>" for compatibility */ - if (strcmp((char*)yytext, "!=") == 0) + if (strcmp(yytext, "!=") == 0) yylval.str = pstrdup("<>"); else - yylval.str = pstrdup((char*)yytext); + yylval.str = pstrdup(yytext); return Op; } {param} { - yylval.ival = atol((char*)&yytext[1]); + yylval.ival = atol(yytext + 1); return PARAM; } @@ -454,7 +478,7 @@ other . char* endptr; errno = 0; - val = strtol((char *)yytext, &endptr, 10); + val = strtol(yytext, &endptr, 10); if (*endptr != '\0' || errno == ERANGE #ifdef HAVE_LONG_INT_64 /* if long > 32 bits, check for overflow of int4 */ @@ -463,28 +487,29 @@ other . ) { /* integer too large, treat it as a float */ - yylval.str = pstrdup((char*)yytext); + yylval.str = pstrdup(yytext); return FCONST; } yylval.ival = val; return ICONST; } {decimal} { - yylval.str = pstrdup((char*)yytext); + yylval.str = pstrdup(yytext); return FCONST; } {real} { - yylval.str = pstrdup((char*)yytext); + yylval.str = pstrdup(yytext); return FCONST; } {identifier} { ScanKeyword *keyword; + char *ident; int i; /* Is it a keyword? */ - keyword = ScanKeywordLookup((char*) yytext); + keyword = ScanKeywordLookup(yytext); if (keyword != NULL) return keyword->value; @@ -496,26 +521,25 @@ other . * which seems appropriate under SQL99 rules, whereas * the keyword comparison was NOT locale-dependent. */ - for (i = 0; yytext[i]; i++) + ident = pstrdup(yytext); + for (i = 0; ident[i]; i++) { - if (isupper((unsigned char) yytext[i])) - yytext[i] = tolower((unsigned char) yytext[i]); + if (isupper((unsigned char) ident[i])) + ident[i] = tolower((unsigned char) ident[i]); } if (i >= NAMEDATALEN) { -#ifdef MULTIBYTE int len; - len = pg_mbcliplen(yytext,i,NAMEDATALEN-1); - elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"", - yytext, len, yytext); - yytext[len] = '\0'; +#ifdef MULTIBYTE + len = pg_mbcliplen(ident, i, NAMEDATALEN-1); #else - elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"", - yytext, NAMEDATALEN-1, yytext); - yytext[NAMEDATALEN-1] = '\0'; + len = NAMEDATALEN-1; #endif + elog(NOTICE, "identifier \"%s\" will be truncated to \"%.*s\"", + ident, len, ident); + ident[len] = '\0'; } - yylval.str = pstrdup((char*) yytext); + yylval.str = ident; return IDENT; } @@ -526,7 +550,8 @@ other . void yyerror(const char *message) { - elog(ERROR, "parser: %s at or near \"%s\"", message, yytext); + elog(ERROR, "parser: %s at or near \"%s\"", message, + token_start ? token_start : yytext); } diff --git a/src/backend/po/nls.mk b/src/backend/po/nls.mk index 75975029b3..8797d8527d 100644 --- a/src/backend/po/nls.mk +++ b/src/backend/po/nls.mk @@ -1,4 +1,4 @@ CATALOG_NAME := postgres AVAIL_LANGUAGES := cs de hu ru zh_CN zh_TW GETTEXT_FILES := + gettext-files -GETTEXT_TRIGGERS:= elog:2 postmaster_error +GETTEXT_TRIGGERS:= elog:2 postmaster_error yyerror diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 42df7c06df..ebfe8eeb66 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -17,7 +17,7 @@ SELECT 'first line' ' - next line' /* this comment is not allowed here */ ' - third line' AS "Illegal comment within continuation"; -ERROR: parser: parse error at or near "'" +ERROR: parser: parse error at or near "' - third line'" -- -- test conversions between various string types -- -- 2.40.0