From 043f9eb90afcc565a88834ff26ed127c5d31d103 Mon Sep 17 00:00:00 2001 From: "Thomas G. Lockhart" Date: Sun, 4 Aug 2002 06:36:18 +0000 Subject: [PATCH] Implement hex literal conversion to bit string literal. May not be the long-term solution (some continuing discussion with Peter E.) but better than the current mapping of a conversion to integer which I'd put in years ago before we had any bit string types at all. This is already supported in the bit string implementation elsewhere. --- src/backend/parser/scan.l | 74 +++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 4e22646c68..c8e13c382e 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.97 2002/06/22 02:04:45 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.98 2002/08/04 06:36:18 thomas Exp $ * *------------------------------------------------------------------------- */ @@ -60,7 +60,7 @@ static char *litbufdup(void); * When we parse a token that requires multiple lexer rules to process, * we set token_start to point at the true start of the token, for use * by yyerror(). yytext will point at just the text consumed by the last - * rule, so it's not very helpful (eg, it might contain just the last + * rule, so it's not very helpful (e.g., it might contain just the last * quote mark of a quoted identifier). But to avoid cluttering every rule * with setting token_start, we allow token_start = NULL to denote that * it's okay to use yytext. @@ -93,10 +93,10 @@ unsigned char unescape_single_char(unsigned char c); * and to eliminate parsing troubles for numeric strings. * Exclusive states: * bit string literal - * extended C-style comments - thomas 1997-07-12 - * delimited identifiers (double-quoted identifiers) - thomas 1997-10-27 - * hexadecimal numeric string - thomas 1997-11-16 - * quoted strings - thomas 1997-07-30 + * extended C-style comments + * delimited identifiers (double-quoted identifiers) + * hexadecimal numeric string + * quoted strings */ %x xb @@ -106,6 +106,13 @@ unsigned char unescape_single_char(unsigned char c); %x xq /* Bit string + * It is tempting to scan the string for only those characters + * which are allowed. However, this leads to silently swallowed + * characters if illegal characters are included in the string. + * For example, if xbinside is [01] then B'ABCD' is interpreted + * as a zero-length string, and the ABCD' is lost! + * Better to pass the string forward and let the input routines + * validate the contents. */ xbstart [bB]{quote} xbstop {quote} @@ -116,7 +123,7 @@ xbcat {quote}{whitespace_with_newline}{quote} */ xhstart [xX]{quote} xhstop {quote} -xhinside [^']+ +xhinside [^']* xhcat {quote}{whitespace_with_newline}{quote} /* National character @@ -244,7 +251,7 @@ other . * style of two adjacent single quotes "''" and in the Postgres/Java style * of escaped-quote "\'". * Other embedded escaped characters are matched explicitly and the leading - * backslash is dropped from the string. - thomas 1997-09-24 + * backslash is dropped from the string. * Note that xcstart must appear before operator, as explained above! * Also whitespace (comment) must appear before operator. */ @@ -291,8 +298,10 @@ other . {xbstart} { /* Binary bit type. - * Should be passing the type forward into the parser - * rather than trying to embed it into the string. + * At some point we should simply pass the string + * forward to the parser and label it there. + * In the meantime, place a leading "b" on the string + * to mark it for the input routine as a binary string. */ token_start = yytext; BEGIN(xb); @@ -301,10 +310,8 @@ other . } {xbstop} { BEGIN(INITIAL); - if (literalbuf[strspn(literalbuf + 1, "01") + 1] != '\0') - yyerror("invalid bit string input"); yylval.str = litbufdup(); - return BITCONST; + return BCONST; } {xhinside} | {xbinside} { @@ -314,44 +321,43 @@ other . {xbcat} { /* ignore */ } -<> { yyerror("unterminated bit string literal"); } - +<> { + yyerror("unterminated bit string literal"); + } {xhstart} { /* Hexadecimal bit type. - * Should be passing the type forward into the parser - * rather than trying to embed it into the string. + * At some point we should simply pass the string + * forward to the parser and label it there. + * In the meantime, place a leading "x" on the string + * to mark it for the input routine as a hex string. */ token_start = yytext; BEGIN(xh); startlit(); + addlitchar('x'); } {xhstop} { - long val; - char* endptr; - BEGIN(INITIAL); - errno = 0; - val = strtol(literalbuf, &endptr, 16); - if (*endptr != '\0' || errno == ERANGE -#ifdef HAVE_LONG_INT_64 - /* if long > 32 bits, check for overflow of int4 */ - || val != (long) ((int32) val) -#endif - ) - yyerror("bad hexadecimal integer input"); - yylval.ival = val; - return ICONST; + yylval.str = litbufdup(); + return XCONST; } -<> { yyerror("unterminated hexadecimal integer"); } +<> { yyerror("unterminated hexadecimal string literal"); } {xnstart} { /* National character. - * Need to remember type info to flow it forward into the parser. - * Not yet implemented. - thomas 2002-06-17 + * We will pass this along as a normal character string, + * but preceded with an internally-generated "NCHAR". */ + const ScanKeyword *keyword; + + /* This had better be a keyword! */ + keyword = ScanKeywordLookup("nchar"); + Assert(keyword != NULL); + yylval.keyword = keyword->name; token_start = yytext; BEGIN(xq); startlit(); + return keyword->value; } -- 2.40.0