*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.63 2000/01/26 05:56:43 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.64 2000/02/19 04:17:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/* set up my input handler --- need one flavor for flex, one for lex */
#if defined(FLEX_SCANNER)
+
#define YY_NO_UNPUT
static int myinput(char* buf, int max);
#undef YY_INPUT
#define YY_INPUT(buf,result,max) {result = myinput(buf,max);}
-#else
+
+#else /* !FLEX_SCANNER */
+
#undef input
int input();
#undef unput
void unput(char);
+
#endif /* FLEX_SCANNER */
extern YYSTYPE yylval;
static void addlit(char *ytext, int yleng);
%}
-/* OK, here is a short description of lex/flex rules behavior.
+/*
+ * OK, here is a short description of lex/flex rules behavior.
* The longest pattern which matches an input string is always chosen.
* For equal-length patterns, the first occurring in the rules list is chosen.
- * INITIAL is the starting condition, to which all non-conditional rules apply.
- * When in an exclusive condition, only those rules defined for that condition apply.
+ * INITIAL is the starting state, to which all non-conditional rules apply.
+ * Exclusive states change parsing rules while the state is active. When in
+ * an exclusive state, only those rules defined for that state apply.
*
- * Exclusive states change parsing rules while the state is active.
- * There are exclusive states for quoted strings, extended comments,
- * and to eliminate parsing troubles for numeric strings.
+ * We use exclusive states for quoted strings, extended comments,
+ * and to eliminate parsing troubles for numeric strings.
* Exclusive states:
* <xb> binary numeric string - thomas 1997-11-16
* <xc> extended C-style comments - tgl 1997-07-12
* <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27
* <xh> hexadecimal numeric string - thomas 1997-11-16
* <xq> quoted strings - tgl 1997-07-30
- *
- * The "extended comment" syntax closely resembles allowable operator syntax.
- * So, when in condition <xc>, only strings which would terminate the
- * "extended comment" trigger any action other than "ignore".
- * Be sure to match _any_ candidate comment, including those with appended
- * operator-like symbols. - thomas 1997-07-14
*/
%x xb
*/
xbstart [bB]{quote}
xbstop {quote}
-xbinside [^']*
-xbcat {quote}{space}*\n{space}*{quote}
+xbinside [^']+
+xbcat {quote}{whitespace_with_newline}{quote}
/* Hexadecimal number
*/
xhstart [xX]{quote}
xhstop {quote}
-xhinside [^']*
-xhcat {quote}{space}*\n{space}*{quote}
+xhinside [^']+
+xhcat {quote}{whitespace_with_newline}{quote}
/* Extended quote
* xqdouble implements SQL92 embedded quote
* xqcat allows strings to cross input lines
* Note: reduction of '' and \ sequences to output text is done in scanstr(),
- * not by rules here.
+ * not by rules here. But we do get rid of xqcat sequences here.
*/
quote '
xqstart {quote}
xqstop {quote}
xqdouble {quote}{quote}
-xqinside [^\\']*
+xqinside [^\\']+
xqliteral [\\](.|\n)
-xqcat {quote}{space}*\n{space}*{quote}
+xqcat {quote}{whitespace_with_newline}{quote}
/* Delimited quote
* Allows embedded spaces and other special characters into identifiers.
dquote \"
xdstart {dquote}
xdstop {dquote}
-xdinside [^"]*
+xdinside [^"]+
-/* Comments
+/* C-style comments
* Ignored by the scanner and parser.
+ *
+ * The "extended comment" syntax closely resembles allowable operator syntax.
+ * The tricky part here is to get lex to recognize a string starting with
+ * slash-star as a comment, when interpreting it as an operator would produce
+ * a longer match --- remember lex will prefer a longer match! So, we have
+ * to provide a special rule for xcline (a complete comment that could
+ * otherwise look like an operator), as well as append {op_and_self}* to
+ * xcstart so that it matches at least as much as {operator} would.
+ * Then the tie-breaker (first matching rule of same length) wins.
+ * There is still a problem if someone writes, eg, slash-star-star-slash-plus.
+ * It'll be taken as an xcstart, rather than xcline and an operator as one
+ * could wish. I don't see any way around that given lex's behavior;
+ * that someone will just have to write a space after the comment.
*/
-xcline [\/][\*].*[\*][\/]{space}*\n*
-xcstart [\/][\*]{op_and_self}*
-xcstop {op_and_self}*[\*][\/]({space}*|\n)
-xcinside [^*]*
-xcstar [^/]
+xcline \/\*{op_and_self}*\*\/
+xcstart \/\*{op_and_self}*
+xcstop \*+\/
+xcinside ([^*]+)|(\*+[^/])
digit [0-9]
letter [\200-\377_A-Za-z]
integer {digit}+
decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
+real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
param \${integer}
-comment ("--"|"//").*
+/*
+ * In order to make the world safe for Windows and Mac clients as well as
+ * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
+ * sequence will be seen as two successive newlines, but that doesn't cause
+ * any problems. SQL92-style comments, which start with -- and extend to the
+ * next newline, are treated as equivalent to a single whitespace character.
+ *
+ * NOTE a fine point: if there is no newline following --, we will absorb
+ * everything to the end of the input as a comment. This is correct. Older
+ * versions of Postgres failed to recognize -- as a comment if the input
+ * did not end with a newline.
+ *
+ * XXX perhaps \f (formfeed) should be treated as a newline as well?
+ */
space [ \t\n\r\f]
+horiz_space [ \t\f]
+newline [\n\r]
+non_newline [^\n\r]
+
+comment (("--"|"//"){non_newline}*)
+
+whitespace ({space}|{comment})
+
+/*
+ * SQL92 requires at least one newline in the whitespace separating
+ * string literals that are to be concatenated. Silly, but who are we
+ * to argue? Note that {whitespace_with_newline} should not have * after
+ * it, whereas {whitespace} should generally have a * after it...
+ */
+
+horiz_whitespace ({horiz_space}|{comment})
+whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)
+
other .
/* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
* of escaped-quote "\'".
* Other embedded escaped characters are matched explicitly and the leading
* backslash is dropped from the string. - thomas 1997-09-24
+ * Note that xcline must appear before xcstart, which must appear before
+ * operator, as explained above! Also whitespace (comment) must appear
+ * before operator.
*/
%%
-{comment} { /* ignore */ }
+{whitespace} { /* ignore */ }
{xcline} { /* ignore */ }
-<xc>{xcstar} |
{xcstart} { BEGIN(xc); }
<xc>{xcstop} { BEGIN(INITIAL); }
}
<xh>{xhcat} |
<xb>{xbcat} {
+ /* ignore */
}
{xhstart} {
addlit(yytext, yyleng);
}
<xq>{xqcat} {
+ /* ignore */
}
{self} { return yytext[0]; }
{operator} {
- if (strcmp((char*)yytext,"!=") == 0)
- yylval.str = pstrdup("<>"); /* compatability */
+ if (strcmp((char*)yytext, "!=") == 0)
+ yylval.str = pstrdup("<>"); /* compatibility */
else
yylval.str = pstrdup((char*)yytext);
return Op;
}
+
{param} {
yylval.ival = atoi((char*)&yytext[1]);
return PARAM;
}
-
{integer} {
char* endptr;
return IDENT;
}
}
-{space} { /* ignore */ }
{other} { return yytext[0]; }