2 /*-------------------------------------------------------------------------
5 * lexical scanner for ecpg
7 * This is a modified version of src/backend/parser/scan.l
10 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
15 * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.96 2002/07/01 06:56:10 meskes Exp $
17 *-------------------------------------------------------------------------
19 #include "postgres_fe.h"
22 #include <sys/types.h>
29 /* some versions of lex define this as a macro */
36 extern YYSTYPE yylval;
38 static int xcdepth = 0; /* depth of nesting in slash-star comments */
41 * literalbuf is used to accumulate literal values when multiple rules
42 * are needed to parse a single literal. Call startlit to reset buffer
43 * to empty, addlit to add text. Note that the buffer is permanently
44 * malloc'd to the largest size needed so far in the current run.
46 static char *literalbuf = NULL; /* expandable buffer */
47 static int literallen; /* actual current length */
48 static int literalalloc; /* current allocated buffer size */
50 #define startlit() (literalbuf[0] = '\0', literallen = 0)
51 static void addlit(char *ytext, int yleng);
52 static void addlitchar (unsigned char);
59 YY_BUFFER_STATE buffer;
62 struct _yy_buffer *next;
67 #define MAX_NESTED_IF 128
68 static short preproc_tos;
70 static struct _if_value
74 } stacked_if_value[MAX_NESTED_IF];
79 %s C SQL incl def def_ident
82 * OK, here is a short description of lex/flex rules behavior.
83 * The longest pattern which matches an input string is always chosen.
84 * For equal-length patterns, the first occurring in the rules list is chosen.
85 * INITIAL is the starting state, to which all non-conditional rules apply.
86 * Exclusive states change parsing rules while the state is active. When in
87 * an exclusive state, only those rules defined for that state apply.
89 * We use exclusive states for quoted strings, extended comments,
90 * and to eliminate parsing troubles for numeric strings.
92 * <xbit> bit string literal
93 * <xc> extended C-style comments - thomas 1997-07-12
94 * <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
95 * <xh> hexadecimal numeric string - thomas 1997-11-16
96 * <xq> quoted strings - thomas 1997-07-30
111 xbitstart [bB]{quote}
114 xbitcat {quote}{whitespace_with_newline}{quote}
116 /* Hexadecimal number
121 xhcat {quote}{whitespace_with_newline}{quote}
123 /* C version of hex number
125 xch 0[xX][0-9A-Fa-f]*
128 * xqdouble implements SQL92 embedded quote
129 * xqcat allows strings to cross input lines
134 xqdouble {quote}{quote}
137 xqoctesc [\\][0-7]{1,3}
138 xqcat {quote}{whitespace_with_newline}{quote}
141 * Allows embedded spaces and other special characters into identifiers.
146 xddouble {dquote}{dquote}
149 /* special stuff for C strings */
153 xdcinside ({xdcqq}|{xdcqdq}|{xdcother})
157 * The "extended comment" syntax closely resembles allowable operator syntax.
158 * The tricky part here is to get lex to recognize a string starting with
159 * slash-star as a comment, when interpreting it as an operator would produce
160 * a longer match --- remember lex will prefer a longer match! Also, if we
161 * have something like plus-slash-star, lex will think this is a 3-character
162 * operator whereas we want to see it as a + operator and a comment start.
163 * The solution is two-fold:
164 * 1. append {op_chars}* to xcstart so that it matches as much text as
165 * {operator} would. Then the tie-breaker (first matching rule of same
166 * length) ensures xcstart wins. We put back the extra stuff with yyless()
167 * in case it contains a star-slash that should terminate the comment.
168 * 2. In the operator rule, check for slash-star within the operator, and
169 * if found throw it back with yyless(). This handles the plus-slash-star
171 * SQL92-style comments, which start with dash-dash, have similar interactions
172 * with the operator rule.
174 xcstart \/\*{op_chars}*
179 letter [\200-\377_A-Za-z]
180 letter_or_digit [\200-\377_A-Za-z0-9]
182 identifier {letter}{letter_or_digit}*
187 * "self" is the set of chars that should be returned as single-character
188 * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
189 * which can be one or more characters long (but if a single-char token
190 * appears in the "self" set, it is not to be returned as an Op). Note
191 * that the sets overlap, but each has some chars that are not in the other.
193 * If you change either set, adjust the character lists appearing in the
194 * rule for "operator"!
196 self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=]
197 op_chars [\~\!\@\#\^\&\|\`\?\$\+\-\*\/\%\<\>\=]
200 /* we no longer allow unary minus in numbers.
201 * instead we pass it separately to parser. there it gets
202 * coerced via doNegate() -- Leon aug 20 1999
206 decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
207 real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
212 * In order to make the world safe for Windows and Mac clients as well as
213 * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
214 * sequence will be seen as two successive newlines, but that doesn't cause
215 * any problems. SQL92-style comments, which start with -- and extend to the
216 * next newline, are treated as equivalent to a single whitespace character.
218 * NOTE a fine point: if there is no newline following --, we will absorb
219 * everything to the end of the input as a comment. This is correct. Older
220 * versions of Postgres failed to recognize -- as a comment if the input
221 * did not end with a newline.
223 * XXX perhaps \f (formfeed) should be treated as a newline as well?
233 comment ("--"{non_newline}*)
235 whitespace ({space}+|{comment})
238 * SQL92 requires at least one newline in the whitespace separating
239 * string literals that are to be concatenated. Silly, but who are we
240 * to argue? Note that {whitespace_with_newline} should not have * after
241 * it, whereas {whitespace} should generally have a * after it...
244 horiz_whitespace ({horiz_space}|{comment})
245 whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)
249 /* some stuff needed for ecpg */
250 exec [eE][xX][eE][cC]
252 define [dD][eE][fF][iI][nN][eE]
253 include [iI][nN][cC][lL][uU][dD][eE]
255 ifdef [iI][fF][dD][eE][fF]
256 ifndef [iI][fF][nN][dD][eE][fF]
257 else [eE][lL][sS][eE]
258 elif [eE][lL][iI][fF]
259 endif [eE][nN][dD][iI][fF]
261 exec_sql {exec}{space}*{sql}{space}*
262 ipdigit ({digit}|{digit}{digit}|{digit}{digit}{digit})
263 ip {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
265 /* Take care of cpp continuation lines */
266 cppline {space}*#(.*\\{space})*.*
269 * Quoted strings must allow some special characters such as single-quote
271 * Embedded single-quotes are implemented both in the SQL92-standard
272 * style of two adjacent single quotes "''" and in the Postgres/Java style
273 * of escaped-quote "\'".
274 * Other embedded escaped characters are matched explicitly and the leading
275 * backslash is dropped from the string. - thomas 1997-09-24
276 * Note that xcstart must appear before operator, as explained above!
277 * Also whitespace (comment) must appear before operator.
283 /* code to execute during start of each call of yylex() */
287 <SQL>{whitespace} { /* ignore */ }
290 token_start = yytext;
291 state_before = YYSTATE;
294 /* Put back any characters past slash-star; see above */
300 /* Put back any characters past slash-star; see above */
316 <xc>{xcinside} { ECHO; }
317 <xc>{op_chars} { ECHO; }
319 <xc><<EOF>> { mmerror(PARSE_ERROR, ET_ERROR, "Unterminated /* comment"); }
322 token_start = yytext;
329 if (literalbuf[strspn(literalbuf, "01") + 1] != '\0')
330 mmerror(PARSE_ERROR, ET_ERROR, "invalid bit string input.");
331 yylval.str = literalbuf;
336 <xbit>{xbitinside} { addlit(yytext, yyleng); }
338 <xbit>{xbitcat} { /* ignore */ }
339 <xbit><<EOF>> { mmerror(PARSE_ERROR, ET_ERROR, "Unterminated bit string"); }
342 token_start = yytext;
352 val = strtol(literalbuf, &endptr, 16);
353 if (*endptr != '\0' || errno == ERANGE
354 #ifdef HAVE_LONG_INT_64
355 /* if long > 32 bits, check for overflow of int4 */
356 || val != (long) ((int32) val)
359 mmerror(PARSE_ERROR, ET_ERROR, "Bad hexadecimal integer input");
364 <xh><<EOF>> { mmerror(PARSE_ERROR, ET_ERROR, "Unterminated hexadecimal integer"); }
367 token_start = yytext;
368 state_before = YYSTATE;
374 yylval.str = mm_strdup(literalbuf);
375 printf("MM: %s\n", yylval.str);
378 <xq>{xqdouble} { addlitchar('\''); }
379 <xq>{xqinside} { addlit(yytext, yyleng); }
380 <xq>{xqescape} { addlit(yytext, yyleng); }
381 <xq>{xqoctesc} { addlit(yytext, yyleng); }
382 <xq>{xqcat} { /* ignore */ }
384 <xq><<EOF>> { mmerror(PARSE_ERROR, ET_ERROR, "Unterminated quoted string"); }
387 state_before = YYSTATE;
394 mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
395 if (literallen >= NAMEDATALEN)
397 sprintf(errortext, "identifier \"%s\" will be truncated to \"%.*s\"",
398 literalbuf, NAMEDATALEN-1, literalbuf);
399 literalbuf[NAMEDATALEN-1] = '\0';
400 mmerror(PARSE_ERROR, ET_WARNING, errortext);
403 yylval.str = mm_strdup(literalbuf);
408 yylval.str = mm_strdup(literalbuf);
411 <xd>{xddouble} { addlitchar('"'); }
412 <xd>{xdinside} { addlit(yytext, yyleng); }
413 <xd,xdc><<EOF>> { mmerror(PARSE_ERROR, ET_ERROR, "Unterminated quoted identifier"); }
415 state_before = YYSTATE;
419 <xdc>{xdcinside} { addlit(yytext, yyleng); }
420 <SQL>{typecast} { return TYPECAST; }
422 * We may find a ';' inside a structure
423 * definition in a TYPE or VAR statement.
424 * This is not an EOL marker.
426 if (yytext[0] == ';' && struct_level == 0)
432 * Check for embedded slash-star or dash-dash; those
433 * are comment starts, so operator must stop there.
434 * Note that slash-star or dash-dash at the first
435 * character will match a prior rule, not this one.
438 char *slashstar = strstr(yytext, "/*");
439 char *dashdash = strstr(yytext, "--");
441 if (slashstar && dashdash)
443 /* if both appear, take the first one */
444 if (slashstar > dashdash)
445 slashstar = dashdash;
448 slashstar = dashdash;
450 nchars = slashstar - yytext;
453 * For SQL92 compatibility, '+' and '-' cannot be the
454 * last char of a multi-char operator unless the operator
455 * contains chars that are not in SQL92 operators.
456 * The idea is to lex '=-' as two operators, but not
457 * to forbid operator names like '?-' that could not be
458 * sequences of SQL92 operators.
461 (yytext[nchars-1] == '+' ||
462 yytext[nchars-1] == '-'))
466 for (ic = nchars-2; ic >= 0; ic--)
468 if (strchr("~!@#^&|`?$%", yytext[ic]))
472 break; /* found a char that makes it OK */
473 nchars--; /* else remove the +/-, and check again */
478 /* Strip the unwanted chars from the token */
481 * If what we have left is only one char, and it's
482 * one of the characters matching "self", then
483 * return it as a character token the same way
484 * that the "self" rule would have.
487 strchr(",()[].;$:+-*/%^<>=", yytext[0]))
491 /* Convert "!=" operator to "<>" for compatibility */
492 if (strcmp(yytext, "!=") == 0)
493 yylval.str = mm_strdup("<>");
495 yylval.str = mm_strdup(yytext);
499 yylval.ival = atol(yytext+1);
507 val = strtol((char *)yytext, &endptr,10);
508 if (*endptr != '\0' || errno == ERANGE
509 #ifdef HAVE_LONG_INT_64
510 /* if long > 32 bits, check for overflow of int4 */
511 || val != (long) ((int32) val)
516 yylval.str = mm_strdup(yytext);
523 yylval.str = mm_strdup(yytext);
527 yylval.str = mm_strdup(yytext);
531 yylval.str = mm_strdup(yytext);
534 <SQL>:{identifier}(("->"|\.){identifier})* {
535 yylval.str = mm_strdup(yytext+1);
539 ScanKeyword *keyword;
540 struct _defines *ptr;
542 /* Is it an SQL keyword? */
543 keyword = ScanKeywordLookup(yytext);
545 return keyword->value;
547 /* Is it an ECPG keyword? */
548 keyword = ScanECPGKeywordLookup( yytext);
550 return keyword->value;
552 /* How about a DEFINE? */
553 for (ptr = defines; ptr; ptr = ptr->next)
555 if (strcmp(yytext, ptr->old) == 0)
557 struct _yy_buffer *yb;
559 yb = mm_alloc(sizeof(struct _yy_buffer));
561 yb->buffer = YY_CURRENT_BUFFER;
562 yb->lineno = yylineno;
563 yb->filename = mm_strdup(input_filename);
564 yb->next = yy_buffer;
568 yy_scan_string(ptr->new);
574 * None of the above. Return it as an identifier.
576 * The backend would attempt to truncate and case-fold
577 * the identifier, but I see no good reason for ecpg
578 * to do so; that's just another way that ecpg could get
579 * out of step with the backend.
583 yylval.str = mm_strdup( yytext);
587 <SQL>{other} { return yytext[0]; }
588 <C>{exec_sql} { BEGIN SQL; return SQL_START; }
589 <C>{ccomment} { /* ignore */ }
594 yylval.ival = strtol((char *)yytext,&endptr,16);
595 if (*endptr != '\0' || errno == ERANGE)
598 yylval.str = mm_strdup(yytext);
604 yylval.str = mm_strdup(yytext);
608 ScanKeyword *keyword;
610 keyword = ScanCKeywordLookup(yytext);
611 if (keyword != NULL) {
612 return keyword->value;
616 struct _defines *ptr;
618 for (ptr = defines; ptr; ptr = ptr->next)
620 if (strcmp(yytext, ptr->old) == 0)
622 struct _yy_buffer *yb;
624 yb = mm_alloc(sizeof(struct _yy_buffer));
626 yb->buffer = YY_CURRENT_BUFFER;
627 yb->lineno = yylineno;
628 yb->filename = mm_strdup(input_filename);
629 yb->next = yy_buffer;
633 yy_scan_string(ptr->new);
639 yylval.str = mm_strdup(yytext);
644 <C>";" { return(';'); }
645 <C>"," { return(','); }
646 <C>"*" { return('*'); }
647 <C>"%" { return('%'); }
648 <C>"/" { return('/'); }
649 <C>"+" { return('+'); }
650 <C>"-" { return('-'); }
651 <C>"(" { return('('); }
652 <C>")" { return(')'); }
654 <C>\{ { return('{'); }
655 <C>\} { return('}'); }
656 <C>\[ { return('['); }
657 <C>\] { return(']'); }
658 <C>\= { return('='); }
659 <C>"->" { return(S_MEMBER); }
660 <C>">>" { return(S_RSHIFT); }
661 <C>"<<" { return(S_LSHIFT); }
662 <C>"||" { return(S_OR); }
663 <C>"&&" { return(S_AND); }
664 <C>"++" { return(S_INC); }
665 <C>"--" { return(S_DEC); }
666 <C>"==" { return(S_EQUAL); }
667 <C>"!=" { return(S_NEQUAL); }
668 <C>"+=" { return(S_ADD); }
669 <C>"-=" { return(S_SUB); }
670 <C>"*=" { return(S_MUL); }
671 <C>"/=" { return(S_DIV); }
672 <C>"%=" { return(S_MOD); }
673 <C>"->*" { return(S_MEMPOINT); }
674 <C>".*" { return(S_DOTPOINT); }
675 <C>{other} { return S_ANYTHING; }
677 <C>{exec_sql}{define}{space}* { BEGIN(def_ident); }
678 <C>{exec_sql}{include}{space}* { BEGIN(incl); }
680 <C,xskip>{exec_sql}{ifdef}{space}* { ifcond = TRUE; BEGIN(xcond); }
681 <C,xskip>{exec_sql}{ifndef}{space}* { ifcond = FALSE; BEGIN(xcond); }
683 <C,xskip>{exec_sql}{elif}{space}* { /* pop stack */
684 if ( preproc_tos == 0 ) {
685 mmerror(PARSE_ERROR, ET_FATAL, "Missing matching 'EXEC SQL IFDEF / EXEC SQL IFNDEF'");
687 else if ( stacked_if_value[preproc_tos].else_branch )
688 mmerror(PARSE_ERROR, ET_FATAL, "Missing 'EXEC SQL ENDIF;'");
692 ifcond = TRUE; BEGIN(xcond);
695 <C,xskip>{exec_sql}{else}{space}*";" { /* only exec sql endif pops the stack, so take care of duplicated 'else' */
696 if ( stacked_if_value[preproc_tos].else_branch ) {
697 mmerror(PARSE_ERROR, ET_FATAL, "Duplicated 'EXEC SQL ELSE;'");
700 stacked_if_value[preproc_tos].else_branch = TRUE;
701 stacked_if_value[preproc_tos].condition =
702 (stacked_if_value[preproc_tos-1].condition &&
703 ! stacked_if_value[preproc_tos].condition);
705 if ( stacked_if_value[preproc_tos].condition )
711 <C,xskip>{exec_sql}{endif}{space}*";" {
712 if ( preproc_tos == 0 )
713 mmerror(PARSE_ERROR, ET_FATAL, "Unmatched 'EXEC SQL ENDIF;'");
717 if ( stacked_if_value[preproc_tos].condition )
723 <xskip>{other} { /* ignore */ }
725 <xcond>{identifier}{space}*";" {
726 if ( preproc_tos >= MAX_NESTED_IF-1 ) {
727 mmerror(PARSE_ERROR, ET_FATAL, "Too many nested 'EXEC SQL IFDEF' conditions");
731 struct _defines *defptr;
734 /* skip the ";" and trailing whitespace. Note that yytext contains
735 at least one non-space character plus the ";" */
736 for ( i = strlen(yytext)-2;
737 i > 0 && isspace((unsigned char) yytext[i]);
742 for ( defptr = defines; defptr != NULL &&
743 ( strcmp(yytext, defptr->old) != 0 ); defptr = defptr->next );
746 stacked_if_value[preproc_tos].else_branch = FALSE;
747 stacked_if_value[preproc_tos].condition =
748 ( (defptr ? ifcond : !ifcond) && stacked_if_value[preproc_tos-1].condition );
751 if ( stacked_if_value[preproc_tos].condition )
757 <def_ident>{identifier} {
758 old = mm_strdup(yytext);
763 struct _defines *ptr, *this;
765 for (ptr = defines; ptr != NULL; ptr = ptr->next)
767 if (strcmp(old, ptr->old) == 0)
770 /* ptr->new = mm_strdup(scanstr(literalbuf));*/
771 ptr->new = mm_strdup(literalbuf);
776 this = (struct _defines *) mm_alloc(sizeof(struct _defines));
778 /* initial definition */
780 this->new = mm_strdup(literalbuf);
781 this->next = defines;
787 <def>[^;] { addlit(yytext, yyleng); }
790 /* got the include file name */
791 struct _yy_buffer *yb;
792 struct _include_path *ip;
793 char inc_file[MAXPGPATH];
796 yb = mm_alloc(sizeof(struct _yy_buffer));
798 yb->buffer = YY_CURRENT_BUFFER;
799 yb->lineno = yylineno;
800 yb->filename = input_filename;
801 yb->next = yy_buffer;
806 * skip the ";" and trailing whitespace. Note that yytext contains
807 * at least one non-space character plus the ";"
809 for ( i = strlen(yytext)-2;
810 i > 0 && isspace((unsigned char) yytext[i]);
816 for (ip = include_paths; yyin == NULL && ip != NULL; ip = ip->next)
818 if (strlen(ip->path) + strlen(yytext) + 3 > MAXPGPATH)
820 fprintf(stderr, "Error: Path %s/%s is too long in line %d, skipping.\n", ip->path, yytext, yylineno);
823 sprintf (inc_file, "%s/%s", ip->path, yytext);
824 yyin = fopen( inc_file, "r" );
827 if (strcmp(inc_file + strlen(inc_file) - 2, ".h"))
829 strcat(inc_file, ".h");
830 yyin = fopen( inc_file, "r" );
836 sprintf(errortext, "Cannot open include file %s in line %d\n", yytext, yylineno);
837 mmerror(NO_INCLUDE_FILE, ET_FATAL, errortext);
840 input_filename = mm_strdup(inc_file);
841 yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE ));
843 output_line_number();
849 if (yy_buffer == NULL) {
850 if ( preproc_tos > 0 )
853 mmerror(PARSE_ERROR, ET_FATAL, "Missing 'EXEC SQL ENDIF;'");
859 struct _yy_buffer *yb = yy_buffer;
865 yy_delete_buffer( YY_CURRENT_BUFFER );
866 yy_switch_to_buffer(yy_buffer->buffer);
868 yylineno = yy_buffer->lineno;
870 /* We have to output the filename only if we change files here */
871 i = strcmp(input_filename, yy_buffer->filename);
873 free(input_filename);
874 input_filename = yy_buffer->filename;
876 yy_buffer = yy_buffer->next;
880 output_line_number();
892 stacked_if_value[preproc_tos].condition = ifcond;
893 stacked_if_value[preproc_tos].else_branch = FALSE;
895 /* initialize literal buffer to a reasonable but expansible size */
896 if (literalbuf == NULL)
899 literalbuf = (char *) malloc(literalalloc);
907 addlit(char *ytext, int yleng)
909 /* enlarge buffer if needed */
910 if ((literallen+yleng) >= literalalloc)
914 while ((literallen+yleng) >= literalalloc);
915 literalbuf = (char *) realloc(literalbuf, literalalloc);
917 /* append new data, add trailing null */
918 memcpy(literalbuf+literallen, ytext, yleng);
920 literalbuf[literallen] = '\0';
924 addlitchar(unsigned char ychar)
926 /* enlarge buffer if needed */
927 if ((literallen+1) >= literalalloc)
930 literalbuf = (char *) realloc(literalbuf, literalalloc);
932 /* append new data, add trailing null */
933 literalbuf[literallen] = ychar;
935 literalbuf[literallen] = '\0';