2 /*-------------------------------------------------------------------------
5 * lexical scanner for ecpg
7 * This is a modified version of src/backend/parser/scan.l
10 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
11 * Portions Copyright (c) 1994, Regents of the University of California
15 * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.54 2000/03/15 19:09:10 meskes Exp $
17 *-------------------------------------------------------------------------
20 #include <sys/types.h>
26 #include "miscadmin.h"
27 #include "nodes/parsenodes.h"
28 #include "nodes/pg_list.h"
29 #include "parser/scansup.h"
32 #include "utils/builtins.h"
34 /* some versions of lex define this as a macro */
39 extern YYSTYPE yylval;
42 * literalbuf is used to accumulate literal values when multiple rules
43 * are needed to parse a single literal. Call startlit to reset buffer
44 * to empty, addlit to add text. Note that the buffer is permanently
45 * malloc'd to the largest size needed so far in the current run.
47 static char *literalbuf = NULL; /* expandable buffer */
48 static int literallen; /* actual current length */
49 static int literalalloc; /* current allocated buffer size */
51 #define startlit() (literalbuf[0] = '\0', literallen = 0)
52 static void addlit(char *ytext, int yleng);
56 struct _yy_buffer { YY_BUFFER_STATE buffer;
59 struct _yy_buffer * next;
64 #define MAX_NESTED_IF 128
65 static short preproc_tos;
67 static struct _if_value {
70 } stacked_if_value[MAX_NESTED_IF];
75 %s C SQL incl def def_ident
78 * OK, here is a short description of lex/flex rules behavior.
79 * The longest pattern which matches an input string is always chosen.
80 * For equal-length patterns, the first occurring in the rules list is chosen.
81 * INITIAL is the starting state, to which all non-conditional rules apply.
82 * Exclusive states change parsing rules while the state is active. When in
83 * an exclusive state, only those rules defined for that state apply.
85 * We use exclusive states for quoted strings, extended comments,
86 * and to eliminate parsing troubles for numeric strings.
88 * <xb> binary numeric string - thomas 1997-11-16
89 * <xc> extended C-style comments - tgl 1997-07-12
90 * <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27
91 * <xh> hexadecimal numeric string - thomas 1997-11-16
92 * <xq> quoted strings - tgl 1997-07-30
110 xbcat {quote}{whitespace_with_newline}{quote}
112 /* Hexadecimal number
117 xhcat {quote}{whitespace_with_newline}{quote}
119 /* C version of hex number
121 xch 0[xX][0-9A-Fa-f]*
124 * xqdouble implements SQL92 embedded quote
125 * xqcat allows strings to cross input lines
130 xqdouble {quote}{quote}
133 xqcat {quote}{whitespace_with_newline}{quote}
136 * Allows embedded spaces and other special characters into identifiers.
143 /* special stuff for C strings */
147 xdcinside ({xdcqq}|{xdcqdq}|{xdcother})
150 * The "extended comment" syntax closely resembles allowable operator syntax.
151 * The tricky part here is to get lex to recognize a string starting with
152 * slash-star as a comment, when interpreting it as an operator would produce
153 * a longer match --- remember lex will prefer a longer match! Also, if we
154 * have tor whereas we want to see it as a + operator and a comment start.
155 * The solution is two-fold:
156 * 1. append {op_and_self}* to xcstart so that it matches as much text as
157 * {operator} would. Then the tie-breaker (first matching rule of same
158 * length) ensures xcstart wins. We put back the extra stuff with yyless()
159 * in case it contains a star-slash that should terminate the comment.
160 * 2. In the operator rule, check for slash-star within the operator, and
161 * if found throw it back with yyless(). This handles the plus-slash-star
163 * SQL92-style comments, which start with dash-dash, have similar interactions
164 * with the operator rule.
166 xcstart \/\*{op_and_self}*
168 xcinside ([^*]+)|(\*+[^/])
171 letter [\200-\377_A-Za-z]
172 letter_or_digit [\200-\377_A-Za-z0-9]
174 identifier {letter}{letter_or_digit}*
178 /* NB: if you change "self", fix the copy in the operator rule too! */
179 self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|]
180 op_and_self [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
181 operator {op_and_self}+
183 /* we no longer allow unary minus in numbers.
184 * instead we pass it separately to parser. there it gets
185 * coerced via doNegate() -- Leon aug 20 1999
189 decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
190 real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
195 * In order to make the world safe for Windows and Mac clients as well as
196 * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
197 * sequence will be seen as two successive newlines, but that doesn't cause
198 * any problems. SQL92-style comments, which start with -- and extend to the
199 * next newline, are treated as equivalent to a single whitespace character.
201 * NOTE a fine point: if there is no newline following --, we will absorb
202 * everything to the end of the input as a comment. This is correct. Older
203 * versions of Postgres failed to recognize -- as a comment if the input
204 * did not end with a newline.
206 * XXX perhaps \f (formfeed) should be treated as a newline as well?
212 space_or_nl [ \t\r\f\n]
218 comment (("--"|"//"){non_newline}*)
220 whitespace ({space}|{comment})
223 * SQL92 requires at least one newline in the whitespace separating
224 * string literals that are to be concatenated. Silly, but who are we
225 * to argue? Note that {whitespace_with_newline} should not have * after
226 * it, whereas {whitespace} should generally have a * after it...
229 horiz_whitespace ({horiz_space}|{comment})
230 whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)
234 /* some stuff needed for ecpg */
235 exec [eE][xX][eE][cC]
237 define [dD][eE][fF][iI][nN][eE]
238 include [iI][nN][cC][lL][uU][dD][eE]
240 ifdef [iI][fF][dD][eE][fF]
241 ifndef [iI][fF][nN][dD][eE][fF]
242 else [eE][lL][sS][eE]
243 elif [eE][lL][iI][fF]
244 endif [eE][nN][dD][iI][fF]
246 exec_sql {exec}{space_or_nl}*{sql}{space_or_nl}*
248 /* Take care of cpp continuation lines */
249 cppline {space}*#(.*\\{line_end})*.*
251 /* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
252 * AT&T lex does not properly handle C-style comments in this second lex block.
253 * So, put comments here. tgl - 1997-09-08
255 * Quoted strings must allow some special characters such as single-quote
257 * Embedded single-quotes are implemented both in the SQL92-standard
258 * style of two adjacent single quotes "''" and in the Postgres/Java style
259 * of escaped-quote "\'".
260 * Other embedded escaped characters are matched explicitly and the leading
261 * backslash is dropped from the string. - thomas 1997-09-24
262 * Note that xcstart must appear before operator, as explained above!
263 * Also whitespace (comment) must appear before operator.
267 <SQL>{whitespace} { /* ignore */ }
270 state_before = YYSTATE;
273 /* Put back any characters past slash-star; see above */
277 <xc>{xcstop} { ECHO; BEGIN(state_before); }
279 <xc>{xcinside} { ECHO; }
281 <xc><<EOF>> { mmerror(ET_ERROR, "Unterminated /* comment"); }
292 yylval.ival = strtol(literalbuf, &endptr, 2);
293 if (*endptr != '\0' || errno == ERANGE)
294 mmerror(ET_ERROR, "Bad binary integer input!");
297 <xb><<EOF>> { mmerror(ET_ERROR, "Unterminated binary integer"); }
301 addlit(yytext, yyleng);
304 <xb>{xbcat} { /* ignore */
316 yylval.ival = strtol(literalbuf, &endptr, 16);
317 if (*endptr != '\0' || errno == ERANGE)
318 mmerror(ET_ERROR, "Bad hexadecimal integer input");
322 <xb><<EOF>> { mmerror(ET_ERROR, "Unterminated hexadecimal integer"); }
325 state_before = YYSTATE;
331 yylval.str = mm_strdup(literalbuf);
337 addlit(yytext, yyleng);
343 <xq><<EOF>> { mmerror(ET_ERROR, "Unterminated quoted string"); }
346 state_before = YYSTATE;
352 yylval.str = mm_strdup(literalbuf);
356 addlit(yytext, yyleng);
358 <xq><<EOF>> { mmerror(ET_ERROR, "Unterminated quoted identifier"); }
359 <SQL>{typecast} { return TYPECAST; }
361 * We may find a ';' inside a structure
362 * definition in a TYPE or VAR statement.
363 * This is not an EOL marker.
365 if (yytext[0] == ';' && struct_level == 0)
370 /* Check for embedded slash-star or dash-dash */
371 char *slashstar = strstr((char*)yytext, "/*");
372 char *dashdash = strstr((char*)yytext, "--");
374 if (slashstar && dashdash)
376 if (slashstar > dashdash)
377 slashstar = dashdash;
380 slashstar = dashdash;
384 int nchars = slashstar - ((char*)yytext);
386 /* If what we have left is only one char, and it's
387 * one of the characters matching "self", then
388 * return it as a character token the same way
389 * that the "self" rule would have.
392 strchr(",()[].;$:+-*/%^<>=|", yytext[0]))
396 if (strcmp((char*)yytext, "!=") == 0)
397 yylval.str = mm_strdup("<>"); /* compatability */
399 yylval.str = mm_strdup((char*)yytext);
403 yylval.ival = atol((char*)&yytext[1]);
410 yylval.ival = strtol((char *)yytext, &endptr,10);
411 if (*endptr != '\0' || errno == ERANGE)
414 yylval.str = mm_strdup((char*)yytext);
420 yylval.str = mm_strdup((char*)yytext);
424 yylval.str = mm_strdup((char*)yytext);
427 <SQL>:{identifier}(("->"|\.){identifier})* {
428 yylval.str = mm_strdup((char*)yytext+1);
433 ScanKeyword *keyword;
434 char lower_text[NAMEDATALEN];
436 /* this should leave the last byte set to '\0' */
437 strncpy(lower_text, yytext, NAMEDATALEN-1);
438 for(i = 0; lower_text[i]; i++)
439 if (isascii((unsigned char)lower_text[i]) && isupper(lower_text[i]))
440 lower_text[i] = tolower(lower_text[i]);
442 if (i >= NAMEDATALEN)
444 sprintf(errortext, "Identifier \"%s\" will be truncated to \"%.*s\"", yytext, NAMEDATALEN-1, yytext);
445 mmerror (ET_WARN, errortext);
446 yytext[NAMEDATALEN-1] = '\0';
449 keyword = ScanKeywordLookup((char*)lower_text);
450 if (keyword != NULL) {
451 return keyword->value;
455 keyword = ScanECPGKeywordLookup((char*)lower_text);
456 if (keyword != NULL) {
457 return keyword->value;
461 struct _defines *ptr;
463 for (ptr = defines; ptr; ptr = ptr->next)
465 if (strcmp(yytext, ptr->old) == 0)
467 struct _yy_buffer *yb;
469 yb = mm_alloc(sizeof(struct _yy_buffer));
471 yb->buffer = YY_CURRENT_BUFFER;
472 yb->lineno = yylineno;
473 yb->filename = mm_strdup(input_filename);
474 yb->next = yy_buffer;
478 yy_scan_string(ptr->new);
484 yylval.str = mm_strdup((char*)yytext);
490 <SQL>{other} { return yytext[0]; }
491 <C>{exec_sql} { BEGIN SQL; return SQL_START; }
492 <C>{ccomment} { /* ignore */ }
497 yylval.ival = strtol((char *)yytext,&endptr,16);
498 if (*endptr != '\0' || errno == ERANGE)
501 yylval.str = mm_strdup((char*)yytext);
507 yylval.str = mm_strdup((char*)yytext);
511 ScanKeyword *keyword;
513 keyword = ScanCKeywordLookup((char*)yytext);
514 if (keyword != NULL) {
515 return keyword->value;
519 struct _defines *ptr;
521 for (ptr = defines; ptr; ptr = ptr->next)
523 if (strcmp(yytext, ptr->old) == 0)
525 struct _yy_buffer *yb;
527 yb = mm_alloc(sizeof(struct _yy_buffer));
529 yb->buffer = YY_CURRENT_BUFFER;
530 yb->lineno = yylineno;
531 yb->filename = mm_strdup(input_filename);
532 yb->next = yy_buffer;
536 yy_scan_string(ptr->new);
542 yylval.str = mm_strdup((char*)yytext);
547 <C>";" { return(';'); }
548 <C>"," { return(','); }
549 <C>"*" { return('*'); }
550 <C>"%" { return('%'); }
551 <C>"/" { return('/'); }
552 <C>"+" { return('+'); }
553 <C>"-" { return('-'); }
554 <C>"(" { return('('); }
555 <C>")" { return(')'); }
556 <C>{space_or_nl} { ECHO; }
557 <C>\{ { return('{'); }
558 <C>\} { return('}'); }
559 <C>\[ { return('['); }
560 <C>\] { return(']'); }
561 <C>\= { return('='); }
562 <C>{other} { return S_ANYTHING; }
564 <C>{exec_sql}{define}{space_or_nl}* { BEGIN(def_ident); }
565 <C>{exec_sql}{include}{space_or_nl}* { BEGIN(incl); }
567 <C,xskip>{exec_sql}{ifdef}{space_or_nl}* { ifcond = TRUE; BEGIN(xcond); }
568 <C,xskip>{exec_sql}{ifndef}{space_or_nl}* { ifcond = FALSE; BEGIN(xcond); }
570 <C,xskip>{exec_sql}{elif}{space_or_nl}* { /* pop stack */
571 if ( preproc_tos == 0 ) {
572 mmerror(ET_FATAL, "Missing matching 'EXEC SQL IFDEF / EXEC SQL IFNDEF'");
574 else if ( stacked_if_value[preproc_tos].else_branch ) {
575 mmerror(ET_FATAL, "Missing 'EXEC SQL ENDIF;'");
581 ifcond = TRUE; BEGIN(xcond);
584 <C,xskip>{exec_sql}{else}{space_or_nl}*";" { /* only exec sql endif pops the stack, so take care of duplicated 'else' */
585 if ( stacked_if_value[preproc_tos].else_branch ) {
586 mmerror(ET_FATAL, "Duplicated 'EXEC SQL ELSE;'");
589 stacked_if_value[preproc_tos].else_branch = TRUE;
590 stacked_if_value[preproc_tos].condition =
591 (stacked_if_value[preproc_tos-1].condition &&
592 ! stacked_if_value[preproc_tos].condition);
594 if ( stacked_if_value[preproc_tos].condition ) {
602 <C,xskip>{exec_sql}{endif}{space_or_nl}*";" {
603 if ( preproc_tos == 0 ) {
604 mmerror(ET_FATAL, "Unmatched 'EXEC SQL ENDIF;'");
610 if ( stacked_if_value[preproc_tos].condition ) {
618 <xskip>{other} { /* ignore */ }
620 <xcond>{identifier}{space_or_nl}*";" {
621 if ( preproc_tos >= MAX_NESTED_IF-1 ) {
622 mmerror(ET_FATAL, "Too many nested 'EXEC SQL IFDEF' conditions");
625 struct _defines *defptr;
628 /* skip the ";" and trailing whitespace. Note that yytext contains
629 at least one non-space character plus the ";" */
630 for ( i = strlen(yytext)-2; i > 0 && isspace(yytext[i]); i-- ) {}
633 for ( defptr = defines; defptr != NULL &&
634 ( strcmp((char*)yytext, defptr->old) != 0 ); defptr = defptr->next );
637 stacked_if_value[preproc_tos].else_branch = FALSE;
638 stacked_if_value[preproc_tos].condition =
639 ( (defptr ? ifcond : !ifcond) && stacked_if_value[preproc_tos-1].condition );
642 if ( stacked_if_value[preproc_tos].condition ) {
650 <def_ident>{identifier} {
651 old = mm_strdup(yytext);
655 <def>{space_or_nl}*";" {
656 struct _defines *ptr, *this;
658 for (ptr = defines; ptr != NULL; ptr = ptr->next)
660 if (strcmp(old, ptr->old) == 0)
663 /* ptr->new = mm_strdup(scanstr(literalbuf));*/
664 ptr->new = mm_strdup(literalbuf);
669 this = (struct _defines *) mm_alloc(sizeof(struct _defines));
671 /* initial definition */
673 /* this->new = mm_strdup(scanstr(literalbuf));*/
674 this->new = mm_strdup(literalbuf);
675 this->next = defines;
682 addlit(yytext, yyleng);
685 <incl>[^";"]+";" { /* got the include file name */
686 struct _yy_buffer *yb;
687 struct _include_path *ip;
688 char inc_file[MAXPGPATH];
691 yb = mm_alloc(sizeof(struct _yy_buffer));
693 yb->buffer = YY_CURRENT_BUFFER;
694 yb->lineno = yylineno;
695 yb->filename = input_filename;
696 yb->next = yy_buffer;
700 /* skip the ";" and trailing whitespace. Note that yytext contains
701 at least one non-space character plus the ";" */
702 for ( i = strlen(yytext)-2; i > 0 && isspace(yytext[i]); i-- ) {}
706 for (ip = include_paths; yyin == NULL && ip != NULL; ip = ip->next)
708 if (strlen(ip->path) + strlen(yytext) + 3 > MAXPGPATH)
710 fprintf(stderr, "Error: Path %s/%s is too long in line %d, skipping.\n", ip->path, yytext, yylineno);
713 sprintf (inc_file, "%s/%s", ip->path, yytext);
714 yyin = fopen( inc_file, "r" );
717 if (strcmp(inc_file + strlen(inc_file) - 2, ".h"))
719 strcat(inc_file, ".h");
720 yyin = fopen( inc_file, "r" );
727 fprintf(stderr, "Error: Cannot open include file %s in line %d\n", yytext, yylineno);
728 exit(NO_INCLUDE_FILE);
731 input_filename = mm_strdup(inc_file);
732 yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE ));
734 output_line_number();
740 if ( preproc_tos > 0 ) {
743 mmerror(ET_FATAL, "Missing 'EXEC SQL ENDIF;'");
746 if (yy_buffer == NULL)
750 struct _yy_buffer *yb = yy_buffer;
755 yy_delete_buffer( YY_CURRENT_BUFFER );
756 yy_switch_to_buffer(yy_buffer->buffer);
758 yylineno = yy_buffer->lineno;
760 free(input_filename);
761 input_filename = yy_buffer->filename;
763 yy_buffer = yy_buffer->next;
765 output_line_number();
776 stacked_if_value[preproc_tos].condition = ifcond;
777 stacked_if_value[preproc_tos].else_branch = FALSE;
779 /* initialize literal buffer to a reasonable but expansible size */
780 if (literalbuf == NULL)
783 literalbuf = (char *) malloc(literalalloc);
791 addlit(char *ytext, int yleng)
793 /* enlarge buffer if needed */
794 if ((literallen+yleng) >= literalalloc)
798 } while ((literallen+yleng) >= literalalloc);
799 literalbuf = (char *) realloc(literalbuf, literalalloc);
801 /* append data --- note we assume ytext is null-terminated */
802 memcpy(literalbuf+literallen, ytext, yleng+1);