2 /*-------------------------------------------------------------------------
4 * scan.l - Scanner for the PL/pgSQL
7 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
12 * $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.45 2006/03/09 21:29:38 momjian Exp $
14 *-------------------------------------------------------------------------
19 #include "mb/pg_wchar.h"
22 /* No reason to constrain amount of data slurped */
23 #define YY_READ_BUF_SIZE 16777216
25 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
27 #define fprintf(file, fmt, msg) ereport(ERROR, (errmsg_internal("%s", msg)))
29 /* Handles to the buffer that the lexer uses internally */
30 static YY_BUFFER_STATE scanbufhandle;
33 static const char *scanstr; /* original input string */
35 static int scanner_functype;
36 static bool scanner_typereported;
37 static int pushback_token;
38 static bool have_pushback_token;
39 static int lookahead_token;
40 static bool have_lookahead_token;
41 static const char *cur_line_start;
42 static int cur_line_num;
43 static char *dolqstart; /* current $foo$ quote start string */
44 static int dolqlen; /* signal to plpgsql_get_string_value */
46 bool plpgsql_SpaceScanned = false;
50 %option never-interactive
54 %option prefix="plpgsql_base_yy"
56 %option case-insensitive
64 ident_start [A-Za-z\200-\377_]
65 ident_cont [A-Za-z\200-\377_0-9\$]
67 quoted_ident (\"[^\"]*\")+
69 identifier ({ident_start}{ident_cont}*|{quoted_ident})
75 /* $foo$ style quotes ("dollar quoting")
76 * copied straight from the backend SQL parser
78 dolq_start [A-Za-z\200-\377_]
79 dolq_cont [A-Za-z\200-\377_0-9]
80 dolqdelim \$({dolq_start}{dolq_cont}*)?\$
85 * Local variables in scanner to remember where
86 * a string or comment started
90 char *start_charpos = NULL;
93 * Reset the state when entering the scanner
97 plpgsql_SpaceScanned = false;
100 * On the first call to a new source report the
101 * function's type (T_FUNCTION or T_TRIGGER)
104 if (!scanner_typereported)
106 scanner_typereported = true;
107 return scanner_functype;
114 := { return K_ASSIGN; }
115 = { return K_ASSIGN; }
116 \.\. { return K_DOTDOT; }
117 alias { return K_ALIAS; }
118 begin { return K_BEGIN; }
119 close { return K_CLOSE; }
120 constant { return K_CONSTANT; }
121 continue { return K_CONTINUE; }
122 cursor { return K_CURSOR; }
123 debug { return K_DEBUG; }
124 declare { return K_DECLARE; }
125 default { return K_DEFAULT; }
126 diagnostics { return K_DIAGNOSTICS; }
127 else { return K_ELSE; }
128 elseif { return K_ELSIF; }
129 elsif { return K_ELSIF; }
130 end { return K_END; }
131 exception { return K_EXCEPTION; }
132 execute { return K_EXECUTE; }
133 exit { return K_EXIT; }
134 fetch { return K_FETCH; }
135 for { return K_FOR; }
136 from { return K_FROM; }
137 get { return K_GET; }
140 info { return K_INFO; }
141 into { return K_INTO; }
143 log { return K_LOG; }
144 loop { return K_LOOP; }
145 next { return K_NEXT; }
146 not { return K_NOT; }
147 notice { return K_NOTICE; }
148 null { return K_NULL; }
149 open { return K_OPEN; }
151 perform { return K_PERFORM; }
152 raise { return K_RAISE; }
153 rename { return K_RENAME; }
154 result_oid { return K_RESULT_OID; }
155 return { return K_RETURN; }
156 reverse { return K_REVERSE; }
157 row_count { return K_ROW_COUNT; }
158 select { return K_SELECT; }
159 then { return K_THEN; }
161 type { return K_TYPE; }
162 warning { return K_WARNING; }
163 when { return K_WHEN; }
164 while { return K_WHILE; }
166 ^#option { return O_OPTION; }
167 dump { return O_DUMP; }
173 * We set plpgsql_error_lineno in each rule so that errors reported
174 * in the pl_comp.c subroutines will point to the right place.
178 plpgsql_error_lineno = plpgsql_scanner_lineno();
179 return plpgsql_parse_word(yytext); }
180 {identifier}{space}*\.{space}*{identifier} {
181 plpgsql_error_lineno = plpgsql_scanner_lineno();
182 return plpgsql_parse_dblword(yytext); }
183 {identifier}{space}*\.{space}*{identifier}{space}*\.{space}*{identifier} {
184 plpgsql_error_lineno = plpgsql_scanner_lineno();
185 return plpgsql_parse_tripword(yytext); }
186 {identifier}{space}*%TYPE {
187 plpgsql_error_lineno = plpgsql_scanner_lineno();
188 return plpgsql_parse_wordtype(yytext); }
189 {identifier}{space}*\.{space}*{identifier}{space}*%TYPE {
190 plpgsql_error_lineno = plpgsql_scanner_lineno();
191 return plpgsql_parse_dblwordtype(yytext); }
192 {identifier}{space}*\.{space}*{identifier}{space}*\.{space}*{identifier}{space}*%TYPE {
193 plpgsql_error_lineno = plpgsql_scanner_lineno();
194 return plpgsql_parse_tripwordtype(yytext); }
195 {identifier}{space}*%ROWTYPE {
196 plpgsql_error_lineno = plpgsql_scanner_lineno();
197 return plpgsql_parse_wordrowtype(yytext); }
198 {identifier}{space}*\.{space}*{identifier}{space}*%ROWTYPE {
199 plpgsql_error_lineno = plpgsql_scanner_lineno();
200 return plpgsql_parse_dblwordrowtype(yytext); }
202 plpgsql_error_lineno = plpgsql_scanner_lineno();
203 return plpgsql_parse_word(yytext); }
204 {param}{space}*\.{space}*{identifier} {
205 plpgsql_error_lineno = plpgsql_scanner_lineno();
206 return plpgsql_parse_dblword(yytext); }
207 {param}{space}*\.{space}*{identifier}{space}*\.{space}*{identifier} {
208 plpgsql_error_lineno = plpgsql_scanner_lineno();
209 return plpgsql_parse_tripword(yytext); }
210 {param}{space}*%TYPE {
211 plpgsql_error_lineno = plpgsql_scanner_lineno();
212 return plpgsql_parse_wordtype(yytext); }
213 {param}{space}*\.{space}*{identifier}{space}*%TYPE {
214 plpgsql_error_lineno = plpgsql_scanner_lineno();
215 return plpgsql_parse_dblwordtype(yytext); }
216 {param}{space}*\.{space}*{identifier}{space}*\.{space}*{identifier}{space}*%TYPE {
217 plpgsql_error_lineno = plpgsql_scanner_lineno();
218 return plpgsql_parse_tripwordtype(yytext); }
219 {param}{space}*%ROWTYPE {
220 plpgsql_error_lineno = plpgsql_scanner_lineno();
221 return plpgsql_parse_wordrowtype(yytext); }
222 {param}{space}*\.{space}*{identifier}{space}*%ROWTYPE {
223 plpgsql_error_lineno = plpgsql_scanner_lineno();
224 return plpgsql_parse_dblwordrowtype(yytext); }
226 {digit}+ { return T_NUMBER; }
229 plpgsql_error_lineno = plpgsql_scanner_lineno();
231 (errcode(ERRCODE_DATATYPE_MISMATCH),
232 errmsg("unterminated quoted identifier")));
236 * Ignore whitespaces but remember this happened
239 {space}+ { plpgsql_SpaceScanned = true; }
247 \/\* { start_lineno = plpgsql_scanner_lineno();
250 <IN_COMMENT>\*\/ { BEGIN(INITIAL); plpgsql_SpaceScanned = true; }
253 <IN_COMMENT><<EOF>> {
254 plpgsql_error_lineno = start_lineno;
256 (errcode(ERRCODE_DATATYPE_MISMATCH),
257 errmsg("unterminated comment")));
261 * Collect anything inside of ''s and return one STRING token
263 * Hacking yytext/yyleng here lets us avoid using yymore(), which is
264 * a win for performance. It's safe because we know the underlying
265 * input buffer is not changing.
269 start_lineno = plpgsql_scanner_lineno();
270 start_charpos = yytext;
274 /* for now, treat the same as a regular literal */
275 start_lineno = plpgsql_scanner_lineno();
276 start_charpos = yytext;
280 <IN_STRING>\\ { /* can only happen with \ at EOF */ }
283 /* tell plpgsql_get_string_value it's not a dollar quote */
285 /* adjust yytext/yyleng to describe whole string token */
286 yyleng += (yytext - start_charpos);
287 yytext = start_charpos;
291 <IN_STRING>[^'\\]+ { }
293 plpgsql_error_lineno = start_lineno;
295 (errcode(ERRCODE_DATATYPE_MISMATCH),
296 errmsg("unterminated string")));
300 start_lineno = plpgsql_scanner_lineno();
301 start_charpos = yytext;
302 dolqstart = pstrdup(yytext);
303 BEGIN(IN_DOLLARQUOTE);
305 <IN_DOLLARQUOTE>{dolqdelim} {
306 if (strcmp(yytext, dolqstart) == 0)
309 /* tell plpgsql_get_string_value it is a dollar quote */
311 /* adjust yytext/yyleng to describe whole string token */
312 yyleng += (yytext - start_charpos);
313 yytext = start_charpos;
320 * When we fail to match $...$ to dolqstart, transfer
321 * the $... part to the output, but put back the final
322 * $ for rescanning. Consider $delim$...$junk$delim$
327 <IN_DOLLARQUOTE>{dolqinside} { }
328 <IN_DOLLARQUOTE>. { /* needed for $ inside the quoted text */ }
329 <IN_DOLLARQUOTE><<EOF>> {
330 plpgsql_error_lineno = start_lineno;
332 (errcode(ERRCODE_DATATYPE_MISMATCH),
333 errmsg("unterminated dollar-quoted string")));
337 * Any unmatched character is returned as is
340 . { return yytext[0]; }
346 * This is the yylex routine called from outside. It exists to provide
347 * a pushback facility, as well as to allow us to parse syntax that
348 * requires more than one token of lookahead.
355 if (have_pushback_token)
357 have_pushback_token = false;
358 cur_token = pushback_token;
360 else if (have_lookahead_token)
362 have_lookahead_token = false;
363 cur_token = lookahead_token;
368 /* Do we need to look ahead for a possible multiword token? */
371 /* RETURN NEXT must be reduced to a single token */
373 if (!have_lookahead_token)
375 lookahead_token = yylex();
376 have_lookahead_token = true;
378 if (lookahead_token == K_NEXT)
380 have_lookahead_token = false;
381 cur_token = K_RETURN_NEXT;
393 * Push back a single token to be re-read by next plpgsql_yylex() call.
396 plpgsql_push_back_token(int token)
398 if (have_pushback_token)
399 elog(ERROR, "cannot push back multiple tokens");
400 pushback_token = token;
401 have_pushback_token = true;
405 * Report a syntax error.
408 plpgsql_yyerror(const char *message)
410 const char *loc = yytext;
413 plpgsql_error_lineno = plpgsql_scanner_lineno();
415 /* in multibyte encodings, return index in characters not bytes */
416 cursorpos = pg_mbstrlen_with_len(scanbuf, loc - scanbuf) + 1;
418 if (*loc == YY_END_OF_BUFFER_CHAR)
421 (errcode(ERRCODE_SYNTAX_ERROR),
422 /* translator: %s is typically "syntax error" */
423 errmsg("%s at end of input", message),
424 internalerrposition(cursorpos),
425 internalerrquery(scanstr)));
430 (errcode(ERRCODE_SYNTAX_ERROR),
431 /* translator: first %s is typically "syntax error" */
432 errmsg("%s at or near \"%s\"", message, loc),
433 internalerrposition(cursorpos),
434 internalerrquery(scanstr)));
439 * Get the line number at which the current token ends. This substitutes
440 * for flex's very poorly implemented yylineno facility.
442 * We assume that flex has written a '\0' over the character following the
443 * current token in scanbuf. So, we just have to count the '\n' characters
444 * before that. We optimize this a little by keeping track of the last
448 plpgsql_scanner_lineno(void)
452 while ((c = strchr(cur_line_start, '\n')) != NULL)
454 cur_line_start = c + 1;
461 * Called before any actual parsing is done
463 * Note: the passed "str" must remain valid until plpgsql_scanner_finish().
464 * Although it is not fed directly to flex, we need the original string
465 * to cite in error messages.
468 plpgsql_scanner_init(const char *str, int functype)
475 * Might be left over after ereport()
477 if (YY_CURRENT_BUFFER)
478 yy_delete_buffer(YY_CURRENT_BUFFER);
481 * Make a scan buffer with special termination needed by flex.
483 scanbuf = palloc(slen + 2);
484 memcpy(scanbuf, str, slen);
485 scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
486 scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
491 scanner_functype = functype;
492 scanner_typereported = false;
494 have_pushback_token = false;
495 have_lookahead_token = false;
497 cur_line_start = scanbuf;
501 * Hack: skip any initial newline, so that in the common coding layout
502 * CREATE FUNCTION ... AS '
504 * ' LANGUAGE plpgsql;
505 * we will think "line 1" is what the programmer thinks of as line 1.
508 if (*cur_line_start == '\r')
510 if (*cur_line_start == '\n')
517 * Called after parsing is done to clean up after plpgsql_scanner_init()
520 plpgsql_scanner_finish(void)
522 yy_delete_buffer(scanbufhandle);
527 * Called after a T_STRING token is read to get the string literal's value
528 * as a palloc'd string. (We make this a separate call because in many
529 * scenarios there's no need to get the decoded value.)
531 * Note: we expect the literal to be the most recently lexed token. This
532 * would not work well if we supported multiple-token pushback or if
533 * plpgsql_yylex() wanted to read ahead beyond a T_STRING token.
536 plpgsql_get_string_value(void)
544 /* Token is a $foo$...$foo$ string */
545 len = yyleng - 2 * dolqlen;
547 result = (char *) palloc(len + 1);
548 memcpy(result, yytext + dolqlen, len);
551 else if (*yytext == 'E' || *yytext == 'e')
553 /* Token is an E'...' string */
554 result = (char *) palloc(yyleng + 1); /* more than enough room */
556 for (cp = yytext + 2; *cp; cp++)
561 result[len++] = *cp++;
562 /* else it must be string end quote */
564 else if (*cp == '\\')
566 if (cp[1] != '\0') /* just a paranoid check */
567 result[len++] = *(++cp);
576 /* Token is a '...' string */
577 result = (char *) palloc(yyleng + 1); /* more than enough room */
579 for (cp = yytext + 1; *cp; cp++)
584 result[len++] = *cp++;
585 /* else it must be string end quote */
587 else if (*cp == '\\')
589 if (cp[1] != '\0') /* just a paranoid check */
590 result[len++] = *(++cp);