]> granicus.if.org Git - postgresql/blob - src/bin/psql/psqlscan.l
Split psql's lexer into two separate .l files for SQL and backslash cases.
[postgresql] / src / bin / psql / psqlscan.l
1 %top{
2 /*-------------------------------------------------------------------------
3  *
4  * psqlscan.l
5  *        lexical scanner for psql (and other frontend programs)
6  *
7  * This code is mainly needed to determine where the end of a SQL statement
8  * is: we are looking for semicolons that are not within quotes, comments,
9  * or parentheses.  The most reliable way to handle this is to borrow the
10  * backend's flex lexer rules, lock, stock, and barrel.  The rules below
11  * are (except for a few) the same as the backend's, but their actions are
12  * just ECHO whereas the backend's actions generally do other things.
13  *
14  * XXX The rules in this file must be kept in sync with the backend lexer!!!
15  *
16  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
17  *
18  * See psqlscan_int.h for additional commentary.
19  *
20  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
21  * Portions Copyright (c) 1994, Regents of the University of California
22  *
23  * IDENTIFICATION
24  *        src/bin/psql/psqlscan.l
25  *
26  *-------------------------------------------------------------------------
27  */
28 #include "postgres_fe.h"
29
30 #include "psqlscan.h"
31
32 #include "libpq-fe.h"
33 }
34
35 %{
36 #include "psqlscan_int.h"
37
38 /*
39  * Set the type of yyextra; we use it as a pointer back to the containing
40  * PsqlScanState.
41  */
42 #define YY_EXTRA_TYPE PsqlScanState
43
44
45 /* Return values from yylex() */
46 #define LEXRES_EOL                      0       /* end of input */
47 #define LEXRES_SEMI                     1       /* command-terminating semicolon found */
48 #define LEXRES_BACKSLASH        2       /* backslash command start */
49
50
51 static bool var_is_current_source(PsqlScanState state, const char *varname);
52
53 #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
54
55 /*
56  * Work around a bug in flex 2.5.35: it emits a couple of functions that
57  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
58  * this would cause warnings.  Providing our own declarations should be
59  * harmless even when the bug gets fixed.
60  */
61 extern int      psql_yyget_column(yyscan_t yyscanner);
62 extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
63
64 %}
65
66 %option reentrant
67 %option 8bit
68 %option never-interactive
69 %option nodefault
70 %option noinput
71 %option nounput
72 %option noyywrap
73 %option warn
74 %option prefix="psql_yy"
75
76 /*
77  * All of the following definitions and rules should exactly match
78  * src/backend/parser/scan.l so far as the flex patterns are concerned.
79  * The rule bodies are just ECHO as opposed to what the backend does,
80  * however.  (But be sure to duplicate code that affects the lexing process,
81  * such as BEGIN().)  Also, psqlscan uses a single <<EOF>> rule whereas
82  * scan.l has a separate one for each exclusive state.
83  */
84
85 /*
86  * OK, here is a short description of lex/flex rules behavior.
87  * The longest pattern which matches an input string is always chosen.
88  * For equal-length patterns, the first occurring in the rules list is chosen.
89  * INITIAL is the starting state, to which all non-conditional rules apply.
90  * Exclusive states change parsing rules while the state is active.  When in
91  * an exclusive state, only those rules defined for that state apply.
92  *
93  * We use exclusive states for quoted strings, extended comments,
94  * and to eliminate parsing troubles for numeric strings.
95  * Exclusive states:
96  *  <xb> bit string literal
97  *  <xc> extended C-style comments
98  *  <xd> delimited identifiers (double-quoted identifiers)
99  *  <xh> hexadecimal numeric string
100  *  <xq> standard quoted strings
101  *  <xe> extended quoted strings (support backslash escape sequences)
102  *  <xdolq> $foo$ quoted strings
103  *  <xui> quoted identifier with Unicode escapes
104  *  <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow
105  *  <xus> quoted string with Unicode escapes
106  *  <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow
107  *
108  * Note: we intentionally don't mimic the backend's <xeu> state; we have
109  * no need to distinguish it from <xe> state, and no good way to get out
110  * of it in error cases.  The backend just throws yyerror() in those
111  * cases, but that's not an option here.
112  */
113
114 %x xb
115 %x xc
116 %x xd
117 %x xh
118 %x xe
119 %x xq
120 %x xdolq
121 %x xui
122 %x xuiend
123 %x xus
124 %x xusend
125
126 /*
127  * In order to make the world safe for Windows and Mac clients as well as
128  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
129  * sequence will be seen as two successive newlines, but that doesn't cause
130  * any problems.  Comments that start with -- and extend to the next
131  * newline are treated as equivalent to a single whitespace character.
132  *
133  * NOTE a fine point: if there is no newline following --, we will absorb
134  * everything to the end of the input as a comment.  This is correct.  Older
135  * versions of Postgres failed to recognize -- as a comment if the input
136  * did not end with a newline.
137  *
138  * XXX perhaps \f (formfeed) should be treated as a newline as well?
139  *
140  * XXX if you change the set of whitespace characters, fix scanner_isspace()
141  * to agree, and see also the plpgsql lexer.
142  */
143
144 space                   [ \t\n\r\f]
145 horiz_space             [ \t\f]
146 newline                 [\n\r]
147 non_newline             [^\n\r]
148
149 comment                 ("--"{non_newline}*)
150
151 whitespace              ({space}+|{comment})
152
153 /*
154  * SQL requires at least one newline in the whitespace separating
155  * string literals that are to be concatenated.  Silly, but who are we
156  * to argue?  Note that {whitespace_with_newline} should not have * after
157  * it, whereas {whitespace} should generally have a * after it...
158  */
159
160 special_whitespace              ({space}+|{comment}{newline})
161 horiz_whitespace                ({horiz_space}|{comment})
162 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
163
164 /*
165  * To ensure that {quotecontinue} can be scanned without having to back up
166  * if the full pattern isn't matched, we include trailing whitespace in
167  * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
168  * except for {quote} followed by whitespace and just one "-" (not two,
169  * which would start a {comment}).  To cover that we have {quotefail}.
170  * The actions for {quotestop} and {quotefail} must throw back characters
171  * beyond the quote proper.
172  */
173 quote                   '
174 quotestop               {quote}{whitespace}*
175 quotecontinue   {quote}{whitespace_with_newline}{quote}
176 quotefail               {quote}{whitespace}*"-"
177
178 /* Bit string
179  * It is tempting to scan the string for only those characters
180  * which are allowed. However, this leads to silently swallowed
181  * characters if illegal characters are included in the string.
182  * For example, if xbinside is [01] then B'ABCD' is interpreted
183  * as a zero-length string, and the ABCD' is lost!
184  * Better to pass the string forward and let the input routines
185  * validate the contents.
186  */
187 xbstart                 [bB]{quote}
188 xbinside                [^']*
189
190 /* Hexadecimal number */
191 xhstart                 [xX]{quote}
192 xhinside                [^']*
193
194 /* National character */
195 xnstart                 [nN]{quote}
196
197 /* Quoted string that allows backslash escapes */
198 xestart                 [eE]{quote}
199 xeinside                [^\\']+
200 xeescape                [\\][^0-7]
201 xeoctesc                [\\][0-7]{1,3}
202 xehexesc                [\\]x[0-9A-Fa-f]{1,2}
203 xeunicode               [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
204 xeunicodefail   [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
205
206 /* Extended quote
207  * xqdouble implements embedded quote, ''''
208  */
209 xqstart                 {quote}
210 xqdouble                {quote}{quote}
211 xqinside                [^']+
212
213 /* $foo$ style quotes ("dollar quoting")
214  * The quoted string starts with $foo$ where "foo" is an optional string
215  * in the form of an identifier, except that it may not contain "$",
216  * and extends to the first occurrence of an identical string.
217  * There is *no* processing of the quoted text.
218  *
219  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
220  * fails to match its trailing "$".
221  */
222 dolq_start              [A-Za-z\200-\377_]
223 dolq_cont               [A-Za-z\200-\377_0-9]
224 dolqdelim               \$({dolq_start}{dolq_cont}*)?\$
225 dolqfailed              \${dolq_start}{dolq_cont}*
226 dolqinside              [^$]+
227
228 /* Double quote
229  * Allows embedded spaces and other special characters into identifiers.
230  */
231 dquote                  \"
232 xdstart                 {dquote}
233 xdstop                  {dquote}
234 xddouble                {dquote}{dquote}
235 xdinside                [^"]+
236
237 /* Unicode escapes */
238 uescape                 [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
239 /* error rule to avoid backup */
240 uescapefail             [uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU]
241
242 /* Quoted identifier with Unicode escapes */
243 xuistart                [uU]&{dquote}
244
245 /* Quoted string with Unicode escapes */
246 xusstart                [uU]&{quote}
247
248 /* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
249 xustop1         {uescapefail}?
250 xustop2         {uescape}
251
252 /* error rule to avoid backup */
253 xufailed                [uU]&
254
255
256 /* C-style comments
257  *
258  * The "extended comment" syntax closely resembles allowable operator syntax.
259  * The tricky part here is to get lex to recognize a string starting with
260  * slash-star as a comment, when interpreting it as an operator would produce
261  * a longer match --- remember lex will prefer a longer match!  Also, if we
262  * have something like plus-slash-star, lex will think this is a 3-character
263  * operator whereas we want to see it as a + operator and a comment start.
264  * The solution is two-fold:
265  * 1. append {op_chars}* to xcstart so that it matches as much text as
266  *    {operator} would. Then the tie-breaker (first matching rule of same
267  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
268  *    in case it contains a star-slash that should terminate the comment.
269  * 2. In the operator rule, check for slash-star within the operator, and
270  *    if found throw it back with yyless().  This handles the plus-slash-star
271  *    problem.
272  * Dash-dash comments have similar interactions with the operator rule.
273  */
274 xcstart                 \/\*{op_chars}*
275 xcstop                  \*+\/
276 xcinside                [^*/]+
277
278 digit                   [0-9]
279 ident_start             [A-Za-z\200-\377_]
280 ident_cont              [A-Za-z\200-\377_0-9\$]
281
282 identifier              {ident_start}{ident_cont}*
283
284 /* Assorted special-case operators and operator-like tokens */
285 typecast                "::"
286 dot_dot                 \.\.
287 colon_equals    ":="
288 equals_greater  "=>"
289 less_equals             "<="
290 greater_equals  ">="
291 less_greater    "<>"
292 not_equals              "!="
293
294 /*
295  * "self" is the set of chars that should be returned as single-character
296  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
297  * which can be one or more characters long (but if a single-char token
298  * appears in the "self" set, it is not to be returned as an Op).  Note
299  * that the sets overlap, but each has some chars that are not in the other.
300  *
301  * If you change either set, adjust the character lists appearing in the
302  * rule for "operator"!
303  */
304 self                    [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
305 op_chars                [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
306 operator                {op_chars}+
307
308 /* we no longer allow unary minus in numbers.
309  * instead we pass it separately to parser. there it gets
310  * coerced via doNegate() -- Leon aug 20 1999
311  *
312  * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
313  *
314  * {realfail1} and {realfail2} are added to prevent the need for scanner
315  * backup when the {real} rule fails to match completely.
316  */
317
318 integer                 {digit}+
319 decimal                 (({digit}*\.{digit}+)|({digit}+\.{digit}*))
320 decimalfail             {digit}+\.\.
321 real                    ({integer}|{decimal})[Ee][-+]?{digit}+
322 realfail1               ({integer}|{decimal})[Ee]
323 realfail2               ({integer}|{decimal})[Ee][-+]
324
325 param                   \${integer}
326
327 /* psql-specific: characters allowed in variable names */
328 variable_char   [A-Za-z\200-\377_0-9]
329
330 other                   .
331
332 /*
333  * Dollar quoted strings are totally opaque, and no escaping is done on them.
334  * Other quoted strings must allow some special characters such as single-quote
335  *  and newline.
336  * Embedded single-quotes are implemented both in the SQL standard
337  *  style of two adjacent single quotes "''" and in the Postgres/Java style
338  *  of escaped-quote "\'".
339  * Other embedded escaped characters are matched explicitly and the leading
340  *  backslash is dropped from the string.
341  * Note that xcstart must appear before operator, as explained above!
342  *  Also whitespace (comment) must appear before operator.
343  */
344
345 %%
346
347 %{
348                 /* Declare some local variables inside yylex(), for convenience */
349                 PsqlScanState cur_state = yyextra;
350                 PQExpBuffer output_buf = cur_state->output_buf;
351
352                 /*
353                  * Force flex into the state indicated by start_state.  This has a
354                  * couple of purposes: it lets some of the functions below set a
355                  * new starting state without ugly direct access to flex variables,
356                  * and it allows us to transition from one flex lexer to another
357                  * so that we can lex different parts of the source string using
358                  * separate lexers.
359                  */
360                 BEGIN(cur_state->start_state);
361 %}
362
363 {whitespace}    {
364                                         /*
365                                          * Note that the whitespace rule includes both true
366                                          * whitespace and single-line ("--" style) comments.
367                                          * We suppress whitespace at the start of the query
368                                          * buffer.  We also suppress all single-line comments,
369                                          * which is pretty dubious but is the historical
370                                          * behavior.
371                                          */
372                                         if (!(output_buf->len == 0 || yytext[0] == '-'))
373                                                 ECHO;
374                                 }
375
376 {xcstart}               {
377                                         cur_state->xcdepth = 0;
378                                         BEGIN(xc);
379                                         /* Put back any characters past slash-star; see above */
380                                         yyless(2);
381                                         ECHO;
382                                 }
383
384 <xc>{xcstart}   {
385                                         cur_state->xcdepth++;
386                                         /* Put back any characters past slash-star; see above */
387                                         yyless(2);
388                                         ECHO;
389                                 }
390
391 <xc>{xcstop}    {
392                                         if (cur_state->xcdepth <= 0)
393                                         {
394                                                 BEGIN(INITIAL);
395                                         }
396                                         else
397                                                 cur_state->xcdepth--;
398                                         ECHO;
399                                 }
400
401 <xc>{xcinside}  {
402                                         ECHO;
403                                 }
404
405 <xc>{op_chars}  {
406                                         ECHO;
407                                 }
408
409 <xc>\*+                 {
410                                         ECHO;
411                                 }
412
413 {xbstart}               {
414                                         BEGIN(xb);
415                                         ECHO;
416                                 }
417 <xb>{quotestop} |
418 <xb>{quotefail} {
419                                         yyless(1);
420                                         BEGIN(INITIAL);
421                                         ECHO;
422                                 }
423 <xh>{xhinside}  |
424 <xb>{xbinside}  {
425                                         ECHO;
426                                 }
427 <xh>{quotecontinue}     |
428 <xb>{quotecontinue}     {
429                                         ECHO;
430                                 }
431
432 {xhstart}               {
433                                         /* Hexadecimal bit type.
434                                          * At some point we should simply pass the string
435                                          * forward to the parser and label it there.
436                                          * In the meantime, place a leading "x" on the string
437                                          * to mark it for the input routine as a hex string.
438                                          */
439                                         BEGIN(xh);
440                                         ECHO;
441                                 }
442 <xh>{quotestop} |
443 <xh>{quotefail} {
444                                         yyless(1);
445                                         BEGIN(INITIAL);
446                                         ECHO;
447                                 }
448
449 {xnstart}               {
450                                         yyless(1);                              /* eat only 'n' this time */
451                                         ECHO;
452                                 }
453
454 {xqstart}               {
455                                         if (cur_state->std_strings)
456                                                 BEGIN(xq);
457                                         else
458                                                 BEGIN(xe);
459                                         ECHO;
460                                 }
461 {xestart}               {
462                                         BEGIN(xe);
463                                         ECHO;
464                                 }
465 {xusstart}              {
466                                         BEGIN(xus);
467                                         ECHO;
468                                 }
469 <xq,xe>{quotestop}      |
470 <xq,xe>{quotefail} {
471                                         yyless(1);
472                                         BEGIN(INITIAL);
473                                         ECHO;
474                                 }
475 <xus>{quotestop} |
476 <xus>{quotefail} {
477                                         yyless(1);
478                                         BEGIN(xusend);
479                                         ECHO;
480                                 }
481 <xusend>{whitespace} {
482                                         ECHO;
483                                 }
484 <xusend>{other} |
485 <xusend>{xustop1} {
486                                         yyless(0);
487                                         BEGIN(INITIAL);
488                                         ECHO;
489                                 }
490 <xusend>{xustop2} {
491                                         BEGIN(INITIAL);
492                                         ECHO;
493                                 }
494 <xq,xe,xus>{xqdouble} {
495                                         ECHO;
496                                 }
497 <xq,xus>{xqinside}  {
498                                         ECHO;
499                                 }
500 <xe>{xeinside}  {
501                                         ECHO;
502                                 }
503 <xe>{xeunicode} {
504                                         ECHO;
505                                 }
506 <xe>{xeunicodefail}     {
507                                         ECHO;
508                                 }
509 <xe>{xeescape}  {
510                                         ECHO;
511                                 }
512 <xe>{xeoctesc}  {
513                                         ECHO;
514                                 }
515 <xe>{xehexesc}  {
516                                         ECHO;
517                                 }
518 <xq,xe,xus>{quotecontinue} {
519                                         ECHO;
520                                 }
521 <xe>.                   {
522                                         /* This is only needed for \ just before EOF */
523                                         ECHO;
524                                 }
525
526 {dolqdelim}             {
527                                         cur_state->dolqstart = pg_strdup(yytext);
528                                         BEGIN(xdolq);
529                                         ECHO;
530                                 }
531 {dolqfailed}    {
532                                         /* throw back all but the initial "$" */
533                                         yyless(1);
534                                         ECHO;
535                                 }
536 <xdolq>{dolqdelim} {
537                                         if (strcmp(yytext, cur_state->dolqstart) == 0)
538                                         {
539                                                 free(cur_state->dolqstart);
540                                                 cur_state->dolqstart = NULL;
541                                                 BEGIN(INITIAL);
542                                         }
543                                         else
544                                         {
545                                                 /*
546                                                  * When we fail to match $...$ to dolqstart, transfer
547                                                  * the $... part to the output, but put back the final
548                                                  * $ for rescanning.  Consider $delim$...$junk$delim$
549                                                  */
550                                                 yyless(yyleng-1);
551                                         }
552                                         ECHO;
553                                 }
554 <xdolq>{dolqinside} {
555                                         ECHO;
556                                 }
557 <xdolq>{dolqfailed} {
558                                         ECHO;
559                                 }
560 <xdolq>.                {
561                                         /* This is only needed for $ inside the quoted text */
562                                         ECHO;
563                                 }
564
565 {xdstart}               {
566                                         BEGIN(xd);
567                                         ECHO;
568                                 }
569 {xuistart}              {
570                                         BEGIN(xui);
571                                         ECHO;
572                                 }
573 <xd>{xdstop}    {
574                                         BEGIN(INITIAL);
575                                         ECHO;
576                                 }
577 <xui>{dquote} {
578                                         yyless(1);
579                                         BEGIN(xuiend);
580                                         ECHO;
581                                 }
582 <xuiend>{whitespace} {
583                                         ECHO;
584                                 }
585 <xuiend>{other} |
586 <xuiend>{xustop1} {
587                                         yyless(0);
588                                         BEGIN(INITIAL);
589                                         ECHO;
590                                 }
591 <xuiend>{xustop2}       {
592                                         BEGIN(INITIAL);
593                                         ECHO;
594                                 }
595 <xd,xui>{xddouble}      {
596                                         ECHO;
597                                 }
598 <xd,xui>{xdinside}      {
599                                         ECHO;
600                                 }
601
602 {xufailed}      {
603                                         /* throw back all but the initial u/U */
604                                         yyless(1);
605                                         ECHO;
606                                 }
607
608 {typecast}              {
609                                         ECHO;
610                                 }
611
612 {dot_dot}               {
613                                         ECHO;
614                                 }
615
616 {colon_equals}  {
617                                         ECHO;
618                                 }
619
620 {equals_greater} {
621                                         ECHO;
622                                 }
623
624 {less_equals}   {
625                                         ECHO;
626                                 }
627
628 {greater_equals} {
629                                         ECHO;
630                                 }
631
632 {less_greater}  {
633                                         ECHO;
634                                 }
635
636 {not_equals}    {
637                                         ECHO;
638                                 }
639
640         /*
641          * These rules are specific to psql --- they implement parenthesis
642          * counting and detection of command-ending semicolon.  These must
643          * appear before the {self} rule so that they take precedence over it.
644          */
645
646 "("                             {
647                                         cur_state->paren_depth++;
648                                         ECHO;
649                                 }
650
651 ")"                             {
652                                         if (cur_state->paren_depth > 0)
653                                                 cur_state->paren_depth--;
654                                         ECHO;
655                                 }
656
657 ";"                             {
658                                         ECHO;
659                                         if (cur_state->paren_depth == 0)
660                                         {
661                                                 /* Terminate lexing temporarily */
662                                                 cur_state->start_state = YY_START;
663                                                 return LEXRES_SEMI;
664                                         }
665                                 }
666
667         /*
668          * psql-specific rules to handle backslash commands and variable
669          * substitution.  We want these before {self}, also.
670          */
671
672 "\\"[;:]                {
673                                         /* Force a semicolon or colon into the query buffer */
674                                         psqlscan_emit(cur_state, yytext + 1, 1);
675                                 }
676
677 "\\"                    {
678                                         /* Terminate lexing temporarily */
679                                         cur_state->start_state = YY_START;
680                                         return LEXRES_BACKSLASH;
681                                 }
682
683 :{variable_char}+       {
684                                         /* Possible psql variable substitution */
685                                         char   *varname;
686                                         char   *value;
687
688                                         varname = psqlscan_extract_substring(cur_state,
689                                                                                                                  yytext + 1,
690                                                                                                                  yyleng - 1);
691                                         if (cur_state->callbacks->get_variable)
692                                                 value = cur_state->callbacks->get_variable(varname,
693                                                                                                                                    false,
694                                                                                                                                    false);
695                                         else
696                                                 value = NULL;
697
698                                         if (value)
699                                         {
700                                                 /* It is a variable, check for recursion */
701                                                 if (var_is_current_source(cur_state, varname))
702                                                 {
703                                                         /* Recursive expansion --- don't go there */
704                                                         cur_state->callbacks->write_error("skipping recursive expansion of variable \"%s\"\n",
705                                                                                                                           varname);
706                                                         /* Instead copy the string as is */
707                                                         ECHO;
708                                                 }
709                                                 else
710                                                 {
711                                                         /* OK, perform substitution */
712                                                         psqlscan_push_new_buffer(cur_state, value, varname);
713                                                         /* yy_scan_string already made buffer active */
714                                                 }
715                                                 free(value);
716                                         }
717                                         else
718                                         {
719                                                 /*
720                                                  * if the variable doesn't exist we'll copy the
721                                                  * string as is
722                                                  */
723                                                 ECHO;
724                                         }
725
726                                         free(varname);
727                                 }
728
729 :'{variable_char}+'     {
730                                         psqlscan_escape_variable(cur_state, yytext, yyleng, false);
731                                 }
732
733 :\"{variable_char}+\"   {
734                                         psqlscan_escape_variable(cur_state, yytext, yyleng, true);
735                                 }
736
737         /*
738          * These rules just avoid the need for scanner backup if one of the
739          * two rules above fails to match completely.
740          */
741
742 :'{variable_char}*      {
743                                         /* Throw back everything but the colon */
744                                         yyless(1);
745                                         ECHO;
746                                 }
747
748 :\"{variable_char}*     {
749                                         /* Throw back everything but the colon */
750                                         yyless(1);
751                                         ECHO;
752                                 }
753
754         /*
755          * Back to backend-compatible rules.
756          */
757
758 {self}                  {
759                                         ECHO;
760                                 }
761
762 {operator}              {
763                                         /*
764                                          * Check for embedded slash-star or dash-dash; those
765                                          * are comment starts, so operator must stop there.
766                                          * Note that slash-star or dash-dash at the first
767                                          * character will match a prior rule, not this one.
768                                          */
769                                         int             nchars = yyleng;
770                                         char   *slashstar = strstr(yytext, "/*");
771                                         char   *dashdash = strstr(yytext, "--");
772
773                                         if (slashstar && dashdash)
774                                         {
775                                                 /* if both appear, take the first one */
776                                                 if (slashstar > dashdash)
777                                                         slashstar = dashdash;
778                                         }
779                                         else if (!slashstar)
780                                                 slashstar = dashdash;
781                                         if (slashstar)
782                                                 nchars = slashstar - yytext;
783
784                                         /*
785                                          * For SQL compatibility, '+' and '-' cannot be the
786                                          * last char of a multi-char operator unless the operator
787                                          * contains chars that are not in SQL operators.
788                                          * The idea is to lex '=-' as two operators, but not
789                                          * to forbid operator names like '?-' that could not be
790                                          * sequences of SQL operators.
791                                          */
792                                         while (nchars > 1 &&
793                                                    (yytext[nchars-1] == '+' ||
794                                                         yytext[nchars-1] == '-'))
795                                         {
796                                                 int             ic;
797
798                                                 for (ic = nchars-2; ic >= 0; ic--)
799                                                 {
800                                                         if (strchr("~!@#^&|`?%", yytext[ic]))
801                                                                 break;
802                                                 }
803                                                 if (ic >= 0)
804                                                         break; /* found a char that makes it OK */
805                                                 nchars--; /* else remove the +/-, and check again */
806                                         }
807
808                                         if (nchars < yyleng)
809                                         {
810                                                 /* Strip the unwanted chars from the token */
811                                                 yyless(nchars);
812                                         }
813                                         ECHO;
814                                 }
815
816 {param}                 {
817                                         ECHO;
818                                 }
819
820 {integer}               {
821                                         ECHO;
822                                 }
823 {decimal}               {
824                                         ECHO;
825                                 }
826 {decimalfail}   {
827                                         /* throw back the .., and treat as integer */
828                                         yyless(yyleng-2);
829                                         ECHO;
830                                 }
831 {real}                  {
832                                         ECHO;
833                                 }
834 {realfail1}             {
835                                         /*
836                                          * throw back the [Ee], and treat as {decimal}.  Note
837                                          * that it is possible the input is actually {integer},
838                                          * but since this case will almost certainly lead to a
839                                          * syntax error anyway, we don't bother to distinguish.
840                                          */
841                                         yyless(yyleng-1);
842                                         ECHO;
843                                 }
844 {realfail2}             {
845                                         /* throw back the [Ee][+-], and proceed as above */
846                                         yyless(yyleng-2);
847                                         ECHO;
848                                 }
849
850
851 {identifier}    {
852                                         ECHO;
853                                 }
854
855 {other}                 {
856                                         ECHO;
857                                 }
858
859         /*
860          * psql uses a single <<EOF>> rule, unlike the backend.
861          */
862
863 <<EOF>>                 {
864                                         if (cur_state->buffer_stack == NULL)
865                                         {
866                                                 cur_state->start_state = YY_START;
867                                                 return LEXRES_EOL; /* end of input reached */
868                                         }
869
870                                         /*
871                                          * We were expanding a variable, so pop the inclusion
872                                          * stack and keep lexing
873                                          */
874                                         psqlscan_pop_buffer_stack(cur_state);
875                                         psqlscan_select_top_buffer(cur_state);
876                                 }
877
878 %%
879
880 /*
881  * Create a lexer working state struct.
882  *
883  * callbacks is a struct of function pointers that encapsulate some
884  * behavior we need from the surrounding program.  This struct must
885  * remain valid for the lifespan of the PsqlScanState.
886  */
887 PsqlScanState
888 psql_scan_create(const PsqlScanCallbacks *callbacks)
889 {
890         PsqlScanState state;
891
892         state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
893
894         state->callbacks = callbacks;
895
896         yylex_init_extra(state, &state->scanner);
897
898         psql_scan_reset(state);
899
900         return state;
901 }
902
903 /*
904  * Destroy a lexer working state struct, releasing all resources.
905  */
906 void
907 psql_scan_destroy(PsqlScanState state)
908 {
909         psql_scan_finish(state);
910
911         psql_scan_reset(state);
912
913         yylex_destroy(state->scanner);
914
915         free(state);
916 }
917
918 /*
919  * Set up to perform lexing of the given input line.
920  *
921  * The text at *line, extending for line_len bytes, will be scanned by
922  * subsequent calls to the psql_scan routines.  psql_scan_finish should
923  * be called when scanning is complete.  Note that the lexer retains
924  * a pointer to the storage at *line --- this string must not be altered
925  * or freed until after psql_scan_finish is called.
926  *
927  * encoding is the libpq identifier for the character encoding in use,
928  * and std_strings says whether standard_conforming_strings is on.
929  */
930 void
931 psql_scan_setup(PsqlScanState state,
932                                 const char *line, int line_len,
933                                 int encoding, bool std_strings)
934 {
935         /* Mustn't be scanning already */
936         Assert(state->scanbufhandle == NULL);
937         Assert(state->buffer_stack == NULL);
938
939         /* Do we need to hack the character set encoding? */
940         state->encoding = encoding;
941         state->safe_encoding = pg_valid_server_encoding_id(encoding);
942
943         /* Save standard-strings flag as well */
944         state->std_strings = std_strings;
945
946         /* Set up flex input buffer with appropriate translation and padding */
947         state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
948                                                                                                    &state->scanbuf);
949         state->scanline = line;
950
951         /* Set lookaside data in case we have to map unsafe encoding */
952         state->curline = state->scanbuf;
953         state->refline = state->scanline;
954 }
955
956 /*
957  * Do lexical analysis of SQL command text.
958  *
959  * The text previously passed to psql_scan_setup is scanned, and appended
960  * (possibly with transformation) to query_buf.
961  *
962  * The return value indicates the condition that stopped scanning:
963  *
964  * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
965  * transferred to query_buf.)  The command accumulated in query_buf should
966  * be executed, then clear query_buf and call again to scan the remainder
967  * of the line.
968  *
969  * PSCAN_BACKSLASH: found a backslash that starts a special command.
970  * Any previous data on the line has been transferred to query_buf.
971  * The caller will typically next apply a separate flex lexer to scan
972  * the special command.
973  *
974  * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
975  * incomplete SQL command.  *prompt is set to the appropriate prompt type.
976  *
977  * PSCAN_EOL: the end of the line was reached, and there is no lexical
978  * reason to consider the command incomplete.  The caller may or may not
979  * choose to send it.  *prompt is set to the appropriate prompt type if
980  * the caller chooses to collect more input.
981  *
982  * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
983  * be called next, then the cycle may be repeated with a fresh input line.
984  *
985  * In all cases, *prompt is set to an appropriate prompt type code for the
986  * next line-input operation.
987  */
988 PsqlScanResult
989 psql_scan(PsqlScanState state,
990                   PQExpBuffer query_buf,
991                   promptStatus_t *prompt)
992 {
993         PsqlScanResult result;
994         int                     lexresult;
995
996         /* Must be scanning already */
997         Assert(state->scanbufhandle != NULL);
998
999         /* Set current output target */
1000         state->output_buf = query_buf;
1001
1002         /* Set input source */
1003         if (state->buffer_stack != NULL)
1004                 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
1005         else
1006                 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1007
1008         /* And lex. */
1009         lexresult = yylex(state->scanner);
1010
1011         /*
1012          * Check termination state and return appropriate result info.
1013          */
1014         switch (lexresult)
1015         {
1016                 case LEXRES_EOL:                /* end of input */
1017                         switch (state->start_state)
1018                         {
1019                                 case INITIAL:
1020                                 case xuiend:    /* we treat these like INITIAL */
1021                                 case xusend:
1022                                         if (state->paren_depth > 0)
1023                                         {
1024                                                 result = PSCAN_INCOMPLETE;
1025                                                 *prompt = PROMPT_PAREN;
1026                                         }
1027                                         else if (query_buf->len > 0)
1028                                         {
1029                                                 result = PSCAN_EOL;
1030                                                 *prompt = PROMPT_CONTINUE;
1031                                         }
1032                                         else
1033                                         {
1034                                                 /* never bother to send an empty buffer */
1035                                                 result = PSCAN_INCOMPLETE;
1036                                                 *prompt = PROMPT_READY;
1037                                         }
1038                                         break;
1039                                 case xb:
1040                                         result = PSCAN_INCOMPLETE;
1041                                         *prompt = PROMPT_SINGLEQUOTE;
1042                                         break;
1043                                 case xc:
1044                                         result = PSCAN_INCOMPLETE;
1045                                         *prompt = PROMPT_COMMENT;
1046                                         break;
1047                                 case xd:
1048                                         result = PSCAN_INCOMPLETE;
1049                                         *prompt = PROMPT_DOUBLEQUOTE;
1050                                         break;
1051                                 case xh:
1052                                         result = PSCAN_INCOMPLETE;
1053                                         *prompt = PROMPT_SINGLEQUOTE;
1054                                         break;
1055                                 case xe:
1056                                         result = PSCAN_INCOMPLETE;
1057                                         *prompt = PROMPT_SINGLEQUOTE;
1058                                         break;
1059                                 case xq:
1060                                         result = PSCAN_INCOMPLETE;
1061                                         *prompt = PROMPT_SINGLEQUOTE;
1062                                         break;
1063                                 case xdolq:
1064                                         result = PSCAN_INCOMPLETE;
1065                                         *prompt = PROMPT_DOLLARQUOTE;
1066                                         break;
1067                                 case xui:
1068                                         result = PSCAN_INCOMPLETE;
1069                                         *prompt = PROMPT_DOUBLEQUOTE;
1070                                         break;
1071                                 case xus:
1072                                         result = PSCAN_INCOMPLETE;
1073                                         *prompt = PROMPT_SINGLEQUOTE;
1074                                         break;
1075                                 default:
1076                                         /* can't get here */
1077                                         fprintf(stderr, "invalid YY_START\n");
1078                                         exit(1);
1079                         }
1080                         break;
1081                 case LEXRES_SEMI:               /* semicolon */
1082                         result = PSCAN_SEMICOLON;
1083                         *prompt = PROMPT_READY;
1084                         break;
1085                 case LEXRES_BACKSLASH:  /* backslash */
1086                         result = PSCAN_BACKSLASH;
1087                         *prompt = PROMPT_READY;
1088                         break;
1089                 default:
1090                         /* can't get here */
1091                         fprintf(stderr, "invalid yylex result\n");
1092                         exit(1);
1093         }
1094
1095         return result;
1096 }
1097
1098 /*
1099  * Clean up after scanning a string.  This flushes any unread input and
1100  * releases resources (but not the PsqlScanState itself).  Note however
1101  * that this does not reset the lexer scan state; that can be done by
1102  * psql_scan_reset(), which is an orthogonal operation.
1103  *
1104  * It is legal to call this when not scanning anything (makes it easier
1105  * to deal with error recovery).
1106  */
1107 void
1108 psql_scan_finish(PsqlScanState state)
1109 {
1110         /* Drop any incomplete variable expansions. */
1111         while (state->buffer_stack != NULL)
1112                 psqlscan_pop_buffer_stack(state);
1113
1114         /* Done with the outer scan buffer, too */
1115         if (state->scanbufhandle)
1116                 yy_delete_buffer(state->scanbufhandle, state->scanner);
1117         state->scanbufhandle = NULL;
1118         if (state->scanbuf)
1119                 free(state->scanbuf);
1120         state->scanbuf = NULL;
1121 }
1122
1123 /*
1124  * Reset lexer scanning state to start conditions.  This is appropriate
1125  * for executing \r psql commands (or any other time that we discard the
1126  * prior contents of query_buf).  It is not, however, necessary to do this
1127  * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1128  * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1129  * conditions are returned.
1130  *
1131  * Note that this is unrelated to flushing unread input; that task is
1132  * done by psql_scan_finish().
1133  */
1134 void
1135 psql_scan_reset(PsqlScanState state)
1136 {
1137         state->start_state = INITIAL;
1138         state->paren_depth = 0;
1139         state->xcdepth = 0;                     /* not really necessary */
1140         if (state->dolqstart)
1141                 free(state->dolqstart);
1142         state->dolqstart = NULL;
1143 }
1144
1145 /*
1146  * Reselect this lexer (psqlscan.l) after using another one.
1147  *
1148  * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
1149  * state, because we'd never switch to another lexer in a different state.
1150  * However, we don't want to reset e.g. paren_depth, so this can't be
1151  * the same as psql_scan_reset().
1152  *
1153  * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
1154  * must be a superset of this.
1155  *
1156  * Note: it seems likely that other lexers could just assign INITIAL for
1157  * themselves, since that probably has the value zero in every flex-generated
1158  * lexer.  But let's not assume that.
1159  */
1160 void
1161 psql_scan_reselect_sql_lexer(PsqlScanState state)
1162 {
1163         state->start_state = INITIAL;
1164 }
1165
1166 /*
1167  * Return true if lexer is currently in an "inside quotes" state.
1168  *
1169  * This is pretty grotty but is needed to preserve the old behavior
1170  * that mainloop.c drops blank lines not inside quotes without even
1171  * echoing them.
1172  */
1173 bool
1174 psql_scan_in_quote(PsqlScanState state)
1175 {
1176         return state->start_state != INITIAL;
1177 }
1178
1179 /*
1180  * Push the given string onto the stack of stuff to scan.
1181  *
1182  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1183  */
1184 void
1185 psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
1186                                                  const char *varname)
1187 {
1188         StackElem  *stackelem;
1189
1190         stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
1191
1192         /*
1193          * In current usage, the passed varname points at the current flex
1194          * input buffer; we must copy it before calling psqlscan_prepare_buffer()
1195          * because that will change the buffer state.
1196          */
1197         stackelem->varname = varname ? pg_strdup(varname) : NULL;
1198
1199         stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
1200                                                                                          &stackelem->bufstring);
1201         state->curline = stackelem->bufstring;
1202         if (state->safe_encoding)
1203         {
1204                 stackelem->origstring = NULL;
1205                 state->refline = stackelem->bufstring;
1206         }
1207         else
1208         {
1209                 stackelem->origstring = pg_strdup(newstr);
1210                 state->refline = stackelem->origstring;
1211         }
1212         stackelem->next = state->buffer_stack;
1213         state->buffer_stack = stackelem;
1214 }
1215
1216 /*
1217  * Pop the topmost buffer stack item (there must be one!)
1218  *
1219  * NB: after this, the flex input state is unspecified; caller must
1220  * switch to an appropriate buffer to continue lexing.
1221  * See psqlscan_select_top_buffer().
1222  */
1223 void
1224 psqlscan_pop_buffer_stack(PsqlScanState state)
1225 {
1226         StackElem  *stackelem = state->buffer_stack;
1227
1228         state->buffer_stack = stackelem->next;
1229         yy_delete_buffer(stackelem->buf, state->scanner);
1230         free(stackelem->bufstring);
1231         if (stackelem->origstring)
1232                 free(stackelem->origstring);
1233         if (stackelem->varname)
1234                 free(stackelem->varname);
1235         free(stackelem);
1236 }
1237
1238 /*
1239  * Select the topmost surviving buffer as the active input.
1240  */
1241 void
1242 psqlscan_select_top_buffer(PsqlScanState state)
1243 {
1244         StackElem  *stackelem = state->buffer_stack;
1245
1246         if (stackelem != NULL)
1247         {
1248                 yy_switch_to_buffer(stackelem->buf, state->scanner);
1249                 state->curline = stackelem->bufstring;
1250                 state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
1251         }
1252         else
1253         {
1254                 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1255                 state->curline = state->scanbuf;
1256                 state->refline = state->scanline;
1257         }
1258 }
1259
1260 /*
1261  * Check if specified variable name is the source for any string
1262  * currently being scanned
1263  */
1264 static bool
1265 var_is_current_source(PsqlScanState state, const char *varname)
1266 {
1267         StackElem  *stackelem;
1268
1269         for (stackelem = state->buffer_stack;
1270                  stackelem != NULL;
1271                  stackelem = stackelem->next)
1272         {
1273                 if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
1274                         return true;
1275         }
1276         return false;
1277 }
1278
1279 /*
1280  * Set up a flex input buffer to scan the given data.  We always make a
1281  * copy of the data.  If working in an unsafe encoding, the copy has
1282  * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1283  *
1284  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1285  */
1286 YY_BUFFER_STATE
1287 psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
1288                                                 char **txtcopy)
1289 {
1290         char       *newtxt;
1291
1292         /* Flex wants two \0 characters after the actual data */
1293         newtxt = pg_malloc(len + 2);
1294         *txtcopy = newtxt;
1295         newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1296
1297         if (state->safe_encoding)
1298                 memcpy(newtxt, txt, len);
1299         else
1300         {
1301                 /* Gotta do it the hard way */
1302                 int             i = 0;
1303
1304                 while (i < len)
1305                 {
1306                         int             thislen = PQmblen(txt + i, state->encoding);
1307
1308                         /* first byte should always be okay... */
1309                         newtxt[i] = txt[i];
1310                         i++;
1311                         while (--thislen > 0 && i < len)
1312                                 newtxt[i++] = (char) 0xFF;
1313                 }
1314         }
1315
1316         return yy_scan_buffer(newtxt, len + 2, state->scanner);
1317 }
1318
1319 /*
1320  * psqlscan_emit() --- body for ECHO macro
1321  *
1322  * NB: this must be used for ALL and ONLY the text copied from the flex
1323  * input data.  If you pass it something that is not part of the yytext
1324  * string, you are making a mistake.  Internally generated text can be
1325  * appended directly to state->output_buf.
1326  */
1327 void
1328 psqlscan_emit(PsqlScanState state, const char *txt, int len)
1329 {
1330         PQExpBuffer output_buf = state->output_buf;
1331
1332         if (state->safe_encoding)
1333                 appendBinaryPQExpBuffer(output_buf, txt, len);
1334         else
1335         {
1336                 /* Gotta do it the hard way */
1337                 const char *reference = state->refline;
1338                 int             i;
1339
1340                 reference += (txt - state->curline);
1341
1342                 for (i = 0; i < len; i++)
1343                 {
1344                         char    ch = txt[i];
1345
1346                         if (ch == (char) 0xFF)
1347                                 ch = reference[i];
1348                         appendPQExpBufferChar(output_buf, ch);
1349                 }
1350         }
1351 }
1352
1353 /*
1354  * psqlscan_extract_substring --- fetch value of (part of) the current token
1355  *
1356  * This is like psqlscan_emit(), except that the data is returned as a
1357  * malloc'd string rather than being pushed directly to state->output_buf.
1358  */
1359 char *
1360 psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
1361 {
1362         char       *result = (char *) pg_malloc(len + 1);
1363
1364         if (state->safe_encoding)
1365                 memcpy(result, txt, len);
1366         else
1367         {
1368                 /* Gotta do it the hard way */
1369                 const char *reference = state->refline;
1370                 int             i;
1371
1372                 reference += (txt - state->curline);
1373
1374                 for (i = 0; i < len; i++)
1375                 {
1376                         char    ch = txt[i];
1377
1378                         if (ch == (char) 0xFF)
1379                                 ch = reference[i];
1380                         result[i] = ch;
1381                 }
1382         }
1383         result[len] = '\0';
1384         return result;
1385 }
1386
1387 /*
1388  * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
1389  *
1390  * If the variable name is found, escape its value using the appropriate
1391  * quoting method and emit the value to output_buf.  (Since the result is
1392  * surely quoted, there is never any reason to rescan it.)  If we don't
1393  * find the variable or escaping fails, emit the token as-is.
1394  */
1395 void
1396 psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
1397                                                  bool as_ident)
1398 {
1399         char       *varname;
1400         char       *value;
1401
1402         /* Variable lookup. */
1403         varname = psqlscan_extract_substring(state, txt + 2, len - 3);
1404         if (state->callbacks->get_variable)
1405                 value = state->callbacks->get_variable(varname, true, as_ident);
1406         else
1407                 value = NULL;
1408         free(varname);
1409
1410         if (value)
1411         {
1412                 /* Emit the suitably-escaped value */
1413                 appendPQExpBufferStr(state->output_buf, value);
1414                 free(value);
1415         }
1416         else
1417         {
1418                 /* Emit original token as-is */
1419                 psqlscan_emit(state, txt, len);
1420         }
1421 }