]> granicus.if.org Git - postgresql/blob - src/bin/psql/psqlscan.l
Provide Assert() for frontend code.
[postgresql] / src / bin / psql / psqlscan.l
1 %{
2 /*-------------------------------------------------------------------------
3  *
4  * psqlscan.l
5  *        lexical scanner for psql
6  *
7  * This code is mainly needed to determine where the end of a SQL statement
8  * is: we are looking for semicolons that are not within quotes, comments,
9  * or parentheses.  The most reliable way to handle this is to borrow the
10  * backend's flex lexer rules, lock, stock, and barrel.  The rules below
11  * are (except for a few) the same as the backend's, but their actions are
12  * just ECHO whereas the backend's actions generally do other things.
13  *
14  * XXX The rules in this file must be kept in sync with the backend lexer!!!
15  *
16  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
17  *
18  * The most difficult aspect of this code is that we need to work in multibyte
19  * encodings that are not ASCII-safe.  A "safe" encoding is one in which each
20  * byte of a multibyte character has the high bit set (it's >= 0x80).  Since
21  * all our lexing rules treat all high-bit-set characters alike, we don't
22  * really need to care whether such a byte is part of a sequence or not.
23  * In an "unsafe" encoding, we still expect the first byte of a multibyte
24  * sequence to be >= 0x80, but later bytes might not be.  If we scan such
25  * a sequence as-is, the lexing rules could easily be fooled into matching
26  * such bytes to ordinary ASCII characters.  Our solution for this is to
27  * substitute 0xFF for each non-first byte within the data presented to flex.
28  * The flex rules will then pass the FF's through unmolested.  The emit()
29  * subroutine is responsible for looking back to the original string and
30  * replacing FF's with the corresponding original bytes.
31  *
32  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
33  * Portions Copyright (c) 1994, Regents of the University of California
34  *
35  * IDENTIFICATION
36  *        src/bin/psql/psqlscan.l
37  *
38  *-------------------------------------------------------------------------
39  */
40 #include "postgres_fe.h"
41
42 #include "psqlscan.h"
43
44 #include <ctype.h>
45
46 #include "common.h"
47 #include "settings.h"
48 #include "variables.h"
49
50
51 /*
52  * We use a stack of flex buffers to handle substitution of psql variables.
53  * Each stacked buffer contains the as-yet-unread text from one psql variable.
54  * When we pop the stack all the way, we resume reading from the outer buffer
55  * identified by scanbufhandle.
56  */
57 typedef struct StackElem
58 {
59         YY_BUFFER_STATE buf;            /* flex input control structure */
60         char       *bufstring;          /* data actually being scanned by flex */
61         char       *origstring;         /* copy of original data, if needed */
62         char       *varname;            /* name of variable providing data, or NULL */
63         struct StackElem *next;
64 } StackElem;
65
66 /*
67  * All working state of the lexer must be stored in PsqlScanStateData
68  * between calls.  This allows us to have multiple open lexer operations,
69  * which is needed for nested include files.  The lexer itself is not
70  * recursive, but it must be re-entrant.
71  */
72 typedef struct PsqlScanStateData
73 {
74         StackElem  *buffer_stack;       /* stack of variable expansion buffers */
75         /*
76          * These variables always refer to the outer buffer, never to any
77          * stacked variable-expansion buffer.
78          */
79         YY_BUFFER_STATE scanbufhandle;
80         char       *scanbuf;            /* start of outer-level input buffer */
81         const char *scanline;           /* current input line at outer level */
82
83         /* safe_encoding, curline, refline are used by emit() to replace FFs */
84         int                     encoding;               /* encoding being used now */
85         bool            safe_encoding;  /* is current encoding "safe"? */
86         const char *curline;            /* actual flex input string for cur buf */
87         const char *refline;            /* original data for cur buffer */
88
89         /*
90          * All this state lives across successive input lines, until explicitly
91          * reset by psql_scan_reset.
92          */
93         int                     start_state;    /* saved YY_START */
94         int                     paren_depth;    /* depth of nesting in parentheses */
95         int                     xcdepth;                /* depth of nesting in slash-star comments */
96         char       *dolqstart;          /* current $foo$ quote start string */
97 } PsqlScanStateData;
98
99 static PsqlScanState cur_state; /* current state while active */
100
101 static PQExpBuffer output_buf;  /* current output buffer */
102
103 /* these variables do not need to be saved across calls */
104 static enum slash_option_type option_type;
105 static char *option_quote;
106 static int      unquoted_option_chars;
107 static int      backtick_start_offset;
108
109
110 /* Return values from yylex() */
111 #define LEXRES_EOL                      0       /* end of input */
112 #define LEXRES_SEMI                     1       /* command-terminating semicolon found */
113 #define LEXRES_BACKSLASH        2       /* backslash command start */
114 #define LEXRES_OK                       3       /* OK completion of backslash argument */
115
116
117 int     yylex(void);
118
119 static void evaluate_backtick(void);
120 static void push_new_buffer(const char *newstr, const char *varname);
121 static void pop_buffer_stack(PsqlScanState state);
122 static bool var_is_current_source(PsqlScanState state, const char *varname);
123 static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
124                                                                           char **txtcopy);
125 static void emit(const char *txt, int len);
126 static char *extract_substring(const char *txt, int len);
127 static void escape_variable(bool as_ident);
128
129 #define ECHO emit(yytext, yyleng)
130
131 %}
132
133 %option 8bit
134 %option never-interactive
135 %option nodefault
136 %option noinput
137 %option nounput
138 %option noyywrap
139 %option warn
140
141 /*
142  * All of the following definitions and rules should exactly match
143  * src/backend/parser/scan.l so far as the flex patterns are concerned.
144  * The rule bodies are just ECHO as opposed to what the backend does,
145  * however.  (But be sure to duplicate code that affects the lexing process,
146  * such as BEGIN().)  Also, psqlscan uses a single <<EOF>> rule whereas
147  * scan.l has a separate one for each exclusive state.
148  */
149
150 /*
151  * OK, here is a short description of lex/flex rules behavior.
152  * The longest pattern which matches an input string is always chosen.
153  * For equal-length patterns, the first occurring in the rules list is chosen.
154  * INITIAL is the starting state, to which all non-conditional rules apply.
155  * Exclusive states change parsing rules while the state is active.  When in
156  * an exclusive state, only those rules defined for that state apply.
157  *
158  * We use exclusive states for quoted strings, extended comments,
159  * and to eliminate parsing troubles for numeric strings.
160  * Exclusive states:
161  *  <xb> bit string literal
162  *  <xc> extended C-style comments
163  *  <xd> delimited identifiers (double-quoted identifiers)
164  *  <xh> hexadecimal numeric string
165  *  <xq> standard quoted strings
166  *  <xe> extended quoted strings (support backslash escape sequences)
167  *  <xdolq> $foo$ quoted strings
168  *  <xui> quoted identifier with Unicode escapes
169  *  <xus> quoted string with Unicode escapes
170  *
171  * Note: we intentionally don't mimic the backend's <xeu> state; we have
172  * no need to distinguish it from <xe> state, and no good way to get out
173  * of it in error cases.  The backend just throws yyerror() in those
174  * cases, but that's not an option here.
175  */
176
177 %x xb
178 %x xc
179 %x xd
180 %x xh
181 %x xe
182 %x xq
183 %x xdolq
184 %x xui
185 %x xus
186 /* Additional exclusive states for psql only: lex backslash commands */
187 %x xslashcmd
188 %x xslashargstart
189 %x xslasharg
190 %x xslashquote
191 %x xslashbackquote
192 %x xslashdquote
193 %x xslashwholeline
194 %x xslashend
195
196 /*
197  * In order to make the world safe for Windows and Mac clients as well as
198  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
199  * sequence will be seen as two successive newlines, but that doesn't cause
200  * any problems.  Comments that start with -- and extend to the next
201  * newline are treated as equivalent to a single whitespace character.
202  *
203  * NOTE a fine point: if there is no newline following --, we will absorb
204  * everything to the end of the input as a comment.  This is correct.  Older
205  * versions of Postgres failed to recognize -- as a comment if the input
206  * did not end with a newline.
207  *
208  * XXX perhaps \f (formfeed) should be treated as a newline as well?
209  *
210  * XXX if you change the set of whitespace characters, fix scanner_isspace()
211  * to agree, and see also the plpgsql lexer.
212  */
213
214 space                   [ \t\n\r\f]
215 horiz_space             [ \t\f]
216 newline                 [\n\r]
217 non_newline             [^\n\r]
218
219 comment                 ("--"{non_newline}*)
220
221 whitespace              ({space}+|{comment})
222
223 /*
224  * SQL requires at least one newline in the whitespace separating
225  * string literals that are to be concatenated.  Silly, but who are we
226  * to argue?  Note that {whitespace_with_newline} should not have * after
227  * it, whereas {whitespace} should generally have a * after it...
228  */
229
230 special_whitespace              ({space}+|{comment}{newline})
231 horiz_whitespace                ({horiz_space}|{comment})
232 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
233
234 /*
235  * To ensure that {quotecontinue} can be scanned without having to back up
236  * if the full pattern isn't matched, we include trailing whitespace in
237  * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
238  * except for {quote} followed by whitespace and just one "-" (not two,
239  * which would start a {comment}).  To cover that we have {quotefail}.
240  * The actions for {quotestop} and {quotefail} must throw back characters
241  * beyond the quote proper.
242  */
243 quote                   '
244 quotestop               {quote}{whitespace}*
245 quotecontinue   {quote}{whitespace_with_newline}{quote}
246 quotefail               {quote}{whitespace}*"-"
247
248 /* Bit string
249  * It is tempting to scan the string for only those characters
250  * which are allowed. However, this leads to silently swallowed
251  * characters if illegal characters are included in the string.
252  * For example, if xbinside is [01] then B'ABCD' is interpreted
253  * as a zero-length string, and the ABCD' is lost!
254  * Better to pass the string forward and let the input routines
255  * validate the contents.
256  */
257 xbstart                 [bB]{quote}
258 xbinside                [^']*
259
260 /* Hexadecimal number */
261 xhstart                 [xX]{quote}
262 xhinside                [^']*
263
264 /* National character */
265 xnstart                 [nN]{quote}
266
267 /* Quoted string that allows backslash escapes */
268 xestart                 [eE]{quote}
269 xeinside                [^\\']+
270 xeescape                [\\][^0-7]
271 xeoctesc                [\\][0-7]{1,3}
272 xehexesc                [\\]x[0-9A-Fa-f]{1,2}
273 xeunicode               [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
274 xeunicodefail   [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
275
276 /* Extended quote
277  * xqdouble implements embedded quote, ''''
278  */
279 xqstart                 {quote}
280 xqdouble                {quote}{quote}
281 xqinside                [^']+
282
283 /* $foo$ style quotes ("dollar quoting")
284  * The quoted string starts with $foo$ where "foo" is an optional string
285  * in the form of an identifier, except that it may not contain "$",
286  * and extends to the first occurrence of an identical string.
287  * There is *no* processing of the quoted text.
288  *
289  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
290  * fails to match its trailing "$".
291  */
292 dolq_start              [A-Za-z\200-\377_]
293 dolq_cont               [A-Za-z\200-\377_0-9]
294 dolqdelim               \$({dolq_start}{dolq_cont}*)?\$
295 dolqfailed              \${dolq_start}{dolq_cont}*
296 dolqinside              [^$]+
297
298 /* Double quote
299  * Allows embedded spaces and other special characters into identifiers.
300  */
301 dquote                  \"
302 xdstart                 {dquote}
303 xdstop                  {dquote}
304 xddouble                {dquote}{dquote}
305 xdinside                [^"]+
306
307 /* Unicode escapes */
308 uescape                 [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
309 /* error rule to avoid backup */
310 uescapefail             ("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])
311
312 /* Quoted identifier with Unicode escapes */
313 xuistart                [uU]&{dquote}
314 xuistop1                {dquote}{whitespace}*{uescapefail}?
315 xuistop2                {dquote}{whitespace}*{uescape}
316
317 /* Quoted string with Unicode escapes */
318 xusstart                [uU]&{quote}
319 xusstop1                {quote}{whitespace}*{uescapefail}?
320 xusstop2                {quote}{whitespace}*{uescape}
321
322 /* error rule to avoid backup */
323 xufailed                [uU]&
324
325
326 /* C-style comments
327  *
328  * The "extended comment" syntax closely resembles allowable operator syntax.
329  * The tricky part here is to get lex to recognize a string starting with
330  * slash-star as a comment, when interpreting it as an operator would produce
331  * a longer match --- remember lex will prefer a longer match!  Also, if we
332  * have something like plus-slash-star, lex will think this is a 3-character
333  * operator whereas we want to see it as a + operator and a comment start.
334  * The solution is two-fold:
335  * 1. append {op_chars}* to xcstart so that it matches as much text as
336  *    {operator} would. Then the tie-breaker (first matching rule of same
337  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
338  *    in case it contains a star-slash that should terminate the comment.
339  * 2. In the operator rule, check for slash-star within the operator, and
340  *    if found throw it back with yyless().  This handles the plus-slash-star
341  *    problem.
342  * Dash-dash comments have similar interactions with the operator rule.
343  */
344 xcstart                 \/\*{op_chars}*
345 xcstop                  \*+\/
346 xcinside                [^*/]+
347
348 digit                   [0-9]
349 ident_start             [A-Za-z\200-\377_]
350 ident_cont              [A-Za-z\200-\377_0-9\$]
351
352 identifier              {ident_start}{ident_cont}*
353
354 typecast                "::"
355 dot_dot                 \.\.
356 colon_equals    ":="
357
358 /*
359  * "self" is the set of chars that should be returned as single-character
360  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
361  * which can be one or more characters long (but if a single-char token
362  * appears in the "self" set, it is not to be returned as an Op).  Note
363  * that the sets overlap, but each has some chars that are not in the other.
364  *
365  * If you change either set, adjust the character lists appearing in the
366  * rule for "operator"!
367  */
368 self                    [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
369 op_chars                [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
370 operator                {op_chars}+
371
372 /* we no longer allow unary minus in numbers.
373  * instead we pass it separately to parser. there it gets
374  * coerced via doNegate() -- Leon aug 20 1999
375  *
376  * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
377  *
378  * {realfail1} and {realfail2} are added to prevent the need for scanner
379  * backup when the {real} rule fails to match completely.
380  */
381
382 integer                 {digit}+
383 decimal                 (({digit}*\.{digit}+)|({digit}+\.{digit}*))
384 decimalfail             {digit}+\.\.
385 real                    ({integer}|{decimal})[Ee][-+]?{digit}+
386 realfail1               ({integer}|{decimal})[Ee]
387 realfail2               ({integer}|{decimal})[Ee][-+]
388
389 param                   \${integer}
390
391 /* psql-specific: characters allowed in variable names */
392 variable_char   [A-Za-z\200-\377_0-9]
393
394 other                   .
395
396 /*
397  * Dollar quoted strings are totally opaque, and no escaping is done on them.
398  * Other quoted strings must allow some special characters such as single-quote
399  *  and newline.
400  * Embedded single-quotes are implemented both in the SQL standard
401  *  style of two adjacent single quotes "''" and in the Postgres/Java style
402  *  of escaped-quote "\'".
403  * Other embedded escaped characters are matched explicitly and the leading
404  *  backslash is dropped from the string.
405  * Note that xcstart must appear before operator, as explained above!
406  *  Also whitespace (comment) must appear before operator.
407  */
408
409 %%
410
411 {whitespace}    {
412                                         /*
413                                          * Note that the whitespace rule includes both true
414                                          * whitespace and single-line ("--" style) comments.
415                                          * We suppress whitespace at the start of the query
416                                          * buffer.  We also suppress all single-line comments,
417                                          * which is pretty dubious but is the historical
418                                          * behavior.
419                                          */
420                                         if (!(output_buf->len == 0 || yytext[0] == '-'))
421                                                 ECHO;
422                                 }
423
424 {xcstart}               {
425                                         cur_state->xcdepth = 0;
426                                         BEGIN(xc);
427                                         /* Put back any characters past slash-star; see above */
428                                         yyless(2);
429                                         ECHO;
430                                 }
431
432 <xc>{xcstart}   {
433                                         cur_state->xcdepth++;
434                                         /* Put back any characters past slash-star; see above */
435                                         yyless(2);
436                                         ECHO;
437                                 }
438
439 <xc>{xcstop}    {
440                                         if (cur_state->xcdepth <= 0)
441                                         {
442                                                 BEGIN(INITIAL);
443                                         }
444                                         else
445                                                 cur_state->xcdepth--;
446                                         ECHO;
447                                 }
448
449 <xc>{xcinside}  {
450                                         ECHO;
451                                 }
452
453 <xc>{op_chars}  {
454                                         ECHO;
455                                 }
456
457 <xc>\*+                 {
458                                         ECHO;
459                                 }
460
461 {xbstart}               {
462                                         BEGIN(xb);
463                                         ECHO;
464                                 }
465 <xb>{quotestop} |
466 <xb>{quotefail} {
467                                         yyless(1);
468                                         BEGIN(INITIAL);
469                                         ECHO;
470                                 }
471 <xh>{xhinside}  |
472 <xb>{xbinside}  {
473                                         ECHO;
474                                 }
475 <xh>{quotecontinue}     |
476 <xb>{quotecontinue}     {
477                                         ECHO;
478                                 }
479
480 {xhstart}               {
481                                         /* Hexadecimal bit type.
482                                          * At some point we should simply pass the string
483                                          * forward to the parser and label it there.
484                                          * In the meantime, place a leading "x" on the string
485                                          * to mark it for the input routine as a hex string.
486                                          */
487                                         BEGIN(xh);
488                                         ECHO;
489                                 }
490 <xh>{quotestop} |
491 <xh>{quotefail} {
492                                         yyless(1);
493                                         BEGIN(INITIAL);
494                                         ECHO;
495                                 }
496
497 {xnstart}               {
498                                         yyless(1);                              /* eat only 'n' this time */
499                                         ECHO;
500                                 }
501
502 {xqstart}               {
503                                         if (standard_strings())
504                                                 BEGIN(xq);
505                                         else
506                                                 BEGIN(xe);
507                                         ECHO;
508                                 }
509 {xestart}               {
510                                         BEGIN(xe);
511                                         ECHO;
512                                 }
513 {xusstart}              {
514                                         BEGIN(xus);
515                                         ECHO;
516                                 }
517 <xq,xe>{quotestop}      |
518 <xq,xe>{quotefail} {
519                                         yyless(1);
520                                         BEGIN(INITIAL);
521                                         ECHO;
522                                 }
523 <xus>{xusstop1} {
524                                         yyless(1);
525                                         BEGIN(INITIAL);
526                                         ECHO;
527                                 }
528 <xus>{xusstop2} {
529                                         BEGIN(INITIAL);
530                                         ECHO;
531                                 }
532 <xq,xe,xus>{xqdouble} {
533                                         ECHO;
534                                 }
535 <xq,xus>{xqinside}  {
536                                         ECHO;
537                                 }
538 <xe>{xeinside}  {
539                                         ECHO;
540                                 }
541 <xe>{xeunicode} {
542                                         ECHO;
543                                 }
544 <xe>{xeunicodefail}     {
545                                         ECHO;
546                                 }
547 <xe>{xeescape}  {
548                                         ECHO;
549                                 }
550 <xe>{xeoctesc}  {
551                                         ECHO;
552                                 }
553 <xe>{xehexesc}  {
554                                         ECHO;
555                                 }
556 <xq,xe,xus>{quotecontinue} {
557                                         ECHO;
558                                 }
559 <xe>.                   {
560                                         /* This is only needed for \ just before EOF */
561                                         ECHO;
562                                 }
563
564 {dolqdelim}             {
565                                         cur_state->dolqstart = pg_strdup(yytext);
566                                         BEGIN(xdolq);
567                                         ECHO;
568                                 }
569 {dolqfailed}    {
570                                         /* throw back all but the initial "$" */
571                                         yyless(1);
572                                         ECHO;
573                                 }
574 <xdolq>{dolqdelim} {
575                                         if (strcmp(yytext, cur_state->dolqstart) == 0)
576                                         {
577                                                 free(cur_state->dolqstart);
578                                                 cur_state->dolqstart = NULL;
579                                                 BEGIN(INITIAL);
580                                         }
581                                         else
582                                         {
583                                                 /*
584                                                  * When we fail to match $...$ to dolqstart, transfer
585                                                  * the $... part to the output, but put back the final
586                                                  * $ for rescanning.  Consider $delim$...$junk$delim$
587                                                  */
588                                                 yyless(yyleng-1);
589                                         }
590                                         ECHO;
591                                 }
592 <xdolq>{dolqinside} {
593                                         ECHO;
594                                 }
595 <xdolq>{dolqfailed} {
596                                         ECHO;
597                                 }
598 <xdolq>.                {
599                                         /* This is only needed for $ inside the quoted text */
600                                         ECHO;
601                                 }
602
603 {xdstart}               {
604                                         BEGIN(xd);
605                                         ECHO;
606                                 }
607 {xuistart}              {
608                                         BEGIN(xui);
609                                         ECHO;
610                                 }
611 <xd>{xdstop}    {
612                                         BEGIN(INITIAL);
613                                         ECHO;
614                                 }
615 <xui>{xuistop1} {
616                                         yyless(1);
617                                         BEGIN(INITIAL);
618                                         ECHO;
619                                 }
620 <xui>{xuistop2} {
621                                         BEGIN(INITIAL);
622                                         ECHO;
623                                 }
624 <xd,xui>{xddouble}      {
625                                         ECHO;
626                                 }
627 <xd,xui>{xdinside}      {
628                                         ECHO;
629                                 }
630
631 {xufailed}      {
632                                         /* throw back all but the initial u/U */
633                                         yyless(1);
634                                         ECHO;
635                                 }
636
637 {typecast}              {
638                                         ECHO;
639                                 }
640
641 {dot_dot}               {
642                                         ECHO;
643                                 }
644
645 {colon_equals}  {
646                                         ECHO;
647                                 }
648
649         /*
650          * These rules are specific to psql --- they implement parenthesis
651          * counting and detection of command-ending semicolon.  These must
652          * appear before the {self} rule so that they take precedence over it.
653          */
654
655 "("                             {
656                                         cur_state->paren_depth++;
657                                         ECHO;
658                                 }
659
660 ")"                             {
661                                         if (cur_state->paren_depth > 0)
662                                                 cur_state->paren_depth--;
663                                         ECHO;
664                                 }
665
666 ";"                             {
667                                         ECHO;
668                                         if (cur_state->paren_depth == 0)
669                                         {
670                                                 /* Terminate lexing temporarily */
671                                                 return LEXRES_SEMI;
672                                         }
673                                 }
674
675         /*
676          * psql-specific rules to handle backslash commands and variable
677          * substitution.  We want these before {self}, also.
678          */
679
680 "\\"[;:]                {
681                                         /* Force a semicolon or colon into the query buffer */
682                                         emit(yytext + 1, 1);
683                                 }
684
685 "\\"                    {
686                                         /* Terminate lexing temporarily */
687                                         return LEXRES_BACKSLASH;
688                                 }
689
690 :{variable_char}+       {
691                                         /* Possible psql variable substitution */
692                                         char   *varname;
693                                         const char *value;
694
695                                         varname = extract_substring(yytext + 1, yyleng - 1);
696                                         value = GetVariable(pset.vars, varname);
697
698                                         if (value)
699                                         {
700                                                 /* It is a variable, check for recursion */
701                                                 if (var_is_current_source(cur_state, varname))
702                                                 {
703                                                         /* Recursive expansion --- don't go there */
704                                                         psql_error("skipping recursive expansion of variable \"%s\"\n",
705                                                                            varname);
706                                                         /* Instead copy the string as is */
707                                                         ECHO;
708                                                 }
709                                                 else
710                                                 {
711                                                         /* OK, perform substitution */
712                                                         push_new_buffer(value, varname);
713                                                         /* yy_scan_string already made buffer active */
714                                                 }
715                                         }
716                                         else
717                                         {
718                                                 /*
719                                                  * if the variable doesn't exist we'll copy the
720                                                  * string as is
721                                                  */
722                                                 ECHO;
723                                         }
724
725                                         free(varname);
726                                 }
727
728 :'{variable_char}+'     {
729                                         escape_variable(false);
730                                 }
731
732 :\"{variable_char}+\"   {
733                                         escape_variable(true);
734                                 }
735
736         /*
737          * These rules just avoid the need for scanner backup if one of the
738          * two rules above fails to match completely.
739          */
740
741 :'{variable_char}*      {
742                                         /* Throw back everything but the colon */
743                                         yyless(1);
744                                         ECHO;
745                                 }
746
747 :\"{variable_char}*     {
748                                         /* Throw back everything but the colon */
749                                         yyless(1);
750                                         ECHO;
751                                 }
752
753         /*
754          * Back to backend-compatible rules.
755          */
756
757 {self}                  {
758                                         ECHO;
759                                 }
760
761 {operator}              {
762                                         /*
763                                          * Check for embedded slash-star or dash-dash; those
764                                          * are comment starts, so operator must stop there.
765                                          * Note that slash-star or dash-dash at the first
766                                          * character will match a prior rule, not this one.
767                                          */
768                                         int             nchars = yyleng;
769                                         char   *slashstar = strstr(yytext, "/*");
770                                         char   *dashdash = strstr(yytext, "--");
771
772                                         if (slashstar && dashdash)
773                                         {
774                                                 /* if both appear, take the first one */
775                                                 if (slashstar > dashdash)
776                                                         slashstar = dashdash;
777                                         }
778                                         else if (!slashstar)
779                                                 slashstar = dashdash;
780                                         if (slashstar)
781                                                 nchars = slashstar - yytext;
782
783                                         /*
784                                          * For SQL compatibility, '+' and '-' cannot be the
785                                          * last char of a multi-char operator unless the operator
786                                          * contains chars that are not in SQL operators.
787                                          * The idea is to lex '=-' as two operators, but not
788                                          * to forbid operator names like '?-' that could not be
789                                          * sequences of SQL operators.
790                                          */
791                                         while (nchars > 1 &&
792                                                    (yytext[nchars-1] == '+' ||
793                                                         yytext[nchars-1] == '-'))
794                                         {
795                                                 int             ic;
796
797                                                 for (ic = nchars-2; ic >= 0; ic--)
798                                                 {
799                                                         if (strchr("~!@#^&|`?%", yytext[ic]))
800                                                                 break;
801                                                 }
802                                                 if (ic >= 0)
803                                                         break; /* found a char that makes it OK */
804                                                 nchars--; /* else remove the +/-, and check again */
805                                         }
806
807                                         if (nchars < yyleng)
808                                         {
809                                                 /* Strip the unwanted chars from the token */
810                                                 yyless(nchars);
811                                         }
812                                         ECHO;
813                                 }
814
815 {param}                 {
816                                         ECHO;
817                                 }
818
819 {integer}               {
820                                         ECHO;
821                                 }
822 {decimal}               {
823                                         ECHO;
824                                 }
825 {decimalfail}   {
826                                         /* throw back the .., and treat as integer */
827                                         yyless(yyleng-2);
828                                         ECHO;
829                                 }
830 {real}                  {
831                                         ECHO;
832                                 }
833 {realfail1}             {
834                                         /*
835                                          * throw back the [Ee], and treat as {decimal}.  Note
836                                          * that it is possible the input is actually {integer},
837                                          * but since this case will almost certainly lead to a
838                                          * syntax error anyway, we don't bother to distinguish.
839                                          */
840                                         yyless(yyleng-1);
841                                         ECHO;
842                                 }
843 {realfail2}             {
844                                         /* throw back the [Ee][+-], and proceed as above */
845                                         yyless(yyleng-2);
846                                         ECHO;
847                                 }
848
849
850 {identifier}    {
851                                         ECHO;
852                                 }
853
854 {other}                 {
855                                         ECHO;
856                                 }
857
858
859         /*
860          * Everything from here down is psql-specific.
861          */
862
863 <<EOF>>                 {
864                                         StackElem  *stackelem = cur_state->buffer_stack;
865
866                                         if (stackelem == NULL)
867                                                 return LEXRES_EOL; /* end of input reached */
868
869                                         /*
870                                          * We were expanding a variable, so pop the inclusion
871                                          * stack and keep lexing
872                                          */
873                                         pop_buffer_stack(cur_state);
874
875                                         stackelem = cur_state->buffer_stack;
876                                         if (stackelem != NULL)
877                                         {
878                                                 yy_switch_to_buffer(stackelem->buf);
879                                                 cur_state->curline = stackelem->bufstring;
880                                                 cur_state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
881                                         }
882                                         else
883                                         {
884                                                 yy_switch_to_buffer(cur_state->scanbufhandle);
885                                                 cur_state->curline = cur_state->scanbuf;
886                                                 cur_state->refline = cur_state->scanline;
887                                         }
888                                 }
889
890         /*
891          * Exclusive lexer states to handle backslash command lexing
892          */
893
894 <xslashcmd>{
895         /* command name ends at whitespace or backslash; eat all else */
896
897 {space}|"\\"    {
898                                         yyless(0);
899                                         return LEXRES_OK;
900                                 }
901
902 {other}                 { ECHO; }
903
904 }
905
906 <xslashargstart>{
907         /*
908          * Discard any whitespace before argument, then go to xslasharg state.
909          * An exception is that "|" is only special at start of argument, so we
910          * check for it here.
911          */
912
913 {space}+                { }
914
915 "|"                             {
916                                         if (option_type == OT_FILEPIPE)
917                                         {
918                                                 /* treat like whole-string case */
919                                                 ECHO;
920                                                 BEGIN(xslashwholeline);
921                                         }
922                                         else
923                                         {
924                                                 /* vertical bar is not special otherwise */
925                                                 yyless(0);
926                                                 BEGIN(xslasharg);
927                                         }
928                                 }
929
930 {other}                 {
931                                         yyless(0);
932                                         BEGIN(xslasharg);
933                                 }
934
935 }
936
937 <xslasharg>{
938         /*
939          * Default processing of text in a slash command's argument.
940          *
941          * Note: unquoted_option_chars counts the number of characters at the
942          * end of the argument that were not subject to any form of quoting.
943          * psql_scan_slash_option needs this to strip trailing semicolons safely.
944          */
945
946 {space}|"\\"    {
947                                         /*
948                                          * Unquoted space is end of arg; do not eat.  Likewise
949                                          * backslash is end of command or next command, do not eat
950                                          *
951                                          * XXX this means we can't conveniently accept options
952                                          * that include unquoted backslashes; therefore, option
953                                          * processing that encourages use of backslashes is rather
954                                          * broken.
955                                          */
956                                         yyless(0);
957                                         return LEXRES_OK;
958                                 }
959
960 {quote}                 {
961                                         *option_quote = '\'';
962                                         unquoted_option_chars = 0;
963                                         BEGIN(xslashquote);
964                                 }
965
966 "`"                             {
967                                         backtick_start_offset = output_buf->len;
968                                         *option_quote = '`';
969                                         unquoted_option_chars = 0;
970                                         BEGIN(xslashbackquote);
971                                 }
972
973 {dquote}                {
974                                         ECHO;
975                                         *option_quote = '"';
976                                         unquoted_option_chars = 0;
977                                         BEGIN(xslashdquote);
978                                 }
979
980 :{variable_char}+       {
981                                         /* Possible psql variable substitution */
982                                         if (option_type == OT_NO_EVAL)
983                                                 ECHO;
984                                         else
985                                         {
986                                                 char   *varname;
987                                                 const char *value;
988
989                                                 varname = extract_substring(yytext + 1, yyleng - 1);
990                                                 value = GetVariable(pset.vars, varname);
991                                                 free(varname);
992
993                                                 /*
994                                                  * The variable value is just emitted without any
995                                                  * further examination.  This is consistent with the
996                                                  * pre-8.0 code behavior, if not with the way that
997                                                  * variables are handled outside backslash commands.
998                                                  * Note that we needn't guard against recursion here.
999                                                  */
1000                                                 if (value)
1001                                                         appendPQExpBufferStr(output_buf, value);
1002                                                 else
1003                                                         ECHO;
1004
1005                                                 *option_quote = ':';
1006                                         }
1007                                         unquoted_option_chars = 0;
1008                                 }
1009
1010 :'{variable_char}+'     {
1011                                         if (option_type == OT_NO_EVAL)
1012                                                 ECHO;
1013                                         else
1014                                         {
1015                                                 escape_variable(false);
1016                                                 *option_quote = ':';
1017                                         }
1018                                         unquoted_option_chars = 0;
1019                                 }
1020
1021
1022 :\"{variable_char}+\"   {
1023                                         if (option_type == OT_NO_EVAL)
1024                                                 ECHO;
1025                                         else
1026                                         {
1027                                                 escape_variable(true);
1028                                                 *option_quote = ':';
1029                                         }
1030                                         unquoted_option_chars = 0;
1031                                 }
1032
1033 :'{variable_char}*      {
1034                                         /* Throw back everything but the colon */
1035                                         yyless(1);
1036                                         unquoted_option_chars++;
1037                                         ECHO;
1038                                 }
1039
1040 :\"{variable_char}*     {
1041                                         /* Throw back everything but the colon */
1042                                         yyless(1);
1043                                         unquoted_option_chars++;
1044                                         ECHO;
1045                                 }
1046
1047 {other}                 {
1048                                         unquoted_option_chars++;
1049                                         ECHO;
1050                                 }
1051
1052 }
1053
1054 <xslashquote>{
1055         /*
1056          * single-quoted text: copy literally except for '' and backslash
1057          * sequences
1058          */
1059
1060 {quote}                 { BEGIN(xslasharg); }
1061
1062 {xqdouble}              { appendPQExpBufferChar(output_buf, '\''); }
1063
1064 "\\n"                   { appendPQExpBufferChar(output_buf, '\n'); }
1065 "\\t"                   { appendPQExpBufferChar(output_buf, '\t'); }
1066 "\\b"                   { appendPQExpBufferChar(output_buf, '\b'); }
1067 "\\r"                   { appendPQExpBufferChar(output_buf, '\r'); }
1068 "\\f"                   { appendPQExpBufferChar(output_buf, '\f'); }
1069
1070 {xeoctesc}              {
1071                                         /* octal case */
1072                                         appendPQExpBufferChar(output_buf,
1073                                                                                   (char) strtol(yytext + 1, NULL, 8));
1074                                 }
1075
1076 {xehexesc}              {
1077                                         /* hex case */
1078                                         appendPQExpBufferChar(output_buf,
1079                                                                                   (char) strtol(yytext + 2, NULL, 16));
1080                                 }
1081
1082 "\\".                   { emit(yytext + 1, 1); }
1083
1084 {other}|\n              { ECHO; }
1085
1086 }
1087
1088 <xslashbackquote>{
1089         /*
1090          * backticked text: copy everything until next backquote, then evaluate.
1091          *
1092          * XXX Possible future behavioral change: substitute for :VARIABLE?
1093          */
1094
1095 "`"                             {
1096                                         /* In NO_EVAL mode, don't evaluate the command */
1097                                         if (option_type != OT_NO_EVAL)
1098                                                 evaluate_backtick();
1099                                         BEGIN(xslasharg);
1100                                 }
1101
1102 {other}|\n              { ECHO; }
1103
1104 }
1105
1106 <xslashdquote>{
1107         /* double-quoted text: copy verbatim, including the double quotes */
1108
1109 {dquote}                {
1110                                         ECHO;
1111                                         BEGIN(xslasharg);
1112                                 }
1113
1114 {other}|\n              { ECHO; }
1115
1116 }
1117
1118 <xslashwholeline>{
1119         /* copy everything until end of input line */
1120         /* but suppress leading whitespace */
1121
1122 {space}+                {
1123                                         if (output_buf->len > 0)
1124                                                 ECHO;
1125                                 }
1126
1127 {other}                 { ECHO; }
1128
1129 }
1130
1131 <xslashend>{
1132         /* at end of command, eat a double backslash, but not anything else */
1133
1134 "\\\\"                  { return LEXRES_OK; }
1135
1136 {other}|\n              {
1137                                         yyless(0);
1138                                         return LEXRES_OK;
1139                                 }
1140
1141 }
1142
1143 %%
1144
1145 /*
1146  * Create a lexer working state struct.
1147  */
1148 PsqlScanState
1149 psql_scan_create(void)
1150 {
1151         PsqlScanState state;
1152
1153         state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
1154
1155         psql_scan_reset(state);
1156
1157         return state;
1158 }
1159
1160 /*
1161  * Destroy a lexer working state struct, releasing all resources.
1162  */
1163 void
1164 psql_scan_destroy(PsqlScanState state)
1165 {
1166         psql_scan_finish(state);
1167
1168         psql_scan_reset(state);
1169
1170         free(state);
1171 }
1172
1173 /*
1174  * Set up to perform lexing of the given input line.
1175  *
1176  * The text at *line, extending for line_len bytes, will be scanned by
1177  * subsequent calls to the psql_scan routines.  psql_scan_finish should
1178  * be called when scanning is complete.  Note that the lexer retains
1179  * a pointer to the storage at *line --- this string must not be altered
1180  * or freed until after psql_scan_finish is called.
1181  */
1182 void
1183 psql_scan_setup(PsqlScanState state,
1184                                 const char *line, int line_len)
1185 {
1186         /* Mustn't be scanning already */
1187         Assert(state->scanbufhandle == NULL);
1188         Assert(state->buffer_stack == NULL);
1189
1190         /* Do we need to hack the character set encoding? */
1191         state->encoding = pset.encoding;
1192         state->safe_encoding = pg_valid_server_encoding_id(state->encoding);
1193
1194         /* needed for prepare_buffer */
1195         cur_state = state;
1196
1197         /* Set up flex input buffer with appropriate translation and padding */
1198         state->scanbufhandle = prepare_buffer(line, line_len,
1199                                                                                   &state->scanbuf);
1200         state->scanline = line;
1201
1202         /* Set lookaside data in case we have to map unsafe encoding */
1203         state->curline = state->scanbuf;
1204         state->refline = state->scanline;
1205 }
1206
1207 /*
1208  * Do lexical analysis of SQL command text.
1209  *
1210  * The text previously passed to psql_scan_setup is scanned, and appended
1211  * (possibly with transformation) to query_buf.
1212  *
1213  * The return value indicates the condition that stopped scanning:
1214  *
1215  * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
1216  * transferred to query_buf.)  The command accumulated in query_buf should
1217  * be executed, then clear query_buf and call again to scan the remainder
1218  * of the line.
1219  *
1220  * PSCAN_BACKSLASH: found a backslash that starts a psql special command.
1221  * Any previous data on the line has been transferred to query_buf.
1222  * The caller will typically next call psql_scan_slash_command(),
1223  * perhaps psql_scan_slash_option(), and psql_scan_slash_command_end().
1224  *
1225  * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
1226  * incomplete SQL command.  *prompt is set to the appropriate prompt type.
1227  *
1228  * PSCAN_EOL: the end of the line was reached, and there is no lexical
1229  * reason to consider the command incomplete.  The caller may or may not
1230  * choose to send it.  *prompt is set to the appropriate prompt type if
1231  * the caller chooses to collect more input.
1232  *
1233  * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
1234  * be called next, then the cycle may be repeated with a fresh input line.
1235  *
1236  * In all cases, *prompt is set to an appropriate prompt type code for the
1237  * next line-input operation.
1238  */
1239 PsqlScanResult
1240 psql_scan(PsqlScanState state,
1241                   PQExpBuffer query_buf,
1242                   promptStatus_t *prompt)
1243 {
1244         PsqlScanResult result;
1245         int                     lexresult;
1246
1247         /* Must be scanning already */
1248         Assert(state->scanbufhandle != NULL);
1249
1250         /* Set up static variables that will be used by yylex */
1251         cur_state = state;
1252         output_buf = query_buf;
1253
1254         if (state->buffer_stack != NULL)
1255                 yy_switch_to_buffer(state->buffer_stack->buf);
1256         else
1257                 yy_switch_to_buffer(state->scanbufhandle);
1258
1259         BEGIN(state->start_state);
1260
1261         /* And lex. */
1262         lexresult = yylex();
1263
1264         /* Update static vars back to the state struct */
1265         state->start_state = YY_START;
1266
1267         /*
1268          * Check termination state and return appropriate result info.
1269          */
1270         switch (lexresult)
1271         {
1272                 case LEXRES_EOL:                /* end of input */
1273                         switch (state->start_state)
1274                         {
1275                                 /* This switch must cover all non-slash-command states. */
1276                                 case INITIAL:
1277                                         if (state->paren_depth > 0)
1278                                         {
1279                                                 result = PSCAN_INCOMPLETE;
1280                                                 *prompt = PROMPT_PAREN;
1281                                         }
1282                                         else if (query_buf->len > 0)
1283                                         {
1284                                                 result = PSCAN_EOL;
1285                                                 *prompt = PROMPT_CONTINUE;
1286                                         }
1287                                         else
1288                                         {
1289                                                 /* never bother to send an empty buffer */
1290                                                 result = PSCAN_INCOMPLETE;
1291                                                 *prompt = PROMPT_READY;
1292                                         }
1293                                         break;
1294                                 case xb:
1295                                         result = PSCAN_INCOMPLETE;
1296                                         *prompt = PROMPT_SINGLEQUOTE;
1297                                         break;
1298                                 case xc:
1299                                         result = PSCAN_INCOMPLETE;
1300                                         *prompt = PROMPT_COMMENT;
1301                                         break;
1302                                 case xd:
1303                                         result = PSCAN_INCOMPLETE;
1304                                         *prompt = PROMPT_DOUBLEQUOTE;
1305                                         break;
1306                                 case xh:
1307                                         result = PSCAN_INCOMPLETE;
1308                                         *prompt = PROMPT_SINGLEQUOTE;
1309                                         break;
1310                                 case xe:
1311                                         result = PSCAN_INCOMPLETE;
1312                                         *prompt = PROMPT_SINGLEQUOTE;
1313                                         break;
1314                                 case xq:
1315                                         result = PSCAN_INCOMPLETE;
1316                                         *prompt = PROMPT_SINGLEQUOTE;
1317                                         break;
1318                                 case xdolq:
1319                                         result = PSCAN_INCOMPLETE;
1320                                         *prompt = PROMPT_DOLLARQUOTE;
1321                                         break;
1322                                 case xui:
1323                                         result = PSCAN_INCOMPLETE;
1324                                         *prompt = PROMPT_DOUBLEQUOTE;
1325                                         break;
1326                                 case xus:
1327                                         result = PSCAN_INCOMPLETE;
1328                                         *prompt = PROMPT_SINGLEQUOTE;
1329                                         break;
1330                                 default:
1331                                         /* can't get here */
1332                                         fprintf(stderr, "invalid YY_START\n");
1333                                         exit(1);
1334                         }
1335                         break;
1336                 case LEXRES_SEMI:               /* semicolon */
1337                         result = PSCAN_SEMICOLON;
1338                         *prompt = PROMPT_READY;
1339                         break;
1340                 case LEXRES_BACKSLASH:  /* backslash */
1341                         result = PSCAN_BACKSLASH;
1342                         *prompt = PROMPT_READY;
1343                         break;
1344                 default:
1345                         /* can't get here */
1346                         fprintf(stderr, "invalid yylex result\n");
1347                         exit(1);
1348         }
1349
1350         return result;
1351 }
1352
1353 /*
1354  * Clean up after scanning a string.  This flushes any unread input and
1355  * releases resources (but not the PsqlScanState itself).  Note however
1356  * that this does not reset the lexer scan state; that can be done by
1357  * psql_scan_reset(), which is an orthogonal operation.
1358  *
1359  * It is legal to call this when not scanning anything (makes it easier
1360  * to deal with error recovery).
1361  */
1362 void
1363 psql_scan_finish(PsqlScanState state)
1364 {
1365         /* Drop any incomplete variable expansions. */
1366         while (state->buffer_stack != NULL)
1367                 pop_buffer_stack(state);
1368
1369         /* Done with the outer scan buffer, too */
1370         if (state->scanbufhandle)
1371                 yy_delete_buffer(state->scanbufhandle);
1372         state->scanbufhandle = NULL;
1373         if (state->scanbuf)
1374                 free(state->scanbuf);
1375         state->scanbuf = NULL;
1376 }
1377
1378 /*
1379  * Reset lexer scanning state to start conditions.  This is appropriate
1380  * for executing \r psql commands (or any other time that we discard the
1381  * prior contents of query_buf).  It is not, however, necessary to do this
1382  * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1383  * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1384  * conditions are returned.
1385  *
1386  * Note that this is unrelated to flushing unread input; that task is
1387  * done by psql_scan_finish().
1388  */
1389 void
1390 psql_scan_reset(PsqlScanState state)
1391 {
1392         state->start_state = INITIAL;
1393         state->paren_depth = 0;
1394         state->xcdepth = 0;                     /* not really necessary */
1395         if (state->dolqstart)
1396                 free(state->dolqstart);
1397         state->dolqstart = NULL;
1398 }
1399
1400 /*
1401  * Return true if lexer is currently in an "inside quotes" state.
1402  *
1403  * This is pretty grotty but is needed to preserve the old behavior
1404  * that mainloop.c drops blank lines not inside quotes without even
1405  * echoing them.
1406  */
1407 bool
1408 psql_scan_in_quote(PsqlScanState state)
1409 {
1410         return state->start_state != INITIAL;
1411 }
1412
1413 /*
1414  * Scan the command name of a psql backslash command.  This should be called
1415  * after psql_scan() returns PSCAN_BACKSLASH.  It is assumed that the input
1416  * has been consumed through the leading backslash.
1417  *
1418  * The return value is a malloc'd copy of the command name, as parsed off
1419  * from the input.
1420  */
1421 char *
1422 psql_scan_slash_command(PsqlScanState state)
1423 {
1424         PQExpBufferData mybuf;
1425
1426         /* Must be scanning already */
1427         Assert(state->scanbufhandle != NULL);
1428
1429         /* Build a local buffer that we'll return the data of */
1430         initPQExpBuffer(&mybuf);
1431
1432         /* Set up static variables that will be used by yylex */
1433         cur_state = state;
1434         output_buf = &mybuf;
1435
1436         if (state->buffer_stack != NULL)
1437                 yy_switch_to_buffer(state->buffer_stack->buf);
1438         else
1439                 yy_switch_to_buffer(state->scanbufhandle);
1440
1441         BEGIN(xslashcmd);
1442
1443         /* And lex. */
1444         yylex();
1445
1446         /* There are no possible errors in this lex state... */
1447
1448         return mybuf.data;
1449 }
1450
1451 /*
1452  * Parse off the next argument for a backslash command, and return it as a
1453  * malloc'd string.  If there are no more arguments, returns NULL.
1454  *
1455  * type tells what processing, if any, to perform on the option string;
1456  * for example, if it's a SQL identifier, we want to downcase any unquoted
1457  * letters.
1458  *
1459  * if quote is not NULL, *quote is set to 0 if no quoting was found, else
1460  * the last quote symbol used in the argument.
1461  *
1462  * if semicolon is true, unquoted trailing semicolon(s) that would otherwise
1463  * be taken as part of the option string will be stripped.
1464  *
1465  * NOTE: the only possible syntax errors for backslash options are unmatched
1466  * quotes, which are detected when we run out of input.  Therefore, on a
1467  * syntax error we just throw away the string and return NULL; there is no
1468  * need to worry about flushing remaining input.
1469  */
1470 char *
1471 psql_scan_slash_option(PsqlScanState state,
1472                                            enum slash_option_type type,
1473                                            char *quote,
1474                                            bool semicolon)
1475 {
1476         PQExpBufferData mybuf;
1477         int                     lexresult PG_USED_FOR_ASSERTS_ONLY;
1478         char            local_quote;
1479
1480         /* Must be scanning already */
1481         Assert(state->scanbufhandle != NULL);
1482
1483         if (quote == NULL)
1484                 quote = &local_quote;
1485         *quote = 0;
1486
1487         /* Build a local buffer that we'll return the data of */
1488         initPQExpBuffer(&mybuf);
1489
1490         /* Set up static variables that will be used by yylex */
1491         cur_state = state;
1492         output_buf = &mybuf;
1493         option_type = type;
1494         option_quote = quote;
1495         unquoted_option_chars = 0;
1496
1497         if (state->buffer_stack != NULL)
1498                 yy_switch_to_buffer(state->buffer_stack->buf);
1499         else
1500                 yy_switch_to_buffer(state->scanbufhandle);
1501
1502         if (type == OT_WHOLE_LINE)
1503                 BEGIN(xslashwholeline);
1504         else
1505                 BEGIN(xslashargstart);
1506
1507         /* And lex. */
1508         lexresult = yylex();
1509
1510         /*
1511          * Check the lex result: we should have gotten back either LEXRES_OK
1512          * or LEXRES_EOL (the latter indicating end of string).  If we were inside
1513          * a quoted string, as indicated by YY_START, EOL is an error.
1514          */
1515         Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
1516
1517         switch (YY_START)
1518         {
1519                 case xslashargstart:
1520                         /* empty arg */
1521                         break;
1522                 case xslasharg:
1523                         /* Strip any unquoted trailing semi-colons if requested */
1524                         if (semicolon)
1525                         {
1526                                 while (unquoted_option_chars-- > 0 &&
1527                                            mybuf.len > 0 &&
1528                                            mybuf.data[mybuf.len - 1] == ';')
1529                                 {
1530                                         mybuf.data[--mybuf.len] = '\0';
1531                                 }
1532                         }
1533
1534                         /*
1535                          * If SQL identifier processing was requested, then we strip out
1536                          * excess double quotes and downcase unquoted letters.
1537                          * Doubled double-quotes become output double-quotes, per spec.
1538                          *
1539                          * Note that a string like FOO"BAR"BAZ will be converted to
1540                          * fooBARbaz; this is somewhat inconsistent with the SQL spec,
1541                          * which would have us parse it as several identifiers.  But
1542                          * for psql's purposes, we want a string like "foo"."bar" to
1543                          * be treated as one option, so there's little choice.
1544                          */
1545                         if (type == OT_SQLID || type == OT_SQLIDHACK)
1546                         {
1547                                 bool            inquotes = false;
1548                                 char       *cp = mybuf.data;
1549
1550                                 while (*cp)
1551                                 {
1552                                         if (*cp == '"')
1553                                         {
1554                                                 if (inquotes && cp[1] == '"')
1555                                                 {
1556                                                         /* Keep the first quote, remove the second */
1557                                                         cp++;
1558                                                 }
1559                                                 inquotes = !inquotes;
1560                                                 /* Collapse out quote at *cp */
1561                                                 memmove(cp, cp + 1, strlen(cp));
1562                                                 mybuf.len--;
1563                                                 /* do not advance cp */
1564                                         }
1565                                         else
1566                                         {
1567                                                 if (!inquotes && type == OT_SQLID)
1568                                                         *cp = pg_tolower((unsigned char) *cp);
1569                                                 cp += PQmblen(cp, pset.encoding);
1570                                         }
1571                                 }
1572                         }
1573                         break;
1574                 case xslashquote:
1575                 case xslashbackquote:
1576                 case xslashdquote:
1577                         /* must have hit EOL inside quotes */
1578                         psql_error("unterminated quoted string\n");
1579                         termPQExpBuffer(&mybuf);
1580                         return NULL;
1581                 case xslashwholeline:
1582                         /* always okay */
1583                         break;
1584                 default:
1585                         /* can't get here */
1586                         fprintf(stderr, "invalid YY_START\n");
1587                         exit(1);
1588         }
1589
1590         /*
1591          * An unquoted empty argument isn't possible unless we are at end of
1592          * command.  Return NULL instead.
1593          */
1594         if (mybuf.len == 0 && *quote == 0)
1595         {
1596                 termPQExpBuffer(&mybuf);
1597                 return NULL;
1598         }
1599
1600         /* Else return the completed string. */
1601         return mybuf.data;
1602 }
1603
1604 /*
1605  * Eat up any unused \\ to complete a backslash command.
1606  */
1607 void
1608 psql_scan_slash_command_end(PsqlScanState state)
1609 {
1610         /* Must be scanning already */
1611         Assert(state->scanbufhandle != NULL);
1612
1613         /* Set up static variables that will be used by yylex */
1614         cur_state = state;
1615         output_buf = NULL;
1616
1617         if (state->buffer_stack != NULL)
1618                 yy_switch_to_buffer(state->buffer_stack->buf);
1619         else
1620                 yy_switch_to_buffer(state->scanbufhandle);
1621
1622         BEGIN(xslashend);
1623
1624         /* And lex. */
1625         yylex();
1626
1627         /* There are no possible errors in this lex state... */
1628 }
1629
1630 /*
1631  * Evaluate a backticked substring of a slash command's argument.
1632  *
1633  * The portion of output_buf starting at backtick_start_offset is evaluated
1634  * as a shell command and then replaced by the command's output.
1635  */
1636 static void
1637 evaluate_backtick(void)
1638 {
1639         char       *cmd = output_buf->data + backtick_start_offset;
1640         PQExpBufferData cmd_output;
1641         FILE       *fd;
1642         bool            error = false;
1643         char            buf[512];
1644         size_t          result;
1645
1646         initPQExpBuffer(&cmd_output);
1647
1648         fd = popen(cmd, PG_BINARY_R);
1649         if (!fd)
1650         {
1651                 psql_error("%s: %s\n", cmd, strerror(errno));
1652                 error = true;
1653         }
1654
1655         if (!error)
1656         {
1657                 do
1658                 {
1659                         result = fread(buf, 1, sizeof(buf), fd);
1660                         if (ferror(fd))
1661                         {
1662                                 psql_error("%s: %s\n", cmd, strerror(errno));
1663                                 error = true;
1664                                 break;
1665                         }
1666                         appendBinaryPQExpBuffer(&cmd_output, buf, result);
1667                 } while (!feof(fd));
1668         }
1669
1670         if (fd && pclose(fd) == -1)
1671         {
1672                 psql_error("%s: %s\n", cmd, strerror(errno));
1673                 error = true;
1674         }
1675
1676         if (PQExpBufferDataBroken(cmd_output))
1677         {
1678                 psql_error("%s: out of memory\n", cmd);
1679                 error = true;
1680         }
1681
1682         /* Now done with cmd, delete it from output_buf */
1683         output_buf->len = backtick_start_offset;
1684         output_buf->data[output_buf->len] = '\0';
1685
1686         /* If no error, transfer result to output_buf */
1687         if (!error)
1688         {
1689                 /* strip any trailing newline */
1690                 if (cmd_output.len > 0 &&
1691                         cmd_output.data[cmd_output.len - 1] == '\n')
1692                         cmd_output.len--;
1693                 appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len);
1694         }
1695
1696         termPQExpBuffer(&cmd_output);
1697 }
1698
1699 /*
1700  * Push the given string onto the stack of stuff to scan.
1701  *
1702  * cur_state must point to the active PsqlScanState.
1703  *
1704  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1705  */
1706 static void
1707 push_new_buffer(const char *newstr, const char *varname)
1708 {
1709         StackElem  *stackelem;
1710
1711         stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
1712
1713         /*
1714          * In current usage, the passed varname points at the current flex
1715          * input buffer; we must copy it before calling prepare_buffer()
1716          * because that will change the buffer state.
1717          */
1718         stackelem->varname = varname ? pg_strdup(varname) : NULL;
1719
1720         stackelem->buf = prepare_buffer(newstr, strlen(newstr),
1721                                                                         &stackelem->bufstring);
1722         cur_state->curline = stackelem->bufstring;
1723         if (cur_state->safe_encoding)
1724         {
1725                 stackelem->origstring = NULL;
1726                 cur_state->refline = stackelem->bufstring;
1727         }
1728         else
1729         {
1730                 stackelem->origstring = pg_strdup(newstr);
1731                 cur_state->refline = stackelem->origstring;
1732         }
1733         stackelem->next = cur_state->buffer_stack;
1734         cur_state->buffer_stack = stackelem;
1735 }
1736
1737 /*
1738  * Pop the topmost buffer stack item (there must be one!)
1739  *
1740  * NB: after this, the flex input state is unspecified; caller must
1741  * switch to an appropriate buffer to continue lexing.
1742  */
1743 static void
1744 pop_buffer_stack(PsqlScanState state)
1745 {
1746         StackElem  *stackelem = state->buffer_stack;
1747
1748         state->buffer_stack = stackelem->next;
1749         yy_delete_buffer(stackelem->buf);
1750         free(stackelem->bufstring);
1751         if (stackelem->origstring)
1752                 free(stackelem->origstring);
1753         if (stackelem->varname)
1754                 free(stackelem->varname);
1755         free(stackelem);
1756 }
1757
1758 /*
1759  * Check if specified variable name is the source for any string
1760  * currently being scanned
1761  */
1762 static bool
1763 var_is_current_source(PsqlScanState state, const char *varname)
1764 {
1765         StackElem  *stackelem;
1766
1767         for (stackelem = state->buffer_stack;
1768                  stackelem != NULL;
1769                  stackelem = stackelem->next)
1770         {
1771                 if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
1772                         return true;
1773         }
1774         return false;
1775 }
1776
1777 /*
1778  * Set up a flex input buffer to scan the given data.  We always make a
1779  * copy of the data.  If working in an unsafe encoding, the copy has
1780  * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1781  *
1782  * cur_state must point to the active PsqlScanState.
1783  *
1784  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1785  */
1786 static YY_BUFFER_STATE
1787 prepare_buffer(const char *txt, int len, char **txtcopy)
1788 {
1789         char       *newtxt;
1790
1791         /* Flex wants two \0 characters after the actual data */
1792         newtxt = pg_malloc(len + 2);
1793         *txtcopy = newtxt;
1794         newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1795
1796         if (cur_state->safe_encoding)
1797                 memcpy(newtxt, txt, len);
1798         else
1799         {
1800                 /* Gotta do it the hard way */
1801                 int             i = 0;
1802
1803                 while (i < len)
1804                 {
1805                         int             thislen = PQmblen(txt + i, cur_state->encoding);
1806
1807                         /* first byte should always be okay... */
1808                         newtxt[i] = txt[i];
1809                         i++;
1810                         while (--thislen > 0 && i < len)
1811                                 newtxt[i++] = (char) 0xFF;
1812                 }
1813         }
1814
1815         return yy_scan_buffer(newtxt, len + 2);
1816 }
1817
1818 /*
1819  * emit() --- body for ECHO macro
1820  *
1821  * NB: this must be used for ALL and ONLY the text copied from the flex
1822  * input data.  If you pass it something that is not part of the yytext
1823  * string, you are making a mistake.  Internally generated text can be
1824  * appended directly to output_buf.
1825  */
1826 static void
1827 emit(const char *txt, int len)
1828 {
1829         if (cur_state->safe_encoding)
1830                 appendBinaryPQExpBuffer(output_buf, txt, len);
1831         else
1832         {
1833                 /* Gotta do it the hard way */
1834                 const char *reference = cur_state->refline;
1835                 int             i;
1836
1837                 reference += (txt - cur_state->curline);
1838
1839                 for (i = 0; i < len; i++)
1840                 {
1841                         char    ch = txt[i];
1842
1843                         if (ch == (char) 0xFF)
1844                                 ch = reference[i];
1845                         appendPQExpBufferChar(output_buf, ch);
1846                 }
1847         }
1848 }
1849
1850 /*
1851  * extract_substring --- fetch the true value of (part of) the current token
1852  *
1853  * This is like emit(), except that the data is returned as a malloc'd string
1854  * rather than being pushed directly to output_buf.
1855  */
1856 static char *
1857 extract_substring(const char *txt, int len)
1858 {
1859         char       *result = (char *) pg_malloc(len + 1);
1860
1861         if (cur_state->safe_encoding)
1862                 memcpy(result, txt, len);
1863         else
1864         {
1865                 /* Gotta do it the hard way */
1866                 const char *reference = cur_state->refline;
1867                 int             i;
1868
1869                 reference += (txt - cur_state->curline);
1870
1871                 for (i = 0; i < len; i++)
1872                 {
1873                         char    ch = txt[i];
1874
1875                         if (ch == (char) 0xFF)
1876                                 ch = reference[i];
1877                         result[i] = ch;
1878                 }
1879         }
1880         result[len] = '\0';
1881         return result;
1882 }
1883
1884 /*
1885  * escape_variable --- process :'VARIABLE' or :"VARIABLE"
1886  *
1887  * If the variable name is found, escape its value using the appropriate
1888  * quoting method and emit the value to output_buf.  (Since the result is
1889  * surely quoted, there is never any reason to rescan it.)  If we don't
1890  * find the variable or the escaping function fails, emit the token as-is.
1891  */
1892 static void
1893 escape_variable(bool as_ident)
1894 {
1895         char       *varname;
1896         const char *value;
1897
1898         /* Variable lookup. */
1899         varname = extract_substring(yytext + 2, yyleng - 3);
1900         value = GetVariable(pset.vars, varname);
1901         free(varname);
1902
1903         /* Escaping. */
1904         if (value)
1905         {
1906                 if (!pset.db)
1907                         psql_error("can't escape without active connection\n");
1908                 else
1909                 {
1910                         char   *escaped_value;
1911
1912                         if (as_ident)
1913                                 escaped_value =
1914                                         PQescapeIdentifier(pset.db, value, strlen(value));
1915                         else
1916                                 escaped_value =
1917                                         PQescapeLiteral(pset.db, value, strlen(value));
1918
1919                         if (escaped_value == NULL)
1920                         {
1921                                 const char *error = PQerrorMessage(pset.db);
1922
1923                                 psql_error("%s", error);
1924                         }
1925                         else
1926                         {
1927                                 appendPQExpBufferStr(output_buf, escaped_value);
1928                                 PQfreemem(escaped_value);
1929                                 return;
1930                         }
1931                 }
1932         }
1933
1934         /*
1935          * If we reach this point, some kind of error has occurred.  Emit the
1936          * original text into the output buffer.
1937          */
1938         emit(yytext, yyleng);
1939 }