2 /*-------------------------------------------------------------------------
5 * lexical scanner for psql
7 * This code is mainly needed to determine where the end of a SQL statement
8 * is: we are looking for semicolons that are not within quotes, comments,
9 * or parentheses. The most reliable way to handle this is to borrow the
10 * backend's flex lexer rules, lock, stock, and barrel. The rules below
11 * are (except for a few) the same as the backend's, but their actions are
12 * just ECHO whereas the backend's actions generally do other things.
14 * XXX The rules in this file must be kept in sync with the backend lexer!!!
16 * XXX Avoid creating backtracking cases --- see the backend lexer for info.
18 * The most difficult aspect of this code is that we need to work in multibyte
19 * encodings that are not ASCII-safe. A "safe" encoding is one in which each
20 * byte of a multibyte character has the high bit set (it's >= 0x80). Since
21 * all our lexing rules treat all high-bit-set characters alike, we don't
22 * really need to care whether such a byte is part of a sequence or not.
23 * In an "unsafe" encoding, we still expect the first byte of a multibyte
24 * sequence to be >= 0x80, but later bytes might not be. If we scan such
25 * a sequence as-is, the lexing rules could easily be fooled into matching
26 * such bytes to ordinary ASCII characters. Our solution for this is to
27 * substitute 0xFF for each non-first byte within the data presented to flex.
28 * The flex rules will then pass the FF's through unmolested. The emit()
29 * subroutine is responsible for looking back to the original string and
30 * replacing FF's with the corresponding original bytes.
32 * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
33 * Portions Copyright (c) 1994, Regents of the University of California
36 * src/bin/psql/psqlscan.l
38 *-------------------------------------------------------------------------
40 #include "postgres_fe.h"
48 #include "variables.h"
52 * We use a stack of flex buffers to handle substitution of psql variables.
53 * Each stacked buffer contains the as-yet-unread text from one psql variable.
54 * When we pop the stack all the way, we resume reading from the outer buffer
55 * identified by scanbufhandle.
57 typedef struct StackElem
59 YY_BUFFER_STATE buf; /* flex input control structure */
60 char *bufstring; /* data actually being scanned by flex */
61 char *origstring; /* copy of original data, if needed */
62 char *varname; /* name of variable providing data, or NULL */
63 struct StackElem *next;
67 * All working state of the lexer must be stored in PsqlScanStateData
68 * between calls. This allows us to have multiple open lexer operations,
69 * which is needed for nested include files. The lexer itself is not
70 * recursive, but it must be re-entrant.
72 typedef struct PsqlScanStateData
74 StackElem *buffer_stack; /* stack of variable expansion buffers */
76 * These variables always refer to the outer buffer, never to any
77 * stacked variable-expansion buffer.
79 YY_BUFFER_STATE scanbufhandle;
80 char *scanbuf; /* start of outer-level input buffer */
81 const char *scanline; /* current input line at outer level */
83 /* safe_encoding, curline, refline are used by emit() to replace FFs */
84 int encoding; /* encoding being used now */
85 bool safe_encoding; /* is current encoding "safe"? */
86 const char *curline; /* actual flex input string for cur buf */
87 const char *refline; /* original data for cur buffer */
90 * All this state lives across successive input lines, until explicitly
91 * reset by psql_scan_reset.
93 int start_state; /* saved YY_START */
94 int paren_depth; /* depth of nesting in parentheses */
95 int xcdepth; /* depth of nesting in slash-star comments */
96 char *dolqstart; /* current $foo$ quote start string */
99 static PsqlScanState cur_state; /* current state while active */
101 static PQExpBuffer output_buf; /* current output buffer */
103 /* these variables do not need to be saved across calls */
104 static enum slash_option_type option_type;
105 static char *option_quote;
106 static int unquoted_option_chars;
107 static int backtick_start_offset;
110 /* Return values from yylex() */
111 #define LEXRES_EOL 0 /* end of input */
112 #define LEXRES_SEMI 1 /* command-terminating semicolon found */
113 #define LEXRES_BACKSLASH 2 /* backslash command start */
114 #define LEXRES_OK 3 /* OK completion of backslash argument */
119 static void evaluate_backtick(void);
120 static void push_new_buffer(const char *newstr, const char *varname);
121 static void pop_buffer_stack(PsqlScanState state);
122 static bool var_is_current_source(PsqlScanState state, const char *varname);
123 static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
125 static void emit(const char *txt, int len);
126 static char *extract_substring(const char *txt, int len);
127 static void escape_variable(bool as_ident);
129 #define ECHO emit(yytext, yyleng)
134 %option never-interactive
142 * All of the following definitions and rules should exactly match
143 * src/backend/parser/scan.l so far as the flex patterns are concerned.
144 * The rule bodies are just ECHO as opposed to what the backend does,
145 * however. (But be sure to duplicate code that affects the lexing process,
146 * such as BEGIN().) Also, psqlscan uses a single <<EOF>> rule whereas
147 * scan.l has a separate one for each exclusive state.
151 * OK, here is a short description of lex/flex rules behavior.
152 * The longest pattern which matches an input string is always chosen.
153 * For equal-length patterns, the first occurring in the rules list is chosen.
154 * INITIAL is the starting state, to which all non-conditional rules apply.
155 * Exclusive states change parsing rules while the state is active. When in
156 * an exclusive state, only those rules defined for that state apply.
158 * We use exclusive states for quoted strings, extended comments,
159 * and to eliminate parsing troubles for numeric strings.
161 * <xb> bit string literal
162 * <xc> extended C-style comments
163 * <xd> delimited identifiers (double-quoted identifiers)
164 * <xh> hexadecimal numeric string
165 * <xq> standard quoted strings
166 * <xe> extended quoted strings (support backslash escape sequences)
167 * <xdolq> $foo$ quoted strings
168 * <xui> quoted identifier with Unicode escapes
169 * <xus> quoted string with Unicode escapes
171 * Note: we intentionally don't mimic the backend's <xeu> state; we have
172 * no need to distinguish it from <xe> state, and no good way to get out
173 * of it in error cases. The backend just throws yyerror() in those
174 * cases, but that's not an option here.
186 /* Additional exclusive states for psql only: lex backslash commands */
197 * In order to make the world safe for Windows and Mac clients as well as
198 * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
199 * sequence will be seen as two successive newlines, but that doesn't cause
200 * any problems. Comments that start with -- and extend to the next
201 * newline are treated as equivalent to a single whitespace character.
203 * NOTE a fine point: if there is no newline following --, we will absorb
204 * everything to the end of the input as a comment. This is correct. Older
205 * versions of Postgres failed to recognize -- as a comment if the input
206 * did not end with a newline.
208 * XXX perhaps \f (formfeed) should be treated as a newline as well?
210 * XXX if you change the set of whitespace characters, fix scanner_isspace()
211 * to agree, and see also the plpgsql lexer.
219 comment ("--"{non_newline}*)
221 whitespace ({space}+|{comment})
224 * SQL requires at least one newline in the whitespace separating
225 * string literals that are to be concatenated. Silly, but who are we
226 * to argue? Note that {whitespace_with_newline} should not have * after
227 * it, whereas {whitespace} should generally have a * after it...
230 special_whitespace ({space}+|{comment}{newline})
231 horiz_whitespace ({horiz_space}|{comment})
232 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
235 * To ensure that {quotecontinue} can be scanned without having to back up
236 * if the full pattern isn't matched, we include trailing whitespace in
237 * {quotestop}. This matches all cases where {quotecontinue} fails to match,
238 * except for {quote} followed by whitespace and just one "-" (not two,
239 * which would start a {comment}). To cover that we have {quotefail}.
240 * The actions for {quotestop} and {quotefail} must throw back characters
241 * beyond the quote proper.
244 quotestop {quote}{whitespace}*
245 quotecontinue {quote}{whitespace_with_newline}{quote}
246 quotefail {quote}{whitespace}*"-"
249 * It is tempting to scan the string for only those characters
250 * which are allowed. However, this leads to silently swallowed
251 * characters if illegal characters are included in the string.
252 * For example, if xbinside is [01] then B'ABCD' is interpreted
253 * as a zero-length string, and the ABCD' is lost!
254 * Better to pass the string forward and let the input routines
255 * validate the contents.
260 /* Hexadecimal number */
264 /* National character */
267 /* Quoted string that allows backslash escapes */
271 xeoctesc [\\][0-7]{1,3}
272 xehexesc [\\]x[0-9A-Fa-f]{1,2}
273 xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
274 xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
277 * xqdouble implements embedded quote, ''''
280 xqdouble {quote}{quote}
283 /* $foo$ style quotes ("dollar quoting")
284 * The quoted string starts with $foo$ where "foo" is an optional string
285 * in the form of an identifier, except that it may not contain "$",
286 * and extends to the first occurrence of an identical string.
287 * There is *no* processing of the quoted text.
289 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
290 * fails to match its trailing "$".
292 dolq_start [A-Za-z\200-\377_]
293 dolq_cont [A-Za-z\200-\377_0-9]
294 dolqdelim \$({dolq_start}{dolq_cont}*)?\$
295 dolqfailed \${dolq_start}{dolq_cont}*
299 * Allows embedded spaces and other special characters into identifiers.
304 xddouble {dquote}{dquote}
307 /* Unicode escapes */
308 uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
309 /* error rule to avoid backup */
310 uescapefail ("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])
312 /* Quoted identifier with Unicode escapes */
313 xuistart [uU]&{dquote}
314 xuistop1 {dquote}{whitespace}*{uescapefail}?
315 xuistop2 {dquote}{whitespace}*{uescape}
317 /* Quoted string with Unicode escapes */
318 xusstart [uU]&{quote}
319 xusstop1 {quote}{whitespace}*{uescapefail}?
320 xusstop2 {quote}{whitespace}*{uescape}
322 /* error rule to avoid backup */
328 * The "extended comment" syntax closely resembles allowable operator syntax.
329 * The tricky part here is to get lex to recognize a string starting with
330 * slash-star as a comment, when interpreting it as an operator would produce
331 * a longer match --- remember lex will prefer a longer match! Also, if we
332 * have something like plus-slash-star, lex will think this is a 3-character
333 * operator whereas we want to see it as a + operator and a comment start.
334 * The solution is two-fold:
335 * 1. append {op_chars}* to xcstart so that it matches as much text as
336 * {operator} would. Then the tie-breaker (first matching rule of same
337 * length) ensures xcstart wins. We put back the extra stuff with yyless()
338 * in case it contains a star-slash that should terminate the comment.
339 * 2. In the operator rule, check for slash-star within the operator, and
340 * if found throw it back with yyless(). This handles the plus-slash-star
342 * Dash-dash comments have similar interactions with the operator rule.
344 xcstart \/\*{op_chars}*
349 ident_start [A-Za-z\200-\377_]
350 ident_cont [A-Za-z\200-\377_0-9\$]
352 identifier {ident_start}{ident_cont}*
359 * "self" is the set of chars that should be returned as single-character
360 * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
361 * which can be one or more characters long (but if a single-char token
362 * appears in the "self" set, it is not to be returned as an Op). Note
363 * that the sets overlap, but each has some chars that are not in the other.
365 * If you change either set, adjust the character lists appearing in the
366 * rule for "operator"!
368 self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
369 op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
372 /* we no longer allow unary minus in numbers.
373 * instead we pass it separately to parser. there it gets
374 * coerced via doNegate() -- Leon aug 20 1999
376 * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
378 * {realfail1} and {realfail2} are added to prevent the need for scanner
379 * backup when the {real} rule fails to match completely.
383 decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
384 decimalfail {digit}+\.\.
385 real ({integer}|{decimal})[Ee][-+]?{digit}+
386 realfail1 ({integer}|{decimal})[Ee]
387 realfail2 ({integer}|{decimal})[Ee][-+]
391 /* psql-specific: characters allowed in variable names */
392 variable_char [A-Za-z\200-\377_0-9]
397 * Dollar quoted strings are totally opaque, and no escaping is done on them.
398 * Other quoted strings must allow some special characters such as single-quote
400 * Embedded single-quotes are implemented both in the SQL standard
401 * style of two adjacent single quotes "''" and in the Postgres/Java style
402 * of escaped-quote "\'".
403 * Other embedded escaped characters are matched explicitly and the leading
404 * backslash is dropped from the string.
405 * Note that xcstart must appear before operator, as explained above!
406 * Also whitespace (comment) must appear before operator.
413 * Note that the whitespace rule includes both true
414 * whitespace and single-line ("--" style) comments.
415 * We suppress whitespace at the start of the query
416 * buffer. We also suppress all single-line comments,
417 * which is pretty dubious but is the historical
420 if (!(output_buf->len == 0 || yytext[0] == '-'))
425 cur_state->xcdepth = 0;
427 /* Put back any characters past slash-star; see above */
433 cur_state->xcdepth++;
434 /* Put back any characters past slash-star; see above */
440 if (cur_state->xcdepth <= 0)
445 cur_state->xcdepth--;
475 <xh>{quotecontinue} |
476 <xb>{quotecontinue} {
481 /* Hexadecimal bit type.
482 * At some point we should simply pass the string
483 * forward to the parser and label it there.
484 * In the meantime, place a leading "x" on the string
485 * to mark it for the input routine as a hex string.
498 yyless(1); /* eat only 'n' this time */
503 if (standard_strings())
532 <xq,xe,xus>{xqdouble} {
544 <xe>{xeunicodefail} {
556 <xq,xe,xus>{quotecontinue} {
560 /* This is only needed for \ just before EOF */
565 cur_state->dolqstart = pg_strdup(yytext);
570 /* throw back all but the initial "$" */
575 if (strcmp(yytext, cur_state->dolqstart) == 0)
577 free(cur_state->dolqstart);
578 cur_state->dolqstart = NULL;
584 * When we fail to match $...$ to dolqstart, transfer
585 * the $... part to the output, but put back the final
586 * $ for rescanning. Consider $delim$...$junk$delim$
592 <xdolq>{dolqinside} {
595 <xdolq>{dolqfailed} {
599 /* This is only needed for $ inside the quoted text */
632 /* throw back all but the initial u/U */
650 * These rules are specific to psql --- they implement parenthesis
651 * counting and detection of command-ending semicolon. These must
652 * appear before the {self} rule so that they take precedence over it.
656 cur_state->paren_depth++;
661 if (cur_state->paren_depth > 0)
662 cur_state->paren_depth--;
668 if (cur_state->paren_depth == 0)
670 /* Terminate lexing temporarily */
676 * psql-specific rules to handle backslash commands and variable
677 * substitution. We want these before {self}, also.
681 /* Force a semicolon or colon into the query buffer */
686 /* Terminate lexing temporarily */
687 return LEXRES_BACKSLASH;
691 /* Possible psql variable substitution */
695 varname = extract_substring(yytext + 1, yyleng - 1);
696 value = GetVariable(pset.vars, varname);
700 /* It is a variable, check for recursion */
701 if (var_is_current_source(cur_state, varname))
703 /* Recursive expansion --- don't go there */
704 psql_error("skipping recursive expansion of variable \"%s\"\n",
706 /* Instead copy the string as is */
711 /* OK, perform substitution */
712 push_new_buffer(value, varname);
713 /* yy_scan_string already made buffer active */
719 * if the variable doesn't exist we'll copy the
728 :'{variable_char}+' {
729 escape_variable(false);
732 :\"{variable_char}+\" {
733 escape_variable(true);
737 * These rules just avoid the need for scanner backup if one of the
738 * two rules above fails to match completely.
742 /* Throw back everything but the colon */
747 :\"{variable_char}* {
748 /* Throw back everything but the colon */
754 * Back to backend-compatible rules.
763 * Check for embedded slash-star or dash-dash; those
764 * are comment starts, so operator must stop there.
765 * Note that slash-star or dash-dash at the first
766 * character will match a prior rule, not this one.
769 char *slashstar = strstr(yytext, "/*");
770 char *dashdash = strstr(yytext, "--");
772 if (slashstar && dashdash)
774 /* if both appear, take the first one */
775 if (slashstar > dashdash)
776 slashstar = dashdash;
779 slashstar = dashdash;
781 nchars = slashstar - yytext;
784 * For SQL compatibility, '+' and '-' cannot be the
785 * last char of a multi-char operator unless the operator
786 * contains chars that are not in SQL operators.
787 * The idea is to lex '=-' as two operators, but not
788 * to forbid operator names like '?-' that could not be
789 * sequences of SQL operators.
792 (yytext[nchars-1] == '+' ||
793 yytext[nchars-1] == '-'))
797 for (ic = nchars-2; ic >= 0; ic--)
799 if (strchr("~!@#^&|`?%", yytext[ic]))
803 break; /* found a char that makes it OK */
804 nchars--; /* else remove the +/-, and check again */
809 /* Strip the unwanted chars from the token */
826 /* throw back the .., and treat as integer */
835 * throw back the [Ee], and treat as {decimal}. Note
836 * that it is possible the input is actually {integer},
837 * but since this case will almost certainly lead to a
838 * syntax error anyway, we don't bother to distinguish.
844 /* throw back the [Ee][+-], and proceed as above */
860 * Everything from here down is psql-specific.
864 StackElem *stackelem = cur_state->buffer_stack;
866 if (stackelem == NULL)
867 return LEXRES_EOL; /* end of input reached */
870 * We were expanding a variable, so pop the inclusion
871 * stack and keep lexing
873 pop_buffer_stack(cur_state);
875 stackelem = cur_state->buffer_stack;
876 if (stackelem != NULL)
878 yy_switch_to_buffer(stackelem->buf);
879 cur_state->curline = stackelem->bufstring;
880 cur_state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
884 yy_switch_to_buffer(cur_state->scanbufhandle);
885 cur_state->curline = cur_state->scanbuf;
886 cur_state->refline = cur_state->scanline;
891 * Exclusive lexer states to handle backslash command lexing
895 /* command name ends at whitespace or backslash; eat all else */
908 * Discard any whitespace before argument, then go to xslasharg state.
909 * An exception is that "|" is only special at start of argument, so we
916 if (option_type == OT_FILEPIPE)
918 /* treat like whole-string case */
920 BEGIN(xslashwholeline);
924 /* vertical bar is not special otherwise */
939 * Default processing of text in a slash command's argument.
941 * Note: unquoted_option_chars counts the number of characters at the
942 * end of the argument that were not subject to any form of quoting.
943 * psql_scan_slash_option needs this to strip trailing semicolons safely.
948 * Unquoted space is end of arg; do not eat. Likewise
949 * backslash is end of command or next command, do not eat
951 * XXX this means we can't conveniently accept options
952 * that include unquoted backslashes; therefore, option
953 * processing that encourages use of backslashes is rather
961 *option_quote = '\'';
962 unquoted_option_chars = 0;
967 backtick_start_offset = output_buf->len;
969 unquoted_option_chars = 0;
970 BEGIN(xslashbackquote);
976 unquoted_option_chars = 0;
981 /* Possible psql variable substitution */
982 if (option_type == OT_NO_EVAL)
989 varname = extract_substring(yytext + 1, yyleng - 1);
990 value = GetVariable(pset.vars, varname);
994 * The variable value is just emitted without any
995 * further examination. This is consistent with the
996 * pre-8.0 code behavior, if not with the way that
997 * variables are handled outside backslash commands.
998 * Note that we needn't guard against recursion here.
1001 appendPQExpBufferStr(output_buf, value);
1005 *option_quote = ':';
1007 unquoted_option_chars = 0;
1010 :'{variable_char}+' {
1011 if (option_type == OT_NO_EVAL)
1015 escape_variable(false);
1016 *option_quote = ':';
1018 unquoted_option_chars = 0;
1022 :\"{variable_char}+\" {
1023 if (option_type == OT_NO_EVAL)
1027 escape_variable(true);
1028 *option_quote = ':';
1030 unquoted_option_chars = 0;
1033 :'{variable_char}* {
1034 /* Throw back everything but the colon */
1036 unquoted_option_chars++;
1040 :\"{variable_char}* {
1041 /* Throw back everything but the colon */
1043 unquoted_option_chars++;
1048 unquoted_option_chars++;
1056 * single-quoted text: copy literally except for '' and backslash
1060 {quote} { BEGIN(xslasharg); }
1062 {xqdouble} { appendPQExpBufferChar(output_buf, '\''); }
1064 "\\n" { appendPQExpBufferChar(output_buf, '\n'); }
1065 "\\t" { appendPQExpBufferChar(output_buf, '\t'); }
1066 "\\b" { appendPQExpBufferChar(output_buf, '\b'); }
1067 "\\r" { appendPQExpBufferChar(output_buf, '\r'); }
1068 "\\f" { appendPQExpBufferChar(output_buf, '\f'); }
1072 appendPQExpBufferChar(output_buf,
1073 (char) strtol(yytext + 1, NULL, 8));
1078 appendPQExpBufferChar(output_buf,
1079 (char) strtol(yytext + 2, NULL, 16));
1082 "\\". { emit(yytext + 1, 1); }
1084 {other}|\n { ECHO; }
1090 * backticked text: copy everything until next backquote, then evaluate.
1092 * XXX Possible future behavioral change: substitute for :VARIABLE?
1096 /* In NO_EVAL mode, don't evaluate the command */
1097 if (option_type != OT_NO_EVAL)
1098 evaluate_backtick();
1102 {other}|\n { ECHO; }
1107 /* double-quoted text: copy verbatim, including the double quotes */
1114 {other}|\n { ECHO; }
1119 /* copy everything until end of input line */
1120 /* but suppress leading whitespace */
1123 if (output_buf->len > 0)
1132 /* at end of command, eat a double backslash, but not anything else */
1134 "\\\\" { return LEXRES_OK; }
1146 * Create a lexer working state struct.
1149 psql_scan_create(void)
1151 PsqlScanState state;
1153 state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
1155 psql_scan_reset(state);
1161 * Destroy a lexer working state struct, releasing all resources.
1164 psql_scan_destroy(PsqlScanState state)
1166 psql_scan_finish(state);
1168 psql_scan_reset(state);
1174 * Set up to perform lexing of the given input line.
1176 * The text at *line, extending for line_len bytes, will be scanned by
1177 * subsequent calls to the psql_scan routines. psql_scan_finish should
1178 * be called when scanning is complete. Note that the lexer retains
1179 * a pointer to the storage at *line --- this string must not be altered
1180 * or freed until after psql_scan_finish is called.
1183 psql_scan_setup(PsqlScanState state,
1184 const char *line, int line_len)
1186 /* Mustn't be scanning already */
1187 Assert(state->scanbufhandle == NULL);
1188 Assert(state->buffer_stack == NULL);
1190 /* Do we need to hack the character set encoding? */
1191 state->encoding = pset.encoding;
1192 state->safe_encoding = pg_valid_server_encoding_id(state->encoding);
1194 /* needed for prepare_buffer */
1197 /* Set up flex input buffer with appropriate translation and padding */
1198 state->scanbufhandle = prepare_buffer(line, line_len,
1200 state->scanline = line;
1202 /* Set lookaside data in case we have to map unsafe encoding */
1203 state->curline = state->scanbuf;
1204 state->refline = state->scanline;
1208 * Do lexical analysis of SQL command text.
1210 * The text previously passed to psql_scan_setup is scanned, and appended
1211 * (possibly with transformation) to query_buf.
1213 * The return value indicates the condition that stopped scanning:
1215 * PSCAN_SEMICOLON: found a command-ending semicolon. (The semicolon is
1216 * transferred to query_buf.) The command accumulated in query_buf should
1217 * be executed, then clear query_buf and call again to scan the remainder
1220 * PSCAN_BACKSLASH: found a backslash that starts a psql special command.
1221 * Any previous data on the line has been transferred to query_buf.
1222 * The caller will typically next call psql_scan_slash_command(),
1223 * perhaps psql_scan_slash_option(), and psql_scan_slash_command_end().
1225 * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
1226 * incomplete SQL command. *prompt is set to the appropriate prompt type.
1228 * PSCAN_EOL: the end of the line was reached, and there is no lexical
1229 * reason to consider the command incomplete. The caller may or may not
1230 * choose to send it. *prompt is set to the appropriate prompt type if
1231 * the caller chooses to collect more input.
1233 * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
1234 * be called next, then the cycle may be repeated with a fresh input line.
1236 * In all cases, *prompt is set to an appropriate prompt type code for the
1237 * next line-input operation.
1240 psql_scan(PsqlScanState state,
1241 PQExpBuffer query_buf,
1242 promptStatus_t *prompt)
1244 PsqlScanResult result;
1247 /* Must be scanning already */
1248 Assert(state->scanbufhandle != NULL);
1250 /* Set up static variables that will be used by yylex */
1252 output_buf = query_buf;
1254 if (state->buffer_stack != NULL)
1255 yy_switch_to_buffer(state->buffer_stack->buf);
1257 yy_switch_to_buffer(state->scanbufhandle);
1259 BEGIN(state->start_state);
1262 lexresult = yylex();
1264 /* Update static vars back to the state struct */
1265 state->start_state = YY_START;
1268 * Check termination state and return appropriate result info.
1272 case LEXRES_EOL: /* end of input */
1273 switch (state->start_state)
1275 /* This switch must cover all non-slash-command states. */
1277 if (state->paren_depth > 0)
1279 result = PSCAN_INCOMPLETE;
1280 *prompt = PROMPT_PAREN;
1282 else if (query_buf->len > 0)
1285 *prompt = PROMPT_CONTINUE;
1289 /* never bother to send an empty buffer */
1290 result = PSCAN_INCOMPLETE;
1291 *prompt = PROMPT_READY;
1295 result = PSCAN_INCOMPLETE;
1296 *prompt = PROMPT_SINGLEQUOTE;
1299 result = PSCAN_INCOMPLETE;
1300 *prompt = PROMPT_COMMENT;
1303 result = PSCAN_INCOMPLETE;
1304 *prompt = PROMPT_DOUBLEQUOTE;
1307 result = PSCAN_INCOMPLETE;
1308 *prompt = PROMPT_SINGLEQUOTE;
1311 result = PSCAN_INCOMPLETE;
1312 *prompt = PROMPT_SINGLEQUOTE;
1315 result = PSCAN_INCOMPLETE;
1316 *prompt = PROMPT_SINGLEQUOTE;
1319 result = PSCAN_INCOMPLETE;
1320 *prompt = PROMPT_DOLLARQUOTE;
1323 result = PSCAN_INCOMPLETE;
1324 *prompt = PROMPT_DOUBLEQUOTE;
1327 result = PSCAN_INCOMPLETE;
1328 *prompt = PROMPT_SINGLEQUOTE;
1331 /* can't get here */
1332 fprintf(stderr, "invalid YY_START\n");
1336 case LEXRES_SEMI: /* semicolon */
1337 result = PSCAN_SEMICOLON;
1338 *prompt = PROMPT_READY;
1340 case LEXRES_BACKSLASH: /* backslash */
1341 result = PSCAN_BACKSLASH;
1342 *prompt = PROMPT_READY;
1345 /* can't get here */
1346 fprintf(stderr, "invalid yylex result\n");
1354 * Clean up after scanning a string. This flushes any unread input and
1355 * releases resources (but not the PsqlScanState itself). Note however
1356 * that this does not reset the lexer scan state; that can be done by
1357 * psql_scan_reset(), which is an orthogonal operation.
1359 * It is legal to call this when not scanning anything (makes it easier
1360 * to deal with error recovery).
1363 psql_scan_finish(PsqlScanState state)
1365 /* Drop any incomplete variable expansions. */
1366 while (state->buffer_stack != NULL)
1367 pop_buffer_stack(state);
1369 /* Done with the outer scan buffer, too */
1370 if (state->scanbufhandle)
1371 yy_delete_buffer(state->scanbufhandle);
1372 state->scanbufhandle = NULL;
1374 free(state->scanbuf);
1375 state->scanbuf = NULL;
1379 * Reset lexer scanning state to start conditions. This is appropriate
1380 * for executing \r psql commands (or any other time that we discard the
1381 * prior contents of query_buf). It is not, however, necessary to do this
1382 * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1383 * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1384 * conditions are returned.
1386 * Note that this is unrelated to flushing unread input; that task is
1387 * done by psql_scan_finish().
1390 psql_scan_reset(PsqlScanState state)
1392 state->start_state = INITIAL;
1393 state->paren_depth = 0;
1394 state->xcdepth = 0; /* not really necessary */
1395 if (state->dolqstart)
1396 free(state->dolqstart);
1397 state->dolqstart = NULL;
1401 * Return true if lexer is currently in an "inside quotes" state.
1403 * This is pretty grotty but is needed to preserve the old behavior
1404 * that mainloop.c drops blank lines not inside quotes without even
1408 psql_scan_in_quote(PsqlScanState state)
1410 return state->start_state != INITIAL;
1414 * Scan the command name of a psql backslash command. This should be called
1415 * after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input
1416 * has been consumed through the leading backslash.
1418 * The return value is a malloc'd copy of the command name, as parsed off
1422 psql_scan_slash_command(PsqlScanState state)
1424 PQExpBufferData mybuf;
1426 /* Must be scanning already */
1427 Assert(state->scanbufhandle != NULL);
1429 /* Build a local buffer that we'll return the data of */
1430 initPQExpBuffer(&mybuf);
1432 /* Set up static variables that will be used by yylex */
1434 output_buf = &mybuf;
1436 if (state->buffer_stack != NULL)
1437 yy_switch_to_buffer(state->buffer_stack->buf);
1439 yy_switch_to_buffer(state->scanbufhandle);
1446 /* There are no possible errors in this lex state... */
1452 * Parse off the next argument for a backslash command, and return it as a
1453 * malloc'd string. If there are no more arguments, returns NULL.
1455 * type tells what processing, if any, to perform on the option string;
1456 * for example, if it's a SQL identifier, we want to downcase any unquoted
1459 * if quote is not NULL, *quote is set to 0 if no quoting was found, else
1460 * the last quote symbol used in the argument.
1462 * if semicolon is true, unquoted trailing semicolon(s) that would otherwise
1463 * be taken as part of the option string will be stripped.
1465 * NOTE: the only possible syntax errors for backslash options are unmatched
1466 * quotes, which are detected when we run out of input. Therefore, on a
1467 * syntax error we just throw away the string and return NULL; there is no
1468 * need to worry about flushing remaining input.
1471 psql_scan_slash_option(PsqlScanState state,
1472 enum slash_option_type type,
1476 PQExpBufferData mybuf;
1477 int lexresult PG_USED_FOR_ASSERTS_ONLY;
1480 /* Must be scanning already */
1481 Assert(state->scanbufhandle != NULL);
1484 quote = &local_quote;
1487 /* Build a local buffer that we'll return the data of */
1488 initPQExpBuffer(&mybuf);
1490 /* Set up static variables that will be used by yylex */
1492 output_buf = &mybuf;
1494 option_quote = quote;
1495 unquoted_option_chars = 0;
1497 if (state->buffer_stack != NULL)
1498 yy_switch_to_buffer(state->buffer_stack->buf);
1500 yy_switch_to_buffer(state->scanbufhandle);
1502 if (type == OT_WHOLE_LINE)
1503 BEGIN(xslashwholeline);
1505 BEGIN(xslashargstart);
1508 lexresult = yylex();
1511 * Check the lex result: we should have gotten back either LEXRES_OK
1512 * or LEXRES_EOL (the latter indicating end of string). If we were inside
1513 * a quoted string, as indicated by YY_START, EOL is an error.
1515 Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
1519 case xslashargstart:
1523 /* Strip any unquoted trailing semi-colons if requested */
1526 while (unquoted_option_chars-- > 0 &&
1528 mybuf.data[mybuf.len - 1] == ';')
1530 mybuf.data[--mybuf.len] = '\0';
1535 * If SQL identifier processing was requested, then we strip out
1536 * excess double quotes and downcase unquoted letters.
1537 * Doubled double-quotes become output double-quotes, per spec.
1539 * Note that a string like FOO"BAR"BAZ will be converted to
1540 * fooBARbaz; this is somewhat inconsistent with the SQL spec,
1541 * which would have us parse it as several identifiers. But
1542 * for psql's purposes, we want a string like "foo"."bar" to
1543 * be treated as one option, so there's little choice.
1545 if (type == OT_SQLID || type == OT_SQLIDHACK)
1547 bool inquotes = false;
1548 char *cp = mybuf.data;
1554 if (inquotes && cp[1] == '"')
1556 /* Keep the first quote, remove the second */
1559 inquotes = !inquotes;
1560 /* Collapse out quote at *cp */
1561 memmove(cp, cp + 1, strlen(cp));
1563 /* do not advance cp */
1567 if (!inquotes && type == OT_SQLID)
1568 *cp = pg_tolower((unsigned char) *cp);
1569 cp += PQmblen(cp, pset.encoding);
1575 case xslashbackquote:
1577 /* must have hit EOL inside quotes */
1578 psql_error("unterminated quoted string\n");
1579 termPQExpBuffer(&mybuf);
1581 case xslashwholeline:
1585 /* can't get here */
1586 fprintf(stderr, "invalid YY_START\n");
1591 * An unquoted empty argument isn't possible unless we are at end of
1592 * command. Return NULL instead.
1594 if (mybuf.len == 0 && *quote == 0)
1596 termPQExpBuffer(&mybuf);
1600 /* Else return the completed string. */
1605 * Eat up any unused \\ to complete a backslash command.
1608 psql_scan_slash_command_end(PsqlScanState state)
1610 /* Must be scanning already */
1611 Assert(state->scanbufhandle != NULL);
1613 /* Set up static variables that will be used by yylex */
1617 if (state->buffer_stack != NULL)
1618 yy_switch_to_buffer(state->buffer_stack->buf);
1620 yy_switch_to_buffer(state->scanbufhandle);
1627 /* There are no possible errors in this lex state... */
1631 * Evaluate a backticked substring of a slash command's argument.
1633 * The portion of output_buf starting at backtick_start_offset is evaluated
1634 * as a shell command and then replaced by the command's output.
1637 evaluate_backtick(void)
1639 char *cmd = output_buf->data + backtick_start_offset;
1640 PQExpBufferData cmd_output;
1646 initPQExpBuffer(&cmd_output);
1648 fd = popen(cmd, PG_BINARY_R);
1651 psql_error("%s: %s\n", cmd, strerror(errno));
1659 result = fread(buf, 1, sizeof(buf), fd);
1662 psql_error("%s: %s\n", cmd, strerror(errno));
1666 appendBinaryPQExpBuffer(&cmd_output, buf, result);
1667 } while (!feof(fd));
1670 if (fd && pclose(fd) == -1)
1672 psql_error("%s: %s\n", cmd, strerror(errno));
1676 if (PQExpBufferDataBroken(cmd_output))
1678 psql_error("%s: out of memory\n", cmd);
1682 /* Now done with cmd, delete it from output_buf */
1683 output_buf->len = backtick_start_offset;
1684 output_buf->data[output_buf->len] = '\0';
1686 /* If no error, transfer result to output_buf */
1689 /* strip any trailing newline */
1690 if (cmd_output.len > 0 &&
1691 cmd_output.data[cmd_output.len - 1] == '\n')
1693 appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len);
1696 termPQExpBuffer(&cmd_output);
1700 * Push the given string onto the stack of stuff to scan.
1702 * cur_state must point to the active PsqlScanState.
1704 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1707 push_new_buffer(const char *newstr, const char *varname)
1709 StackElem *stackelem;
1711 stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
1714 * In current usage, the passed varname points at the current flex
1715 * input buffer; we must copy it before calling prepare_buffer()
1716 * because that will change the buffer state.
1718 stackelem->varname = varname ? pg_strdup(varname) : NULL;
1720 stackelem->buf = prepare_buffer(newstr, strlen(newstr),
1721 &stackelem->bufstring);
1722 cur_state->curline = stackelem->bufstring;
1723 if (cur_state->safe_encoding)
1725 stackelem->origstring = NULL;
1726 cur_state->refline = stackelem->bufstring;
1730 stackelem->origstring = pg_strdup(newstr);
1731 cur_state->refline = stackelem->origstring;
1733 stackelem->next = cur_state->buffer_stack;
1734 cur_state->buffer_stack = stackelem;
1738 * Pop the topmost buffer stack item (there must be one!)
1740 * NB: after this, the flex input state is unspecified; caller must
1741 * switch to an appropriate buffer to continue lexing.
1744 pop_buffer_stack(PsqlScanState state)
1746 StackElem *stackelem = state->buffer_stack;
1748 state->buffer_stack = stackelem->next;
1749 yy_delete_buffer(stackelem->buf);
1750 free(stackelem->bufstring);
1751 if (stackelem->origstring)
1752 free(stackelem->origstring);
1753 if (stackelem->varname)
1754 free(stackelem->varname);
1759 * Check if specified variable name is the source for any string
1760 * currently being scanned
1763 var_is_current_source(PsqlScanState state, const char *varname)
1765 StackElem *stackelem;
1767 for (stackelem = state->buffer_stack;
1769 stackelem = stackelem->next)
1771 if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
1778 * Set up a flex input buffer to scan the given data. We always make a
1779 * copy of the data. If working in an unsafe encoding, the copy has
1780 * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1782 * cur_state must point to the active PsqlScanState.
1784 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1786 static YY_BUFFER_STATE
1787 prepare_buffer(const char *txt, int len, char **txtcopy)
1791 /* Flex wants two \0 characters after the actual data */
1792 newtxt = pg_malloc(len + 2);
1794 newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1796 if (cur_state->safe_encoding)
1797 memcpy(newtxt, txt, len);
1800 /* Gotta do it the hard way */
1805 int thislen = PQmblen(txt + i, cur_state->encoding);
1807 /* first byte should always be okay... */
1810 while (--thislen > 0 && i < len)
1811 newtxt[i++] = (char) 0xFF;
1815 return yy_scan_buffer(newtxt, len + 2);
1819 * emit() --- body for ECHO macro
1821 * NB: this must be used for ALL and ONLY the text copied from the flex
1822 * input data. If you pass it something that is not part of the yytext
1823 * string, you are making a mistake. Internally generated text can be
1824 * appended directly to output_buf.
1827 emit(const char *txt, int len)
1829 if (cur_state->safe_encoding)
1830 appendBinaryPQExpBuffer(output_buf, txt, len);
1833 /* Gotta do it the hard way */
1834 const char *reference = cur_state->refline;
1837 reference += (txt - cur_state->curline);
1839 for (i = 0; i < len; i++)
1843 if (ch == (char) 0xFF)
1845 appendPQExpBufferChar(output_buf, ch);
1851 * extract_substring --- fetch the true value of (part of) the current token
1853 * This is like emit(), except that the data is returned as a malloc'd string
1854 * rather than being pushed directly to output_buf.
1857 extract_substring(const char *txt, int len)
1859 char *result = (char *) pg_malloc(len + 1);
1861 if (cur_state->safe_encoding)
1862 memcpy(result, txt, len);
1865 /* Gotta do it the hard way */
1866 const char *reference = cur_state->refline;
1869 reference += (txt - cur_state->curline);
1871 for (i = 0; i < len; i++)
1875 if (ch == (char) 0xFF)
1885 * escape_variable --- process :'VARIABLE' or :"VARIABLE"
1887 * If the variable name is found, escape its value using the appropriate
1888 * quoting method and emit the value to output_buf. (Since the result is
1889 * surely quoted, there is never any reason to rescan it.) If we don't
1890 * find the variable or the escaping function fails, emit the token as-is.
1893 escape_variable(bool as_ident)
1898 /* Variable lookup. */
1899 varname = extract_substring(yytext + 2, yyleng - 3);
1900 value = GetVariable(pset.vars, varname);
1907 psql_error("can't escape without active connection\n");
1910 char *escaped_value;
1914 PQescapeIdentifier(pset.db, value, strlen(value));
1917 PQescapeLiteral(pset.db, value, strlen(value));
1919 if (escaped_value == NULL)
1921 const char *error = PQerrorMessage(pset.db);
1923 psql_error("%s", error);
1927 appendPQExpBufferStr(output_buf, escaped_value);
1928 PQfreemem(escaped_value);
1935 * If we reach this point, some kind of error has occurred. Emit the
1936 * original text into the output buffer.
1938 emit(yytext, yyleng);