2 /*-------------------------------------------------------------------------
5 * lexical scanner for psql (and other frontend programs)
7 * This code is mainly needed to determine where the end of a SQL statement
8 * is: we are looking for semicolons that are not within quotes, comments,
9 * or parentheses. The most reliable way to handle this is to borrow the
10 * backend's flex lexer rules, lock, stock, and barrel. The rules below
11 * are (except for a few) the same as the backend's, but their actions are
12 * just ECHO whereas the backend's actions generally do other things.
14 * XXX The rules in this file must be kept in sync with the backend lexer!!!
16 * XXX Avoid creating backtracking cases --- see the backend lexer for info.
18 * See psqlscan_int.h for additional commentary.
20 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
21 * Portions Copyright (c) 1994, Regents of the University of California
24 * src/bin/psql/psqlscan.l
26 *-------------------------------------------------------------------------
28 #include "postgres_fe.h"
36 #include "psqlscan_int.h"
39 * Set the type of yyextra; we use it as a pointer back to the containing
42 #define YY_EXTRA_TYPE PsqlScanState
45 /* Return values from yylex() */
46 #define LEXRES_EOL 0 /* end of input */
47 #define LEXRES_SEMI 1 /* command-terminating semicolon found */
48 #define LEXRES_BACKSLASH 2 /* backslash command start */
51 static bool var_is_current_source(PsqlScanState state, const char *varname);
53 #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
56 * Work around a bug in flex 2.5.35: it emits a couple of functions that
57 * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
58 * this would cause warnings. Providing our own declarations should be
59 * harmless even when the bug gets fixed.
61 extern int psql_yyget_column(yyscan_t yyscanner);
62 extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
68 %option never-interactive
74 %option prefix="psql_yy"
77 * All of the following definitions and rules should exactly match
78 * src/backend/parser/scan.l so far as the flex patterns are concerned.
79 * The rule bodies are just ECHO as opposed to what the backend does,
80 * however. (But be sure to duplicate code that affects the lexing process,
81 * such as BEGIN().) Also, psqlscan uses a single <<EOF>> rule whereas
82 * scan.l has a separate one for each exclusive state.
86 * OK, here is a short description of lex/flex rules behavior.
87 * The longest pattern which matches an input string is always chosen.
88 * For equal-length patterns, the first occurring in the rules list is chosen.
89 * INITIAL is the starting state, to which all non-conditional rules apply.
90 * Exclusive states change parsing rules while the state is active. When in
91 * an exclusive state, only those rules defined for that state apply.
93 * We use exclusive states for quoted strings, extended comments,
94 * and to eliminate parsing troubles for numeric strings.
96 * <xb> bit string literal
97 * <xc> extended C-style comments
98 * <xd> delimited identifiers (double-quoted identifiers)
99 * <xh> hexadecimal numeric string
100 * <xq> standard quoted strings
101 * <xe> extended quoted strings (support backslash escape sequences)
102 * <xdolq> $foo$ quoted strings
103 * <xui> quoted identifier with Unicode escapes
104 * <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow
105 * <xus> quoted string with Unicode escapes
106 * <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow
108 * Note: we intentionally don't mimic the backend's <xeu> state; we have
109 * no need to distinguish it from <xe> state, and no good way to get out
110 * of it in error cases. The backend just throws yyerror() in those
111 * cases, but that's not an option here.
127 * In order to make the world safe for Windows and Mac clients as well as
128 * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
129 * sequence will be seen as two successive newlines, but that doesn't cause
130 * any problems. Comments that start with -- and extend to the next
131 * newline are treated as equivalent to a single whitespace character.
133 * NOTE a fine point: if there is no newline following --, we will absorb
134 * everything to the end of the input as a comment. This is correct. Older
135 * versions of Postgres failed to recognize -- as a comment if the input
136 * did not end with a newline.
138 * XXX perhaps \f (formfeed) should be treated as a newline as well?
140 * XXX if you change the set of whitespace characters, fix scanner_isspace()
141 * to agree, and see also the plpgsql lexer.
149 comment ("--"{non_newline}*)
151 whitespace ({space}+|{comment})
154 * SQL requires at least one newline in the whitespace separating
155 * string literals that are to be concatenated. Silly, but who are we
156 * to argue? Note that {whitespace_with_newline} should not have * after
157 * it, whereas {whitespace} should generally have a * after it...
160 special_whitespace ({space}+|{comment}{newline})
161 horiz_whitespace ({horiz_space}|{comment})
162 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
165 * To ensure that {quotecontinue} can be scanned without having to back up
166 * if the full pattern isn't matched, we include trailing whitespace in
167 * {quotestop}. This matches all cases where {quotecontinue} fails to match,
168 * except for {quote} followed by whitespace and just one "-" (not two,
169 * which would start a {comment}). To cover that we have {quotefail}.
170 * The actions for {quotestop} and {quotefail} must throw back characters
171 * beyond the quote proper.
174 quotestop {quote}{whitespace}*
175 quotecontinue {quote}{whitespace_with_newline}{quote}
176 quotefail {quote}{whitespace}*"-"
179 * It is tempting to scan the string for only those characters
180 * which are allowed. However, this leads to silently swallowed
181 * characters if illegal characters are included in the string.
182 * For example, if xbinside is [01] then B'ABCD' is interpreted
183 * as a zero-length string, and the ABCD' is lost!
184 * Better to pass the string forward and let the input routines
185 * validate the contents.
190 /* Hexadecimal number */
194 /* National character */
197 /* Quoted string that allows backslash escapes */
201 xeoctesc [\\][0-7]{1,3}
202 xehexesc [\\]x[0-9A-Fa-f]{1,2}
203 xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
204 xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
207 * xqdouble implements embedded quote, ''''
210 xqdouble {quote}{quote}
213 /* $foo$ style quotes ("dollar quoting")
214 * The quoted string starts with $foo$ where "foo" is an optional string
215 * in the form of an identifier, except that it may not contain "$",
216 * and extends to the first occurrence of an identical string.
217 * There is *no* processing of the quoted text.
219 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
220 * fails to match its trailing "$".
222 dolq_start [A-Za-z\200-\377_]
223 dolq_cont [A-Za-z\200-\377_0-9]
224 dolqdelim \$({dolq_start}{dolq_cont}*)?\$
225 dolqfailed \${dolq_start}{dolq_cont}*
229 * Allows embedded spaces and other special characters into identifiers.
234 xddouble {dquote}{dquote}
237 /* Unicode escapes */
238 uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
239 /* error rule to avoid backup */
240 uescapefail [uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU]
242 /* Quoted identifier with Unicode escapes */
243 xuistart [uU]&{dquote}
245 /* Quoted string with Unicode escapes */
246 xusstart [uU]&{quote}
248 /* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
249 xustop1 {uescapefail}?
252 /* error rule to avoid backup */
258 * The "extended comment" syntax closely resembles allowable operator syntax.
259 * The tricky part here is to get lex to recognize a string starting with
260 * slash-star as a comment, when interpreting it as an operator would produce
261 * a longer match --- remember lex will prefer a longer match! Also, if we
262 * have something like plus-slash-star, lex will think this is a 3-character
263 * operator whereas we want to see it as a + operator and a comment start.
264 * The solution is two-fold:
265 * 1. append {op_chars}* to xcstart so that it matches as much text as
266 * {operator} would. Then the tie-breaker (first matching rule of same
267 * length) ensures xcstart wins. We put back the extra stuff with yyless()
268 * in case it contains a star-slash that should terminate the comment.
269 * 2. In the operator rule, check for slash-star within the operator, and
270 * if found throw it back with yyless(). This handles the plus-slash-star
272 * Dash-dash comments have similar interactions with the operator rule.
274 xcstart \/\*{op_chars}*
279 ident_start [A-Za-z\200-\377_]
280 ident_cont [A-Za-z\200-\377_0-9\$]
282 identifier {ident_start}{ident_cont}*
284 /* Assorted special-case operators and operator-like tokens */
295 * "self" is the set of chars that should be returned as single-character
296 * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
297 * which can be one or more characters long (but if a single-char token
298 * appears in the "self" set, it is not to be returned as an Op). Note
299 * that the sets overlap, but each has some chars that are not in the other.
301 * If you change either set, adjust the character lists appearing in the
302 * rule for "operator"!
304 self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
305 op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
308 /* we no longer allow unary minus in numbers.
309 * instead we pass it separately to parser. there it gets
310 * coerced via doNegate() -- Leon aug 20 1999
312 * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
314 * {realfail1} and {realfail2} are added to prevent the need for scanner
315 * backup when the {real} rule fails to match completely.
319 decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
320 decimalfail {digit}+\.\.
321 real ({integer}|{decimal})[Ee][-+]?{digit}+
322 realfail1 ({integer}|{decimal})[Ee]
323 realfail2 ({integer}|{decimal})[Ee][-+]
327 /* psql-specific: characters allowed in variable names */
328 variable_char [A-Za-z\200-\377_0-9]
333 * Dollar quoted strings are totally opaque, and no escaping is done on them.
334 * Other quoted strings must allow some special characters such as single-quote
336 * Embedded single-quotes are implemented both in the SQL standard
337 * style of two adjacent single quotes "''" and in the Postgres/Java style
338 * of escaped-quote "\'".
339 * Other embedded escaped characters are matched explicitly and the leading
340 * backslash is dropped from the string.
341 * Note that xcstart must appear before operator, as explained above!
342 * Also whitespace (comment) must appear before operator.
348 /* Declare some local variables inside yylex(), for convenience */
349 PsqlScanState cur_state = yyextra;
350 PQExpBuffer output_buf = cur_state->output_buf;
353 * Force flex into the state indicated by start_state. This has a
354 * couple of purposes: it lets some of the functions below set a
355 * new starting state without ugly direct access to flex variables,
356 * and it allows us to transition from one flex lexer to another
357 * so that we can lex different parts of the source string using
360 BEGIN(cur_state->start_state);
365 * Note that the whitespace rule includes both true
366 * whitespace and single-line ("--" style) comments.
367 * We suppress whitespace at the start of the query
368 * buffer. We also suppress all single-line comments,
369 * which is pretty dubious but is the historical
372 if (!(output_buf->len == 0 || yytext[0] == '-'))
377 cur_state->xcdepth = 0;
379 /* Put back any characters past slash-star; see above */
385 cur_state->xcdepth++;
386 /* Put back any characters past slash-star; see above */
392 if (cur_state->xcdepth <= 0)
397 cur_state->xcdepth--;
427 <xh>{quotecontinue} |
428 <xb>{quotecontinue} {
433 /* Hexadecimal bit type.
434 * At some point we should simply pass the string
435 * forward to the parser and label it there.
436 * In the meantime, place a leading "x" on the string
437 * to mark it for the input routine as a hex string.
450 yyless(1); /* eat only 'n' this time */
455 if (cur_state->std_strings)
481 <xusend>{whitespace} {
494 <xq,xe,xus>{xqdouble} {
506 <xe>{xeunicodefail} {
518 <xq,xe,xus>{quotecontinue} {
522 /* This is only needed for \ just before EOF */
527 cur_state->dolqstart = pg_strdup(yytext);
532 /* throw back all but the initial "$" */
537 if (strcmp(yytext, cur_state->dolqstart) == 0)
539 free(cur_state->dolqstart);
540 cur_state->dolqstart = NULL;
546 * When we fail to match $...$ to dolqstart, transfer
547 * the $... part to the output, but put back the final
548 * $ for rescanning. Consider $delim$...$junk$delim$
554 <xdolq>{dolqinside} {
557 <xdolq>{dolqfailed} {
561 /* This is only needed for $ inside the quoted text */
582 <xuiend>{whitespace} {
603 /* throw back all but the initial u/U */
641 * These rules are specific to psql --- they implement parenthesis
642 * counting and detection of command-ending semicolon. These must
643 * appear before the {self} rule so that they take precedence over it.
647 cur_state->paren_depth++;
652 if (cur_state->paren_depth > 0)
653 cur_state->paren_depth--;
659 if (cur_state->paren_depth == 0)
661 /* Terminate lexing temporarily */
662 cur_state->start_state = YY_START;
668 * psql-specific rules to handle backslash commands and variable
669 * substitution. We want these before {self}, also.
673 /* Force a semicolon or colon into the query buffer */
674 psqlscan_emit(cur_state, yytext + 1, 1);
678 /* Terminate lexing temporarily */
679 cur_state->start_state = YY_START;
680 return LEXRES_BACKSLASH;
684 /* Possible psql variable substitution */
688 varname = psqlscan_extract_substring(cur_state,
691 if (cur_state->callbacks->get_variable)
692 value = cur_state->callbacks->get_variable(varname,
700 /* It is a variable, check for recursion */
701 if (var_is_current_source(cur_state, varname))
703 /* Recursive expansion --- don't go there */
704 cur_state->callbacks->write_error("skipping recursive expansion of variable \"%s\"\n",
706 /* Instead copy the string as is */
711 /* OK, perform substitution */
712 psqlscan_push_new_buffer(cur_state, value, varname);
713 /* yy_scan_string already made buffer active */
720 * if the variable doesn't exist we'll copy the
729 :'{variable_char}+' {
730 psqlscan_escape_variable(cur_state, yytext, yyleng, false);
733 :\"{variable_char}+\" {
734 psqlscan_escape_variable(cur_state, yytext, yyleng, true);
738 * These rules just avoid the need for scanner backup if one of the
739 * two rules above fails to match completely.
743 /* Throw back everything but the colon */
748 :\"{variable_char}* {
749 /* Throw back everything but the colon */
755 * Back to backend-compatible rules.
764 * Check for embedded slash-star or dash-dash; those
765 * are comment starts, so operator must stop there.
766 * Note that slash-star or dash-dash at the first
767 * character will match a prior rule, not this one.
770 char *slashstar = strstr(yytext, "/*");
771 char *dashdash = strstr(yytext, "--");
773 if (slashstar && dashdash)
775 /* if both appear, take the first one */
776 if (slashstar > dashdash)
777 slashstar = dashdash;
780 slashstar = dashdash;
782 nchars = slashstar - yytext;
785 * For SQL compatibility, '+' and '-' cannot be the
786 * last char of a multi-char operator unless the operator
787 * contains chars that are not in SQL operators.
788 * The idea is to lex '=-' as two operators, but not
789 * to forbid operator names like '?-' that could not be
790 * sequences of SQL operators.
793 (yytext[nchars-1] == '+' ||
794 yytext[nchars-1] == '-'))
798 for (ic = nchars-2; ic >= 0; ic--)
800 if (strchr("~!@#^&|`?%", yytext[ic]))
804 break; /* found a char that makes it OK */
805 nchars--; /* else remove the +/-, and check again */
810 /* Strip the unwanted chars from the token */
827 /* throw back the .., and treat as integer */
836 * throw back the [Ee], and treat as {decimal}. Note
837 * that it is possible the input is actually {integer},
838 * but since this case will almost certainly lead to a
839 * syntax error anyway, we don't bother to distinguish.
845 /* throw back the [Ee][+-], and proceed as above */
860 * psql uses a single <<EOF>> rule, unlike the backend.
864 if (cur_state->buffer_stack == NULL)
866 cur_state->start_state = YY_START;
867 return LEXRES_EOL; /* end of input reached */
871 * We were expanding a variable, so pop the inclusion
872 * stack and keep lexing
874 psqlscan_pop_buffer_stack(cur_state);
875 psqlscan_select_top_buffer(cur_state);
881 * Create a lexer working state struct.
883 * callbacks is a struct of function pointers that encapsulate some
884 * behavior we need from the surrounding program. This struct must
885 * remain valid for the lifespan of the PsqlScanState.
888 psql_scan_create(const PsqlScanCallbacks *callbacks)
892 state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
894 state->callbacks = callbacks;
896 yylex_init_extra(state, &state->scanner);
898 psql_scan_reset(state);
904 * Destroy a lexer working state struct, releasing all resources.
907 psql_scan_destroy(PsqlScanState state)
909 psql_scan_finish(state);
911 psql_scan_reset(state);
913 yylex_destroy(state->scanner);
919 * Set up to perform lexing of the given input line.
921 * The text at *line, extending for line_len bytes, will be scanned by
922 * subsequent calls to the psql_scan routines. psql_scan_finish should
923 * be called when scanning is complete. Note that the lexer retains
924 * a pointer to the storage at *line --- this string must not be altered
925 * or freed until after psql_scan_finish is called.
927 * encoding is the libpq identifier for the character encoding in use,
928 * and std_strings says whether standard_conforming_strings is on.
931 psql_scan_setup(PsqlScanState state,
932 const char *line, int line_len,
933 int encoding, bool std_strings)
935 /* Mustn't be scanning already */
936 Assert(state->scanbufhandle == NULL);
937 Assert(state->buffer_stack == NULL);
939 /* Do we need to hack the character set encoding? */
940 state->encoding = encoding;
941 state->safe_encoding = pg_valid_server_encoding_id(encoding);
943 /* Save standard-strings flag as well */
944 state->std_strings = std_strings;
946 /* Set up flex input buffer with appropriate translation and padding */
947 state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
949 state->scanline = line;
951 /* Set lookaside data in case we have to map unsafe encoding */
952 state->curline = state->scanbuf;
953 state->refline = state->scanline;
957 * Do lexical analysis of SQL command text.
959 * The text previously passed to psql_scan_setup is scanned, and appended
960 * (possibly with transformation) to query_buf.
962 * The return value indicates the condition that stopped scanning:
964 * PSCAN_SEMICOLON: found a command-ending semicolon. (The semicolon is
965 * transferred to query_buf.) The command accumulated in query_buf should
966 * be executed, then clear query_buf and call again to scan the remainder
969 * PSCAN_BACKSLASH: found a backslash that starts a special command.
970 * Any previous data on the line has been transferred to query_buf.
971 * The caller will typically next apply a separate flex lexer to scan
972 * the special command.
974 * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
975 * incomplete SQL command. *prompt is set to the appropriate prompt type.
977 * PSCAN_EOL: the end of the line was reached, and there is no lexical
978 * reason to consider the command incomplete. The caller may or may not
979 * choose to send it. *prompt is set to the appropriate prompt type if
980 * the caller chooses to collect more input.
982 * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
983 * be called next, then the cycle may be repeated with a fresh input line.
985 * In all cases, *prompt is set to an appropriate prompt type code for the
986 * next line-input operation.
989 psql_scan(PsqlScanState state,
990 PQExpBuffer query_buf,
991 promptStatus_t *prompt)
993 PsqlScanResult result;
996 /* Must be scanning already */
997 Assert(state->scanbufhandle != NULL);
999 /* Set current output target */
1000 state->output_buf = query_buf;
1002 /* Set input source */
1003 if (state->buffer_stack != NULL)
1004 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
1006 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1009 lexresult = yylex(state->scanner);
1012 * Check termination state and return appropriate result info.
1016 case LEXRES_EOL: /* end of input */
1017 switch (state->start_state)
1020 case xuiend: /* we treat these like INITIAL */
1022 if (state->paren_depth > 0)
1024 result = PSCAN_INCOMPLETE;
1025 *prompt = PROMPT_PAREN;
1027 else if (query_buf->len > 0)
1030 *prompt = PROMPT_CONTINUE;
1034 /* never bother to send an empty buffer */
1035 result = PSCAN_INCOMPLETE;
1036 *prompt = PROMPT_READY;
1040 result = PSCAN_INCOMPLETE;
1041 *prompt = PROMPT_SINGLEQUOTE;
1044 result = PSCAN_INCOMPLETE;
1045 *prompt = PROMPT_COMMENT;
1048 result = PSCAN_INCOMPLETE;
1049 *prompt = PROMPT_DOUBLEQUOTE;
1052 result = PSCAN_INCOMPLETE;
1053 *prompt = PROMPT_SINGLEQUOTE;
1056 result = PSCAN_INCOMPLETE;
1057 *prompt = PROMPT_SINGLEQUOTE;
1060 result = PSCAN_INCOMPLETE;
1061 *prompt = PROMPT_SINGLEQUOTE;
1064 result = PSCAN_INCOMPLETE;
1065 *prompt = PROMPT_DOLLARQUOTE;
1068 result = PSCAN_INCOMPLETE;
1069 *prompt = PROMPT_DOUBLEQUOTE;
1072 result = PSCAN_INCOMPLETE;
1073 *prompt = PROMPT_SINGLEQUOTE;
1076 /* can't get here */
1077 fprintf(stderr, "invalid YY_START\n");
1081 case LEXRES_SEMI: /* semicolon */
1082 result = PSCAN_SEMICOLON;
1083 *prompt = PROMPT_READY;
1085 case LEXRES_BACKSLASH: /* backslash */
1086 result = PSCAN_BACKSLASH;
1087 *prompt = PROMPT_READY;
1090 /* can't get here */
1091 fprintf(stderr, "invalid yylex result\n");
1099 * Clean up after scanning a string. This flushes any unread input and
1100 * releases resources (but not the PsqlScanState itself). Note however
1101 * that this does not reset the lexer scan state; that can be done by
1102 * psql_scan_reset(), which is an orthogonal operation.
1104 * It is legal to call this when not scanning anything (makes it easier
1105 * to deal with error recovery).
1108 psql_scan_finish(PsqlScanState state)
1110 /* Drop any incomplete variable expansions. */
1111 while (state->buffer_stack != NULL)
1112 psqlscan_pop_buffer_stack(state);
1114 /* Done with the outer scan buffer, too */
1115 if (state->scanbufhandle)
1116 yy_delete_buffer(state->scanbufhandle, state->scanner);
1117 state->scanbufhandle = NULL;
1119 free(state->scanbuf);
1120 state->scanbuf = NULL;
1124 * Reset lexer scanning state to start conditions. This is appropriate
1125 * for executing \r psql commands (or any other time that we discard the
1126 * prior contents of query_buf). It is not, however, necessary to do this
1127 * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1128 * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1129 * conditions are returned.
1131 * Note that this is unrelated to flushing unread input; that task is
1132 * done by psql_scan_finish().
1135 psql_scan_reset(PsqlScanState state)
1137 state->start_state = INITIAL;
1138 state->paren_depth = 0;
1139 state->xcdepth = 0; /* not really necessary */
1140 if (state->dolqstart)
1141 free(state->dolqstart);
1142 state->dolqstart = NULL;
1146 * Reselect this lexer (psqlscan.l) after using another one.
1148 * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
1149 * state, because we'd never switch to another lexer in a different state.
1150 * However, we don't want to reset e.g. paren_depth, so this can't be
1151 * the same as psql_scan_reset().
1153 * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
1154 * must be a superset of this.
1156 * Note: it seems likely that other lexers could just assign INITIAL for
1157 * themselves, since that probably has the value zero in every flex-generated
1158 * lexer. But let's not assume that.
1161 psql_scan_reselect_sql_lexer(PsqlScanState state)
1163 state->start_state = INITIAL;
1167 * Return true if lexer is currently in an "inside quotes" state.
1169 * This is pretty grotty but is needed to preserve the old behavior
1170 * that mainloop.c drops blank lines not inside quotes without even
1174 psql_scan_in_quote(PsqlScanState state)
1176 return state->start_state != INITIAL;
1180 * Push the given string onto the stack of stuff to scan.
1182 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1185 psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
1186 const char *varname)
1188 StackElem *stackelem;
1190 stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
1193 * In current usage, the passed varname points at the current flex
1194 * input buffer; we must copy it before calling psqlscan_prepare_buffer()
1195 * because that will change the buffer state.
1197 stackelem->varname = varname ? pg_strdup(varname) : NULL;
1199 stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
1200 &stackelem->bufstring);
1201 state->curline = stackelem->bufstring;
1202 if (state->safe_encoding)
1204 stackelem->origstring = NULL;
1205 state->refline = stackelem->bufstring;
1209 stackelem->origstring = pg_strdup(newstr);
1210 state->refline = stackelem->origstring;
1212 stackelem->next = state->buffer_stack;
1213 state->buffer_stack = stackelem;
1217 * Pop the topmost buffer stack item (there must be one!)
1219 * NB: after this, the flex input state is unspecified; caller must
1220 * switch to an appropriate buffer to continue lexing.
1221 * See psqlscan_select_top_buffer().
1224 psqlscan_pop_buffer_stack(PsqlScanState state)
1226 StackElem *stackelem = state->buffer_stack;
1228 state->buffer_stack = stackelem->next;
1229 yy_delete_buffer(stackelem->buf, state->scanner);
1230 free(stackelem->bufstring);
1231 if (stackelem->origstring)
1232 free(stackelem->origstring);
1233 if (stackelem->varname)
1234 free(stackelem->varname);
1239 * Select the topmost surviving buffer as the active input.
1242 psqlscan_select_top_buffer(PsqlScanState state)
1244 StackElem *stackelem = state->buffer_stack;
1246 if (stackelem != NULL)
1248 yy_switch_to_buffer(stackelem->buf, state->scanner);
1249 state->curline = stackelem->bufstring;
1250 state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
1254 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1255 state->curline = state->scanbuf;
1256 state->refline = state->scanline;
1261 * Check if specified variable name is the source for any string
1262 * currently being scanned
1265 var_is_current_source(PsqlScanState state, const char *varname)
1267 StackElem *stackelem;
1269 for (stackelem = state->buffer_stack;
1271 stackelem = stackelem->next)
1273 if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
1280 * Set up a flex input buffer to scan the given data. We always make a
1281 * copy of the data. If working in an unsafe encoding, the copy has
1282 * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1284 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1287 psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
1292 /* Flex wants two \0 characters after the actual data */
1293 newtxt = pg_malloc(len + 2);
1295 newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1297 if (state->safe_encoding)
1298 memcpy(newtxt, txt, len);
1301 /* Gotta do it the hard way */
1306 int thislen = PQmblen(txt + i, state->encoding);
1308 /* first byte should always be okay... */
1311 while (--thislen > 0 && i < len)
1312 newtxt[i++] = (char) 0xFF;
1316 return yy_scan_buffer(newtxt, len + 2, state->scanner);
1320 * psqlscan_emit() --- body for ECHO macro
1322 * NB: this must be used for ALL and ONLY the text copied from the flex
1323 * input data. If you pass it something that is not part of the yytext
1324 * string, you are making a mistake. Internally generated text can be
1325 * appended directly to state->output_buf.
1328 psqlscan_emit(PsqlScanState state, const char *txt, int len)
1330 PQExpBuffer output_buf = state->output_buf;
1332 if (state->safe_encoding)
1333 appendBinaryPQExpBuffer(output_buf, txt, len);
1336 /* Gotta do it the hard way */
1337 const char *reference = state->refline;
1340 reference += (txt - state->curline);
1342 for (i = 0; i < len; i++)
1346 if (ch == (char) 0xFF)
1348 appendPQExpBufferChar(output_buf, ch);
1354 * psqlscan_extract_substring --- fetch value of (part of) the current token
1356 * This is like psqlscan_emit(), except that the data is returned as a
1357 * malloc'd string rather than being pushed directly to state->output_buf.
1360 psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
1362 char *result = (char *) pg_malloc(len + 1);
1364 if (state->safe_encoding)
1365 memcpy(result, txt, len);
1368 /* Gotta do it the hard way */
1369 const char *reference = state->refline;
1372 reference += (txt - state->curline);
1374 for (i = 0; i < len; i++)
1378 if (ch == (char) 0xFF)
1388 * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
1390 * If the variable name is found, escape its value using the appropriate
1391 * quoting method and emit the value to output_buf. (Since the result is
1392 * surely quoted, there is never any reason to rescan it.) If we don't
1393 * find the variable or escaping fails, emit the token as-is.
1396 psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
1402 /* Variable lookup. */
1403 varname = psqlscan_extract_substring(state, txt + 2, len - 3);
1404 if (state->callbacks->get_variable)
1405 value = state->callbacks->get_variable(varname, true, as_ident);
1412 /* Emit the suitably-escaped value */
1413 appendPQExpBufferStr(state->output_buf, value);
1418 /* Emit original token as-is */
1419 psqlscan_emit(state, txt, len);