2 /*-------------------------------------------------------------------------
5 * lexical scanner for psql backslash commands
7 * XXX Avoid creating backtracking cases --- see the backend lexer for info.
9 * See psqlscan_int.h for additional commentary.
11 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
12 * Portions Copyright (c) 1994, Regents of the University of California
15 * src/bin/psql/psqlscanslash.l
17 *-------------------------------------------------------------------------
19 #include "postgres_fe.h"
21 #include "psqlscanslash.h"
27 #include "psqlscan_int.h"
30 * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
31 * doesn't presently make use of that argument, so just declare it as int.
36 * Set the type of yyextra; we use it as a pointer back to the containing
39 #define YY_EXTRA_TYPE PsqlScanState
42 * These variables do not need to be saved across calls. Yeah, it's a bit
43 * of a hack, but putting them into PsqlScanStateData would be klugy too.
45 static enum slash_option_type option_type;
46 static char *option_quote;
47 static int unquoted_option_chars;
48 static int backtick_start_offset;
51 /* Return values from yylex() */
52 #define LEXRES_EOL 0 /* end of input */
53 #define LEXRES_OK 1 /* OK completion of backslash argument */
56 static void evaluate_backtick(PsqlScanState state);
58 #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
61 * Work around a bug in flex 2.5.35: it emits a couple of functions that
62 * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
63 * this would cause warnings. Providing our own declarations should be
64 * harmless even when the bug gets fixed.
66 extern int slash_yyget_column(yyscan_t yyscanner);
67 extern void slash_yyset_column(int column_no, yyscan_t yyscanner);
71 /* Except for the prefix, these options should match psqlscan.l */
75 %option never-interactive
81 %option prefix="slash_yy"
84 * OK, here is a short description of lex/flex rules behavior.
85 * The longest pattern which matches an input string is always chosen.
86 * For equal-length patterns, the first occurring in the rules list is chosen.
87 * INITIAL is the starting state, to which all non-conditional rules apply.
88 * Exclusive states change parsing rules while the state is active. When in
89 * an exclusive state, only those rules defined for that state apply.
92 /* Exclusive states for lexing backslash commands */
103 * Assorted character class definitions that should match psqlscan.l.
107 xeoctesc [\\][0-7]{1,3}
108 xehexesc [\\]x[0-9A-Fa-f]{1,2}
109 xqdouble {quote}{quote}
111 variable_char [A-Za-z\200-\377_0-9]
118 /* Declare some local variables inside yylex(), for convenience */
119 PsqlScanState cur_state = yyextra;
120 PQExpBuffer output_buf = cur_state->output_buf;
123 * Force flex into the state indicated by start_state. This has a
124 * couple of purposes: it lets some of the functions below set a new
125 * starting state without ugly direct access to flex variables, and it
126 * allows us to transition from one flex lexer to another so that we
127 * can lex different parts of the source string using separate lexers.
129 BEGIN(cur_state->start_state);
133 * We don't really expect to be invoked in the INITIAL state in this
134 * lexer; but if we are, just spit data to the output_buf until EOF.
140 * Exclusive lexer states to handle backslash command lexing
144 /* command name ends at whitespace or backslash; eat all else */
148 cur_state->start_state = YY_START;
158 * Discard any whitespace before argument, then go to xslasharg state.
159 * An exception is that "|" is only special at start of argument, so we
166 if (option_type == OT_FILEPIPE)
168 /* treat like whole-string case */
170 BEGIN(xslashwholeline);
174 /* vertical bar is not special otherwise */
189 * Default processing of text in a slash command's argument.
191 * Note: unquoted_option_chars counts the number of characters at the
192 * end of the argument that were not subject to any form of quoting.
193 * psql_scan_slash_option needs this to strip trailing semicolons safely.
198 * Unquoted space is end of arg; do not eat. Likewise
199 * backslash is end of command or next command, do not eat
201 * XXX this means we can't conveniently accept options
202 * that include unquoted backslashes; therefore, option
203 * processing that encourages use of backslashes is rather
207 cur_state->start_state = YY_START;
212 *option_quote = '\'';
213 unquoted_option_chars = 0;
218 backtick_start_offset = output_buf->len;
220 unquoted_option_chars = 0;
221 BEGIN(xslashbackquote);
227 unquoted_option_chars = 0;
232 /* Possible psql variable substitution */
233 if (option_type == OT_NO_EVAL ||
234 cur_state->callbacks->get_variable == NULL)
241 varname = psqlscan_extract_substring(cur_state,
244 value = cur_state->callbacks->get_variable(varname,
250 * The variable value is just emitted without any
251 * further examination. This is consistent with the
252 * pre-8.0 code behavior, if not with the way that
253 * variables are handled outside backslash commands.
254 * Note that we needn't guard against recursion here.
258 appendPQExpBufferStr(output_buf, value);
266 unquoted_option_chars = 0;
269 :'{variable_char}+' {
270 if (option_type == OT_NO_EVAL)
274 psqlscan_escape_variable(cur_state, yytext, yyleng, false);
277 unquoted_option_chars = 0;
281 :\"{variable_char}+\" {
282 if (option_type == OT_NO_EVAL)
286 psqlscan_escape_variable(cur_state, yytext, yyleng, true);
289 unquoted_option_chars = 0;
293 /* Throw back everything but the colon */
295 unquoted_option_chars++;
299 :\"{variable_char}* {
300 /* Throw back everything but the colon */
302 unquoted_option_chars++;
307 unquoted_option_chars++;
315 * single-quoted text: copy literally except for '' and backslash
319 {quote} { BEGIN(xslasharg); }
321 {xqdouble} { appendPQExpBufferChar(output_buf, '\''); }
323 "\\n" { appendPQExpBufferChar(output_buf, '\n'); }
324 "\\t" { appendPQExpBufferChar(output_buf, '\t'); }
325 "\\b" { appendPQExpBufferChar(output_buf, '\b'); }
326 "\\r" { appendPQExpBufferChar(output_buf, '\r'); }
327 "\\f" { appendPQExpBufferChar(output_buf, '\f'); }
331 appendPQExpBufferChar(output_buf,
332 (char) strtol(yytext + 1, NULL, 8));
337 appendPQExpBufferChar(output_buf,
338 (char) strtol(yytext + 2, NULL, 16));
341 "\\". { psqlscan_emit(cur_state, yytext + 1, 1); }
349 * backticked text: copy everything until next backquote, then evaluate.
351 * XXX Possible future behavioral change: substitute for :VARIABLE?
355 /* In NO_EVAL mode, don't evaluate the command */
356 if (option_type != OT_NO_EVAL)
357 evaluate_backtick(cur_state);
366 /* double-quoted text: copy verbatim, including the double quotes */
378 /* copy everything until end of input line */
379 /* but suppress leading whitespace */
382 if (output_buf->len > 0)
391 /* at end of command, eat a double backslash, but not anything else */
394 cur_state->start_state = YY_START;
400 cur_state->start_state = YY_START;
407 if (cur_state->buffer_stack == NULL)
409 cur_state->start_state = YY_START;
410 return LEXRES_EOL; /* end of input reached */
414 * We were expanding a variable, so pop the inclusion
415 * stack and keep lexing
417 psqlscan_pop_buffer_stack(cur_state);
418 psqlscan_select_top_buffer(cur_state);
424 * Scan the command name of a psql backslash command. This should be called
425 * after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input
426 * has been consumed through the leading backslash.
428 * The return value is a malloc'd copy of the command name, as parsed off
432 psql_scan_slash_command(PsqlScanState state)
434 PQExpBufferData mybuf;
436 /* Must be scanning already */
437 Assert(state->scanbufhandle != NULL);
439 /* Build a local buffer that we'll return the data of */
440 initPQExpBuffer(&mybuf);
442 /* Set current output target */
443 state->output_buf = &mybuf;
445 /* Set input source */
446 if (state->buffer_stack != NULL)
447 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
449 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
452 * Set lexer start state. Note that this is sufficient to switch
453 * state->scanner over to using the tables in this lexer file.
455 state->start_state = xslashcmd;
458 yylex(NULL, state->scanner);
460 /* There are no possible errors in this lex state... */
463 * In case the caller returns to using the regular SQL lexer, reselect the
464 * appropriate initial state.
466 psql_scan_reselect_sql_lexer(state);
472 * Parse off the next argument for a backslash command, and return it as a
473 * malloc'd string. If there are no more arguments, returns NULL.
475 * type tells what processing, if any, to perform on the option string;
476 * for example, if it's a SQL identifier, we want to downcase any unquoted
479 * if quote is not NULL, *quote is set to 0 if no quoting was found, else
480 * the last quote symbol used in the argument.
482 * if semicolon is true, unquoted trailing semicolon(s) that would otherwise
483 * be taken as part of the option string will be stripped.
485 * NOTE: the only possible syntax errors for backslash options are unmatched
486 * quotes, which are detected when we run out of input. Therefore, on a
487 * syntax error we just throw away the string and return NULL; there is no
488 * need to worry about flushing remaining input.
491 psql_scan_slash_option(PsqlScanState state,
492 enum slash_option_type type,
496 PQExpBufferData mybuf;
497 int lexresult PG_USED_FOR_ASSERTS_ONLY;
501 /* Must be scanning already */
502 Assert(state->scanbufhandle != NULL);
505 quote = &local_quote;
508 /* Build a local buffer that we'll return the data of */
509 initPQExpBuffer(&mybuf);
511 /* Set up static variables that will be used by yylex */
513 option_quote = quote;
514 unquoted_option_chars = 0;
516 /* Set current output target */
517 state->output_buf = &mybuf;
519 /* Set input source */
520 if (state->buffer_stack != NULL)
521 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
523 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
525 /* Set lexer start state */
526 if (type == OT_WHOLE_LINE)
527 state->start_state = xslashwholeline;
529 state->start_state = xslashargstart;
532 lexresult = yylex(NULL, state->scanner);
534 /* Save final state for a moment... */
535 final_state = state->start_state;
538 * In case the caller returns to using the regular SQL lexer, reselect the
539 * appropriate initial state.
541 psql_scan_reselect_sql_lexer(state);
544 * Check the lex result: we should have gotten back either LEXRES_OK
545 * or LEXRES_EOL (the latter indicating end of string). If we were inside
546 * a quoted string, as indicated by final_state, EOL is an error.
548 Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
556 /* Strip any unquoted trailing semi-colons if requested */
559 while (unquoted_option_chars-- > 0 &&
561 mybuf.data[mybuf.len - 1] == ';')
563 mybuf.data[--mybuf.len] = '\0';
568 * If SQL identifier processing was requested, then we strip out
569 * excess double quotes and downcase unquoted letters.
570 * Doubled double-quotes become output double-quotes, per spec.
572 * Note that a string like FOO"BAR"BAZ will be converted to
573 * fooBARbaz; this is somewhat inconsistent with the SQL spec,
574 * which would have us parse it as several identifiers. But
575 * for psql's purposes, we want a string like "foo"."bar" to
576 * be treated as one option, so there's little choice.
578 if (type == OT_SQLID || type == OT_SQLIDHACK)
580 bool inquotes = false;
581 char *cp = mybuf.data;
587 if (inquotes && cp[1] == '"')
589 /* Keep the first quote, remove the second */
592 inquotes = !inquotes;
593 /* Collapse out quote at *cp */
594 memmove(cp, cp + 1, strlen(cp));
596 /* do not advance cp */
600 if (!inquotes && type == OT_SQLID)
601 *cp = pg_tolower((unsigned char) *cp);
602 cp += PQmblen(cp, state->encoding);
608 case xslashbackquote:
610 /* must have hit EOL inside quotes */
611 state->callbacks->write_error("unterminated quoted string\n");
612 termPQExpBuffer(&mybuf);
614 case xslashwholeline:
619 fprintf(stderr, "invalid YY_START\n");
624 * An unquoted empty argument isn't possible unless we are at end of
625 * command. Return NULL instead.
627 if (mybuf.len == 0 && *quote == 0)
629 termPQExpBuffer(&mybuf);
633 /* Else return the completed string. */
638 * Eat up any unused \\ to complete a backslash command.
641 psql_scan_slash_command_end(PsqlScanState state)
643 /* Must be scanning already */
644 Assert(state->scanbufhandle != NULL);
646 /* Set current output target */
647 state->output_buf = NULL; /* we won't output anything */
649 /* Set input source */
650 if (state->buffer_stack != NULL)
651 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
653 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
655 /* Set lexer start state */
656 state->start_state = xslashend;
659 yylex(NULL, state->scanner);
661 /* There are no possible errors in this lex state... */
664 * We expect the caller to return to using the regular SQL lexer, so
665 * reselect the appropriate initial state.
667 psql_scan_reselect_sql_lexer(state);
671 * Evaluate a backticked substring of a slash command's argument.
673 * The portion of output_buf starting at backtick_start_offset is evaluated
674 * as a shell command and then replaced by the command's output.
677 evaluate_backtick(PsqlScanState state)
679 PQExpBuffer output_buf = state->output_buf;
680 char *cmd = output_buf->data + backtick_start_offset;
681 PQExpBufferData cmd_output;
687 initPQExpBuffer(&cmd_output);
689 fd = popen(cmd, PG_BINARY_R);
692 state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
700 result = fread(buf, 1, sizeof(buf), fd);
703 state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
707 appendBinaryPQExpBuffer(&cmd_output, buf, result);
711 if (fd && pclose(fd) == -1)
713 state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
717 if (PQExpBufferDataBroken(cmd_output))
719 state->callbacks->write_error("%s: out of memory\n", cmd);
723 /* Now done with cmd, delete it from output_buf */
724 output_buf->len = backtick_start_offset;
725 output_buf->data[output_buf->len] = '\0';
727 /* If no error, transfer result to output_buf */
730 /* strip any trailing newline */
731 if (cmd_output.len > 0 &&
732 cmd_output.data[cmd_output.len - 1] == '\n')
734 appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len);
737 termPQExpBuffer(&cmd_output);