2 /*-------------------------------------------------------------------------
5 * lexical scanner for psql backslash commands
7 * XXX Avoid creating backtracking cases --- see the backend lexer for info.
9 * See psqlscan_int.h for additional commentary.
11 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
12 * Portions Copyright (c) 1994, Regents of the University of California
15 * src/bin/psql/psqlscanslash.l
17 *-------------------------------------------------------------------------
19 #include "postgres_fe.h"
21 #include "psqlscanslash.h"
27 #include "psqlscan_int.h"
30 * Set the type of yyextra; we use it as a pointer back to the containing
33 #define YY_EXTRA_TYPE PsqlScanState
36 * These variables do not need to be saved across calls. Yeah, it's a bit
37 * of a hack, but putting them into PsqlScanStateData would be klugy too.
39 static enum slash_option_type option_type;
40 static char *option_quote;
41 static int unquoted_option_chars;
42 static int backtick_start_offset;
45 /* Return values from yylex() */
46 #define LEXRES_EOL 0 /* end of input */
47 #define LEXRES_OK 1 /* OK completion of backslash argument */
50 static void evaluate_backtick(PsqlScanState state);
52 #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
55 * Work around a bug in flex 2.5.35: it emits a couple of functions that
56 * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
57 * this would cause warnings. Providing our own declarations should be
58 * harmless even when the bug gets fixed.
60 extern int slash_yyget_column(yyscan_t yyscanner);
61 extern void slash_yyset_column(int column_no, yyscan_t yyscanner);
67 %option never-interactive
73 %option prefix="slash_yy"
76 * OK, here is a short description of lex/flex rules behavior.
77 * The longest pattern which matches an input string is always chosen.
78 * For equal-length patterns, the first occurring in the rules list is chosen.
79 * INITIAL is the starting state, to which all non-conditional rules apply.
80 * Exclusive states change parsing rules while the state is active. When in
81 * an exclusive state, only those rules defined for that state apply.
84 /* Exclusive states for lexing backslash commands */
95 * Assorted character class definitions that should match psqlscan.l.
99 xeoctesc [\\][0-7]{1,3}
100 xehexesc [\\]x[0-9A-Fa-f]{1,2}
101 xqdouble {quote}{quote}
103 variable_char [A-Za-z\200-\377_0-9]
110 /* Declare some local variables inside yylex(), for convenience */
111 PsqlScanState cur_state = yyextra;
112 PQExpBuffer output_buf = cur_state->output_buf;
115 * Force flex into the state indicated by start_state. This has a
116 * couple of purposes: it lets some of the functions below set a
117 * new starting state without ugly direct access to flex variables,
118 * and it allows us to transition from one flex lexer to another
119 * so that we can lex different parts of the source string using
122 BEGIN(cur_state->start_state);
126 * We don't really expect to be invoked in the INITIAL state in this
127 * lexer; but if we are, just spit data to the output_buf until EOF.
133 * Exclusive lexer states to handle backslash command lexing
137 /* command name ends at whitespace or backslash; eat all else */
141 cur_state->start_state = YY_START;
151 * Discard any whitespace before argument, then go to xslasharg state.
152 * An exception is that "|" is only special at start of argument, so we
159 if (option_type == OT_FILEPIPE)
161 /* treat like whole-string case */
163 BEGIN(xslashwholeline);
167 /* vertical bar is not special otherwise */
182 * Default processing of text in a slash command's argument.
184 * Note: unquoted_option_chars counts the number of characters at the
185 * end of the argument that were not subject to any form of quoting.
186 * psql_scan_slash_option needs this to strip trailing semicolons safely.
191 * Unquoted space is end of arg; do not eat. Likewise
192 * backslash is end of command or next command, do not eat
194 * XXX this means we can't conveniently accept options
195 * that include unquoted backslashes; therefore, option
196 * processing that encourages use of backslashes is rather
200 cur_state->start_state = YY_START;
205 *option_quote = '\'';
206 unquoted_option_chars = 0;
211 backtick_start_offset = output_buf->len;
213 unquoted_option_chars = 0;
214 BEGIN(xslashbackquote);
220 unquoted_option_chars = 0;
225 /* Possible psql variable substitution */
226 if (option_type == OT_NO_EVAL ||
227 cur_state->callbacks->get_variable == NULL)
234 varname = psqlscan_extract_substring(cur_state,
237 value = cur_state->callbacks->get_variable(varname,
243 * The variable value is just emitted without any
244 * further examination. This is consistent with the
245 * pre-8.0 code behavior, if not with the way that
246 * variables are handled outside backslash commands.
247 * Note that we needn't guard against recursion here.
251 appendPQExpBufferStr(output_buf, value);
259 unquoted_option_chars = 0;
262 :'{variable_char}+' {
263 if (option_type == OT_NO_EVAL)
267 psqlscan_escape_variable(cur_state, yytext, yyleng, false);
270 unquoted_option_chars = 0;
274 :\"{variable_char}+\" {
275 if (option_type == OT_NO_EVAL)
279 psqlscan_escape_variable(cur_state, yytext, yyleng, true);
282 unquoted_option_chars = 0;
286 /* Throw back everything but the colon */
288 unquoted_option_chars++;
292 :\"{variable_char}* {
293 /* Throw back everything but the colon */
295 unquoted_option_chars++;
300 unquoted_option_chars++;
308 * single-quoted text: copy literally except for '' and backslash
312 {quote} { BEGIN(xslasharg); }
314 {xqdouble} { appendPQExpBufferChar(output_buf, '\''); }
316 "\\n" { appendPQExpBufferChar(output_buf, '\n'); }
317 "\\t" { appendPQExpBufferChar(output_buf, '\t'); }
318 "\\b" { appendPQExpBufferChar(output_buf, '\b'); }
319 "\\r" { appendPQExpBufferChar(output_buf, '\r'); }
320 "\\f" { appendPQExpBufferChar(output_buf, '\f'); }
324 appendPQExpBufferChar(output_buf,
325 (char) strtol(yytext + 1, NULL, 8));
330 appendPQExpBufferChar(output_buf,
331 (char) strtol(yytext + 2, NULL, 16));
334 "\\". { psqlscan_emit(cur_state, yytext + 1, 1); }
342 * backticked text: copy everything until next backquote, then evaluate.
344 * XXX Possible future behavioral change: substitute for :VARIABLE?
348 /* In NO_EVAL mode, don't evaluate the command */
349 if (option_type != OT_NO_EVAL)
350 evaluate_backtick(cur_state);
359 /* double-quoted text: copy verbatim, including the double quotes */
371 /* copy everything until end of input line */
372 /* but suppress leading whitespace */
375 if (output_buf->len > 0)
384 /* at end of command, eat a double backslash, but not anything else */
387 cur_state->start_state = YY_START;
393 cur_state->start_state = YY_START;
400 * psql uses a single <<EOF>> rule, unlike the backend.
404 if (cur_state->buffer_stack == NULL)
406 cur_state->start_state = YY_START;
407 return LEXRES_EOL; /* end of input reached */
411 * We were expanding a variable, so pop the inclusion
412 * stack and keep lexing
414 psqlscan_pop_buffer_stack(cur_state);
415 psqlscan_select_top_buffer(cur_state);
421 * Scan the command name of a psql backslash command. This should be called
422 * after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input
423 * has been consumed through the leading backslash.
425 * The return value is a malloc'd copy of the command name, as parsed off
429 psql_scan_slash_command(PsqlScanState state)
431 PQExpBufferData mybuf;
433 /* Must be scanning already */
434 Assert(state->scanbufhandle != NULL);
436 /* Build a local buffer that we'll return the data of */
437 initPQExpBuffer(&mybuf);
439 /* Set current output target */
440 state->output_buf = &mybuf;
442 /* Set input source */
443 if (state->buffer_stack != NULL)
444 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
446 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
449 * Set lexer start state. Note that this is sufficient to switch
450 * state->scanner over to using the tables in this lexer file.
452 state->start_state = xslashcmd;
455 yylex(state->scanner);
457 /* There are no possible errors in this lex state... */
460 * In case the caller returns to using the regular SQL lexer, reselect the
461 * appropriate initial state.
463 psql_scan_reselect_sql_lexer(state);
469 * Parse off the next argument for a backslash command, and return it as a
470 * malloc'd string. If there are no more arguments, returns NULL.
472 * type tells what processing, if any, to perform on the option string;
473 * for example, if it's a SQL identifier, we want to downcase any unquoted
476 * if quote is not NULL, *quote is set to 0 if no quoting was found, else
477 * the last quote symbol used in the argument.
479 * if semicolon is true, unquoted trailing semicolon(s) that would otherwise
480 * be taken as part of the option string will be stripped.
482 * NOTE: the only possible syntax errors for backslash options are unmatched
483 * quotes, which are detected when we run out of input. Therefore, on a
484 * syntax error we just throw away the string and return NULL; there is no
485 * need to worry about flushing remaining input.
488 psql_scan_slash_option(PsqlScanState state,
489 enum slash_option_type type,
493 PQExpBufferData mybuf;
494 int lexresult PG_USED_FOR_ASSERTS_ONLY;
498 /* Must be scanning already */
499 Assert(state->scanbufhandle != NULL);
502 quote = &local_quote;
505 /* Build a local buffer that we'll return the data of */
506 initPQExpBuffer(&mybuf);
508 /* Set up static variables that will be used by yylex */
510 option_quote = quote;
511 unquoted_option_chars = 0;
513 /* Set current output target */
514 state->output_buf = &mybuf;
516 /* Set input source */
517 if (state->buffer_stack != NULL)
518 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
520 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
522 /* Set lexer start state */
523 if (type == OT_WHOLE_LINE)
524 state->start_state = xslashwholeline;
526 state->start_state = xslashargstart;
529 lexresult = yylex(state->scanner);
531 /* Save final state for a moment... */
532 final_state = state->start_state;
535 * In case the caller returns to using the regular SQL lexer, reselect the
536 * appropriate initial state.
538 psql_scan_reselect_sql_lexer(state);
541 * Check the lex result: we should have gotten back either LEXRES_OK
542 * or LEXRES_EOL (the latter indicating end of string). If we were inside
543 * a quoted string, as indicated by final_state, EOL is an error.
545 Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
553 /* Strip any unquoted trailing semi-colons if requested */
556 while (unquoted_option_chars-- > 0 &&
558 mybuf.data[mybuf.len - 1] == ';')
560 mybuf.data[--mybuf.len] = '\0';
565 * If SQL identifier processing was requested, then we strip out
566 * excess double quotes and downcase unquoted letters.
567 * Doubled double-quotes become output double-quotes, per spec.
569 * Note that a string like FOO"BAR"BAZ will be converted to
570 * fooBARbaz; this is somewhat inconsistent with the SQL spec,
571 * which would have us parse it as several identifiers. But
572 * for psql's purposes, we want a string like "foo"."bar" to
573 * be treated as one option, so there's little choice.
575 if (type == OT_SQLID || type == OT_SQLIDHACK)
577 bool inquotes = false;
578 char *cp = mybuf.data;
584 if (inquotes && cp[1] == '"')
586 /* Keep the first quote, remove the second */
589 inquotes = !inquotes;
590 /* Collapse out quote at *cp */
591 memmove(cp, cp + 1, strlen(cp));
593 /* do not advance cp */
597 if (!inquotes && type == OT_SQLID)
598 *cp = pg_tolower((unsigned char) *cp);
599 cp += PQmblen(cp, state->encoding);
605 case xslashbackquote:
607 /* must have hit EOL inside quotes */
608 state->callbacks->write_error("unterminated quoted string\n");
609 termPQExpBuffer(&mybuf);
611 case xslashwholeline:
616 fprintf(stderr, "invalid YY_START\n");
621 * An unquoted empty argument isn't possible unless we are at end of
622 * command. Return NULL instead.
624 if (mybuf.len == 0 && *quote == 0)
626 termPQExpBuffer(&mybuf);
630 /* Else return the completed string. */
635 * Eat up any unused \\ to complete a backslash command.
638 psql_scan_slash_command_end(PsqlScanState state)
640 /* Must be scanning already */
641 Assert(state->scanbufhandle != NULL);
643 /* Set current output target */
644 state->output_buf = NULL; /* we won't output anything */
646 /* Set input source */
647 if (state->buffer_stack != NULL)
648 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
650 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
652 /* Set lexer start state */
653 state->start_state = xslashend;
656 yylex(state->scanner);
658 /* There are no possible errors in this lex state... */
661 * We expect the caller to return to using the regular SQL lexer, so
662 * reselect the appropriate initial state.
664 psql_scan_reselect_sql_lexer(state);
668 * Evaluate a backticked substring of a slash command's argument.
670 * The portion of output_buf starting at backtick_start_offset is evaluated
671 * as a shell command and then replaced by the command's output.
674 evaluate_backtick(PsqlScanState state)
676 PQExpBuffer output_buf = state->output_buf;
677 char *cmd = output_buf->data + backtick_start_offset;
678 PQExpBufferData cmd_output;
684 initPQExpBuffer(&cmd_output);
686 fd = popen(cmd, PG_BINARY_R);
689 state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
697 result = fread(buf, 1, sizeof(buf), fd);
700 state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
704 appendBinaryPQExpBuffer(&cmd_output, buf, result);
708 if (fd && pclose(fd) == -1)
710 state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
714 if (PQExpBufferDataBroken(cmd_output))
716 state->callbacks->write_error("%s: out of memory\n", cmd);
720 /* Now done with cmd, delete it from output_buf */
721 output_buf->len = backtick_start_offset;
722 output_buf->data[output_buf->len] = '\0';
724 /* If no error, transfer result to output_buf */
727 /* strip any trailing newline */
728 if (cmd_output.len > 0 &&
729 cmd_output.data[cmd_output.len - 1] == '\n')
731 appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len);
734 termPQExpBuffer(&cmd_output);