]> granicus.if.org Git - postgresql/blob - src/bin/psql/psqlscanslash.l
Use %option bison-bridge in psql/pgbench lexers.
[postgresql] / src / bin / psql / psqlscanslash.l
1 %top{
2 /*-------------------------------------------------------------------------
3  *
4  * psqlscanslash.l
5  *        lexical scanner for psql backslash commands
6  *
7  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
8  *
9  * See psqlscan_int.h for additional commentary.
10  *
11  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
12  * Portions Copyright (c) 1994, Regents of the University of California
13  *
14  * IDENTIFICATION
15  *        src/bin/psql/psqlscanslash.l
16  *
17  *-------------------------------------------------------------------------
18  */
19 #include "postgres_fe.h"
20
21 #include "psqlscanslash.h"
22
23 #include "libpq-fe.h"
24 }
25
26 %{
27 #include "psqlscan_int.h"
28
29 /*
30  * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
31  * doesn't presently make use of that argument, so just declare it as int.
32  */
33 typedef int YYSTYPE;
34
35 /*
36  * Set the type of yyextra; we use it as a pointer back to the containing
37  * PsqlScanState.
38  */
39 #define YY_EXTRA_TYPE PsqlScanState
40
41 /*
42  * These variables do not need to be saved across calls.  Yeah, it's a bit
43  * of a hack, but putting them into PsqlScanStateData would be klugy too.
44  */
45 static enum slash_option_type option_type;
46 static char *option_quote;
47 static int      unquoted_option_chars;
48 static int      backtick_start_offset;
49
50
51 /* Return values from yylex() */
52 #define LEXRES_EOL                      0       /* end of input */
53 #define LEXRES_OK                       1       /* OK completion of backslash argument */
54
55
56 static void evaluate_backtick(PsqlScanState state);
57
58 #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
59
60 /*
61  * Work around a bug in flex 2.5.35: it emits a couple of functions that
62  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
63  * this would cause warnings.  Providing our own declarations should be
64  * harmless even when the bug gets fixed.
65  */
66 extern int      slash_yyget_column(yyscan_t yyscanner);
67 extern void slash_yyset_column(int column_no, yyscan_t yyscanner);
68
69 %}
70
71 /* Except for the prefix, these options should match psqlscan.l */
72 %option reentrant
73 %option bison-bridge
74 %option 8bit
75 %option never-interactive
76 %option nodefault
77 %option noinput
78 %option nounput
79 %option noyywrap
80 %option warn
81 %option prefix="slash_yy"
82
83 /*
84  * OK, here is a short description of lex/flex rules behavior.
85  * The longest pattern which matches an input string is always chosen.
86  * For equal-length patterns, the first occurring in the rules list is chosen.
87  * INITIAL is the starting state, to which all non-conditional rules apply.
88  * Exclusive states change parsing rules while the state is active.  When in
89  * an exclusive state, only those rules defined for that state apply.
90  */
91
92 /* Exclusive states for lexing backslash commands */
93 %x xslashcmd
94 %x xslashargstart
95 %x xslasharg
96 %x xslashquote
97 %x xslashbackquote
98 %x xslashdquote
99 %x xslashwholeline
100 %x xslashend
101
102 /*
103  * Assorted character class definitions that should match psqlscan.l.
104  */
105 space                   [ \t\n\r\f]
106 quote                   '
107 xeoctesc                [\\][0-7]{1,3}
108 xehexesc                [\\]x[0-9A-Fa-f]{1,2}
109 xqdouble                {quote}{quote}
110 dquote                  \"
111 variable_char   [A-Za-z\200-\377_0-9]
112
113 other                   .
114
115 %%
116
117 %{
118                 /* Declare some local variables inside yylex(), for convenience */
119                 PsqlScanState cur_state = yyextra;
120                 PQExpBuffer output_buf = cur_state->output_buf;
121
122                 /*
123                  * Force flex into the state indicated by start_state.  This has a
124                  * couple of purposes: it lets some of the functions below set a new
125                  * starting state without ugly direct access to flex variables, and it
126                  * allows us to transition from one flex lexer to another so that we
127                  * can lex different parts of the source string using separate lexers.
128                  */
129                 BEGIN(cur_state->start_state);
130 %}
131
132         /*
133          * We don't really expect to be invoked in the INITIAL state in this
134          * lexer; but if we are, just spit data to the output_buf until EOF.
135          */
136
137 {other}|\n              { ECHO; }
138
139         /*
140          * Exclusive lexer states to handle backslash command lexing
141          */
142
143 <xslashcmd>{
144         /* command name ends at whitespace or backslash; eat all else */
145
146 {space}|"\\"    {
147                                         yyless(0);
148                                         cur_state->start_state = YY_START;
149                                         return LEXRES_OK;
150                                 }
151
152 {other}                 { ECHO; }
153
154 }
155
156 <xslashargstart>{
157         /*
158          * Discard any whitespace before argument, then go to xslasharg state.
159          * An exception is that "|" is only special at start of argument, so we
160          * check for it here.
161          */
162
163 {space}+                { }
164
165 "|"                             {
166                                         if (option_type == OT_FILEPIPE)
167                                         {
168                                                 /* treat like whole-string case */
169                                                 ECHO;
170                                                 BEGIN(xslashwholeline);
171                                         }
172                                         else
173                                         {
174                                                 /* vertical bar is not special otherwise */
175                                                 yyless(0);
176                                                 BEGIN(xslasharg);
177                                         }
178                                 }
179
180 {other}                 {
181                                         yyless(0);
182                                         BEGIN(xslasharg);
183                                 }
184
185 }
186
187 <xslasharg>{
188         /*
189          * Default processing of text in a slash command's argument.
190          *
191          * Note: unquoted_option_chars counts the number of characters at the
192          * end of the argument that were not subject to any form of quoting.
193          * psql_scan_slash_option needs this to strip trailing semicolons safely.
194          */
195
196 {space}|"\\"    {
197                                         /*
198                                          * Unquoted space is end of arg; do not eat.  Likewise
199                                          * backslash is end of command or next command, do not eat
200                                          *
201                                          * XXX this means we can't conveniently accept options
202                                          * that include unquoted backslashes; therefore, option
203                                          * processing that encourages use of backslashes is rather
204                                          * broken.
205                                          */
206                                         yyless(0);
207                                         cur_state->start_state = YY_START;
208                                         return LEXRES_OK;
209                                 }
210
211 {quote}                 {
212                                         *option_quote = '\'';
213                                         unquoted_option_chars = 0;
214                                         BEGIN(xslashquote);
215                                 }
216
217 "`"                             {
218                                         backtick_start_offset = output_buf->len;
219                                         *option_quote = '`';
220                                         unquoted_option_chars = 0;
221                                         BEGIN(xslashbackquote);
222                                 }
223
224 {dquote}                {
225                                         ECHO;
226                                         *option_quote = '"';
227                                         unquoted_option_chars = 0;
228                                         BEGIN(xslashdquote);
229                                 }
230
231 :{variable_char}+       {
232                                         /* Possible psql variable substitution */
233                                         if (option_type == OT_NO_EVAL ||
234                                                 cur_state->callbacks->get_variable == NULL)
235                                                 ECHO;
236                                         else
237                                         {
238                                                 char       *varname;
239                                                 char       *value;
240
241                                                 varname = psqlscan_extract_substring(cur_state,
242                                                                                                                          yytext + 1,
243                                                                                                                          yyleng - 1);
244                                                 value = cur_state->callbacks->get_variable(varname,
245                                                                                                                                    false,
246                                                                                                                                    false);
247                                                 free(varname);
248
249                                                 /*
250                                                  * The variable value is just emitted without any
251                                                  * further examination.  This is consistent with the
252                                                  * pre-8.0 code behavior, if not with the way that
253                                                  * variables are handled outside backslash commands.
254                                                  * Note that we needn't guard against recursion here.
255                                                  */
256                                                 if (value)
257                                                 {
258                                                         appendPQExpBufferStr(output_buf, value);
259                                                         free(value);
260                                                 }
261                                                 else
262                                                         ECHO;
263
264                                                 *option_quote = ':';
265                                         }
266                                         unquoted_option_chars = 0;
267                                 }
268
269 :'{variable_char}+'     {
270                                         if (option_type == OT_NO_EVAL)
271                                                 ECHO;
272                                         else
273                                         {
274                                                 psqlscan_escape_variable(cur_state, yytext, yyleng, false);
275                                                 *option_quote = ':';
276                                         }
277                                         unquoted_option_chars = 0;
278                                 }
279
280
281 :\"{variable_char}+\"   {
282                                         if (option_type == OT_NO_EVAL)
283                                                 ECHO;
284                                         else
285                                         {
286                                                 psqlscan_escape_variable(cur_state, yytext, yyleng, true);
287                                                 *option_quote = ':';
288                                         }
289                                         unquoted_option_chars = 0;
290                                 }
291
292 :'{variable_char}*      {
293                                         /* Throw back everything but the colon */
294                                         yyless(1);
295                                         unquoted_option_chars++;
296                                         ECHO;
297                                 }
298
299 :\"{variable_char}*     {
300                                         /* Throw back everything but the colon */
301                                         yyless(1);
302                                         unquoted_option_chars++;
303                                         ECHO;
304                                 }
305
306 {other}                 {
307                                         unquoted_option_chars++;
308                                         ECHO;
309                                 }
310
311 }
312
313 <xslashquote>{
314         /*
315          * single-quoted text: copy literally except for '' and backslash
316          * sequences
317          */
318
319 {quote}                 { BEGIN(xslasharg); }
320
321 {xqdouble}              { appendPQExpBufferChar(output_buf, '\''); }
322
323 "\\n"                   { appendPQExpBufferChar(output_buf, '\n'); }
324 "\\t"                   { appendPQExpBufferChar(output_buf, '\t'); }
325 "\\b"                   { appendPQExpBufferChar(output_buf, '\b'); }
326 "\\r"                   { appendPQExpBufferChar(output_buf, '\r'); }
327 "\\f"                   { appendPQExpBufferChar(output_buf, '\f'); }
328
329 {xeoctesc}              {
330                                         /* octal case */
331                                         appendPQExpBufferChar(output_buf,
332                                                                                   (char) strtol(yytext + 1, NULL, 8));
333                                 }
334
335 {xehexesc}              {
336                                         /* hex case */
337                                         appendPQExpBufferChar(output_buf,
338                                                                                   (char) strtol(yytext + 2, NULL, 16));
339                                 }
340
341 "\\".                   { psqlscan_emit(cur_state, yytext + 1, 1); }
342
343 {other}|\n              { ECHO; }
344
345 }
346
347 <xslashbackquote>{
348         /*
349          * backticked text: copy everything until next backquote, then evaluate.
350          *
351          * XXX Possible future behavioral change: substitute for :VARIABLE?
352          */
353
354 "`"                             {
355                                         /* In NO_EVAL mode, don't evaluate the command */
356                                         if (option_type != OT_NO_EVAL)
357                                                 evaluate_backtick(cur_state);
358                                         BEGIN(xslasharg);
359                                 }
360
361 {other}|\n              { ECHO; }
362
363 }
364
365 <xslashdquote>{
366         /* double-quoted text: copy verbatim, including the double quotes */
367
368 {dquote}                {
369                                         ECHO;
370                                         BEGIN(xslasharg);
371                                 }
372
373 {other}|\n              { ECHO; }
374
375 }
376
377 <xslashwholeline>{
378         /* copy everything until end of input line */
379         /* but suppress leading whitespace */
380
381 {space}+                {
382                                         if (output_buf->len > 0)
383                                                 ECHO;
384                                 }
385
386 {other}                 { ECHO; }
387
388 }
389
390 <xslashend>{
391         /* at end of command, eat a double backslash, but not anything else */
392
393 "\\\\"                  {
394                                         cur_state->start_state = YY_START;
395                                         return LEXRES_OK;
396                                 }
397
398 {other}|\n              {
399                                         yyless(0);
400                                         cur_state->start_state = YY_START;
401                                         return LEXRES_OK;
402                                 }
403
404 }
405
406 <<EOF>>                 {
407                                         if (cur_state->buffer_stack == NULL)
408                                         {
409                                                 cur_state->start_state = YY_START;
410                                                 return LEXRES_EOL;              /* end of input reached */
411                                         }
412
413                                         /*
414                                          * We were expanding a variable, so pop the inclusion
415                                          * stack and keep lexing
416                                          */
417                                         psqlscan_pop_buffer_stack(cur_state);
418                                         psqlscan_select_top_buffer(cur_state);
419                                 }
420
421 %%
422
423 /*
424  * Scan the command name of a psql backslash command.  This should be called
425  * after psql_scan() returns PSCAN_BACKSLASH.  It is assumed that the input
426  * has been consumed through the leading backslash.
427  *
428  * The return value is a malloc'd copy of the command name, as parsed off
429  * from the input.
430  */
431 char *
432 psql_scan_slash_command(PsqlScanState state)
433 {
434         PQExpBufferData mybuf;
435
436         /* Must be scanning already */
437         Assert(state->scanbufhandle != NULL);
438
439         /* Build a local buffer that we'll return the data of */
440         initPQExpBuffer(&mybuf);
441
442         /* Set current output target */
443         state->output_buf = &mybuf;
444
445         /* Set input source */
446         if (state->buffer_stack != NULL)
447                 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
448         else
449                 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
450
451         /*
452          * Set lexer start state.  Note that this is sufficient to switch
453          * state->scanner over to using the tables in this lexer file.
454          */
455         state->start_state = xslashcmd;
456
457         /* And lex. */
458         yylex(NULL, state->scanner);
459
460         /* There are no possible errors in this lex state... */
461
462         /*
463          * In case the caller returns to using the regular SQL lexer, reselect the
464          * appropriate initial state.
465          */
466         psql_scan_reselect_sql_lexer(state);
467
468         return mybuf.data;
469 }
470
471 /*
472  * Parse off the next argument for a backslash command, and return it as a
473  * malloc'd string.  If there are no more arguments, returns NULL.
474  *
475  * type tells what processing, if any, to perform on the option string;
476  * for example, if it's a SQL identifier, we want to downcase any unquoted
477  * letters.
478  *
479  * if quote is not NULL, *quote is set to 0 if no quoting was found, else
480  * the last quote symbol used in the argument.
481  *
482  * if semicolon is true, unquoted trailing semicolon(s) that would otherwise
483  * be taken as part of the option string will be stripped.
484  *
485  * NOTE: the only possible syntax errors for backslash options are unmatched
486  * quotes, which are detected when we run out of input.  Therefore, on a
487  * syntax error we just throw away the string and return NULL; there is no
488  * need to worry about flushing remaining input.
489  */
490 char *
491 psql_scan_slash_option(PsqlScanState state,
492                                            enum slash_option_type type,
493                                            char *quote,
494                                            bool semicolon)
495 {
496         PQExpBufferData mybuf;
497         int                     lexresult PG_USED_FOR_ASSERTS_ONLY;
498         int                     final_state;
499         char            local_quote;
500
501         /* Must be scanning already */
502         Assert(state->scanbufhandle != NULL);
503
504         if (quote == NULL)
505                 quote = &local_quote;
506         *quote = 0;
507
508         /* Build a local buffer that we'll return the data of */
509         initPQExpBuffer(&mybuf);
510
511         /* Set up static variables that will be used by yylex */
512         option_type = type;
513         option_quote = quote;
514         unquoted_option_chars = 0;
515
516         /* Set current output target */
517         state->output_buf = &mybuf;
518
519         /* Set input source */
520         if (state->buffer_stack != NULL)
521                 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
522         else
523                 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
524
525         /* Set lexer start state */
526         if (type == OT_WHOLE_LINE)
527                 state->start_state = xslashwholeline;
528         else
529                 state->start_state = xslashargstart;
530
531         /* And lex. */
532         lexresult = yylex(NULL, state->scanner);
533
534         /* Save final state for a moment... */
535         final_state = state->start_state;
536
537         /*
538          * In case the caller returns to using the regular SQL lexer, reselect the
539          * appropriate initial state.
540          */
541         psql_scan_reselect_sql_lexer(state);
542
543         /*
544          * Check the lex result: we should have gotten back either LEXRES_OK
545          * or LEXRES_EOL (the latter indicating end of string).  If we were inside
546          * a quoted string, as indicated by final_state, EOL is an error.
547          */
548         Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
549
550         switch (final_state)
551         {
552                 case xslashargstart:
553                         /* empty arg */
554                         break;
555                 case xslasharg:
556                         /* Strip any unquoted trailing semi-colons if requested */
557                         if (semicolon)
558                         {
559                                 while (unquoted_option_chars-- > 0 &&
560                                            mybuf.len > 0 &&
561                                            mybuf.data[mybuf.len - 1] == ';')
562                                 {
563                                         mybuf.data[--mybuf.len] = '\0';
564                                 }
565                         }
566
567                         /*
568                          * If SQL identifier processing was requested, then we strip out
569                          * excess double quotes and downcase unquoted letters.
570                          * Doubled double-quotes become output double-quotes, per spec.
571                          *
572                          * Note that a string like FOO"BAR"BAZ will be converted to
573                          * fooBARbaz; this is somewhat inconsistent with the SQL spec,
574                          * which would have us parse it as several identifiers.  But
575                          * for psql's purposes, we want a string like "foo"."bar" to
576                          * be treated as one option, so there's little choice.
577                          */
578                         if (type == OT_SQLID || type == OT_SQLIDHACK)
579                         {
580                                 bool            inquotes = false;
581                                 char       *cp = mybuf.data;
582
583                                 while (*cp)
584                                 {
585                                         if (*cp == '"')
586                                         {
587                                                 if (inquotes && cp[1] == '"')
588                                                 {
589                                                         /* Keep the first quote, remove the second */
590                                                         cp++;
591                                                 }
592                                                 inquotes = !inquotes;
593                                                 /* Collapse out quote at *cp */
594                                                 memmove(cp, cp + 1, strlen(cp));
595                                                 mybuf.len--;
596                                                 /* do not advance cp */
597                                         }
598                                         else
599                                         {
600                                                 if (!inquotes && type == OT_SQLID)
601                                                         *cp = pg_tolower((unsigned char) *cp);
602                                                 cp += PQmblen(cp, state->encoding);
603                                         }
604                                 }
605                         }
606                         break;
607                 case xslashquote:
608                 case xslashbackquote:
609                 case xslashdquote:
610                         /* must have hit EOL inside quotes */
611                         state->callbacks->write_error("unterminated quoted string\n");
612                         termPQExpBuffer(&mybuf);
613                         return NULL;
614                 case xslashwholeline:
615                         /* always okay */
616                         break;
617                 default:
618                         /* can't get here */
619                         fprintf(stderr, "invalid YY_START\n");
620                         exit(1);
621         }
622
623         /*
624          * An unquoted empty argument isn't possible unless we are at end of
625          * command.  Return NULL instead.
626          */
627         if (mybuf.len == 0 && *quote == 0)
628         {
629                 termPQExpBuffer(&mybuf);
630                 return NULL;
631         }
632
633         /* Else return the completed string. */
634         return mybuf.data;
635 }
636
637 /*
638  * Eat up any unused \\ to complete a backslash command.
639  */
640 void
641 psql_scan_slash_command_end(PsqlScanState state)
642 {
643         /* Must be scanning already */
644         Assert(state->scanbufhandle != NULL);
645
646         /* Set current output target */
647         state->output_buf = NULL;       /* we won't output anything */
648
649         /* Set input source */
650         if (state->buffer_stack != NULL)
651                 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
652         else
653                 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
654
655         /* Set lexer start state */
656         state->start_state = xslashend;
657
658         /* And lex. */
659         yylex(NULL, state->scanner);
660
661         /* There are no possible errors in this lex state... */
662
663         /*
664          * We expect the caller to return to using the regular SQL lexer, so
665          * reselect the appropriate initial state.
666          */
667         psql_scan_reselect_sql_lexer(state);
668 }
669
670 /*
671  * Evaluate a backticked substring of a slash command's argument.
672  *
673  * The portion of output_buf starting at backtick_start_offset is evaluated
674  * as a shell command and then replaced by the command's output.
675  */
676 static void
677 evaluate_backtick(PsqlScanState state)
678 {
679         PQExpBuffer output_buf = state->output_buf;
680         char       *cmd = output_buf->data + backtick_start_offset;
681         PQExpBufferData cmd_output;
682         FILE       *fd;
683         bool            error = false;
684         char            buf[512];
685         size_t          result;
686
687         initPQExpBuffer(&cmd_output);
688
689         fd = popen(cmd, PG_BINARY_R);
690         if (!fd)
691         {
692                 state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
693                 error = true;
694         }
695
696         if (!error)
697         {
698                 do
699                 {
700                         result = fread(buf, 1, sizeof(buf), fd);
701                         if (ferror(fd))
702                         {
703                                 state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
704                                 error = true;
705                                 break;
706                         }
707                         appendBinaryPQExpBuffer(&cmd_output, buf, result);
708                 } while (!feof(fd));
709         }
710
711         if (fd && pclose(fd) == -1)
712         {
713                 state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
714                 error = true;
715         }
716
717         if (PQExpBufferDataBroken(cmd_output))
718         {
719                 state->callbacks->write_error("%s: out of memory\n", cmd);
720                 error = true;
721         }
722
723         /* Now done with cmd, delete it from output_buf */
724         output_buf->len = backtick_start_offset;
725         output_buf->data[output_buf->len] = '\0';
726
727         /* If no error, transfer result to output_buf */
728         if (!error)
729         {
730                 /* strip any trailing newline */
731                 if (cmd_output.len > 0 &&
732                         cmd_output.data[cmd_output.len - 1] == '\n')
733                         cmd_output.len--;
734                 appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len);
735         }
736
737         termPQExpBuffer(&cmd_output);
738 }