]> granicus.if.org Git - postgresql/blob - src/bin/psql/psqlscanslash.l
Split psql's lexer into two separate .l files for SQL and backslash cases.
[postgresql] / src / bin / psql / psqlscanslash.l
1 %top{
2 /*-------------------------------------------------------------------------
3  *
4  * psqlscanslash.l
5  *        lexical scanner for psql backslash commands
6  *
7  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
8  *
9  * See psqlscan_int.h for additional commentary.
10  *
11  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
12  * Portions Copyright (c) 1994, Regents of the University of California
13  *
14  * IDENTIFICATION
15  *        src/bin/psql/psqlscanslash.l
16  *
17  *-------------------------------------------------------------------------
18  */
19 #include "postgres_fe.h"
20
21 #include "psqlscanslash.h"
22
23 #include "libpq-fe.h"
24 }
25
26 %{
27 #include "psqlscan_int.h"
28
29 /*
30  * Set the type of yyextra; we use it as a pointer back to the containing
31  * PsqlScanState.
32  */
33 #define YY_EXTRA_TYPE PsqlScanState
34
35 /*
36  * These variables do not need to be saved across calls.  Yeah, it's a bit
37  * of a hack, but putting them into PsqlScanStateData would be klugy too.
38  */
39 static enum slash_option_type option_type;
40 static char *option_quote;
41 static int      unquoted_option_chars;
42 static int      backtick_start_offset;
43
44
45 /* Return values from yylex() */
46 #define LEXRES_EOL                      0       /* end of input */
47 #define LEXRES_OK                       1       /* OK completion of backslash argument */
48
49
50 static void evaluate_backtick(PsqlScanState state);
51
52 #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
53
54 /*
55  * Work around a bug in flex 2.5.35: it emits a couple of functions that
56  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
57  * this would cause warnings.  Providing our own declarations should be
58  * harmless even when the bug gets fixed.
59  */
60 extern int      slash_yyget_column(yyscan_t yyscanner);
61 extern void slash_yyset_column(int column_no, yyscan_t yyscanner);
62
63 %}
64
65 %option reentrant
66 %option 8bit
67 %option never-interactive
68 %option nodefault
69 %option noinput
70 %option nounput
71 %option noyywrap
72 %option warn
73 %option prefix="slash_yy"
74
75 /*
76  * OK, here is a short description of lex/flex rules behavior.
77  * The longest pattern which matches an input string is always chosen.
78  * For equal-length patterns, the first occurring in the rules list is chosen.
79  * INITIAL is the starting state, to which all non-conditional rules apply.
80  * Exclusive states change parsing rules while the state is active.  When in
81  * an exclusive state, only those rules defined for that state apply.
82  */
83
84 /* Exclusive states for lexing backslash commands */
85 %x xslashcmd
86 %x xslashargstart
87 %x xslasharg
88 %x xslashquote
89 %x xslashbackquote
90 %x xslashdquote
91 %x xslashwholeline
92 %x xslashend
93
94 /*
95  * Assorted character class definitions that should match psqlscan.l.
96  */
97 space                   [ \t\n\r\f]
98 quote                   '
99 xeoctesc                [\\][0-7]{1,3}
100 xehexesc                [\\]x[0-9A-Fa-f]{1,2}
101 xqdouble                {quote}{quote}
102 dquote                  \"
103 variable_char   [A-Za-z\200-\377_0-9]
104
105 other                   .
106
107 %%
108
109 %{
110                 /* Declare some local variables inside yylex(), for convenience */
111                 PsqlScanState cur_state = yyextra;
112                 PQExpBuffer output_buf = cur_state->output_buf;
113
114                 /*
115                  * Force flex into the state indicated by start_state.  This has a
116                  * couple of purposes: it lets some of the functions below set a
117                  * new starting state without ugly direct access to flex variables,
118                  * and it allows us to transition from one flex lexer to another
119                  * so that we can lex different parts of the source string using
120                  * separate lexers.
121                  */
122                 BEGIN(cur_state->start_state);
123 %}
124
125         /*
126          * We don't really expect to be invoked in the INITIAL state in this
127          * lexer; but if we are, just spit data to the output_buf until EOF.
128          */
129
130 {other}|\n              { ECHO; }
131
132         /*
133          * Exclusive lexer states to handle backslash command lexing
134          */
135
136 <xslashcmd>{
137         /* command name ends at whitespace or backslash; eat all else */
138
139 {space}|"\\"    {
140                                         yyless(0);
141                                         cur_state->start_state = YY_START;
142                                         return LEXRES_OK;
143                                 }
144
145 {other}                 { ECHO; }
146
147 }
148
149 <xslashargstart>{
150         /*
151          * Discard any whitespace before argument, then go to xslasharg state.
152          * An exception is that "|" is only special at start of argument, so we
153          * check for it here.
154          */
155
156 {space}+                { }
157
158 "|"                             {
159                                         if (option_type == OT_FILEPIPE)
160                                         {
161                                                 /* treat like whole-string case */
162                                                 ECHO;
163                                                 BEGIN(xslashwholeline);
164                                         }
165                                         else
166                                         {
167                                                 /* vertical bar is not special otherwise */
168                                                 yyless(0);
169                                                 BEGIN(xslasharg);
170                                         }
171                                 }
172
173 {other}                 {
174                                         yyless(0);
175                                         BEGIN(xslasharg);
176                                 }
177
178 }
179
180 <xslasharg>{
181         /*
182          * Default processing of text in a slash command's argument.
183          *
184          * Note: unquoted_option_chars counts the number of characters at the
185          * end of the argument that were not subject to any form of quoting.
186          * psql_scan_slash_option needs this to strip trailing semicolons safely.
187          */
188
189 {space}|"\\"    {
190                                         /*
191                                          * Unquoted space is end of arg; do not eat.  Likewise
192                                          * backslash is end of command or next command, do not eat
193                                          *
194                                          * XXX this means we can't conveniently accept options
195                                          * that include unquoted backslashes; therefore, option
196                                          * processing that encourages use of backslashes is rather
197                                          * broken.
198                                          */
199                                         yyless(0);
200                                         cur_state->start_state = YY_START;
201                                         return LEXRES_OK;
202                                 }
203
204 {quote}                 {
205                                         *option_quote = '\'';
206                                         unquoted_option_chars = 0;
207                                         BEGIN(xslashquote);
208                                 }
209
210 "`"                             {
211                                         backtick_start_offset = output_buf->len;
212                                         *option_quote = '`';
213                                         unquoted_option_chars = 0;
214                                         BEGIN(xslashbackquote);
215                                 }
216
217 {dquote}                {
218                                         ECHO;
219                                         *option_quote = '"';
220                                         unquoted_option_chars = 0;
221                                         BEGIN(xslashdquote);
222                                 }
223
224 :{variable_char}+       {
225                                         /* Possible psql variable substitution */
226                                         if (option_type == OT_NO_EVAL ||
227                                                 cur_state->callbacks->get_variable == NULL)
228                                                 ECHO;
229                                         else
230                                         {
231                                                 char   *varname;
232                                                 char   *value;
233
234                                                 varname = psqlscan_extract_substring(cur_state,
235                                                                                                                          yytext + 1,
236                                                                                                                          yyleng - 1);
237                                                 value = cur_state->callbacks->get_variable(varname,
238                                                                                                                                    false,
239                                                                                                                                    false);
240                                                 free(varname);
241
242                                                 /*
243                                                  * The variable value is just emitted without any
244                                                  * further examination.  This is consistent with the
245                                                  * pre-8.0 code behavior, if not with the way that
246                                                  * variables are handled outside backslash commands.
247                                                  * Note that we needn't guard against recursion here.
248                                                  */
249                                                 if (value)
250                                                 {
251                                                         appendPQExpBufferStr(output_buf, value);
252                                                         free(value);
253                                                 }
254                                                 else
255                                                         ECHO;
256
257                                                 *option_quote = ':';
258                                         }
259                                         unquoted_option_chars = 0;
260                                 }
261
262 :'{variable_char}+'     {
263                                         if (option_type == OT_NO_EVAL)
264                                                 ECHO;
265                                         else
266                                         {
267                                                 psqlscan_escape_variable(cur_state, yytext, yyleng, false);
268                                                 *option_quote = ':';
269                                         }
270                                         unquoted_option_chars = 0;
271                                 }
272
273
274 :\"{variable_char}+\"   {
275                                         if (option_type == OT_NO_EVAL)
276                                                 ECHO;
277                                         else
278                                         {
279                                                 psqlscan_escape_variable(cur_state, yytext, yyleng, true);
280                                                 *option_quote = ':';
281                                         }
282                                         unquoted_option_chars = 0;
283                                 }
284
285 :'{variable_char}*      {
286                                         /* Throw back everything but the colon */
287                                         yyless(1);
288                                         unquoted_option_chars++;
289                                         ECHO;
290                                 }
291
292 :\"{variable_char}*     {
293                                         /* Throw back everything but the colon */
294                                         yyless(1);
295                                         unquoted_option_chars++;
296                                         ECHO;
297                                 }
298
299 {other}                 {
300                                         unquoted_option_chars++;
301                                         ECHO;
302                                 }
303
304 }
305
306 <xslashquote>{
307         /*
308          * single-quoted text: copy literally except for '' and backslash
309          * sequences
310          */
311
312 {quote}                 { BEGIN(xslasharg); }
313
314 {xqdouble}              { appendPQExpBufferChar(output_buf, '\''); }
315
316 "\\n"                   { appendPQExpBufferChar(output_buf, '\n'); }
317 "\\t"                   { appendPQExpBufferChar(output_buf, '\t'); }
318 "\\b"                   { appendPQExpBufferChar(output_buf, '\b'); }
319 "\\r"                   { appendPQExpBufferChar(output_buf, '\r'); }
320 "\\f"                   { appendPQExpBufferChar(output_buf, '\f'); }
321
322 {xeoctesc}              {
323                                         /* octal case */
324                                         appendPQExpBufferChar(output_buf,
325                                                                                   (char) strtol(yytext + 1, NULL, 8));
326                                 }
327
328 {xehexesc}              {
329                                         /* hex case */
330                                         appendPQExpBufferChar(output_buf,
331                                                                                   (char) strtol(yytext + 2, NULL, 16));
332                                 }
333
334 "\\".                   { psqlscan_emit(cur_state, yytext + 1, 1); }
335
336 {other}|\n              { ECHO; }
337
338 }
339
340 <xslashbackquote>{
341         /*
342          * backticked text: copy everything until next backquote, then evaluate.
343          *
344          * XXX Possible future behavioral change: substitute for :VARIABLE?
345          */
346
347 "`"                             {
348                                         /* In NO_EVAL mode, don't evaluate the command */
349                                         if (option_type != OT_NO_EVAL)
350                                                 evaluate_backtick(cur_state);
351                                         BEGIN(xslasharg);
352                                 }
353
354 {other}|\n              { ECHO; }
355
356 }
357
358 <xslashdquote>{
359         /* double-quoted text: copy verbatim, including the double quotes */
360
361 {dquote}                {
362                                         ECHO;
363                                         BEGIN(xslasharg);
364                                 }
365
366 {other}|\n              { ECHO; }
367
368 }
369
370 <xslashwholeline>{
371         /* copy everything until end of input line */
372         /* but suppress leading whitespace */
373
374 {space}+                {
375                                         if (output_buf->len > 0)
376                                                 ECHO;
377                                 }
378
379 {other}                 { ECHO; }
380
381 }
382
383 <xslashend>{
384         /* at end of command, eat a double backslash, but not anything else */
385
386 "\\\\"                  {
387                                         cur_state->start_state = YY_START;
388                                         return LEXRES_OK;
389                                 }
390
391 {other}|\n              {
392                                         yyless(0);
393                                         cur_state->start_state = YY_START;
394                                         return LEXRES_OK;
395                                 }
396
397 }
398
399         /*
400          * psql uses a single <<EOF>> rule, unlike the backend.
401          */
402
403 <<EOF>>                 {
404                                         if (cur_state->buffer_stack == NULL)
405                                         {
406                                                 cur_state->start_state = YY_START;
407                                                 return LEXRES_EOL; /* end of input reached */
408                                         }
409
410                                         /*
411                                          * We were expanding a variable, so pop the inclusion
412                                          * stack and keep lexing
413                                          */
414                                         psqlscan_pop_buffer_stack(cur_state);
415                                         psqlscan_select_top_buffer(cur_state);
416                                 }
417
418 %%
419
420 /*
421  * Scan the command name of a psql backslash command.  This should be called
422  * after psql_scan() returns PSCAN_BACKSLASH.  It is assumed that the input
423  * has been consumed through the leading backslash.
424  *
425  * The return value is a malloc'd copy of the command name, as parsed off
426  * from the input.
427  */
428 char *
429 psql_scan_slash_command(PsqlScanState state)
430 {
431         PQExpBufferData mybuf;
432
433         /* Must be scanning already */
434         Assert(state->scanbufhandle != NULL);
435
436         /* Build a local buffer that we'll return the data of */
437         initPQExpBuffer(&mybuf);
438
439         /* Set current output target */
440         state->output_buf = &mybuf;
441
442         /* Set input source */
443         if (state->buffer_stack != NULL)
444                 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
445         else
446                 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
447
448         /*
449          * Set lexer start state.  Note that this is sufficient to switch
450          * state->scanner over to using the tables in this lexer file.
451          */
452         state->start_state = xslashcmd;
453
454         /* And lex. */
455         yylex(state->scanner);
456
457         /* There are no possible errors in this lex state... */
458
459         /*
460          * In case the caller returns to using the regular SQL lexer, reselect the
461          * appropriate initial state.
462          */
463         psql_scan_reselect_sql_lexer(state);
464
465         return mybuf.data;
466 }
467
468 /*
469  * Parse off the next argument for a backslash command, and return it as a
470  * malloc'd string.  If there are no more arguments, returns NULL.
471  *
472  * type tells what processing, if any, to perform on the option string;
473  * for example, if it's a SQL identifier, we want to downcase any unquoted
474  * letters.
475  *
476  * if quote is not NULL, *quote is set to 0 if no quoting was found, else
477  * the last quote symbol used in the argument.
478  *
479  * if semicolon is true, unquoted trailing semicolon(s) that would otherwise
480  * be taken as part of the option string will be stripped.
481  *
482  * NOTE: the only possible syntax errors for backslash options are unmatched
483  * quotes, which are detected when we run out of input.  Therefore, on a
484  * syntax error we just throw away the string and return NULL; there is no
485  * need to worry about flushing remaining input.
486  */
487 char *
488 psql_scan_slash_option(PsqlScanState state,
489                                            enum slash_option_type type,
490                                            char *quote,
491                                            bool semicolon)
492 {
493         PQExpBufferData mybuf;
494         int                     lexresult PG_USED_FOR_ASSERTS_ONLY;
495         int                     final_state;
496         char            local_quote;
497
498         /* Must be scanning already */
499         Assert(state->scanbufhandle != NULL);
500
501         if (quote == NULL)
502                 quote = &local_quote;
503         *quote = 0;
504
505         /* Build a local buffer that we'll return the data of */
506         initPQExpBuffer(&mybuf);
507
508         /* Set up static variables that will be used by yylex */
509         option_type = type;
510         option_quote = quote;
511         unquoted_option_chars = 0;
512
513         /* Set current output target */
514         state->output_buf = &mybuf;
515
516         /* Set input source */
517         if (state->buffer_stack != NULL)
518                 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
519         else
520                 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
521
522         /* Set lexer start state */
523         if (type == OT_WHOLE_LINE)
524                 state->start_state = xslashwholeline;
525         else
526                 state->start_state = xslashargstart;
527
528         /* And lex. */
529         lexresult = yylex(state->scanner);
530
531         /* Save final state for a moment... */
532         final_state = state->start_state;
533
534         /*
535          * In case the caller returns to using the regular SQL lexer, reselect the
536          * appropriate initial state.
537          */
538         psql_scan_reselect_sql_lexer(state);
539
540         /*
541          * Check the lex result: we should have gotten back either LEXRES_OK
542          * or LEXRES_EOL (the latter indicating end of string).  If we were inside
543          * a quoted string, as indicated by final_state, EOL is an error.
544          */
545         Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
546
547         switch (final_state)
548         {
549                 case xslashargstart:
550                         /* empty arg */
551                         break;
552                 case xslasharg:
553                         /* Strip any unquoted trailing semi-colons if requested */
554                         if (semicolon)
555                         {
556                                 while (unquoted_option_chars-- > 0 &&
557                                            mybuf.len > 0 &&
558                                            mybuf.data[mybuf.len - 1] == ';')
559                                 {
560                                         mybuf.data[--mybuf.len] = '\0';
561                                 }
562                         }
563
564                         /*
565                          * If SQL identifier processing was requested, then we strip out
566                          * excess double quotes and downcase unquoted letters.
567                          * Doubled double-quotes become output double-quotes, per spec.
568                          *
569                          * Note that a string like FOO"BAR"BAZ will be converted to
570                          * fooBARbaz; this is somewhat inconsistent with the SQL spec,
571                          * which would have us parse it as several identifiers.  But
572                          * for psql's purposes, we want a string like "foo"."bar" to
573                          * be treated as one option, so there's little choice.
574                          */
575                         if (type == OT_SQLID || type == OT_SQLIDHACK)
576                         {
577                                 bool            inquotes = false;
578                                 char       *cp = mybuf.data;
579
580                                 while (*cp)
581                                 {
582                                         if (*cp == '"')
583                                         {
584                                                 if (inquotes && cp[1] == '"')
585                                                 {
586                                                         /* Keep the first quote, remove the second */
587                                                         cp++;
588                                                 }
589                                                 inquotes = !inquotes;
590                                                 /* Collapse out quote at *cp */
591                                                 memmove(cp, cp + 1, strlen(cp));
592                                                 mybuf.len--;
593                                                 /* do not advance cp */
594                                         }
595                                         else
596                                         {
597                                                 if (!inquotes && type == OT_SQLID)
598                                                         *cp = pg_tolower((unsigned char) *cp);
599                                                 cp += PQmblen(cp, state->encoding);
600                                         }
601                                 }
602                         }
603                         break;
604                 case xslashquote:
605                 case xslashbackquote:
606                 case xslashdquote:
607                         /* must have hit EOL inside quotes */
608                         state->callbacks->write_error("unterminated quoted string\n");
609                         termPQExpBuffer(&mybuf);
610                         return NULL;
611                 case xslashwholeline:
612                         /* always okay */
613                         break;
614                 default:
615                         /* can't get here */
616                         fprintf(stderr, "invalid YY_START\n");
617                         exit(1);
618         }
619
620         /*
621          * An unquoted empty argument isn't possible unless we are at end of
622          * command.  Return NULL instead.
623          */
624         if (mybuf.len == 0 && *quote == 0)
625         {
626                 termPQExpBuffer(&mybuf);
627                 return NULL;
628         }
629
630         /* Else return the completed string. */
631         return mybuf.data;
632 }
633
634 /*
635  * Eat up any unused \\ to complete a backslash command.
636  */
637 void
638 psql_scan_slash_command_end(PsqlScanState state)
639 {
640         /* Must be scanning already */
641         Assert(state->scanbufhandle != NULL);
642
643         /* Set current output target */
644         state->output_buf = NULL;       /* we won't output anything */
645
646         /* Set input source */
647         if (state->buffer_stack != NULL)
648                 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
649         else
650                 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
651
652         /* Set lexer start state */
653         state->start_state = xslashend;
654
655         /* And lex. */
656         yylex(state->scanner);
657
658         /* There are no possible errors in this lex state... */
659
660         /*
661          * We expect the caller to return to using the regular SQL lexer, so
662          * reselect the appropriate initial state.
663          */
664         psql_scan_reselect_sql_lexer(state);
665 }
666
667 /*
668  * Evaluate a backticked substring of a slash command's argument.
669  *
670  * The portion of output_buf starting at backtick_start_offset is evaluated
671  * as a shell command and then replaced by the command's output.
672  */
673 static void
674 evaluate_backtick(PsqlScanState state)
675 {
676         PQExpBuffer output_buf = state->output_buf;
677         char       *cmd = output_buf->data + backtick_start_offset;
678         PQExpBufferData cmd_output;
679         FILE       *fd;
680         bool            error = false;
681         char            buf[512];
682         size_t          result;
683
684         initPQExpBuffer(&cmd_output);
685
686         fd = popen(cmd, PG_BINARY_R);
687         if (!fd)
688         {
689                 state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
690                 error = true;
691         }
692
693         if (!error)
694         {
695                 do
696                 {
697                         result = fread(buf, 1, sizeof(buf), fd);
698                         if (ferror(fd))
699                         {
700                                 state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
701                                 error = true;
702                                 break;
703                         }
704                         appendBinaryPQExpBuffer(&cmd_output, buf, result);
705                 } while (!feof(fd));
706         }
707
708         if (fd && pclose(fd) == -1)
709         {
710                 state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
711                 error = true;
712         }
713
714         if (PQExpBufferDataBroken(cmd_output))
715         {
716                 state->callbacks->write_error("%s: out of memory\n", cmd);
717                 error = true;
718         }
719
720         /* Now done with cmd, delete it from output_buf */
721         output_buf->len = backtick_start_offset;
722         output_buf->data[output_buf->len] = '\0';
723
724         /* If no error, transfer result to output_buf */
725         if (!error)
726         {
727                 /* strip any trailing newline */
728                 if (cmd_output.len > 0 &&
729                         cmd_output.data[cmd_output.len - 1] == '\n')
730                         cmd_output.len--;
731                 appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len);
732         }
733
734         termPQExpBuffer(&cmd_output);
735 }