]> granicus.if.org Git - postgresql/blob - src/backend/parser/scan.l
Improve comment wording.
[postgresql] / src / backend / parser / scan.l
1 %{
2 /*-------------------------------------------------------------------------
3  *
4  * scan.l
5  *        lexical scanner for PostgreSQL
6  *
7  * NOTE NOTE NOTE:
8  *
9  * The rules in this file must be kept in sync with psql's lexer!!!
10  *
11  * The rules are designed so that the scanner never has to backtrack,
12  * in the sense that there is always a rule that can match the input
13  * consumed so far (the rule action may internally throw back some input
14  * with yyless(), however).  As explained in the flex manual, this makes
15  * for a useful speed increase --- about a third faster than a plain -CF
16  * lexer, in simple testing.  The extra complexity is mostly in the rules
17  * for handling float numbers and continued string literals.  If you change
18  * the lexical rules, verify that you haven't broken the no-backtrack
19  * property by running flex with the "-b" option and checking that the
20  * resulting "lex.backup" file says that no backing up is needed.
21  *
22  *
23  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
24  * Portions Copyright (c) 1994, Regents of the University of California
25  *
26  * IDENTIFICATION
27  *        $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.125 2005/06/15 16:28:06 momjian Exp $
28  *
29  *-------------------------------------------------------------------------
30  */
31 #include "postgres.h"
32
33 #include <ctype.h>
34 #include <unistd.h>
35
36 #include "parser/gramparse.h"
37 #include "parser/keywords.h"
38 /* Not needed now that this file is compiled as part of gram.y */
39 /* #include "parser/parse.h" */
40 #include "parser/scansup.h"
41 #include "mb/pg_wchar.h"
42
43
44 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
45 #undef fprintf
46 #define fprintf(file, fmt, msg)  ereport(ERROR, (errmsg_internal("%s", msg)))
47
48 extern YYSTYPE yylval;
49
50 static int              xcdepth = 0;    /* depth of nesting in slash-star comments */
51 static char    *dolqstart;      /* current $foo$ quote start string */
52
53 /*
54  * literalbuf is used to accumulate literal values when multiple rules
55  * are needed to parse a single literal.  Call startlit to reset buffer
56  * to empty, addlit to add text.  Note that the buffer is palloc'd and
57  * starts life afresh on every parse cycle.
58  */
59 static char        *literalbuf;         /* expandable buffer */
60 static int              literallen;             /* actual current length */
61 static int              literalalloc;   /* current allocated buffer size */
62
63 #define startlit()  (literalbuf[0] = '\0', literallen = 0)
64 static void addlit(char *ytext, int yleng);
65 static void addlitchar(unsigned char ychar);
66 static char *litbufdup(void);
67
68 /*
69  * When we parse a token that requires multiple lexer rules to process,
70  * we set token_start to point at the true start of the token, for use
71  * by yyerror().  yytext will point at just the text consumed by the last
72  * rule, so it's not very helpful (e.g., it might contain just the last
73  * quote mark of a quoted identifier).  But to avoid cluttering every rule
74  * with setting token_start, we allow token_start = NULL to denote that
75  * it's okay to use yytext.
76  */
77 static char        *token_start;
78
79 /* Handles to the buffer that the lexer uses internally */
80 static YY_BUFFER_STATE scanbufhandle;
81 static char *scanbuf;
82
83 unsigned char unescape_single_char(unsigned char c);
84
85 %}
86
87 %option 8bit
88 %option never-interactive
89 %option nodefault
90 %option nounput
91 %option noyywrap
92 %option prefix="base_yy"
93
94 /*
95  * OK, here is a short description of lex/flex rules behavior.
96  * The longest pattern which matches an input string is always chosen.
97  * For equal-length patterns, the first occurring in the rules list is chosen.
98  * INITIAL is the starting state, to which all non-conditional rules apply.
99  * Exclusive states change parsing rules while the state is active.  When in
100  * an exclusive state, only those rules defined for that state apply.
101  *
102  * We use exclusive states for quoted strings, extended comments,
103  * and to eliminate parsing troubles for numeric strings.
104  * Exclusive states:
105  *  <xb> bit string literal
106  *  <xc> extended C-style comments
107  *  <xd> delimited identifiers (double-quoted identifiers)
108  *  <xh> hexadecimal numeric string
109  *  <xq> quoted strings
110  *  <xdolq> $foo$ quoted strings
111  */
112
113 %x xb
114 %x xc
115 %x xd
116 %x xh
117 %x xq
118 %x xdolq
119
120 /*
121  * In order to make the world safe for Windows and Mac clients as well as
122  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
123  * sequence will be seen as two successive newlines, but that doesn't cause
124  * any problems.  Comments that start with -- and extend to the next
125  * newline are treated as equivalent to a single whitespace character.
126  *
127  * NOTE a fine point: if there is no newline following --, we will absorb
128  * everything to the end of the input as a comment.  This is correct.  Older
129  * versions of Postgres failed to recognize -- as a comment if the input
130  * did not end with a newline.
131  *
132  * XXX perhaps \f (formfeed) should be treated as a newline as well?
133  */
134
135 space                   [ \t\n\r\f]
136 horiz_space             [ \t\f]
137 newline                 [\n\r]
138 non_newline             [^\n\r]
139
140 comment                 ("--"{non_newline}*)
141
142 whitespace              ({space}+|{comment})
143
144 /*
145  * SQL requires at least one newline in the whitespace separating
146  * string literals that are to be concatenated.  Silly, but who are we
147  * to argue?  Note that {whitespace_with_newline} should not have * after
148  * it, whereas {whitespace} should generally have a * after it...
149  */
150
151 special_whitespace              ({space}+|{comment}{newline})
152 horiz_whitespace                ({horiz_space}|{comment})
153 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
154
155 /*
156  * To ensure that {quotecontinue} can be scanned without having to back up
157  * if the full pattern isn't matched, we include trailing whitespace in
158  * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
159  * except for {quote} followed by whitespace and just one "-" (not two,
160  * which would start a {comment}).  To cover that we have {quotefail}.
161  * The actions for {quotestop} and {quotefail} must throw back characters
162  * beyond the quote proper.
163  */
164 quote                   '
165 quotestop               {quote}{whitespace}*
166 quotecontinue   {quote}{whitespace_with_newline}{quote}
167 quotefail               {quote}{whitespace}*"-"
168
169 /* Bit string
170  * It is tempting to scan the string for only those characters
171  * which are allowed. However, this leads to silently swallowed
172  * characters if illegal characters are included in the string.
173  * For example, if xbinside is [01] then B'ABCD' is interpreted
174  * as a zero-length string, and the ABCD' is lost!
175  * Better to pass the string forward and let the input routines
176  * validate the contents.
177  */
178 xbstart                 [bB]{quote}
179 xbinside                [^']*
180
181 /* Hexadecimal number */
182 xhstart                 [xX]{quote}
183 xhinside                [^']*
184
185 /* National character */
186 xnstart                 [nN]{quote}
187
188 /* Extended quote
189  * xqdouble implements embedded quote, ''''
190  */
191 xqstart                 {quote}
192 xqdouble                {quote}{quote}
193 xqinside                [^\\']+
194 xqescape                [\\][^0-7]
195 xqoctesc                [\\][0-7]{1,3}
196 xqhexesc                [\\]x[0-9A-Fa-f]{1,2}
197
198 /* $foo$ style quotes ("dollar quoting")
199  * The quoted string starts with $foo$ where "foo" is an optional string
200  * in the form of an identifier, except that it may not contain "$", 
201  * and extends to the first occurrence of an identical string.  
202  * There is *no* processing of the quoted text.
203  *
204  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
205  * fails to match its trailing "$".
206  */
207 dolq_start              [A-Za-z\200-\377_]
208 dolq_cont               [A-Za-z\200-\377_0-9]
209 dolqdelim               \$({dolq_start}{dolq_cont}*)?\$
210 dolqfailed              \${dolq_start}{dolq_cont}*
211 dolqinside              [^$]+
212
213 /* Double quote
214  * Allows embedded spaces and other special characters into identifiers.
215  */
216 dquote                  \"
217 xdstart                 {dquote}
218 xdstop                  {dquote}
219 xddouble                {dquote}{dquote}
220 xdinside                [^"]+
221
222 /* C-style comments
223  *
224  * The "extended comment" syntax closely resembles allowable operator syntax.
225  * The tricky part here is to get lex to recognize a string starting with
226  * slash-star as a comment, when interpreting it as an operator would produce
227  * a longer match --- remember lex will prefer a longer match!  Also, if we
228  * have something like plus-slash-star, lex will think this is a 3-character
229  * operator whereas we want to see it as a + operator and a comment start.
230  * The solution is two-fold:
231  * 1. append {op_chars}* to xcstart so that it matches as much text as
232  *    {operator} would. Then the tie-breaker (first matching rule of same
233  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
234  *    in case it contains a star-slash that should terminate the comment.
235  * 2. In the operator rule, check for slash-star within the operator, and
236  *    if found throw it back with yyless().  This handles the plus-slash-star
237  *    problem.
238  * Dash-dash comments have similar interactions with the operator rule.
239  */
240 xcstart                 \/\*{op_chars}*
241 xcstop                  \*+\/
242 xcinside                [^*/]+
243
244 digit                   [0-9]
245 ident_start             [A-Za-z\200-\377_]
246 ident_cont              [A-Za-z\200-\377_0-9\$]
247
248 identifier              {ident_start}{ident_cont}*
249
250 typecast                "::"
251
252 /*
253  * "self" is the set of chars that should be returned as single-character
254  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
255  * which can be one or more characters long (but if a single-char token
256  * appears in the "self" set, it is not to be returned as an Op).  Note
257  * that the sets overlap, but each has some chars that are not in the other.
258  *
259  * If you change either set, adjust the character lists appearing in the
260  * rule for "operator"!
261  */
262 self                    [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
263 op_chars                [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
264 operator                {op_chars}+
265
266 /* we no longer allow unary minus in numbers. 
267  * instead we pass it separately to parser. there it gets
268  * coerced via doNegate() -- Leon aug 20 1999
269  *
270  * {realfail1} and {realfail2} are added to prevent the need for scanner
271  * backup when the {real} rule fails to match completely.
272  */
273
274 integer                 {digit}+
275 decimal                 (({digit}*\.{digit}+)|({digit}+\.{digit}*))
276 real                    ({integer}|{decimal})[Ee][-+]?{digit}+
277 realfail1               ({integer}|{decimal})[Ee]
278 realfail2               ({integer}|{decimal})[Ee][-+]
279
280 param                   \${integer}
281
282 other                   .
283
284 /*
285  * Dollar quoted strings are totally opaque, and no escaping is done on them.
286  * Other quoted strings must allow some special characters such as single-quote
287  *  and newline.
288  * Embedded single-quotes are implemented both in the SQL standard
289  *  style of two adjacent single quotes "''" and in the Postgres/Java style
290  *  of escaped-quote "\'".
291  * Other embedded escaped characters are matched explicitly and the leading
292  *  backslash is dropped from the string.
293  * Note that xcstart must appear before operator, as explained above!
294  *  Also whitespace (comment) must appear before operator.
295  */
296
297 %%
298
299 %{
300                                         /* code to execute during start of each call of yylex() */
301                                         token_start = NULL;
302 %}
303
304 {whitespace}    {
305                                         /* ignore */
306                                 }
307
308 {xcstart}               {
309                                         token_start = yytext;
310                                         xcdepth = 0;
311                                         BEGIN(xc);
312                                         /* Put back any characters past slash-star; see above */
313                                         yyless(2);
314                                 }
315
316 <xc>{xcstart}   {
317                                         xcdepth++;
318                                         /* Put back any characters past slash-star; see above */
319                                         yyless(2);
320                                 }
321
322 <xc>{xcstop}    {
323                                         if (xcdepth <= 0)
324                                         {
325                                                 BEGIN(INITIAL);
326                                                 /* reset token_start for next token */
327                                                 token_start = NULL;
328                                         }
329                                         else
330                                                 xcdepth--;
331                                 }
332
333 <xc>{xcinside}  {
334                                         /* ignore */
335                                 }
336
337 <xc>{op_chars}  {
338                                         /* ignore */
339                                 }
340
341 <xc>\*+                 {
342                                         /* ignore */
343                                 }
344
345 <xc><<EOF>>             { yyerror("unterminated /* comment"); }
346
347 {xbstart}               {
348                                         /* Binary bit type.
349                                          * At some point we should simply pass the string
350                                          * forward to the parser and label it there.
351                                          * In the meantime, place a leading "b" on the string
352                                          * to mark it for the input routine as a binary string.
353                                          */
354                                         token_start = yytext;
355                                         BEGIN(xb);
356                                         startlit();
357                                         addlitchar('b');
358                                 }
359 <xb>{quotestop} |
360 <xb>{quotefail} {
361                                         yyless(1);
362                                         BEGIN(INITIAL);
363                                         yylval.str = litbufdup();
364                                         return BCONST;
365                                 }
366 <xh>{xhinside}  |
367 <xb>{xbinside}  {
368                                         addlit(yytext, yyleng);
369                                 }
370 <xh>{quotecontinue}     |
371 <xb>{quotecontinue}     {
372                                         /* ignore */
373                                 }
374 <xb><<EOF>>             { yyerror("unterminated bit string literal"); }
375
376 {xhstart}               {
377                                         /* Hexadecimal bit type.
378                                          * At some point we should simply pass the string
379                                          * forward to the parser and label it there.
380                                          * In the meantime, place a leading "x" on the string
381                                          * to mark it for the input routine as a hex string.
382                                          */
383                                         token_start = yytext;
384                                         BEGIN(xh);
385                                         startlit();
386                                         addlitchar('x');
387                                 }
388 <xh>{quotestop} |
389 <xh>{quotefail} {
390                                         yyless(1);
391                                         BEGIN(INITIAL);
392                                         yylval.str = litbufdup();
393                                         return XCONST;
394                                 }
395 <xh><<EOF>>             { yyerror("unterminated hexadecimal string literal"); }
396
397 {xnstart}               {
398                                         /* National character.
399                                          * We will pass this along as a normal character string,
400                                          * but preceded with an internally-generated "NCHAR".
401                                          */
402                                         const ScanKeyword *keyword;
403
404                                         yyless(1);                              /* eat only 'n' this time */
405                                         /* nchar had better be a keyword! */
406                                         keyword = ScanKeywordLookup("nchar");
407                                         Assert(keyword != NULL);
408                                         yylval.keyword = keyword->name;
409                                         return keyword->value;
410                                 }
411
412 {xqstart}               {
413                                         token_start = yytext;
414                                         BEGIN(xq);
415                                         startlit();
416                                 }
417 <xq>{quotestop} |
418 <xq>{quotefail} {
419                                         yyless(1);
420                                         BEGIN(INITIAL);
421                                         yylval.str = litbufdup();
422                                         return SCONST;
423                                 }
424 <xq>{xqdouble}  {
425                                         addlitchar('\'');
426                                 }
427 <xq>{xqinside}  {
428                                         addlit(yytext, yyleng);
429                                 }
430 <xq>{xqescape}  {
431                                         addlitchar(unescape_single_char(yytext[1]));
432                                 }
433 <xq>{xqoctesc}  {
434                                         unsigned char c = strtoul(yytext+1, NULL, 8);
435                                         addlitchar(c);
436                                 }
437 <xq>{xqhexesc}  {
438                                         unsigned char c = strtoul(yytext+2, NULL, 16);
439                                         addlitchar(c);
440                                 }
441 <xq>{quotecontinue} {
442                                         /* ignore */
443                                 }
444 <xq>.                   {
445                                         /* This is only needed for \ just before EOF */
446                                         addlitchar(yytext[0]);
447                                 }
448 <xq><<EOF>>             { yyerror("unterminated quoted string"); }
449
450 {dolqdelim}             {
451                                         token_start = yytext;
452                                         dolqstart = pstrdup(yytext);
453                                         BEGIN(xdolq);
454                                         startlit();
455                                 }
456 {dolqfailed}    {
457                                         /* throw back all but the initial "$" */
458                                         yyless(1);
459                                         /* and treat it as {other} */
460                                         return yytext[0];
461                                 }
462 <xdolq>{dolqdelim} {
463                                         if (strcmp(yytext, dolqstart) == 0)
464                                         {
465                                                 pfree(dolqstart);
466                                                 BEGIN(INITIAL);
467                                                 yylval.str = litbufdup();
468                                                 return SCONST;
469                                         }
470                                         else
471                                         {
472                                                 /*
473                                                  * When we fail to match $...$ to dolqstart, transfer
474                                                  * the $... part to the output, but put back the final
475                                                  * $ for rescanning.  Consider $delim$...$junk$delim$
476                                                  */
477                                                 addlit(yytext, yyleng-1);
478                                                 yyless(yyleng-1);
479                                         }
480                                 }
481 <xdolq>{dolqinside} {
482                                         addlit(yytext, yyleng);
483                                 }
484 <xdolq>{dolqfailed} {
485                                         addlit(yytext, yyleng);
486                                 }
487 <xdolq>.                {
488                                         /* This is only needed for $ inside the quoted text */
489                                         addlitchar(yytext[0]);
490                                 }
491 <xdolq><<EOF>>  { yyerror("unterminated dollar-quoted string"); }
492
493 {xdstart}               {
494                                         token_start = yytext;
495                                         BEGIN(xd);
496                                         startlit();
497                                 }
498 <xd>{xdstop}    {
499                                         char               *ident;
500
501                                         BEGIN(INITIAL);
502                                         if (literallen == 0)
503                                                 yyerror("zero-length delimited identifier");
504                                         ident = litbufdup();
505                                         if (literallen >= NAMEDATALEN)
506                                                 truncate_identifier(ident, literallen, true);
507                                         yylval.str = ident;
508                                         return IDENT;
509                                 }
510 <xd>{xddouble}  {
511                                         addlitchar('"');
512                                 }
513 <xd>{xdinside}  {
514                                         addlit(yytext, yyleng);
515                                 }
516 <xd><<EOF>>             { yyerror("unterminated quoted identifier"); }
517
518 {typecast}              {
519                                         return TYPECAST;
520                                 }
521
522 {self}                  {
523                                         return yytext[0];
524                                 }
525
526 {operator}              {
527                                         /*
528                                          * Check for embedded slash-star or dash-dash; those
529                                          * are comment starts, so operator must stop there.
530                                          * Note that slash-star or dash-dash at the first
531                                          * character will match a prior rule, not this one.
532                                          */
533                                         int             nchars = yyleng;
534                                         char   *slashstar = strstr(yytext, "/*");
535                                         char   *dashdash = strstr(yytext, "--");
536
537                                         if (slashstar && dashdash)
538                                         {
539                                                 /* if both appear, take the first one */
540                                                 if (slashstar > dashdash)
541                                                         slashstar = dashdash;
542                                         }
543                                         else if (!slashstar)
544                                                 slashstar = dashdash;
545                                         if (slashstar)
546                                                 nchars = slashstar - yytext;
547
548                                         /*
549                                          * For SQL compatibility, '+' and '-' cannot be the
550                                          * last char of a multi-char operator unless the operator
551                                          * contains chars that are not in SQL operators.
552                                          * The idea is to lex '=-' as two operators, but not
553                                          * to forbid operator names like '?-' that could not be
554                                          * sequences of SQL operators.
555                                          */
556                                         while (nchars > 1 &&
557                                                    (yytext[nchars-1] == '+' ||
558                                                         yytext[nchars-1] == '-'))
559                                         {
560                                                 int             ic;
561
562                                                 for (ic = nchars-2; ic >= 0; ic--)
563                                                 {
564                                                         if (strchr("~!@#^&|`?%", yytext[ic]))
565                                                                 break;
566                                                 }
567                                                 if (ic >= 0)
568                                                         break; /* found a char that makes it OK */
569                                                 nchars--; /* else remove the +/-, and check again */
570                                         }
571
572                                         if (nchars < yyleng)
573                                         {
574                                                 /* Strip the unwanted chars from the token */
575                                                 yyless(nchars);
576                                                 /*
577                                                  * If what we have left is only one char, and it's
578                                                  * one of the characters matching "self", then
579                                                  * return it as a character token the same way
580                                                  * that the "self" rule would have.
581                                                  */
582                                                 if (nchars == 1 &&
583                                                         strchr(",()[].;:+-*/%^<>=", yytext[0]))
584                                                         return yytext[0];
585                                         }
586
587                                         /* Convert "!=" operator to "<>" for compatibility */
588                                         if (strcmp(yytext, "!=") == 0)
589                                                 yylval.str = pstrdup("<>");
590                                         else
591                                                 yylval.str = pstrdup(yytext);
592                                         return Op;
593                                 }
594
595 {param}                 {
596                                         yylval.ival = atol(yytext + 1);
597                                         return PARAM;
598                                 }
599
600 {integer}               {
601                                         long val;
602                                         char* endptr;
603
604                                         errno = 0;
605                                         val = strtol(yytext, &endptr, 10);
606                                         if (*endptr != '\0' || errno == ERANGE
607 #ifdef HAVE_LONG_INT_64
608                                                 /* if long > 32 bits, check for overflow of int4 */
609                                                 || val != (long) ((int32) val)
610 #endif
611                                                 )
612                                         {
613                                                 /* integer too large, treat it as a float */
614                                                 yylval.str = pstrdup(yytext);
615                                                 return FCONST;
616                                         }
617                                         yylval.ival = val;
618                                         return ICONST;
619                                 }
620 {decimal}               {
621                                         yylval.str = pstrdup(yytext);
622                                         return FCONST;
623                                 }
624 {real}                  {
625                                         yylval.str = pstrdup(yytext);
626                                         return FCONST;
627                                 }
628 {realfail1}             {
629                                         /*
630                                          * throw back the [Ee], and treat as {decimal}.  Note
631                                          * that it is possible the input is actually {integer},
632                                          * but since this case will almost certainly lead to a
633                                          * syntax error anyway, we don't bother to distinguish.
634                                          */
635                                         yyless(yyleng-1);
636                                         yylval.str = pstrdup(yytext);
637                                         return FCONST;
638                                 }
639 {realfail2}             {
640                                         /* throw back the [Ee][+-], and proceed as above */
641                                         yyless(yyleng-2);
642                                         yylval.str = pstrdup(yytext);
643                                         return FCONST;
644                                 }
645
646
647 {identifier}    {
648                                         const ScanKeyword *keyword;
649                                         char               *ident;
650
651                                         /* Is it a keyword? */
652                                         keyword = ScanKeywordLookup(yytext);
653                                         if (keyword != NULL)
654                                         {
655                                                 yylval.keyword = keyword->name;
656                                                 return keyword->value;
657                                         }
658
659                                         /*
660                                          * No.  Convert the identifier to lower case, and truncate
661                                          * if necessary.
662                                          */
663                                         ident = downcase_truncate_identifier(yytext, yyleng, true);
664                                         yylval.str = ident;
665                                         return IDENT;
666                                 }
667
668 {other}                 {
669                                         return yytext[0];
670                                 }
671
672 %%
673
674 void
675 yyerror(const char *message)
676 {
677         const char *loc = token_start ? token_start : yytext;
678         int                     cursorpos;
679
680         /* in multibyte encodings, return index in characters not bytes */
681         cursorpos = pg_mbstrlen_with_len(scanbuf, loc - scanbuf) + 1;
682
683         if (*loc == YY_END_OF_BUFFER_CHAR)
684         {
685                 ereport(ERROR,
686                                 (errcode(ERRCODE_SYNTAX_ERROR),
687                                  /* translator: %s is typically "syntax error" */
688                                  errmsg("%s at end of input", _(message)),
689                                  errposition(cursorpos)));
690         }
691         else
692         {
693                 ereport(ERROR,
694                                 (errcode(ERRCODE_SYNTAX_ERROR),
695                                  /* translator: first %s is typically "syntax error" */
696                                  errmsg("%s at or near \"%s\"", _(message), loc),
697                                  errposition(cursorpos)));
698         }
699 }
700
701
702 /*
703  * Called before any actual parsing is done
704  */
705 void
706 scanner_init(const char *str)
707 {
708         Size    slen = strlen(str);
709
710         /*
711          * Might be left over after ereport()
712          */
713         if (YY_CURRENT_BUFFER)
714                 yy_delete_buffer(YY_CURRENT_BUFFER);
715
716         /*
717          * Make a scan buffer with special termination needed by flex.
718          */
719         scanbuf = palloc(slen + 2);
720         memcpy(scanbuf, str, slen);
721         scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
722         scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
723
724         /* initialize literal buffer to a reasonable but expansible size */
725         literalalloc = 128;
726         literalbuf = (char *) palloc(literalalloc);
727         startlit();
728
729         BEGIN(INITIAL);
730 }
731
732
733 /*
734  * Called after parsing is done to clean up after scanner_init()
735  */
736 void
737 scanner_finish(void)
738 {
739         yy_delete_buffer(scanbufhandle);
740         pfree(scanbuf);
741 }
742
743
744 static void
745 addlit(char *ytext, int yleng)
746 {
747         /* enlarge buffer if needed */
748         if ((literallen+yleng) >= literalalloc)
749         {
750                 do {
751                         literalalloc *= 2;
752                 } while ((literallen+yleng) >= literalalloc);
753                 literalbuf = (char *) repalloc(literalbuf, literalalloc);
754         }
755         /* append new data, add trailing null */
756         memcpy(literalbuf+literallen, ytext, yleng);
757         literallen += yleng;
758         literalbuf[literallen] = '\0';
759 }
760
761
762 static void
763 addlitchar(unsigned char ychar)
764 {
765         /* enlarge buffer if needed */
766         if ((literallen+1) >= literalalloc)
767         {
768                 literalalloc *= 2;
769                 literalbuf = (char *) repalloc(literalbuf, literalalloc);
770         }
771         /* append new data, add trailing null */
772         literalbuf[literallen] = ychar;
773         literallen += 1;
774         literalbuf[literallen] = '\0';
775 }
776
777
778 /*
779  * One might be tempted to write pstrdup(literalbuf) instead of this,
780  * but for long literals this is much faster because the length is
781  * already known.
782  */
783 static char *
784 litbufdup(void)
785 {
786         char *new;
787
788         new = palloc(literallen + 1);
789         memcpy(new, literalbuf, literallen+1);
790         return new;
791 }
792
793
794 unsigned char
795 unescape_single_char(unsigned char c)
796 {
797         switch (c)
798         {
799                 case 'b':
800                         return '\b';
801                 case 'f':
802                         return '\f';
803                 case 'n':
804                         return '\n';
805                 case 'r':
806                         return '\r';
807                 case 't':
808                         return '\t';
809                 default:
810                         return c;
811         }
812 }