granicus.if.org Git - postgresql/blob - src/backend/parser/scan.l

   1 %{
   2 /*-------------------------------------------------------------------------
   3  *
   4  * scan.l
   5  *        lexical scanner for PostgreSQL
   6  *
   7  * NOTE NOTE NOTE:
   8  *
   9  * The rules in this file must be kept in sync with psql's lexer!!!
  10  *
  11  * The rules are designed so that the scanner never has to backtrack,
  12  * in the sense that there is always a rule that can match the input
  13  * consumed so far (the rule action may internally throw back some input
  14  * with yyless(), however).  As explained in the flex manual, this makes
  15  * for a useful speed increase --- about a third faster than a plain -CF
  16  * lexer, in simple testing.  The extra complexity is mostly in the rules
  17  * for handling float numbers and continued string literals.  If you change
  18  * the lexical rules, verify that you haven't broken the no-backtrack
  19  * property by running flex with the "-b" option and checking that the
  20  * resulting "lex.backup" file says that no backing up is needed.
  21  *
  22  *
  23  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  24  * Portions Copyright (c) 1994, Regents of the University of California
  25  *
  26  * IDENTIFICATION
  27  *        $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.125 2005/06/15 16:28:06 momjian Exp $
  28  *
  29  *-------------------------------------------------------------------------
  30  */
  31 #include "postgres.h"
  32
  33 #include <ctype.h>
  34 #include <unistd.h>
  35
  36 #include "parser/gramparse.h"
  37 #include "parser/keywords.h"
  38 /* Not needed now that this file is compiled as part of gram.y */
  39 /* #include "parser/parse.h" */
  40 #include "parser/scansup.h"
  41 #include "mb/pg_wchar.h"
  42
  43
  44 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
  45 #undef fprintf
  46 #define fprintf(file, fmt, msg)  ereport(ERROR, (errmsg_internal("%s", msg)))
  47
  48 extern YYSTYPE yylval;
  49
  50 static int              xcdepth = 0;    /* depth of nesting in slash-star comments */
  51 static char    *dolqstart;      /* current $foo$ quote start string */
  52
  53 /*
  54  * literalbuf is used to accumulate literal values when multiple rules
  55  * are needed to parse a single literal.  Call startlit to reset buffer
  56  * to empty, addlit to add text.  Note that the buffer is palloc'd and
  57  * starts life afresh on every parse cycle.
  58  */
  59 static char        *literalbuf;         /* expandable buffer */
  60 static int              literallen;             /* actual current length */
  61 static int              literalalloc;   /* current allocated buffer size */
  62
  63 #define startlit()  (literalbuf[0] = '\0', literallen = 0)
  64 static void addlit(char *ytext, int yleng);
  65 static void addlitchar(unsigned char ychar);
  66 static char *litbufdup(void);
  67
  68 /*
  69  * When we parse a token that requires multiple lexer rules to process,
  70  * we set token_start to point at the true start of the token, for use
  71  * by yyerror().  yytext will point at just the text consumed by the last
  72  * rule, so it's not very helpful (e.g., it might contain just the last
  73  * quote mark of a quoted identifier).  But to avoid cluttering every rule
  74  * with setting token_start, we allow token_start = NULL to denote that
  75  * it's okay to use yytext.
  76  */
  77 static char        *token_start;
  78
  79 /* Handles to the buffer that the lexer uses internally */
  80 static YY_BUFFER_STATE scanbufhandle;
  81 static char *scanbuf;
  82
  83 unsigned char unescape_single_char(unsigned char c);
  84
  85 %}
  86
  87 %option 8bit
  88 %option never-interactive
  89 %option nodefault
  90 %option nounput
  91 %option noyywrap
  92 %option prefix="base_yy"
  93
  94 /*
  95  * OK, here is a short description of lex/flex rules behavior.
  96  * The longest pattern which matches an input string is always chosen.
  97  * For equal-length patterns, the first occurring in the rules list is chosen.
  98  * INITIAL is the starting state, to which all non-conditional rules apply.
  99  * Exclusive states change parsing rules while the state is active.  When in
 100  * an exclusive state, only those rules defined for that state apply.
 101  *
 102  * We use exclusive states for quoted strings, extended comments,
 103  * and to eliminate parsing troubles for numeric strings.
 104  * Exclusive states:
 105  *  <xb> bit string literal
 106  *  <xc> extended C-style comments
 107  *  <xd> delimited identifiers (double-quoted identifiers)
 108  *  <xh> hexadecimal numeric string
 109  *  <xq> quoted strings
 110  *  <xdolq> $foo$ quoted strings
 111  */
 112
 113 %x xb
 114 %x xc
 115 %x xd
 116 %x xh
 117 %x xq
 118 %x xdolq
 119
 120 /*
 121  * In order to make the world safe for Windows and Mac clients as well as
 122  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
 123  * sequence will be seen as two successive newlines, but that doesn't cause
 124  * any problems.  Comments that start with -- and extend to the next
 125  * newline are treated as equivalent to a single whitespace character.
 126  *
 127  * NOTE a fine point: if there is no newline following --, we will absorb
 128  * everything to the end of the input as a comment.  This is correct.  Older
 129  * versions of Postgres failed to recognize -- as a comment if the input
 130  * did not end with a newline.
 131  *
 132  * XXX perhaps \f (formfeed) should be treated as a newline as well?
 133  */
 134
 135 space                   [ \t\n\r\f]
 136 horiz_space             [ \t\f]
 137 newline                 [\n\r]
 138 non_newline             [^\n\r]
 139
 140 comment                 ("--"{non_newline}*)
 141
 142 whitespace              ({space}+|{comment})
 143
 144 /*
 145  * SQL requires at least one newline in the whitespace separating
 146  * string literals that are to be concatenated.  Silly, but who are we
 147  * to argue?  Note that {whitespace_with_newline} should not have * after
 148  * it, whereas {whitespace} should generally have a * after it...
 149  */
 150
 151 special_whitespace              ({space}+|{comment}{newline})
 152 horiz_whitespace                ({horiz_space}|{comment})
 153 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
 154
 155 /*
 156  * To ensure that {quotecontinue} can be scanned without having to back up
 157  * if the full pattern isn't matched, we include trailing whitespace in
 158  * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
 159  * except for {quote} followed by whitespace and just one "-" (not two,
 160  * which would start a {comment}).  To cover that we have {quotefail}.
 161  * The actions for {quotestop} and {quotefail} must throw back characters
 162  * beyond the quote proper.
 163  */
 164 quote                   '
 165 quotestop               {quote}{whitespace}*
 166 quotecontinue   {quote}{whitespace_with_newline}{quote}
 167 quotefail               {quote}{whitespace}*"-"
 168
 169 /* Bit string
 170  * It is tempting to scan the string for only those characters
 171  * which are allowed. However, this leads to silently swallowed
 172  * characters if illegal characters are included in the string.
 173  * For example, if xbinside is [01] then B'ABCD' is interpreted
 174  * as a zero-length string, and the ABCD' is lost!
 175  * Better to pass the string forward and let the input routines
 176  * validate the contents.
 177  */
 178 xbstart                 [bB]{quote}
 179 xbinside                [^']*
 180
 181 /* Hexadecimal number */
 182 xhstart                 [xX]{quote}
 183 xhinside                [^']*
 184
 185 /* National character */
 186 xnstart                 [nN]{quote}
 187
 188 /* Extended quote
 189  * xqdouble implements embedded quote, ''''
 190  */
 191 xqstart                 {quote}
 192 xqdouble                {quote}{quote}
 193 xqinside                [^\\']+
 194 xqescape                [\\][^0-7]
 195 xqoctesc                [\\][0-7]{1,3}
 196 xqhexesc                [\\]x[0-9A-Fa-f]{1,2}
 197
 198 /* $foo$ style quotes ("dollar quoting")
 199  * The quoted string starts with $foo$ where "foo" is an optional string
 200  * in the form of an identifier, except that it may not contain "$",
 201  * and extends to the first occurrence of an identical string.
 202  * There is *no* processing of the quoted text.
 203  *
 204  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
 205  * fails to match its trailing "$".
 206  */
 207 dolq_start              [A-Za-z\200-\377_]
 208 dolq_cont               [A-Za-z\200-\377_0-9]
 209 dolqdelim               \$({dolq_start}{dolq_cont}*)?\$
 210 dolqfailed              \${dolq_start}{dolq_cont}*
 211 dolqinside              [^$]+
 212
 213 /* Double quote
 214  * Allows embedded spaces and other special characters into identifiers.
 215  */
 216 dquote                  \"
 217 xdstart                 {dquote}
 218 xdstop                  {dquote}
 219 xddouble                {dquote}{dquote}
 220 xdinside                [^"]+
 221
 222 /* C-style comments
 223  *
 224  * The "extended comment" syntax closely resembles allowable operator syntax.
 225  * The tricky part here is to get lex to recognize a string starting with
 226  * slash-star as a comment, when interpreting it as an operator would produce
 227  * a longer match --- remember lex will prefer a longer match!  Also, if we
 228  * have something like plus-slash-star, lex will think this is a 3-character
 229  * operator whereas we want to see it as a + operator and a comment start.
 230  * The solution is two-fold:
 231  * 1. append {op_chars}* to xcstart so that it matches as much text as
 232  *    {operator} would. Then the tie-breaker (first matching rule of same
 233  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
 234  *    in case it contains a star-slash that should terminate the comment.
 235  * 2. In the operator rule, check for slash-star within the operator, and
 236  *    if found throw it back with yyless().  This handles the plus-slash-star
 237  *    problem.
 238  * Dash-dash comments have similar interactions with the operator rule.
 239  */
 240 xcstart                 \/\*{op_chars}*
 241 xcstop                  \*+\/
 242 xcinside                [^*/]+
 243
 244 digit                   [0-9]
 245 ident_start             [A-Za-z\200-\377_]
 246 ident_cont              [A-Za-z\200-\377_0-9\$]
 247
 248 identifier              {ident_start}{ident_cont}*
 249
 250 typecast                "::"
 251
 252 /*
 253  * "self" is the set of chars that should be returned as single-character
 254  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
 255  * which can be one or more characters long (but if a single-char token
 256  * appears in the "self" set, it is not to be returned as an Op).  Note
 257  * that the sets overlap, but each has some chars that are not in the other.
 258  *
 259  * If you change either set, adjust the character lists appearing in the
 260  * rule for "operator"!
 261  */
 262 self                    [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 263 op_chars                [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 264 operator                {op_chars}+
 265
 266 /* we no longer allow unary minus in numbers.
 267  * instead we pass it separately to parser. there it gets
 268  * coerced via doNegate() -- Leon aug 20 1999
 269  *
 270  * {realfail1} and {realfail2} are added to prevent the need for scanner
 271  * backup when the {real} rule fails to match completely.
 272  */
 273
 274 integer                 {digit}+
 275 decimal                 (({digit}*\.{digit}+)|({digit}+\.{digit}*))
 276 real                    ({integer}|{decimal})[Ee][-+]?{digit}+
 277 realfail1               ({integer}|{decimal})[Ee]
 278 realfail2               ({integer}|{decimal})[Ee][-+]
 279
 280 param                   \${integer}
 281
 282 other                   .
 283
 284 /*
 285  * Dollar quoted strings are totally opaque, and no escaping is done on them.
 286  * Other quoted strings must allow some special characters such as single-quote
 287  *  and newline.
 288  * Embedded single-quotes are implemented both in the SQL standard
 289  *  style of two adjacent single quotes "''" and in the Postgres/Java style
 290  *  of escaped-quote "\'".
 291  * Other embedded escaped characters are matched explicitly and the leading
 292  *  backslash is dropped from the string.
 293  * Note that xcstart must appear before operator, as explained above!
 294  *  Also whitespace (comment) must appear before operator.
 295  */
 296
 297 %%
 298
 299 %{
 300                                         /* code to execute during start of each call of yylex() */
 301                                         token_start = NULL;
 302 %}
 303
 304 {whitespace}    {
 305                                         /* ignore */
 306                                 }
 307
 308 {xcstart}               {
 309                                         token_start = yytext;
 310                                         xcdepth = 0;
 311                                         BEGIN(xc);
 312                                         /* Put back any characters past slash-star; see above */
 313                                         yyless(2);
 314                                 }
 315
 316 <xc>{xcstart}   {
 317                                         xcdepth++;
 318                                         /* Put back any characters past slash-star; see above */
 319                                         yyless(2);
 320                                 }
 321
 322 <xc>{xcstop}    {
 323                                         if (xcdepth <= 0)
 324                                         {
 325                                                 BEGIN(INITIAL);
 326                                                 /* reset token_start for next token */
 327                                                 token_start = NULL;
 328                                         }
 329                                         else
 330                                                 xcdepth--;
 331                                 }
 332
 333 <xc>{xcinside}  {
 334                                         /* ignore */
 335                                 }
 336
 337 <xc>{op_chars}  {
 338                                         /* ignore */
 339                                 }
 340
 341 <xc>\*+                 {
 342                                         /* ignore */
 343                                 }
 344
 345 <xc><<EOF>>             { yyerror("unterminated /* comment"); }
 346
 347 {xbstart}               {
 348                                         /* Binary bit type.
 349                                          * At some point we should simply pass the string
 350                                          * forward to the parser and label it there.
 351                                          * In the meantime, place a leading "b" on the string
 352                                          * to mark it for the input routine as a binary string.
 353                                          */
 354                                         token_start = yytext;
 355                                         BEGIN(xb);
 356                                         startlit();
 357                                         addlitchar('b');
 358                                 }
 359 <xb>{quotestop} |
 360 <xb>{quotefail} {
 361                                         yyless(1);
 362                                         BEGIN(INITIAL);
 363                                         yylval.str = litbufdup();
 364                                         return BCONST;
 365                                 }
 366 <xh>{xhinside}  |
 367 <xb>{xbinside}  {
 368                                         addlit(yytext, yyleng);
 369                                 }
 370 <xh>{quotecontinue}     |
 371 <xb>{quotecontinue}     {
 372                                         /* ignore */
 373                                 }
 374 <xb><<EOF>>             { yyerror("unterminated bit string literal"); }
 375
 376 {xhstart}               {
 377                                         /* Hexadecimal bit type.
 378                                          * At some point we should simply pass the string
 379                                          * forward to the parser and label it there.
 380                                          * In the meantime, place a leading "x" on the string
 381                                          * to mark it for the input routine as a hex string.
 382                                          */
 383                                         token_start = yytext;
 384                                         BEGIN(xh);
 385                                         startlit();
 386                                         addlitchar('x');
 387                                 }
 388 <xh>{quotestop} |
 389 <xh>{quotefail} {
 390                                         yyless(1);
 391                                         BEGIN(INITIAL);
 392                                         yylval.str = litbufdup();
 393                                         return XCONST;
 394                                 }
 395 <xh><<EOF>>             { yyerror("unterminated hexadecimal string literal"); }
 396
 397 {xnstart}               {
 398                                         /* National character.
 399                                          * We will pass this along as a normal character string,
 400                                          * but preceded with an internally-generated "NCHAR".
 401                                          */
 402                                         const ScanKeyword *keyword;
 403
 404                                         yyless(1);                              /* eat only 'n' this time */
 405                                         /* nchar had better be a keyword! */
 406                                         keyword = ScanKeywordLookup("nchar");
 407                                         Assert(keyword != NULL);
 408                                         yylval.keyword = keyword->name;
 409                                         return keyword->value;
 410                                 }
 411
 412 {xqstart}               {
 413                                         token_start = yytext;
 414                                         BEGIN(xq);
 415                                         startlit();
 416                                 }
 417 <xq>{quotestop} |
 418 <xq>{quotefail} {
 419                                         yyless(1);
 420                                         BEGIN(INITIAL);
 421                                         yylval.str = litbufdup();
 422                                         return SCONST;
 423                                 }
 424 <xq>{xqdouble}  {
 425                                         addlitchar('\'');
 426                                 }
 427 <xq>{xqinside}  {
 428                                         addlit(yytext, yyleng);
 429                                 }
 430 <xq>{xqescape}  {
 431                                         addlitchar(unescape_single_char(yytext[1]));
 432                                 }
 433 <xq>{xqoctesc}  {
 434                                         unsigned char c = strtoul(yytext+1, NULL, 8);
 435                                         addlitchar(c);
 436                                 }
 437 <xq>{xqhexesc}  {
 438                                         unsigned char c = strtoul(yytext+2, NULL, 16);
 439                                         addlitchar(c);
 440                                 }
 441 <xq>{quotecontinue} {
 442                                         /* ignore */
 443                                 }
 444 <xq>.                   {
 445                                         /* This is only needed for \ just before EOF */
 446                                         addlitchar(yytext[0]);
 447                                 }
 448 <xq><<EOF>>             { yyerror("unterminated quoted string"); }
 449
 450 {dolqdelim}             {
 451                                         token_start = yytext;
 452                                         dolqstart = pstrdup(yytext);
 453                                         BEGIN(xdolq);
 454                                         startlit();
 455                                 }
 456 {dolqfailed}    {
 457                                         /* throw back all but the initial "$" */
 458                                         yyless(1);
 459                                         /* and treat it as {other} */
 460                                         return yytext[0];
 461                                 }
 462 <xdolq>{dolqdelim} {
 463                                         if (strcmp(yytext, dolqstart) == 0)
 464                                         {
 465                                                 pfree(dolqstart);
 466                                                 BEGIN(INITIAL);
 467                                                 yylval.str = litbufdup();
 468                                                 return SCONST;
 469                                         }
 470                                         else
 471                                         {
 472                                                 /*
 473                                                  * When we fail to match $...$ to dolqstart, transfer
 474                                                  * the $... part to the output, but put back the final
 475                                                  * $ for rescanning.  Consider $delim$...$junk$delim$
 476                                                  */
 477                                                 addlit(yytext, yyleng-1);
 478                                                 yyless(yyleng-1);
 479                                         }
 480                                 }
 481 <xdolq>{dolqinside} {
 482                                         addlit(yytext, yyleng);
 483                                 }
 484 <xdolq>{dolqfailed} {
 485                                         addlit(yytext, yyleng);
 486                                 }
 487 <xdolq>.                {
 488                                         /* This is only needed for $ inside the quoted text */
 489                                         addlitchar(yytext[0]);
 490                                 }
 491 <xdolq><<EOF>>  { yyerror("unterminated dollar-quoted string"); }
 492
 493 {xdstart}               {
 494                                         token_start = yytext;
 495                                         BEGIN(xd);
 496                                         startlit();
 497                                 }
 498 <xd>{xdstop}    {
 499                                         char               *ident;
 500
 501                                         BEGIN(INITIAL);
 502                                         if (literallen == 0)
 503                                                 yyerror("zero-length delimited identifier");
 504                                         ident = litbufdup();
 505                                         if (literallen >= NAMEDATALEN)
 506                                                 truncate_identifier(ident, literallen, true);
 507                                         yylval.str = ident;
 508                                         return IDENT;
 509                                 }
 510 <xd>{xddouble}  {
 511                                         addlitchar('"');
 512                                 }
 513 <xd>{xdinside}  {
 514                                         addlit(yytext, yyleng);
 515                                 }
 516 <xd><<EOF>>             { yyerror("unterminated quoted identifier"); }
 517
 518 {typecast}              {
 519                                         return TYPECAST;
 520                                 }
 521
 522 {self}                  {
 523                                         return yytext[0];
 524                                 }
 525
 526 {operator}              {
 527                                         /*
 528                                          * Check for embedded slash-star or dash-dash; those
 529                                          * are comment starts, so operator must stop there.
 530                                          * Note that slash-star or dash-dash at the first
 531                                          * character will match a prior rule, not this one.
 532                                          */
 533                                         int             nchars = yyleng;
 534                                         char   *slashstar = strstr(yytext, "/*");
 535                                         char   *dashdash = strstr(yytext, "--");
 536
 537                                         if (slashstar && dashdash)
 538                                         {
 539                                                 /* if both appear, take the first one */
 540                                                 if (slashstar > dashdash)
 541                                                         slashstar = dashdash;
 542                                         }
 543                                         else if (!slashstar)
 544                                                 slashstar = dashdash;
 545                                         if (slashstar)
 546                                                 nchars = slashstar - yytext;
 547
 548                                         /*
 549                                          * For SQL compatibility, '+' and '-' cannot be the
 550                                          * last char of a multi-char operator unless the operator
 551                                          * contains chars that are not in SQL operators.
 552                                          * The idea is to lex '=-' as two operators, but not
 553                                          * to forbid operator names like '?-' that could not be
 554                                          * sequences of SQL operators.
 555                                          */
 556                                         while (nchars > 1 &&
 557                                                    (yytext[nchars-1] == '+' ||
 558                                                         yytext[nchars-1] == '-'))
 559                                         {
 560                                                 int             ic;
 561
 562                                                 for (ic = nchars-2; ic >= 0; ic--)
 563                                                 {
 564                                                         if (strchr("~!@#^&|`?%", yytext[ic]))
 565                                                                 break;
 566                                                 }
 567                                                 if (ic >= 0)
 568                                                         break; /* found a char that makes it OK */
 569                                                 nchars--; /* else remove the +/-, and check again */
 570                                         }
 571
 572                                         if (nchars < yyleng)
 573                                         {
 574                                                 /* Strip the unwanted chars from the token */
 575                                                 yyless(nchars);
 576                                                 /*
 577                                                  * If what we have left is only one char, and it's
 578                                                  * one of the characters matching "self", then
 579                                                  * return it as a character token the same way
 580                                                  * that the "self" rule would have.
 581                                                  */
 582                                                 if (nchars == 1 &&
 583                                                         strchr(",()[].;:+-*/%^<>=", yytext[0]))
 584                                                         return yytext[0];
 585                                         }
 586
 587                                         /* Convert "!=" operator to "<>" for compatibility */
 588                                         if (strcmp(yytext, "!=") == 0)
 589                                                 yylval.str = pstrdup("<>");
 590                                         else
 591                                                 yylval.str = pstrdup(yytext);
 592                                         return Op;
 593                                 }
 594
 595 {param}                 {
 596                                         yylval.ival = atol(yytext + 1);
 597                                         return PARAM;
 598                                 }
 599
 600 {integer}               {
 601                                         long val;
 602                                         char* endptr;
 603
 604                                         errno = 0;
 605                                         val = strtol(yytext, &endptr, 10);
 606                                         if (*endptr != '\0' || errno == ERANGE
 607 #ifdef HAVE_LONG_INT_64
 608                                                 /* if long > 32 bits, check for overflow of int4 */
 609                                                 || val != (long) ((int32) val)
 610 #endif
 611                                                 )
 612                                         {
 613                                                 /* integer too large, treat it as a float */
 614                                                 yylval.str = pstrdup(yytext);
 615                                                 return FCONST;
 616                                         }
 617                                         yylval.ival = val;
 618                                         return ICONST;
 619                                 }
 620 {decimal}               {
 621                                         yylval.str = pstrdup(yytext);
 622                                         return FCONST;
 623                                 }
 624 {real}                  {
 625                                         yylval.str = pstrdup(yytext);
 626                                         return FCONST;
 627                                 }
 628 {realfail1}             {
 629                                         /*
 630                                          * throw back the [Ee], and treat as {decimal}.  Note
 631                                          * that it is possible the input is actually {integer},
 632                                          * but since this case will almost certainly lead to a
 633                                          * syntax error anyway, we don't bother to distinguish.
 634                                          */
 635                                         yyless(yyleng-1);
 636                                         yylval.str = pstrdup(yytext);
 637                                         return FCONST;
 638                                 }
 639 {realfail2}             {
 640                                         /* throw back the [Ee][+-], and proceed as above */
 641                                         yyless(yyleng-2);
 642                                         yylval.str = pstrdup(yytext);
 643                                         return FCONST;
 644                                 }
 645
 646
 647 {identifier}    {
 648                                         const ScanKeyword *keyword;
 649                                         char               *ident;
 650
 651                                         /* Is it a keyword? */
 652                                         keyword = ScanKeywordLookup(yytext);
 653                                         if (keyword != NULL)
 654                                         {
 655                                                 yylval.keyword = keyword->name;
 656                                                 return keyword->value;
 657                                         }
 658
 659                                         /*
 660                                          * No.  Convert the identifier to lower case, and truncate
 661                                          * if necessary.
 662                                          */
 663                                         ident = downcase_truncate_identifier(yytext, yyleng, true);
 664                                         yylval.str = ident;
 665                                         return IDENT;
 666                                 }
 667
 668 {other}                 {
 669                                         return yytext[0];
 670                                 }
 671
 672 %%
 673
 674 void
 675 yyerror(const char *message)
 676 {
 677         const char *loc = token_start ? token_start : yytext;
 678         int                     cursorpos;
 679
 680         /* in multibyte encodings, return index in characters not bytes */
 681         cursorpos = pg_mbstrlen_with_len(scanbuf, loc - scanbuf) + 1;
 682
 683         if (*loc == YY_END_OF_BUFFER_CHAR)
 684         {
 685                 ereport(ERROR,
 686                                 (errcode(ERRCODE_SYNTAX_ERROR),
 687                                  /* translator: %s is typically "syntax error" */
 688                                  errmsg("%s at end of input", _(message)),
 689                                  errposition(cursorpos)));
 690         }
 691         else
 692         {
 693                 ereport(ERROR,
 694                                 (errcode(ERRCODE_SYNTAX_ERROR),
 695                                  /* translator: first %s is typically "syntax error" */
 696                                  errmsg("%s at or near \"%s\"", _(message), loc),
 697                                  errposition(cursorpos)));
 698         }
 699 }
 700
 701
 702 /*
 703  * Called before any actual parsing is done
 704  */
 705 void
 706 scanner_init(const char *str)
 707 {
 708         Size    slen = strlen(str);
 709
 710         /*
 711          * Might be left over after ereport()
 712          */
 713         if (YY_CURRENT_BUFFER)
 714                 yy_delete_buffer(YY_CURRENT_BUFFER);
 715
 716         /*
 717          * Make a scan buffer with special termination needed by flex.
 718          */
 719         scanbuf = palloc(slen + 2);
 720         memcpy(scanbuf, str, slen);
 721         scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
 722         scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
 723
 724         /* initialize literal buffer to a reasonable but expansible size */
 725         literalalloc = 128;
 726         literalbuf = (char *) palloc(literalalloc);
 727         startlit();
 728
 729         BEGIN(INITIAL);
 730 }
 731
 732
 733 /*
 734  * Called after parsing is done to clean up after scanner_init()
 735  */
 736 void
 737 scanner_finish(void)
 738 {
 739         yy_delete_buffer(scanbufhandle);
 740         pfree(scanbuf);
 741 }
 742
 743
 744 static void
 745 addlit(char *ytext, int yleng)
 746 {
 747         /* enlarge buffer if needed */
 748         if ((literallen+yleng) >= literalalloc)
 749         {
 750                 do {
 751                         literalalloc *= 2;
 752                 } while ((literallen+yleng) >= literalalloc);
 753                 literalbuf = (char *) repalloc(literalbuf, literalalloc);
 754         }
 755         /* append new data, add trailing null */
 756         memcpy(literalbuf+literallen, ytext, yleng);
 757         literallen += yleng;
 758         literalbuf[literallen] = '\0';
 759 }
 760
 761
 762 static void
 763 addlitchar(unsigned char ychar)
 764 {
 765         /* enlarge buffer if needed */
 766         if ((literallen+1) >= literalalloc)
 767         {
 768                 literalalloc *= 2;
 769                 literalbuf = (char *) repalloc(literalbuf, literalalloc);
 770         }
 771         /* append new data, add trailing null */
 772         literalbuf[literallen] = ychar;
 773         literallen += 1;
 774         literalbuf[literallen] = '\0';
 775 }
 776
 777
 778 /*
 779  * One might be tempted to write pstrdup(literalbuf) instead of this,
 780  * but for long literals this is much faster because the length is
 781  * already known.
 782  */
 783 static char *
 784 litbufdup(void)
 785 {
 786         char *new;
 787
 788         new = palloc(literallen + 1);
 789         memcpy(new, literalbuf, literallen+1);
 790         return new;
 791 }
 792
 793
 794 unsigned char
 795 unescape_single_char(unsigned char c)
 796 {
 797         switch (c)
 798         {
 799                 case 'b':
 800                         return '\b';
 801                 case 'f':
 802                         return '\f';
 803                 case 'n':
 804                         return '\n';
 805                 case 'r':
 806                         return '\r';
 807                 case 't':
 808                         return '\t';
 809                 default:
 810                         return c;
 811         }
 812 }