granicus.if.org Git - postgresql/blob - src/backend/parser/parser.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * parser.c
   4  *              Main entry point/driver for PostgreSQL grammar
   5  *
   6  * Note that the grammar is not allowed to perform any table access
   7  * (since we need to be able to do basic parsing even while inside an
   8  * aborted transaction).  Therefore, the data structures returned by
   9  * the grammar are "raw" parsetrees that still need to be analyzed by
  10  * analyze.c and related files.
  11  *
  12  *
  13  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  14  * Portions Copyright (c) 1994, Regents of the University of California
  15  *
  16  * IDENTIFICATION
  17  *        $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.79 2009/07/12 17:12:34 tgl Exp $
  18  *
  19  *-------------------------------------------------------------------------
  20  */
  21
  22 #include "postgres.h"
  23
  24 #include "parser/gramparse.h"
  25 #include "parser/parser.h"
  26
  27
  28 List       *parsetree;                  /* result of parsing is left here */
  29
  30 static bool have_lookahead;             /* is lookahead info valid? */
  31 static int      lookahead_token;        /* one-token lookahead */
  32 static YYSTYPE lookahead_yylval;        /* yylval for lookahead token */
  33 static YYLTYPE lookahead_yylloc;        /* yylloc for lookahead token */
  34
  35
  36 /*
  37  * raw_parser
  38  *              Given a query in string form, do lexical and grammatical analysis.
  39  *
  40  * Returns a list of raw (un-analyzed) parse trees.
  41  */
  42 List *
  43 raw_parser(const char *str)
  44 {
  45         int                     yyresult;
  46
  47         parsetree = NIL;                        /* in case grammar forgets to set it */
  48         have_lookahead = false;
  49
  50         scanner_init(str);
  51         parser_init();
  52
  53         yyresult = base_yyparse();
  54
  55         scanner_finish();
  56
  57         if (yyresult)                           /* error */
  58                 return NIL;
  59
  60         return parsetree;
  61 }
  62
  63
  64 /*
  65  * pg_parse_string_token - get the value represented by a string literal
  66  *
  67  * Given the textual form of a SQL string literal, produce the represented
  68  * value as a palloc'd string.  It is caller's responsibility that the
  69  * passed string does represent one single string literal.
  70  *
  71  * We export this function to avoid having plpgsql depend on internal details
  72  * of the core grammar (such as the token code assigned to SCONST).  Note
  73  * that since the scanner isn't presently re-entrant, this cannot be used
  74  * during use of the main parser/scanner.
  75  */
  76 char *
  77 pg_parse_string_token(const char *token)
  78 {
  79         int                     ctoken;
  80
  81         scanner_init(token);
  82
  83         ctoken = base_yylex();
  84
  85         if (ctoken != SCONST)           /* caller error */
  86                 elog(ERROR, "expected string constant, got token code %d", ctoken);
  87
  88         scanner_finish();
  89
  90         return base_yylval.str;
  91 }
  92
  93
  94 /*
  95  * Intermediate filter between parser and base lexer (base_yylex in scan.l).
  96  *
  97  * The filter is needed because in some cases the standard SQL grammar
  98  * requires more than one token lookahead.      We reduce these cases to one-token
  99  * lookahead by combining tokens here, in order to keep the grammar LALR(1).
 100  *
 101  * Using a filter is simpler than trying to recognize multiword tokens
 102  * directly in scan.l, because we'd have to allow for comments between the
 103  * words.  Furthermore it's not clear how to do it without re-introducing
 104  * scanner backtrack, which would cost more performance than this filter
 105  * layer does.
 106  */
 107 int
 108 filtered_base_yylex(void)
 109 {
 110         int                     cur_token;
 111         int                     next_token;
 112         YYSTYPE         cur_yylval;
 113         YYLTYPE         cur_yylloc;
 114
 115         /* Get next token --- we might already have it */
 116         if (have_lookahead)
 117         {
 118                 cur_token = lookahead_token;
 119                 base_yylval = lookahead_yylval;
 120                 base_yylloc = lookahead_yylloc;
 121                 have_lookahead = false;
 122         }
 123         else
 124                 cur_token = base_yylex();
 125
 126         /* Do we need to look ahead for a possible multiword token? */
 127         switch (cur_token)
 128         {
 129                 case NULLS_P:
 130
 131                         /*
 132                          * NULLS FIRST and NULLS LAST must be reduced to one token
 133                          */
 134                         cur_yylval = base_yylval;
 135                         cur_yylloc = base_yylloc;
 136                         next_token = base_yylex();
 137                         switch (next_token)
 138                         {
 139                                 case FIRST_P:
 140                                         cur_token = NULLS_FIRST;
 141                                         break;
 142                                 case LAST_P:
 143                                         cur_token = NULLS_LAST;
 144                                         break;
 145                                 default:
 146                                         /* save the lookahead token for next time */
 147                                         lookahead_token = next_token;
 148                                         lookahead_yylval = base_yylval;
 149                                         lookahead_yylloc = base_yylloc;
 150                                         have_lookahead = true;
 151                                         /* and back up the output info to cur_token */
 152                                         base_yylval = cur_yylval;
 153                                         base_yylloc = cur_yylloc;
 154                                         break;
 155                         }
 156                         break;
 157
 158                 case WITH:
 159
 160                         /*
 161                          * WITH TIME must be reduced to one token
 162                          */
 163                         cur_yylval = base_yylval;
 164                         cur_yylloc = base_yylloc;
 165                         next_token = base_yylex();
 166                         switch (next_token)
 167                         {
 168                                 case TIME:
 169                                         cur_token = WITH_TIME;
 170                                         break;
 171                                 default:
 172                                         /* save the lookahead token for next time */
 173                                         lookahead_token = next_token;
 174                                         lookahead_yylval = base_yylval;
 175                                         lookahead_yylloc = base_yylloc;
 176                                         have_lookahead = true;
 177                                         /* and back up the output info to cur_token */
 178                                         base_yylval = cur_yylval;
 179                                         base_yylloc = cur_yylloc;
 180                                         break;
 181                         }
 182                         break;
 183
 184                 default:
 185                         break;
 186         }
 187
 188         return cur_token;
 189 }