granicus.if.org Git - postgresql/blob - src/backend/nodes/read.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * read.c
   4  *        routines to convert a string (legal ascii representation of node) back
   5  *        to nodes
   6  *
   7  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
   8  * Portions Copyright (c) 1994, Regents of the University of California
   9  *
  10  *
  11  * IDENTIFICATION
  12  *        src/backend/nodes/read.c
  13  *
  14  * HISTORY
  15  *        AUTHOR                        DATE                    MAJOR EVENT
  16  *        Andrew Yu                     Nov 2, 1994             file creation
  17  *
  18  *-------------------------------------------------------------------------
  19  */
  20 #include "postgres.h"
  21
  22 #include <ctype.h>
  23
  24 #include "nodes/pg_list.h"
  25 #include "nodes/readfuncs.h"
  26 #include "nodes/value.h"
  27
  28
  29 /* Static state for pg_strtok */
  30 static char *pg_strtok_ptr = NULL;
  31
  32
  33 /*
  34  * stringToNode -
  35  *        returns a Node with a given legal ASCII representation
  36  */
  37 void *
  38 stringToNode(char *str)
  39 {
  40         char       *save_strtok;
  41         void       *retval;
  42
  43         /*
  44          * We save and restore the pre-existing state of pg_strtok. This makes the
  45          * world safe for re-entrant invocation of stringToNode, without incurring
  46          * a lot of notational overhead by having to pass the next-character
  47          * pointer around through all the readfuncs.c code.
  48          */
  49         save_strtok = pg_strtok_ptr;
  50
  51         pg_strtok_ptr = str;            /* point pg_strtok at the string to read */
  52
  53         retval = nodeRead(NULL, 0); /* do the reading */
  54
  55         pg_strtok_ptr = save_strtok;
  56
  57         return retval;
  58 }
  59
  60 /*****************************************************************************
  61  *
  62  * the lisp token parser
  63  *
  64  *****************************************************************************/
  65
  66 /*
  67  * pg_strtok --- retrieve next "token" from a string.
  68  *
  69  * Works kinda like strtok, except it never modifies the source string.
  70  * (Instead of storing nulls into the string, the length of the token
  71  * is returned to the caller.)
  72  * Also, the rules about what is a token are hard-wired rather than being
  73  * configured by passing a set of terminating characters.
  74  *
  75  * The string is assumed to have been initialized already by stringToNode.
  76  *
  77  * The rules for tokens are:
  78  *      * Whitespace (space, tab, newline) always separates tokens.
  79  *      * The characters '(', ')', '{', '}' form individual tokens even
  80  *        without any whitespace around them.
  81  *      * Otherwise, a token is all the characters up to the next whitespace
  82  *        or occurrence of one of the four special characters.
  83  *      * A backslash '\' can be used to quote whitespace or one of the four
  84  *        special characters, so that it is treated as a plain token character.
  85  *        Backslashes themselves must also be backslashed for consistency.
  86  *        Any other character can be, but need not be, backslashed as well.
  87  *      * If the resulting token is '<>' (with no backslash), it is returned
  88  *        as a non-NULL pointer to the token but with length == 0.      Note that
  89  *        there is no other way to get a zero-length token.
  90  *
  91  * Returns a pointer to the start of the next token, and the length of the
  92  * token (including any embedded backslashes!) in *length.      If there are
  93  * no more tokens, NULL and 0 are returned.
  94  *
  95  * NOTE: this routine doesn't remove backslashes; the caller must do so
  96  * if necessary (see "debackslash").
  97  *
  98  * NOTE: prior to release 7.0, this routine also had a special case to treat
  99  * a token starting with '"' as extending to the next '"'.      This code was
 100  * broken, however, since it would fail to cope with a string containing an
 101  * embedded '"'.  I have therefore removed this special case, and instead
 102  * introduced rules for using backslashes to quote characters.  Higher-level
 103  * code should add backslashes to a string constant to ensure it is treated
 104  * as a single token.
 105  */
 106 char *
 107 pg_strtok(int *length)
 108 {
 109         char       *local_str;          /* working pointer to string */
 110         char       *ret_str;            /* start of token to return */
 111
 112         local_str = pg_strtok_ptr;
 113
 114         while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')
 115                 local_str++;
 116
 117         if (*local_str == '\0')
 118         {
 119                 *length = 0;
 120                 pg_strtok_ptr = local_str;
 121                 return NULL;                    /* no more tokens */
 122         }
 123
 124         /*
 125          * Now pointing at start of next token.
 126          */
 127         ret_str = local_str;
 128
 129         if (*local_str == '(' || *local_str == ')' ||
 130                 *local_str == '{' || *local_str == '}')
 131         {
 132                 /* special 1-character token */
 133                 local_str++;
 134         }
 135         else
 136         {
 137                 /* Normal token, possibly containing backslashes */
 138                 while (*local_str != '\0' &&
 139                            *local_str != ' ' && *local_str != '\n' &&
 140                            *local_str != '\t' &&
 141                            *local_str != '(' && *local_str != ')' &&
 142                            *local_str != '{' && *local_str != '}')
 143                 {
 144                         if (*local_str == '\\' && local_str[1] != '\0')
 145                                 local_str += 2;
 146                         else
 147                                 local_str++;
 148                 }
 149         }
 150
 151         *length = local_str - ret_str;
 152
 153         /* Recognize special case for "empty" token */
 154         if (*length == 2 && ret_str[0] == '<' && ret_str[1] == '>')
 155                 *length = 0;
 156
 157         pg_strtok_ptr = local_str;
 158
 159         return ret_str;
 160 }
 161
 162 /*
 163  * debackslash -
 164  *        create a palloc'd string holding the given token.
 165  *        any protective backslashes in the token are removed.
 166  */
 167 char *
 168 debackslash(char *token, int length)
 169 {
 170         char       *result = palloc(length + 1);
 171         char       *ptr = result;
 172
 173         while (length > 0)
 174         {
 175                 if (*token == '\\' && length > 1)
 176                         token++, length--;
 177                 *ptr++ = *token++;
 178                 length--;
 179         }
 180         *ptr = '\0';
 181         return result;
 182 }
 183
 184 #define RIGHT_PAREN (1000000 + 1)
 185 #define LEFT_PAREN      (1000000 + 2)
 186 #define LEFT_BRACE      (1000000 + 3)
 187 #define OTHER_TOKEN (1000000 + 4)
 188
 189 /*
 190  * nodeTokenType -
 191  *        returns the type of the node token contained in token.
 192  *        It returns one of the following valid NodeTags:
 193  *              T_Integer, T_Float, T_String, T_BitString
 194  *        and some of its own:
 195  *              RIGHT_PAREN, LEFT_PAREN, LEFT_BRACE, OTHER_TOKEN
 196  *
 197  *        Assumption: the ascii representation is legal
 198  */
 199 static NodeTag
 200 nodeTokenType(char *token, int length)
 201 {
 202         NodeTag         retval;
 203         char       *numptr;
 204         int                     numlen;
 205
 206         /*
 207          * Check if the token is a number
 208          */
 209         numptr = token;
 210         numlen = length;
 211         if (*numptr == '+' || *numptr == '-')
 212                 numptr++, numlen--;
 213         if ((numlen > 0 && isdigit((unsigned char) *numptr)) ||
 214                 (numlen > 1 && *numptr == '.' && isdigit((unsigned char) numptr[1])))
 215         {
 216                 /*
 217                  * Yes.  Figure out whether it is integral or float; this requires
 218                  * both a syntax check and a range check. strtol() can do both for us.
 219                  * We know the token will end at a character that strtol will stop at,
 220                  * so we do not need to modify the string.
 221                  */
 222                 long            val;
 223                 char       *endptr;
 224
 225                 errno = 0;
 226                 val = strtol(token, &endptr, 10);
 227                 (void) val;                             /* avoid compiler warning if unused */
 228                 if (endptr != token + length || errno == ERANGE
 229 #ifdef HAVE_LONG_INT_64
 230                 /* if long > 32 bits, check for overflow of int4 */
 231                         || val != (long) ((int32) val)
 232 #endif
 233                         )
 234                         return T_Float;
 235                 return T_Integer;
 236         }
 237
 238         /*
 239          * these three cases do not need length checks, since pg_strtok() will
 240          * always treat them as single-byte tokens
 241          */
 242         else if (*token == '(')
 243                 retval = LEFT_PAREN;
 244         else if (*token == ')')
 245                 retval = RIGHT_PAREN;
 246         else if (*token == '{')
 247                 retval = LEFT_BRACE;
 248         else if (*token == '\"' && length > 1 && token[length - 1] == '\"')
 249                 retval = T_String;
 250         else if (*token == 'b')
 251                 retval = T_BitString;
 252         else
 253                 retval = OTHER_TOKEN;
 254         return retval;
 255 }
 256
 257 /*
 258  * nodeRead -
 259  *        Slightly higher-level reader.
 260  *
 261  * This routine applies some semantic knowledge on top of the purely
 262  * lexical tokenizer pg_strtok().       It can read
 263  *      * Value token nodes (integers, floats, or strings);
 264  *      * General nodes (via parseNodeString() from readfuncs.c);
 265  *      * Lists of the above;
 266  *      * Lists of integers or OIDs.
 267  * The return value is declared void *, not Node *, to avoid having to
 268  * cast it explicitly in callers that assign to fields of different types.
 269  *
 270  * External callers should always pass NULL/0 for the arguments.  Internally
 271  * a non-NULL token may be passed when the upper recursion level has already
 272  * scanned the first token of a node's representation.
 273  *
 274  * We assume pg_strtok is already initialized with a string to read (hence
 275  * this should only be invoked from within a stringToNode operation).
 276  */
 277 void *
 278 nodeRead(char *token, int tok_len)
 279 {
 280         Node       *result;
 281         NodeTag         type;
 282
 283         if (token == NULL)                      /* need to read a token? */
 284         {
 285                 token = pg_strtok(&tok_len);
 286
 287                 if (token == NULL)              /* end of input */
 288                         return NULL;
 289         }
 290
 291         type = nodeTokenType(token, tok_len);
 292
 293         switch ((int) type)
 294         {
 295                 case LEFT_BRACE:
 296                         result = parseNodeString();
 297                         token = pg_strtok(&tok_len);
 298                         if (token == NULL || token[0] != '}')
 299                                 elog(ERROR, "did not find '}' at end of input node");
 300                         break;
 301                 case LEFT_PAREN:
 302                         {
 303                                 List       *l = NIL;
 304
 305                                 /*----------
 306                                  * Could be an integer list:    (i int int ...)
 307                                  * or an OID list:                              (o int int ...)
 308                                  * or a list of nodes/values:   (node node ...)
 309                                  *----------
 310                                  */
 311                                 token = pg_strtok(&tok_len);
 312                                 if (token == NULL)
 313                                         elog(ERROR, "unterminated List structure");
 314                                 if (tok_len == 1 && token[0] == 'i')
 315                                 {
 316                                         /* List of integers */
 317                                         for (;;)
 318                                         {
 319                                                 int                     val;
 320                                                 char       *endptr;
 321
 322                                                 token = pg_strtok(&tok_len);
 323                                                 if (token == NULL)
 324                                                         elog(ERROR, "unterminated List structure");
 325                                                 if (token[0] == ')')
 326                                                         break;
 327                                                 val = (int) strtol(token, &endptr, 10);
 328                                                 if (endptr != token + tok_len)
 329                                                         elog(ERROR, "unrecognized integer: \"%.*s\"",
 330                                                                  tok_len, token);
 331                                                 l = lappend_int(l, val);
 332                                         }
 333                                 }
 334                                 else if (tok_len == 1 && token[0] == 'o')
 335                                 {
 336                                         /* List of OIDs */
 337                                         for (;;)
 338                                         {
 339                                                 Oid                     val;
 340                                                 char       *endptr;
 341
 342                                                 token = pg_strtok(&tok_len);
 343                                                 if (token == NULL)
 344                                                         elog(ERROR, "unterminated List structure");
 345                                                 if (token[0] == ')')
 346                                                         break;
 347                                                 val = (Oid) strtoul(token, &endptr, 10);
 348                                                 if (endptr != token + tok_len)
 349                                                         elog(ERROR, "unrecognized OID: \"%.*s\"",
 350                                                                  tok_len, token);
 351                                                 l = lappend_oid(l, val);
 352                                         }
 353                                 }
 354                                 else
 355                                 {
 356                                         /* List of other node types */
 357                                         for (;;)
 358                                         {
 359                                                 /* We have already scanned next token... */
 360                                                 if (token[0] == ')')
 361                                                         break;
 362                                                 l = lappend(l, nodeRead(token, tok_len));
 363                                                 token = pg_strtok(&tok_len);
 364                                                 if (token == NULL)
 365                                                         elog(ERROR, "unterminated List structure");
 366                                         }
 367                                 }
 368                                 result = (Node *) l;
 369                                 break;
 370                         }
 371                 case RIGHT_PAREN:
 372                         elog(ERROR, "unexpected right parenthesis");
 373                         result = NULL;          /* keep compiler happy */
 374                         break;
 375                 case OTHER_TOKEN:
 376                         if (tok_len == 0)
 377                         {
 378                                 /* must be "<>" --- represents a null pointer */
 379                                 result = NULL;
 380                         }
 381                         else
 382                         {
 383                                 elog(ERROR, "unrecognized token: \"%.*s\"", tok_len, token);
 384                                 result = NULL;  /* keep compiler happy */
 385                         }
 386                         break;
 387                 case T_Integer:
 388
 389                         /*
 390                          * we know that the token terminates on a char atol will stop at
 391                          */
 392                         result = (Node *) makeInteger(atol(token));
 393                         break;
 394                 case T_Float:
 395                         {
 396                                 char       *fval = (char *) palloc(tok_len + 1);
 397
 398                                 memcpy(fval, token, tok_len);
 399                                 fval[tok_len] = '\0';
 400                                 result = (Node *) makeFloat(fval);
 401                         }
 402                         break;
 403                 case T_String:
 404                         /* need to remove leading and trailing quotes, and backslashes */
 405                         result = (Node *) makeString(debackslash(token + 1, tok_len - 2));
 406                         break;
 407                 case T_BitString:
 408                         {
 409                                 char       *val = palloc(tok_len);
 410
 411                                 /* skip leading 'b' */
 412                                 memcpy(val, token + 1, tok_len - 1);
 413                                 val[tok_len - 1] = '\0';
 414                                 result = (Node *) makeBitString(val);
 415                                 break;
 416                         }
 417                 default:
 418                         elog(ERROR, "unrecognized node type: %d", (int) type);
 419                         result = NULL;          /* keep compiler happy */
 420                         break;
 421         }
 422
 423         return (void *) result;
 424 }