granicus.if.org Git - postgresql/blob - src/backend/nodes/read.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * read.c
   4  *        routines to convert a string (legal ascii representation of node) back
   5  *        to nodes
   6  *
   7  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   8  * Portions Copyright (c) 1994, Regents of the University of California
   9  *
  10  *
  11  * IDENTIFICATION
  12  *        $Header: /cvsroot/pgsql/src/backend/nodes/read.c,v 1.21 2000/02/21 18:47:00 tgl Exp $
  13  *
  14  * HISTORY
  15  *        AUTHOR                        DATE                    MAJOR EVENT
  16  *        Andrew Yu                     Nov 2, 1994             file creation
  17  *
  18  *-------------------------------------------------------------------------
  19  */
  20 #include <ctype.h>
  21 #include <errno.h>
  22
  23 #include "postgres.h"
  24
  25 #include "nodes/pg_list.h"
  26 #include "nodes/readfuncs.h"
  27
  28 /*
  29  * stringToNode -
  30  *        returns a Node with a given legal ASCII representation
  31  */
  32 void *
  33 stringToNode(char *str)
  34 {
  35         void       *retval;
  36
  37         lsptok(str, NULL);                      /* set the string used in lsptok */
  38         retval = nodeRead(true);        /* start reading */
  39
  40         return retval;
  41 }
  42
  43 /*****************************************************************************
  44  *
  45  * the lisp token parser
  46  *
  47  *****************************************************************************/
  48
  49 /*
  50  * lsptok --- retrieve next "token" from a string.
  51  *
  52  * Works kinda like strtok, except it never modifies the source string.
  53  * (Instead of storing nulls into the string, the length of the token
  54  * is returned to the caller.)
  55  * Also, the rules about what is a token are hard-wired rather than being
  56  * configured by passing a set of terminating characters.
  57  *
  58  * The string is initially set by passing a non-NULL "string" value,
  59  * and subsequent calls with string==NULL read the previously given value.
  60  * (Pass length==NULL to set the string without reading its first token.)
  61  *
  62  * The rules for tokens are:
  63  *  * Whitespace (space, tab, newline) always separates tokens.
  64  *  * The characters '(', ')', '{', '}' form individual tokens even
  65  *    without any whitespace around them.
  66  *  * Otherwise, a token is all the characters up to the next whitespace
  67  *    or occurrence of one of the four special characters.
  68  *  * A backslash '\' can be used to quote whitespace or one of the four
  69  *    special characters, so that it is treated as a plain token character.
  70  *    Backslashes themselves must also be backslashed for consistency.
  71  *    Any other character can be, but need not be, backslashed as well.
  72  *  * If the resulting token is '<>' (with no backslash), it is returned
  73  *    as a non-NULL pointer to the token but with length == 0.  Note that
  74  *    there is no other way to get a zero-length token.
  75  *
  76  * Returns a pointer to the start of the next token, and the length of the
  77  * token (including any embedded backslashes!) in *length.  If there are
  78  * no more tokens, NULL and 0 are returned.
  79  *
  80  * NOTE: this routine doesn't remove backslashes; the caller must do so
  81  * if necessary (see "debackslash").
  82  *
  83  * NOTE: prior to release 7.0, this routine also had a special case to treat
  84  * a token starting with '"' as extending to the next '"'.  This code was
  85  * broken, however, since it would fail to cope with a string containing an
  86  * embedded '"'.  I have therefore removed this special case, and instead
  87  * introduced rules for using backslashes to quote characters.  Higher-level
  88  * code should add backslashes to a string constant to ensure it is treated
  89  * as a single token.
  90  */
  91 char *
  92 lsptok(char *string, int *length)
  93 {
  94         static char *saved_str = NULL;
  95         char       *local_str;          /* working pointer to string */
  96         char       *ret_str;            /* start of token to return */
  97
  98         if (string != NULL)
  99         {
 100                 saved_str = string;
 101                 if (length == NULL)
 102                         return NULL;
 103         }
 104
 105         local_str = saved_str;
 106
 107         while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')
 108                 local_str++;
 109
 110         if (*local_str == '\0')
 111         {
 112                 *length = 0;
 113                 saved_str = local_str;
 114                 return NULL;                    /* no more tokens */
 115         }
 116
 117         /*
 118          * Now pointing at start of next token.
 119          */
 120         ret_str = local_str;
 121
 122         if (*local_str == '(' || *local_str == ')' ||
 123                 *local_str == '{' || *local_str == '}')
 124         {
 125                 /* special 1-character token */
 126                 local_str++;
 127         }
 128         else
 129         {
 130                 /* Normal token, possibly containing backslashes */
 131                 while (*local_str != '\0' &&
 132                            *local_str != ' ' && *local_str != '\n' &&
 133                            *local_str != '\t' &&
 134                            *local_str != '(' && *local_str != ')' &&
 135                            *local_str != '{' && *local_str != '}')
 136                 {
 137                         if (*local_str == '\\' && local_str[1] != '\0')
 138                                 local_str += 2;
 139                         else
 140                                 local_str++;
 141                 }
 142         }
 143
 144         *length = local_str - ret_str;
 145
 146         /* Recognize special case for "empty" token */
 147         if (*length == 2 && ret_str[0] == '<' && ret_str[1] == '>')
 148                 *length = 0;
 149
 150         saved_str = local_str;
 151
 152         return ret_str;
 153 }
 154
 155 /*
 156  * debackslash -
 157  *        create a palloc'd string holding the given token.
 158  *        any protective backslashes in the token are removed.
 159  */
 160 char *
 161 debackslash(char *token, int length)
 162 {
 163         char   *result = palloc(length+1);
 164         char   *ptr = result;
 165
 166         while (length > 0)
 167         {
 168                 if (*token == '\\' && length > 1)
 169                         token++, length--;
 170                 *ptr++ = *token++;
 171                 length--;
 172         }
 173         *ptr = '\0';
 174         return result;
 175 }
 176
 177 #define RIGHT_PAREN (1000000 + 1)
 178 #define LEFT_PAREN      (1000000 + 2)
 179 #define PLAN_SYM        (1000000 + 3)
 180 #define AT_SYMBOL       (1000000 + 4)
 181 #define ATOM_TOKEN      (1000000 + 5)
 182
 183 /*
 184  * nodeTokenType -
 185  *        returns the type of the node token contained in token.
 186  *        It returns one of the following valid NodeTags:
 187  *              T_Integer, T_Float, T_String
 188  *        and some of its own:
 189  *              RIGHT_PAREN, LEFT_PAREN, PLAN_SYM, AT_SYMBOL, ATOM_TOKEN
 190  *
 191  *        Assumption: the ascii representation is legal
 192  */
 193 static NodeTag
 194 nodeTokenType(char *token, int length)
 195 {
 196         NodeTag         retval;
 197         char       *numptr;
 198         int                     numlen;
 199         char       *endptr;
 200
 201         /*
 202          * Check if the token is a number
 203          */
 204         numptr = token;
 205         numlen = length;
 206         if (*numptr == '+' || *numptr == '-')
 207                 numptr++, numlen--;
 208         if ((numlen > 0 && isdigit(*numptr)) ||
 209                 (numlen > 1 && *numptr == '.' && isdigit(numptr[1])))
 210         {
 211                 /*
 212                  * Yes.  Figure out whether it is integral or float;
 213                  * this requires both a syntax check and a range check.
 214                  * strtol() can do both for us.
 215                  * We know the token will end at a character that strtol will
 216                  * stop at, so we do not need to modify the string.
 217                  */
 218                 errno = 0;
 219                 (void) strtol(token, &endptr, 10);
 220                 if (endptr != token+length || errno == ERANGE)
 221                         return T_Float;
 222                 return T_Integer;
 223         }
 224         /*
 225          * these three cases do not need length checks, since lsptok()
 226          * will always treat them as single-byte tokens
 227          */
 228         else if (*token == '(')
 229                 retval = LEFT_PAREN;
 230         else if (*token == ')')
 231                 retval = RIGHT_PAREN;
 232         else if (*token == '{')
 233                 retval = PLAN_SYM;
 234         else if (*token == '@' && length == 1)
 235                 retval = AT_SYMBOL;
 236         else if (*token == '\"' && length > 1 && token[length-1] == '\"')
 237                 retval = T_String;
 238         else
 239                 retval = ATOM_TOKEN;
 240         return retval;
 241 }
 242
 243 /*
 244  * nodeRead -
 245  *        Slightly higher-level reader.
 246  *
 247  * This routine applies some semantic knowledge on top of the purely
 248  * lexical tokenizer lsptok().  It can read
 249  *      * Value token nodes (integers, floats, or strings);
 250  *  * Plan nodes (via parsePlanString() from readfuncs.c);
 251  *  * Lists of the above.
 252  *
 253  * Secrets:  He assumes that lsptok already has the string (see above).
 254  * Any callers should set read_car_only to true.
 255  */
 256 void *
 257 nodeRead(bool read_car_only)
 258 {
 259         char       *token;
 260         int                     tok_len;
 261         NodeTag         type;
 262         Node       *this_value,
 263                            *return_value;
 264         bool            make_dotted_pair_cell = false;
 265
 266         token = lsptok(NULL, &tok_len);
 267
 268         if (token == NULL)
 269                 return NULL;
 270
 271         type = nodeTokenType(token, tok_len);
 272
 273         switch (type)
 274         {
 275                 case PLAN_SYM:
 276                         this_value = parsePlanString();
 277                         token = lsptok(NULL, &tok_len);
 278                         if (token[0] != '}')
 279                                 elog(ERROR, "nodeRead: did not find '}' at end of plan node");
 280                         if (!read_car_only)
 281                                 make_dotted_pair_cell = true;
 282                         else
 283                                 make_dotted_pair_cell = false;
 284                         break;
 285                 case LEFT_PAREN:
 286                         if (!read_car_only)
 287                         {
 288                                 List       *l = makeNode(List);
 289
 290                                 lfirst(l) = nodeRead(false);
 291                                 lnext(l) = nodeRead(false);
 292                                 this_value = (Node *) l;
 293                         }
 294                         else
 295                                 this_value = nodeRead(false);
 296                         break;
 297                 case RIGHT_PAREN:
 298                         this_value = NULL;
 299                         break;
 300                 case AT_SYMBOL:
 301                         this_value = NULL;
 302                         break;
 303                 case ATOM_TOKEN:
 304                         if (tok_len == 0)
 305                         {
 306                                 /* must be "<>" */
 307                                 this_value = NULL;
 308                                 /*
 309                                  * It might be NULL but it is an atom!
 310                                  */
 311                                 if (read_car_only)
 312                                         make_dotted_pair_cell = false;
 313                                 else
 314                                         make_dotted_pair_cell = true;
 315                         }
 316                         else
 317                         {
 318                                 /* !attention! result is not a Node.  Use with caution. */
 319                                 this_value = (Node *) debackslash(token, tok_len);
 320                                 make_dotted_pair_cell = true;
 321                         }
 322                         break;
 323                 case T_Integer:
 324                         /* we know that the token terminates on a char atol will stop at */
 325                         this_value = (Node *) makeInteger(atol(token));
 326                         make_dotted_pair_cell = true;
 327                         break;
 328                 case T_Float:
 329                         {
 330                                 char   *fval = (char *) palloc(tok_len + 1);
 331
 332                                 memcpy(fval, token, tok_len);
 333                                 fval[tok_len] = '\0';
 334                                 this_value = (Node *) makeFloat(fval);
 335                                 make_dotted_pair_cell = true;
 336                         }
 337                         break;
 338                 case T_String:
 339                         /* need to remove leading and trailing quotes, and backslashes */
 340                         this_value = (Node *) makeString(debackslash(token+1, tok_len-2));
 341                         make_dotted_pair_cell = true;
 342                         break;
 343                 default:
 344                         elog(ERROR, "nodeRead: Bad type %d", type);
 345                         this_value = NULL;      /* keep compiler happy */
 346                         break;
 347         }
 348         if (make_dotted_pair_cell)
 349         {
 350                 List       *l = makeNode(List);
 351
 352                 lfirst(l) = this_value;
 353
 354                 if (!read_car_only)
 355                         lnext(l) = nodeRead(false);
 356                 else
 357                         lnext(l) = NULL;
 358                 return_value = (Node *) l;
 359         }
 360         else
 361                 return_value = this_value;
 362         return return_value;
 363 }