Minor code review for json.c.

author Tom Lane <tgl@sss.pgh.pa.us>

Tue, 12 Jun 2012 20:23:45 +0000 (16:23 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Tue, 12 Jun 2012 20:23:45 +0000 (16:23 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Tue, 12 Jun 2012 20:23:45 +0000 (16:23 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Tue, 12 Jun 2012 20:23:45 +0000 (16:23 -0400)
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c

index 3d69e50e8e070408f05c598ccbbda525ea28776d..e79c2946d0c650b854233ec96051462cb1c6a21f 100644 (file)
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -25,9 +25,9 @@
  #include "utils/json.h"
  #include "utils/typcache.h"
  
-typedef enum
+typedef enum                                   /* types of JSON values */
  {
-       JSON_VALUE_INVALID,
+       JSON_VALUE_INVALID,                     /* non-value tokens are reported as this */
         JSON_VALUE_STRING,
         JSON_VALUE_NUMBER,
         JSON_VALUE_OBJECT,
@@ -37,17 +37,17 @@ typedef enum
         JSON_VALUE_NULL
  } JsonValueType;
  
-typedef struct
+typedef struct                                 /* state of JSON lexer */
  {
-       char       *input;
-       char       *token_start;
-       char       *token_terminator;
-       JsonValueType token_type;
-       int                     line_number;
-       char       *line_start;
+       char       *input;                      /* whole string being parsed */
+       char       *token_start;        /* start of current token within input */
+       char       *token_terminator; /* end of previous or current token */
+       JsonValueType token_type;       /* type of current token, once it's known */
+       int                     line_number;    /* current line number (counting from 1) */
+       char       *line_start;         /* start of current line within input (BROKEN!!) */
  } JsonLexContext;
  
-typedef enum
+typedef enum                                   /* states of JSON parser */
  {
         JSON_PARSE_VALUE,                       /* expecting a value */
         JSON_PARSE_ARRAY_START,         /* saw '[', expecting value or ']' */
@@ -58,17 +58,18 @@ typedef enum
         JSON_PARSE_OBJECT_COMMA         /* saw object ',', expecting next label */
  } JsonParseState;
  
-typedef struct JsonParseStack
+typedef struct JsonParseStack  /* the parser state has to be stackable */
  {
         JsonParseState state;
+       /* currently only need the state enum, but maybe someday more stuff */
  } JsonParseStack;
  
-typedef enum
+typedef enum                                   /* required operations on state stack */
  {
-       JSON_STACKOP_NONE,
-       JSON_STACKOP_PUSH,
-       JSON_STACKOP_PUSH_WITH_PUSHBACK,
-       JSON_STACKOP_POP
+       JSON_STACKOP_NONE,                      /* no-op */
+       JSON_STACKOP_PUSH,                      /* push new JSON_PARSE_VALUE stack item */
+       JSON_STACKOP_PUSH_WITH_PUSHBACK, /* push, then rescan current token */
+       JSON_STACKOP_POP                        /* pop, or expect end of input if no stack */
  } JsonStackOp;
  
  static void json_validate_cstring(char *input);
@@ -78,17 +79,28 @@ static void json_lex_number(JsonLexContext *lex, char *s);
  static void report_parse_error(JsonParseStack *stack, JsonLexContext *lex);
  static void report_invalid_token(JsonLexContext *lex);
  static char *extract_mb_char(char *s);
-static void composite_to_json(Datum composite, StringInfo result, bool use_line_feeds);
+static void composite_to_json(Datum composite, StringInfo result,
+                                                         bool use_line_feeds);
  static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
                                   Datum *vals, bool *nulls, int *valcount,
                                   TYPCATEGORY tcategory, Oid typoutputfunc,
                                   bool use_line_feeds);
-static void array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds);
+static void array_to_json_internal(Datum array, StringInfo result,
+                                                                  bool use_line_feeds);
  
  /* fake type category for JSON so we can distinguish it in datum_to_json */
  #define TYPCATEGORY_JSON 'j'
  /* letters appearing in numeric output that aren't valid in a JSON number */
  #define NON_NUMERIC_LETTER "NnAaIiFfTtYy"
+/* chars to consider as part of an alphanumeric token */
+#define JSON_ALPHANUMERIC_CHAR(c)  \
+       (((c) >= 'a' && (c) <= 'z') || \
+        ((c) >= 'A' && (c) <= 'Z') || \
+        ((c) >= '0' && (c) <= '9') || \
+        (c) == '_' || \
+        IS_HIGHBIT_SET(c))
+
+
  /*
   * Input.
   */
@@ -99,6 +111,7 @@ json_in(PG_FUNCTION_ARGS)
  
         json_validate_cstring(text);
  
+       /* Internal representation is the same as text, for now */
         PG_RETURN_TEXT_P(cstring_to_text(text));
  }
  
@@ -108,6 +121,7 @@ json_in(PG_FUNCTION_ARGS)
  Datum
  json_out(PG_FUNCTION_ARGS)
  {
+       /* we needn't detoast because text_to_cstring will handle that */
         Datum           txt = PG_GETARG_DATUM(0);
  
         PG_RETURN_CSTRING(TextDatumGetCString(txt));
@@ -119,8 +133,8 @@ json_out(PG_FUNCTION_ARGS)
  Datum
  json_send(PG_FUNCTION_ARGS)
  {
-       StringInfoData buf;
         text       *t = PG_GETARG_TEXT_PP(0);
+       StringInfoData buf;
  
         pq_begintypsend(&buf);
         pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
@@ -176,7 +190,7 @@ json_validate_cstring(char *input)
  
         /* Set up parse stack. */
         stacksize = 32;
-       stacktop = palloc(sizeof(JsonParseStack) * stacksize);
+       stacktop = (JsonParseStack *) palloc(sizeof(JsonParseStack) * stacksize);
         stack = stacktop;
         stack->state = JSON_PARSE_VALUE;
  
@@ -212,8 +226,8 @@ redo:
                                         stack->state = JSON_PARSE_ARRAY_NEXT;
                                 else if (lex.token_start[0] == ']')
                                         op = JSON_STACKOP_POP;
-                               else if (lex.token_start[0] == '['
-                                                || lex.token_start[0] == '{')
+                               else if (lex.token_start[0] == '[' ||
+                                                lex.token_start[0] == '{')
                                 {
                                         stack->state = JSON_PARSE_ARRAY_NEXT;
                                         op = JSON_STACKOP_PUSH_WITH_PUSHBACK;
@@ -234,15 +248,15 @@ redo:
                         case JSON_PARSE_OBJECT_START:
                                 if (lex.token_type == JSON_VALUE_STRING)
                                         stack->state = JSON_PARSE_OBJECT_LABEL;
-                               else if (lex.token_type == JSON_VALUE_INVALID
-                                                && lex.token_start[0] == '}')
+                               else if (lex.token_type == JSON_VALUE_INVALID &&
+                                                lex.token_start[0] == '}')
                                         op = JSON_STACKOP_POP;
                                 else
                                         report_parse_error(stack, &lex);
                                 break;
                         case JSON_PARSE_OBJECT_LABEL:
-                               if (lex.token_type == JSON_VALUE_INVALID
-                                       && lex.token_start[0] == ':')
+                               if (lex.token_type == JSON_VALUE_INVALID &&
+                                       lex.token_start[0] == ':')
                                 {
                                         stack->state = JSON_PARSE_OBJECT_NEXT;
                                         op = JSON_STACKOP_PUSH;
@@ -271,19 +285,21 @@ redo:
                                          (int) stack->state);
                 }
  
-               /* Push or pop the stack, if needed. */
+               /* Push or pop the state stack, if needed. */
                 switch (op)
                 {
                         case JSON_STACKOP_PUSH:
                         case JSON_STACKOP_PUSH_WITH_PUSHBACK:
-                               ++stack;
+                               stack++;
                                 if (stack >= &stacktop[stacksize])
                                 {
+                                       /* Need to enlarge the stack. */
                                         int                     stackoffset = stack - stacktop;
  
-                                       stacksize = stacksize + 32;
-                                       stacktop = repalloc(stacktop,
-                                                                               sizeof(JsonParseStack) * stacksize);
+                                       stacksize += 32;
+                                       stacktop = (JsonParseStack *)
+                                               repalloc(stacktop,
+                                                                sizeof(JsonParseStack) * stacksize);
                                         stack = stacktop + stackoffset;
                                 }
                                 stack->state = JSON_PARSE_VALUE;
@@ -299,7 +315,7 @@ redo:
                                                 report_parse_error(NULL, &lex);
                                         return;
                                 }
-                               --stack;
+                               stack--;
                                 break;
                         case JSON_STACKOP_NONE:
                                 /* nothing to do */
@@ -321,15 +337,15 @@ json_lex(JsonLexContext *lex)
         while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')
         {
                 if (*s == '\n')
-                       ++lex->line_number;
-               ++s;
+                       lex->line_number++;
+               s++;
         }
         lex->token_start = s;
  
         /* Determine token type. */
-       if (strchr("{}[],:", s[0]))
+       if (strchr("{}[],:", s[0]) != NULL)
         {
-               /* strchr() doesn't return false on a NUL input. */
+               /* strchr() is willing to match a zero byte, so test for that. */
                 if (s[0] == '\0')
                 {
                         /* End of string. */
@@ -373,17 +389,16 @@ json_lex(JsonLexContext *lex)
                  * whole word as an unexpected token, rather than just some
                  * unintuitive prefix thereof.
                  */
-               for (p = s; (*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')
-                        || (*p >= '0' && *p <= '9') || *p == '_' || IS_HIGHBIT_SET(*p);
-                        ++p)
-                       ;
+               for (p = s; JSON_ALPHANUMERIC_CHAR(*p); p++)
+                       /* skip */ ;
  
-               /*
-                * We got some sort of unexpected punctuation or an otherwise
-                * unexpected character, so just complain about that one character.
-                */
                 if (p == s)
                 {
+                       /*
+                        * We got some sort of unexpected punctuation or an otherwise
+                        * unexpected character, so just complain about that one
+                        * character.
+                        */
                         lex->token_terminator = s + 1;
                         report_invalid_token(lex);
                 }
@@ -415,9 +430,9 @@ json_lex(JsonLexContext *lex)
  static void
  json_lex_string(JsonLexContext *lex)
  {
-       char       *s = lex->token_start + 1;
+       char       *s;
  
-       for (s = lex->token_start + 1; *s != '"'; ++s)
+       for (s = lex->token_start + 1; *s != '"'; s++)
         {
                 /* Per RFC4627, these characters MUST be escaped. */
                 if ((unsigned char) *s < 32)
@@ -437,7 +452,7 @@ json_lex_string(JsonLexContext *lex)
                 else if (*s == '\\')
                 {
                         /* OK, we have an escape character. */
-                       ++s;
+                       s++;
                         if (*s == '\0')
                         {
                                 lex->token_terminator = s;
@@ -448,7 +463,7 @@ json_lex_string(JsonLexContext *lex)
                                 int                     i;
                                 int                     ch = 0;
  
-                               for (i = 1; i <= 4; ++i)
+                               for (i = 1; i <= 4; i++)
                                 {
                                         if (s[i] == '\0')
                                         {
@@ -474,9 +489,9 @@ json_lex_string(JsonLexContext *lex)
                                 /* Account for the four additional bytes we just parsed. */
                                 s += 4;
                         }
-                       else if (!strchr("\"\\/bfnrt", *s))
+                       else if (strchr("\"\\/bfnrt", *s) == NULL)
                         {
-                               /* Error out. */
+                               /* Not a valid string escape, so error out. */
                                 ereport(ERROR,
                                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                                  errmsg("invalid input syntax for type json"),
@@ -527,12 +542,12 @@ json_lex_number(JsonLexContext *lex, char *s)
  
         /* Part (2): parse main digit string. */
         if (*s == '0')
-               ++s;
+               s++;
         else if (*s >= '1' && *s <= '9')
         {
                 do
                 {
-                       ++s;
+                       s++;
                 } while (*s >= '0' && *s <= '9');
         }
         else
@@ -541,14 +556,14 @@ json_lex_number(JsonLexContext *lex, char *s)
         /* Part (3): parse optional decimal portion. */
         if (*s == '.')
         {
-               ++s;
+               s++;
                 if (*s < '0' || *s > '9')
                         error = true;
                 else
                 {
                         do
                         {
-                               ++s;
+                               s++;
                         } while (*s >= '0' && *s <= '9');
                 }
         }
@@ -556,26 +571,29 @@ json_lex_number(JsonLexContext *lex, char *s)
         /* Part (4): parse optional exponent. */
         if (*s == 'e' || *s == 'E')
         {
-               ++s;
+               s++;
                 if (*s == '+' || *s == '-')
-                       ++s;
+                       s++;
                 if (*s < '0' || *s > '9')
                         error = true;
                 else
                 {
                         do
                         {
-                               ++s;
+                               s++;
                         } while (*s >= '0' && *s <= '9');
                 }
         }
  
-       /* Check for trailing garbage. */
-       for (p = s; (*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')
-                || (*p >= '0' && *p <= '9') || *p == '_' || IS_HIGHBIT_SET(*p); ++p)
-               ;
+       /*
+        * Check for trailing garbage.  As in json_lex(), any alphanumeric stuff
+        * here should be considered part of the token for error-reporting
+        * purposes.
+        */
+       for (p = s; JSON_ALPHANUMERIC_CHAR(*p); p++)
+               error = true;
         lex->token_terminator = p;
-       if (p > s || error)
+       if (error)
                 report_invalid_token(lex);
  }
  
@@ -597,7 +615,7 @@ report_parse_error(JsonParseStack *stack, JsonLexContext *lex)
                                                 lex->input),
                                  errdetail("The input string ended unexpectedly.")));
  
-       /* Work out the offending token. */
+       /* Separate out the offending token. */
         toklen = lex->token_terminator - lex->token_start;
         token = palloc(toklen + 1);
         memcpy(token, lex->token_start, toklen);
@@ -680,14 +698,15 @@ extract_mb_char(char *s)
  }
  
  /*
- * Turn a scalar Datum into JSON. Hand off a non-scalar datum to
- * composite_to_json or array_to_json_internal as appropriate.
+ * Turn a scalar Datum into JSON, appending the string to "result".
+ *
+ * Hand off a non-scalar datum to composite_to_json or array_to_json_internal
+ * as appropriate.
   */
-static inline void
-datum_to_json(Datum val, bool is_null, StringInfo result, TYPCATEGORY tcategory,
-                         Oid typoutputfunc)
+static void
+datum_to_json(Datum val, bool is_null, StringInfo result,
+                         TYPCATEGORY tcategory, Oid typoutputfunc)
  {
-
         char       *outputstr;
  
         if (is_null)
@@ -735,6 +754,7 @@ datum_to_json(Datum val, bool is_null, StringInfo result, TYPCATEGORY tcategory,
                         outputstr = OidOutputFunctionCall(typoutputfunc, val);
                         escape_json(result, outputstr);
                         pfree(outputstr);
+                       break;
         }
  }
  
@@ -748,9 +768,8 @@ array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals,
                                   bool *nulls, int *valcount, TYPCATEGORY tcategory,
                                   Oid typoutputfunc, bool use_line_feeds)
  {
-
         int                     i;
-       char       *sep;
+       const char *sep;
  
         Assert(dim < ndims);
  
@@ -797,7 +816,6 @@ array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
         int                     count = 0;
         Datum      *elements;
         bool       *nulls;
-
         int16           typlen;
         bool            typbyval;
         char            typalign,
@@ -852,7 +870,7 @@ composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
                            *tuple;
         int                     i;
         bool            needsep = false;
-       char       *sep;
+       const char *sep;
  
         sep = use_line_feeds ? ",\n " : ",";
author	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 12 Jun 2012 20:23:45 +0000 (16:23 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 12 Jun 2012 20:23:45 +0000 (16:23 -0400)