Merge the is_ws_char() and is_hex_char() changes to json_tokener from branch 'ramirop...

author Eric Haszlakiewicz <erh+git@nimenees.com>

Sun, 2 Aug 2020 02:54:36 +0000 (02:54 +0000)

committer Eric Haszlakiewicz <erh+git@nimenees.com>

Sun, 2 Aug 2020 02:55:45 +0000 (02:55 +0000)
author Eric Haszlakiewicz <erh+git@nimenees.com>
Sun, 2 Aug 2020 02:54:36 +0000 (02:54 +0000)
committer Eric Haszlakiewicz <erh+git@nimenees.com>
Sun, 2 Aug 2020 02:55:45 +0000 (02:55 +0000)
diff --cc json_tokener.c

index 6527270dd475e058a99f029d52789cec501206ba,6fc4937f97b54478b81a2984bffeea6b1512694b..ea61ce0a9aae24cd7356b4f03db10e2346805761
--- 1/json_tokener.c
--- 2/json_tokener.c
+++ b/json_tokener.c
@@@ -40,19 -40,49 +40,42 @@@
   #ifdef HAVE_XLOCALE_H
   #include <xlocale.h>
   #endif
+ +#ifdef HAVE_STRINGS_H
+ +#include <strings.h>
+ +#endif /* HAVE_STRINGS_H */
   
- -#define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x) & 7) + 9)
+ +#define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x)&7) + 9)
   
   #if !HAVE_STRNCASECMP && defined(_MSC_VER)
- -  /* MSC has the version as _strnicmp */
- -# define strncasecmp _strnicmp
+ +/* MSC has the version as _strnicmp */
+ +#define strncasecmp _strnicmp
   #elif !HAVE_STRNCASECMP
- -# error You do not have strncasecmp on your system.
+ +#error You do not have strncasecmp on your system.
   #endif /* HAVE_STRNCASECMP */
   
- -static int is_number_char(char c)
- -{
- -      return (c >= '0' && c <= '9')
- -          || c == '.'
- -          || c == '+'
- -          || c == '-'
- -          || c == 'e'
- -          || c == 'E';
- -}
- -
+ /* The following helper functions are used to speed up parsing. They
+  * are faster than their ctype counterparts because they assume that
+  * the input is in ASCII and that the locale is set to "C". The
+  * compiler will also inline these functions, providing an additional
+  * speedup by saving on function calls.
+  */
+ static int is_ws_char(char c)
+ {
+       return c == ' '
+           || c == '\t'
+           || c == '\n'
+           || c == '\v'
+           || c == '\f'
+           || c == '\r';
+ }
+ 
+ static int is_hex_char(char c)
+ {
+       return (c >= '0' && c <= '9')
+           || (c >= 'A' && c <= 'F')
+           || (c >= 'a' && c <= 'f');
+ }
+ 
   /* Use C99 NAN by default; if not available, nan("") should work too. */
   #ifndef NAN
   #define NAN nan("")
@@@ -257,994 -268,760 +280,994 @@@ struct json_object *json_tokener_parse_
   
   /* End optimization macro defs */
   
- -
- -struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
- -                                        const char *str, int len)
+ +struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *str, int len)
   {
- -  struct json_object *obj = NULL;
- -  char c = '\1';
- -#ifdef HAVE_USELOCALE
- -  locale_t oldlocale = uselocale(NULL);
- -  locale_t newloc;
- -#elif defined(HAVE_SETLOCALE)
- -  char *oldlocale = NULL;
- -#endif
- -
- -  tok->char_offset = 0;
- -  tok->err = json_tokener_success;
- -
- -  /* this interface is presently not 64-bit clean due to the int len argument
- -     and the internal printbuf interface that takes 32-bit int len arguments
- -     so the function limits the maximum string size to INT32_MAX (2GB).
- -     If the function is called with len == -1 then strlen is called to check
- -     the string length is less than INT32_MAX (2GB) */
- -  if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX)) {
- -    tok->err = json_tokener_error_size;
- -    return NULL;
- -  }
+ +      struct json_object *obj = NULL;
+ +      char c = '\1';
+ +      unsigned int nBytes = 0;
+ +      unsigned int *nBytesp = &nBytes;
   
   #ifdef HAVE_USELOCALE
- -  {
- -    locale_t duploc = duplocale(oldlocale);
- -    newloc = newlocale(LC_NUMERIC, "C", duploc);
- -    // XXX at least Debian 8.4 has a bug in newlocale where it doesn't
- -    //  change the decimal separator unless you set LC_TIME!
- -    if (newloc)
- -    {
- -      duploc = newloc; // original duploc has been freed by newlocale()
- -      newloc = newlocale(LC_TIME, "C", duploc);
- -    }
- -    if (newloc == NULL)
- -    {
- -      freelocale(duploc);
- -      return NULL;
- -    }
- -    uselocale(newloc);
- -  }
+ +      locale_t oldlocale = uselocale(NULL);
+ +      locale_t newloc;
   #elif defined(HAVE_SETLOCALE)
- -  {
- -    char *tmplocale;
- -    tmplocale = setlocale(LC_NUMERIC, NULL);
- -    if (tmplocale) oldlocale = strdup(tmplocale);
- -    setlocale(LC_NUMERIC, "C");
- -  }
+ +      char *oldlocale = NULL;
   #endif
   
- -  while (PEEK_CHAR(c, tok)) {
- -
- -  redo_char:
- -    switch(state) {
- -
- -    case json_tokener_state_eatws:
- -      /* Advance until we change state */
- -      while (is_ws_char(c)) {
- -      if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))
- -        goto out;
- -      }
- -      if(c == '/' && !(tok->flags & JSON_TOKENER_STRICT)) {
- -      printbuf_reset(tok->pb);
- -      printbuf_memappend_fast(tok->pb, &c, 1);
- -      state = json_tokener_state_comment_start;
- -      } else {
- -      state = saved_state;
- -      goto redo_char;
- -      }
- -      break;
- -
- -    case json_tokener_state_start:
- -      switch(c) {
- -      case '{':
- -      state = json_tokener_state_eatws;
- -      saved_state = json_tokener_state_object_field_start;
- -      current = json_object_new_object();
- -      if(current == NULL)
- -              goto out;
- -      break;
- -      case '[':
- -      state = json_tokener_state_eatws;
- -      saved_state = json_tokener_state_array;
- -      current = json_object_new_array();
- -      if(current == NULL)
- -              goto out;
- -      break;
- -      case 'I':
- -      case 'i':
- -      state = json_tokener_state_inf;
- -      printbuf_reset(tok->pb);
- -      tok->st_pos = 0;
- -      goto redo_char;
- -      case 'N':
- -      case 'n':
- -      state = json_tokener_state_null; // or NaN
- -      printbuf_reset(tok->pb);
- -      tok->st_pos = 0;
- -      goto redo_char;
- -      case '\'':
- -        if (tok->flags & JSON_TOKENER_STRICT) {
- -            /* in STRICT mode only double-quote are allowed */
- -            tok->err = json_tokener_error_parse_unexpected;
- -            goto out;
- -        }
- -      /* FALLTHRU */
- -      case '"':
- -      state = json_tokener_state_string;
- -      printbuf_reset(tok->pb);
- -      tok->quote_char = c;
- -      break;
- -      case 'T':
- -      case 't':
- -      case 'F':
- -      case 'f':
- -      state = json_tokener_state_boolean;
- -      printbuf_reset(tok->pb);
- -      tok->st_pos = 0;
- -      goto redo_char;
- -      case '0':
- -      case '1':
- -      case '2':
- -      case '3':
- -      case '4':
- -      case '5':
- -      case '6':
- -      case '7':
- -      case '8':
- -      case '9':
- -      case '-':
- -      state = json_tokener_state_number;
- -      printbuf_reset(tok->pb);
- -      tok->is_double = 0;
- -      goto redo_char;
- -      default:
- -      tok->err = json_tokener_error_parse_unexpected;
- -      goto out;
- -      }
- -      break;
- -
- -    case json_tokener_state_finish:
- -      if(tok->depth == 0) goto out;
- -      obj = json_object_get(current);
- -      json_tokener_reset_level(tok, tok->depth);
- -      tok->depth--;
- -      goto redo_char;
- -
- -    case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */
- -      {
- -      /* If we were guaranteed to have len set, then we could (usually) handle
- -       * the entire "Infinity" check in a single strncmp (strncasecmp), but
- -       * since len might be -1 (i.e. "read until \0"), we need to check it
- -       * a character at a time.
- -       * Trying to handle it both ways would make this code considerably more
- -       * complicated with likely little performance benefit.
+ +      tok->char_offset = 0;
+ +      tok->err = json_tokener_success;
+ +
+ +      /* this interface is presently not 64-bit clean due to the int len argument
+ +       * and the internal printbuf interface that takes 32-bit int len arguments
+ +       * so the function limits the maximum string size to INT32_MAX (2GB).
+ +       * If the function is called with len == -1 then strlen is called to check
+ +       * the string length is less than INT32_MAX (2GB)
          */
- -      int is_negative = 0;
- -      const char *_json_inf_str = json_inf_str;
- -      if (!(tok->flags & JSON_TOKENER_STRICT))
- -              _json_inf_str = json_inf_str_lower;
+ +      if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX))
+ +      {
+ +              tok->err = json_tokener_error_size;
+ +              return NULL;
+ +      }
   
- -      /* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */
- -      while (tok->st_pos < (int)json_inf_str_len)
+ +#ifdef HAVE_USELOCALE
         {
- -              char inf_char = *str;
- -              if (!(tok->flags & JSON_TOKENER_STRICT))
- -                      inf_char = tolower((int)*str);
- -              if (inf_char != _json_inf_str[tok->st_pos])
- -              {
- -                      tok->err = json_tokener_error_parse_unexpected;
- -                      goto out;
- -              }
- -              tok->st_pos++;
- -              (void)ADVANCE_CHAR(str, tok);
- -              if (!PEEK_CHAR(c, tok))
+ +              locale_t duploc = duplocale(oldlocale);
+ +              newloc = newlocale(LC_NUMERIC_MASK, "C", duploc);
+ +              if (newloc == NULL)
                 {
- -                      /* out of input chars, for now at least */
- -                      goto out;
+ +                      freelocale(duploc);
+ +                      return NULL;
                 }
+ +              uselocale(newloc);
         }
- -      /* We checked the full length of "Infinity", so create the object.
- -       * When handling -Infinity, the number parsing code will have dropped
- -       * the "-" into tok->pb for us, so check it now.
- -       */
- -      if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-')
+ +#elif defined(HAVE_SETLOCALE)
         {
- -              is_negative = 1;
+ +              char *tmplocale;
+ +              tmplocale = setlocale(LC_NUMERIC, NULL);
+ +              if (tmplocale)
+ +                      oldlocale = strdup(tmplocale);
+ +              setlocale(LC_NUMERIC, "C");
         }
- -      current = json_object_new_double(is_negative
- -                                       ? -INFINITY : INFINITY);
- -      if (current == NULL)
- -              goto out;
- -      saved_state = json_tokener_state_finish;
- -      state = json_tokener_state_eatws;
- -      goto redo_char;
- -       
- -      }
- -      break;
- -    case json_tokener_state_null: /* aka starts with 'n' */
- -      {
- -      int size;
- -      int size_nan;
- -      printbuf_memappend_fast(tok->pb, &c, 1);
- -      size = json_min(tok->st_pos+1, json_null_str_len);
- -      size_nan = json_min(tok->st_pos+1, json_nan_str_len);
- -      if((!(tok->flags & JSON_TOKENER_STRICT) &&
- -        strncasecmp(json_null_str, tok->pb->buf, size) == 0)
- -        || (strncmp(json_null_str, tok->pb->buf, size) == 0)
- -        ) {
- -        if (tok->st_pos == json_null_str_len) {
- -          current = NULL;
- -          saved_state = json_tokener_state_finish;
- -          state = json_tokener_state_eatws;
- -          goto redo_char;
- -        }
- -      }
- -      else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
- -                strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) ||
- -               (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0)
- -              )
+ +#endif
+ +
+ +      while (PEEK_CHAR(c, tok)) // Note: c might be '\0' !
         {
- -              if (tok->st_pos == json_nan_str_len)
+ +
+ +      redo_char:
+ +              switch (state)
                 {
- -                      current = json_object_new_double(NAN);
+ +
+ +              case json_tokener_state_eatws:
+ +                      /* Advance until we change state */
-                       while (isspace((unsigned char)c))
++                      while (is_ws_char(c))
+ +                      {
+ +                              if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))
+ +                                      goto out;
+ +                      }
+ +                      if (c == '/' && !(tok->flags & JSON_TOKENER_STRICT))
+ +                      {
+ +                              printbuf_reset(tok->pb);
+ +                              printbuf_memappend_fast(tok->pb, &c, 1);
+ +                              state = json_tokener_state_comment_start;
+ +                      }
+ +                      else
+ +                      {
+ +                              state = saved_state;
+ +                              goto redo_char;
+ +                      }
+ +                      break;
+ +
+ +              case json_tokener_state_start:
+ +                      switch (c)
+ +                      {
+ +                      case '{':
+ +                              state = json_tokener_state_eatws;
+ +                              saved_state = json_tokener_state_object_field_start;
+ +                              current = json_object_new_object();
+ +                              if (current == NULL)
+ +                                      goto out;
+ +                              break;
+ +                      case '[':
+ +                              state = json_tokener_state_eatws;
+ +                              saved_state = json_tokener_state_array;
+ +                              current = json_object_new_array();
+ +                              if (current == NULL)
+ +                                      goto out;
+ +                              break;
+ +                      case 'I':
+ +                      case 'i':
+ +                              state = json_tokener_state_inf;
+ +                              printbuf_reset(tok->pb);
+ +                              tok->st_pos = 0;
+ +                              goto redo_char;
+ +                      case 'N':
+ +                      case 'n':
+ +                              state = json_tokener_state_null; // or NaN
+ +                              printbuf_reset(tok->pb);
+ +                              tok->st_pos = 0;
+ +                              goto redo_char;
+ +                      case '\'':
+ +                              if (tok->flags & JSON_TOKENER_STRICT)
+ +                              {
+ +                                      /* in STRICT mode only double-quote are allowed */
+ +                                      tok->err = json_tokener_error_parse_unexpected;
+ +                                      goto out;
+ +                              }
+ +                              /* FALLTHRU */
+ +                      case '"':
+ +                              state = json_tokener_state_string;
+ +                              printbuf_reset(tok->pb);
+ +                              tok->quote_char = c;
+ +                              break;
+ +                      case 'T':
+ +                      case 't':
+ +                      case 'F':
+ +                      case 'f':
+ +                              state = json_tokener_state_boolean;
+ +                              printbuf_reset(tok->pb);
+ +                              tok->st_pos = 0;
+ +                              goto redo_char;
+ +                      case '0':
+ +                      case '1':
+ +                      case '2':
+ +                      case '3':
+ +                      case '4':
+ +                      case '5':
+ +                      case '6':
+ +                      case '7':
+ +                      case '8':
+ +                      case '9':
+ +                      case '-':
+ +                              state = json_tokener_state_number;
+ +                              printbuf_reset(tok->pb);
+ +                              tok->is_double = 0;
+ +                              goto redo_char;
+ +                      default: tok->err = json_tokener_error_parse_unexpected; goto out;
+ +                      }
+ +                      break;
+ +
+ +              case json_tokener_state_finish:
+ +                      if (tok->depth == 0)
+ +                              goto out;
+ +                      obj = json_object_get(current);
+ +                      json_tokener_reset_level(tok, tok->depth);
+ +                      tok->depth--;
+ +                      goto redo_char;
+ +
+ +              case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */
+ +              {
+ +                      /* If we were guaranteed to have len set, then we could (usually) handle
+ +                       * the entire "Infinity" check in a single strncmp (strncasecmp), but
+ +                       * since len might be -1 (i.e. "read until \0"), we need to check it
+ +                       * a character at a time.
+ +                       * Trying to handle it both ways would make this code considerably more
+ +                       * complicated with likely little performance benefit.
+ +                       */
+ +                      int is_negative = 0;
+ +                      const char *_json_inf_str = json_inf_str;
+ +                      if (!(tok->flags & JSON_TOKENER_STRICT))
+ +                              _json_inf_str = json_inf_str_lower;
+ +
+ +                      /* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */
+ +                      while (tok->st_pos < (int)json_inf_str_len)
+ +                      {
+ +                              char inf_char = *str;
+ +                              if (!(tok->flags & JSON_TOKENER_STRICT))
+ +                                      inf_char = tolower((unsigned char)*str);
+ +                              if (inf_char != _json_inf_str[tok->st_pos])
+ +                              {
+ +                                      tok->err = json_tokener_error_parse_unexpected;
+ +                                      goto out;
+ +                              }
+ +                              tok->st_pos++;
+ +                              (void)ADVANCE_CHAR(str, tok);
+ +                              if (!PEEK_CHAR(c, tok))
+ +                              {
+ +                                      /* out of input chars, for now at least */
+ +                                      goto out;
+ +                              }
+ +                      }
+ +                      /* We checked the full length of "Infinity", so create the object.
+ +                       * When handling -Infinity, the number parsing code will have dropped
+ +                       * the "-" into tok->pb for us, so check it now.
+ +                       */
+ +                      if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-')
+ +                      {
+ +                              is_negative = 1;
+ +                      }
+ +                      current = json_object_new_double(is_negative ? -INFINITY : INFINITY);
                         if (current == NULL)
- -                          goto out;
+ +                              goto out;
                         saved_state = json_tokener_state_finish;
                         state = json_tokener_state_eatws;
                         goto redo_char;
                 }
- -      } else {
- -        tok->err = json_tokener_error_parse_null;
- -        goto out;
- -      }
- -      tok->st_pos++;
- -      }
- -      break;
- -
- -    case json_tokener_state_comment_start:
- -      if(c == '*') {
- -      state = json_tokener_state_comment;
- -      } else if(c == '/') {
- -      state = json_tokener_state_comment_eol;
- -      } else {
- -      tok->err = json_tokener_error_parse_comment;
- -      goto out;
- -      }
- -      printbuf_memappend_fast(tok->pb, &c, 1);
- -      break;
- -
- -    case json_tokener_state_comment:
- -              {
- -          /* Advance until we change state */
- -          const char *case_start = str;
- -          while(c != '*') {
- -            if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
- -              printbuf_memappend_fast(tok->pb, case_start, str-case_start);
- -              goto out;
- -            }
- -          }
- -          printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
- -          state = json_tokener_state_comment_end;
- -        }
- -            break;
- -
- -    case json_tokener_state_comment_eol:
- -      {
- -      /* Advance until we change state */
- -      const char *case_start = str;
- -      while(c != '\n') {
- -        if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
- -          printbuf_memappend_fast(tok->pb, case_start, str-case_start);
- -          goto out;
- -        }
- -      }
- -      printbuf_memappend_fast(tok->pb, case_start, str-case_start);
- -      MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
- -      state = json_tokener_state_eatws;
- -      }
- -      break;
- -
- -    case json_tokener_state_comment_end:
- -      printbuf_memappend_fast(tok->pb, &c, 1);
- -      if(c == '/') {
- -      MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
- -      state = json_tokener_state_eatws;
- -      } else {
- -      state = json_tokener_state_comment;
- -      }
- -      break;
- -
- -    case json_tokener_state_string:
- -      {
- -      /* Advance until we change state */
- -      const char *case_start = str;
- -      while(1) {
- -        if(c == tok->quote_char) {
- -          printbuf_memappend_fast(tok->pb, case_start, str-case_start);
- -          current = json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
- -          if(current == NULL)
- -              goto out;
- -          saved_state = json_tokener_state_finish;
- -          state = json_tokener_state_eatws;
- -          break;
- -        } else if(c == '\\') {
- -          printbuf_memappend_fast(tok->pb, case_start, str-case_start);
- -          saved_state = json_tokener_state_string;
- -          state = json_tokener_state_string_escape;
- -          break;
- -        }
- -        if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
- -          printbuf_memappend_fast(tok->pb, case_start, str-case_start);
- -          goto out;
- -        }
- -      }
- -      }
- -      break;
- -
- -    case json_tokener_state_string_escape:
- -      switch(c) {
- -      case '"':
- -      case '\\':
- -      case '/':
- -      printbuf_memappend_fast(tok->pb, &c, 1);
- -      state = saved_state;
- -      break;
- -      case 'b':
- -      case 'n':
- -      case 'r':
- -      case 't':
- -      case 'f':
- -      if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
- -      else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
- -      else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
- -      else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
- -      else if(c == 'f') printbuf_memappend_fast(tok->pb, "\f", 1);
- -      state = saved_state;
- -      break;
- -      case 'u':
- -      tok->ucs_char = 0;
- -      tok->st_pos = 0;
- -      state = json_tokener_state_escape_unicode;
- -      break;
- -      default:
- -      tok->err = json_tokener_error_parse_string;
- -      goto out;
- -      }
- -      break;
- -
- -    case json_tokener_state_escape_unicode:
- -      {
- -          unsigned int got_hi_surrogate = 0;
- -
- -        /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
- -        while(1) {
- -          if (c && is_hex_char(c)) {
- -            tok->ucs_char += ((unsigned int)jt_hexdigit(c) << ((3-tok->st_pos++)*4));
- -            if(tok->st_pos == 4) {
- -              unsigned char unescaped_utf[4];
- -
- -                if (got_hi_surrogate) {
- -                if (IS_LOW_SURROGATE(tok->ucs_char)) {
- -                    /* Recalculate the ucs_char, then fall thru to process normally */
- -                    tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
- -                  } else {
- -                    /* Hi surrogate was not followed by a low surrogate */
- -                    /* Replace the hi and process the rest normally */
- -                  printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
- -                  }
- -                  got_hi_surrogate = 0;
- -                }
- -
- -              if (tok->ucs_char < 0x80) {
- -                unescaped_utf[0] = tok->ucs_char;
- -                printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
- -              } else if (tok->ucs_char < 0x800) {
- -                unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
- -                unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
- -                printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
- -              } else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
- -                  /* Got a high surrogate.  Remember it and look for the
- -                   * the beginning of another sequence, which should be the
- -                   * low surrogate.
- -                   */
- -                  got_hi_surrogate = tok->ucs_char;
- -                  /* Not at end, and the next two chars should be "\u" */
- -                  if ((len == -1 || len > (tok->char_offset + 2)) &&
- -                      // str[0] != '0' &&  // implied by is_hex_char, above.
- -                      (str[1] == '\\') &&
- -                      (str[2] == 'u'))
- -                  {
- -                /* Advance through the 16 bit surrogate, and move on to the
- -                 * next sequence. The next step is to process the following
- -                 * characters.
- -                 */
- -                  if( !ADVANCE_CHAR(str, tok) || !ADVANCE_CHAR(str, tok) ) {
- -                    printbuf_memappend_fast(tok->pb,
- -                                          (char*) utf8_replacement_char, 3);
- -                  }
- -                    /* Advance to the first char of the next sequence and
- -                     * continue processing with the next sequence.
- -                     */
- -                  if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
- -                    printbuf_memappend_fast(tok->pb,
- -                                            (char*) utf8_replacement_char, 3);
- -                    goto out;
- -                    }
- -                  tok->ucs_char = 0;
- -                    tok->st_pos = 0;
- -                    continue; /* other json_tokener_state_escape_unicode */
- -                  } else {
- -                    /* Got a high surrogate without another sequence following
- -                     * it.  Put a replacement char in for the hi surrogate
- -                     * and pretend we finished.
- -                     */
- -                  printbuf_memappend_fast(tok->pb,
- -                                          (char*) utf8_replacement_char, 3);
- -                  }
- -              } else if (IS_LOW_SURROGATE(tok->ucs_char)) {
- -                  /* Got a low surrogate not preceded by a high */
- -                printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
- -                } else if (tok->ucs_char < 0x10000) {
- -                unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
- -                unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
- -                unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
- -                printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
- -              } else if (tok->ucs_char < 0x110000) {
- -                unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
- -                unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
- -                unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
- -                unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
- -                printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
- -              } else {
- -                  /* Don't know what we got--insert the replacement char */
- -                printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
- -                }
- -              state = saved_state;
                 break;
- -            }
- -          } else {
- -            tok->err = json_tokener_error_parse_string;
- -            goto out;
- -          }
- -        if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
- -            if (got_hi_surrogate) /* Clean up any pending chars */
- -            printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
- -          goto out;
- -        }
- -      }
- -      }
- -      break;
- -
- -    case json_tokener_state_boolean:
- -      {
- -      int size1, size2;
- -      printbuf_memappend_fast(tok->pb, &c, 1);
- -      size1 = json_min(tok->st_pos+1, json_true_str_len);
- -      size2 = json_min(tok->st_pos+1, json_false_str_len);
- -      if((!(tok->flags & JSON_TOKENER_STRICT) &&
- -        strncasecmp(json_true_str, tok->pb->buf, size1) == 0)
- -        || (strncmp(json_true_str, tok->pb->buf, size1) == 0)
- -        ) {
- -        if(tok->st_pos == json_true_str_len) {
- -          current = json_object_new_boolean(1);
- -          if(current == NULL)
- -              goto out;
- -          saved_state = json_tokener_state_finish;
- -          state = json_tokener_state_eatws;
- -          goto redo_char;
- -        }
- -      } else if((!(tok->flags & JSON_TOKENER_STRICT) &&
- -        strncasecmp(json_false_str, tok->pb->buf, size2) == 0)
- -        || (strncmp(json_false_str, tok->pb->buf, size2) == 0)) {
- -        if(tok->st_pos == json_false_str_len) {
- -          current = json_object_new_boolean(0);
- -          if(current == NULL)
- -              goto out;
- -          saved_state = json_tokener_state_finish;
- -          state = json_tokener_state_eatws;
- -          goto redo_char;
- -        }
- -      } else {
- -        tok->err = json_tokener_error_parse_boolean;
- -        goto out;
- -      }
- -      tok->st_pos++;
- -      }
- -      break;
- -
- -    case json_tokener_state_number:
- -      {
- -      /* Advance until we change state */
- -      const char *case_start = str;
- -      int case_len=0;
- -      int is_exponent=0;
- -      int negativesign_next_possible_location=1;
- -      while(c && is_number_char(c)) {
- -        ++case_len;
- -
- -        /* non-digit characters checks */
- -        /* note: since the main loop condition to get here was
- -                 an input starting with 0-9 or '-', we are
- -                 protected from input starting with '.' or
- -                 e/E. */
- -        if (c == '.') {
- -          if (tok->is_double != 0) {
- -            /* '.' can only be found once, and out of the exponent part.
- -               Thus, if the input is already flagged as double, it
- -               is invalid. */
- -            tok->err = json_tokener_error_parse_number;
- -            goto out;
- -          }
- -          tok->is_double = 1;
- -        }
- -        if (c == 'e' || c == 'E') {
- -          if (is_exponent != 0) {
- -            /* only one exponent possible */
- -            tok->err = json_tokener_error_parse_number;
- -            goto out;
- -          }
- -          is_exponent = 1;
- -          tok->is_double = 1;
- -          /* the exponent part can begin with a negative sign */
- -          negativesign_next_possible_location = case_len + 1;
- -        }
- -        if (c == '-' && case_len != negativesign_next_possible_location) {
- -          /* If the negative sign is not where expected (ie
- -             start of input or start of exponent part), the
- -             input is invalid. */
- -          tok->err = json_tokener_error_parse_number;
- -          goto out;
- -        }
- -
- -        if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
- -          printbuf_memappend_fast(tok->pb, case_start, case_len);
- -          goto out;
- -        }
- -      }
- -        if (case_len>0)
- -          printbuf_memappend_fast(tok->pb, case_start, case_len);
+ +              case json_tokener_state_null: /* aka starts with 'n' */
+ +              {
+ +                      int size;
+ +                      int size_nan;
+ +                      printbuf_memappend_fast(tok->pb, &c, 1);
+ +                      size = json_min(tok->st_pos + 1, json_null_str_len);
+ +                      size_nan = json_min(tok->st_pos + 1, json_nan_str_len);
+ +                      if ((!(tok->flags & JSON_TOKENER_STRICT) &&
+ +                           strncasecmp(json_null_str, tok->pb->buf, size) == 0) ||
+ +                          (strncmp(json_null_str, tok->pb->buf, size) == 0))
+ +                      {
+ +                              if (tok->st_pos == json_null_str_len)
+ +                              {
+ +                                      current = NULL;
+ +                                      saved_state = json_tokener_state_finish;
+ +                                      state = json_tokener_state_eatws;
+ +                                      goto redo_char;
+ +                              }
+ +                      }
+ +                      else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
+ +                                strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) ||
+ +                               (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0))
+ +                      {
+ +                              if (tok->st_pos == json_nan_str_len)
+ +                              {
+ +                                      current = json_object_new_double(NAN);
+ +                                      if (current == NULL)
+ +                                              goto out;
+ +                                      saved_state = json_tokener_state_finish;
+ +                                      state = json_tokener_state_eatws;
+ +                                      goto redo_char;
+ +                              }
+ +                      }
+ +                      else
+ +                      {
+ +                              tok->err = json_tokener_error_parse_null;
+ +                              goto out;
+ +                      }
+ +                      tok->st_pos++;
+ +              }
+ +              break;
   
- -      // Check for -Infinity
- -      if (tok->pb->buf[0] == '-' && case_len <= 1 &&
- -          (c == 'i' || c == 'I'))
- -      {
- -              state = json_tokener_state_inf;
- -              tok->st_pos = 0;
- -              goto redo_char;
- -      }
- -      }
- -      {
- -      int64_t num64;
- -      double  numd;
- -      if (!tok->is_double && json_parse_sanitized_int64(tok->pb->buf, tok->pb->bpos, &num64) == 0) {
- -              if (num64 && tok->pb->buf[0]=='0' &&
- -                  (tok->flags & JSON_TOKENER_STRICT)) {
- -                      /* in strict mode, number must not start with 0 */
- -                      tok->err = json_tokener_error_parse_number;
- -                      goto out;
+ +              case json_tokener_state_comment_start:
+ +                      if (c == '*')
+ +                      {
+ +                              state = json_tokener_state_comment;
+ +                      }
+ +                      else if (c == '/')
+ +                      {
+ +                              state = json_tokener_state_comment_eol;
+ +                      }
+ +                      else
+ +                      {
+ +                              tok->err = json_tokener_error_parse_comment;
+ +                              goto out;
+ +                      }
+ +                      printbuf_memappend_fast(tok->pb, &c, 1);
+ +                      break;
+ +
+ +              case json_tokener_state_comment:
+ +              {
+ +                      /* Advance until we change state */
+ +                      const char *case_start = str;
+ +                      while (c != '*')
+ +                      {
+ +                              if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
+ +                              {
+ +                                      printbuf_memappend_fast(tok->pb, case_start,
+ +                                                              str - case_start);
+ +                                      goto out;
+ +                              }
+ +                      }
+ +                      printbuf_memappend_fast(tok->pb, case_start, 1 + str - case_start);
+ +                      state = json_tokener_state_comment_end;
+ +              }
+ +              break;
+ +
+ +              case json_tokener_state_comment_eol:
+ +              {
+ +                      /* Advance until we change state */
+ +                      const char *case_start = str;
+ +                      while (c != '\n')
+ +                      {
+ +                              if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
+ +                              {
+ +                                      printbuf_memappend_fast(tok->pb, case_start,
+ +                                                              str - case_start);
+ +                                      goto out;
+ +                              }
+ +                      }
+ +                      printbuf_memappend_fast(tok->pb, case_start, str - case_start);
+ +                      MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
+ +                      state = json_tokener_state_eatws;
+ +              }
+ +              break;
+ +
+ +              case json_tokener_state_comment_end:
+ +                      printbuf_memappend_fast(tok->pb, &c, 1);
+ +                      if (c == '/')
+ +                      {
+ +                              MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
+ +                              state = json_tokener_state_eatws;
+ +                      }
+ +                      else
+ +                      {
+ +                              state = json_tokener_state_comment;
+ +                      }
+ +                      break;
+ +
+ +              case json_tokener_state_string:
+ +              {
+ +                      /* Advance until we change state */
+ +                      const char *case_start = str;
+ +                      while (1)
+ +                      {
+ +                              if (c == tok->quote_char)
+ +                              {
+ +                                      printbuf_memappend_fast(tok->pb, case_start,
+ +                                                              str - case_start);
+ +                                      current =
+ +                                          json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
+ +                                      if (current == NULL)
+ +                                              goto out;
+ +                                      saved_state = json_tokener_state_finish;
+ +                                      state = json_tokener_state_eatws;
+ +                                      break;
+ +                              }
+ +                              else if (c == '\\')
+ +                              {
+ +                                      printbuf_memappend_fast(tok->pb, case_start,
+ +                                                              str - case_start);
+ +                                      saved_state = json_tokener_state_string;
+ +                                      state = json_tokener_state_string_escape;
+ +                                      break;
+ +                              }
+ +                              if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
+ +                              {
+ +                                      printbuf_memappend_fast(tok->pb, case_start,
+ +                                                              str - case_start);
+ +                                      goto out;
+ +                              }
+ +                      }
+ +              }
+ +              break;
+ +
+ +              case json_tokener_state_string_escape:
+ +                      switch (c)
+ +                      {
+ +                      case '"':
+ +                      case '\\':
+ +                      case '/':
+ +                              printbuf_memappend_fast(tok->pb, &c, 1);
+ +                              state = saved_state;
+ +                              break;
+ +                      case 'b':
+ +                      case 'n':
+ +                      case 'r':
+ +                      case 't':
+ +                      case 'f':
+ +                              if (c == 'b')
+ +                                      printbuf_memappend_fast(tok->pb, "\b", 1);
+ +                              else if (c == 'n')
+ +                                      printbuf_memappend_fast(tok->pb, "\n", 1);
+ +                              else if (c == 'r')
+ +                                      printbuf_memappend_fast(tok->pb, "\r", 1);
+ +                              else if (c == 't')
+ +                                      printbuf_memappend_fast(tok->pb, "\t", 1);
+ +                              else if (c == 'f')
+ +                                      printbuf_memappend_fast(tok->pb, "\f", 1);
+ +                              state = saved_state;
+ +                              break;
+ +                      case 'u':
+ +                              tok->ucs_char = 0;
+ +                              tok->st_pos = 0;
+ +                              state = json_tokener_state_escape_unicode;
+ +                              break;
+ +                      default: tok->err = json_tokener_error_parse_string; goto out;
+ +                      }
+ +                      break;
+ +
+ +                      // ===================================================
+ +
+ +              case json_tokener_state_escape_unicode:
+ +              {
+ +                      /* Handle a 4-byte \uNNNN sequence, or two sequences if a surrogate pair */
+ +                      while (1)
+ +                      {
-                               if (!c || !strchr(json_hex_chars, c))
++                              if (!c || !is_hex_char(c))
+ +                              {
+ +                                      tok->err = json_tokener_error_parse_string;
+ +                                      goto out;
+ +                              }
+ +                              tok->ucs_char |=
+ +                                  ((unsigned int)jt_hexdigit(c) << ((3 - tok->st_pos) * 4));
+ +                              tok->st_pos++;
+ +                              if (tok->st_pos >= 4)
+ +                                      break;
+ +
+ +                              (void)ADVANCE_CHAR(str, tok);
+ +                              if (!PEEK_CHAR(c, tok))
+ +                              {
+ +                                      /*
+ +                                       * We're out of characters in the current call to
+ +                                       * json_tokener_parse(), but a subsequent call might
+ +                                       * provide us with more, so leave our current state
+ +                                       * as-is (including tok->high_surrogate) and return.
+ +                                       */
+ +                                      goto out;
+ +                              }
+ +                      }
+ +                      tok->st_pos = 0;
+ +
+ +                      /* Now, we have a full \uNNNN sequence in tok->ucs_char */
+ +
+ +                      /* If the *previous* sequence was a high surrogate ... */
+ +                      if (tok->high_surrogate)
+ +                      {
+ +                              if (IS_LOW_SURROGATE(tok->ucs_char))
+ +                              {
+ +                                      /* Recalculate the ucs_char, then fall thru to process normally */
+ +                                      tok->ucs_char = DECODE_SURROGATE_PAIR(tok->high_surrogate,
+ +                                                                            tok->ucs_char);
+ +                              }
+ +                              else
+ +                              {
+ +                                      /* High surrogate was not followed by a low surrogate
+ +                                       * Replace the high and process the rest normally
+ +                                       */
+ +                                      printbuf_memappend_fast(tok->pb,
+ +                                                              (char *)utf8_replacement_char, 3);
+ +                              }
+ +                              tok->high_surrogate = 0;
+ +                      }
+ +
+ +                      if (tok->ucs_char < 0x80)
+ +                      {
+ +                              unsigned char unescaped_utf[1];
+ +                              unescaped_utf[0] = tok->ucs_char;
+ +                              printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 1);
+ +                      }
+ +                      else if (tok->ucs_char < 0x800)
+ +                      {
+ +                              unsigned char unescaped_utf[2];
+ +                              unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
+ +                              unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
+ +                              printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 2);
+ +                      }
+ +                      else if (IS_HIGH_SURROGATE(tok->ucs_char))
+ +                      {
+ +                              /*
+ +                               * The next two characters should be \u, HOWEVER,
+ +                               * we can't simply peek ahead here, because the
+ +                               * characters we need might not be passed to us
+ +                               * until a subsequent call to json_tokener_parse.
+ +                               * Instead, transition throug a couple of states.
+ +                               * (now):
+ +                               *   _escape_unicode => _unicode_need_escape
+ +                               * (see a '\\' char):
+ +                               *   _unicode_need_escape => _unicode_need_u
+ +                               * (see a 'u' char):
+ +                               *   _unicode_need_u => _escape_unicode
+ +                               *      ...and we'll end up back around here.
+ +                               */
+ +                              tok->high_surrogate = tok->ucs_char;
+ +                              tok->ucs_char = 0;
+ +                              state = json_tokener_state_escape_unicode_need_escape;
+ +                              break;
+ +                      }
+ +                      else if (IS_LOW_SURROGATE(tok->ucs_char))
+ +                      {
+ +                              /* Got a low surrogate not preceded by a high */
+ +                              printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
+ +                      }
+ +                      else if (tok->ucs_char < 0x10000)
+ +                      {
+ +                              unsigned char unescaped_utf[3];
+ +                              unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
+ +                              unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
+ +                              unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
+ +                              printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 3);
+ +                      }
+ +                      else if (tok->ucs_char < 0x110000)
+ +                      {
+ +                              unsigned char unescaped_utf[4];
+ +                              unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
+ +                              unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
+ +                              unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
+ +                              unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
+ +                              printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 4);
+ +                      }
+ +                      else
+ +                      {
+ +                              /* Don't know what we got--insert the replacement char */
+ +                              printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
+ +                      }
+ +                      state = saved_state; // i.e. _state_string or _state_object_field
                 }
- -              current = json_object_new_int64(num64);
- -              if(current == NULL)
- -                  goto out;
+ +              break;
+ +
+ +              case json_tokener_state_escape_unicode_need_escape:
+ +                      // We get here after processing a high_surrogate
+ +                      // require a '\\' char
+ +                      if (!c || c != '\\')
+ +                      {
+ +                              /* Got a high surrogate without another sequence following
+ +                               * it.  Put a replacement char in for the high surrogate
+ +                               * and pop back up to _state_string or _state_object_field.
+ +                               */
+ +                              printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
+ +                              tok->high_surrogate = 0;
+ +                              tok->ucs_char = 0;
+ +                              tok->st_pos = 0;
+ +                              state = saved_state;
+ +                              goto redo_char;
+ +                      }
+ +                      state = json_tokener_state_escape_unicode_need_u;
+ +                      break;
+ +
+ +              case json_tokener_state_escape_unicode_need_u:
+ +                      /* We already had a \ char, check that it's \u */
+ +                      if (!c || c != 'u')
+ +                      {
+ +                              /* Got a high surrogate with some non-unicode escape
+ +                               * sequence following it.
+ +                               * Put a replacement char in for the high surrogate
+ +                               * and handle the escape sequence normally.
+ +                               */
+ +                              printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
+ +                              tok->high_surrogate = 0;
+ +                              tok->ucs_char = 0;
+ +                              tok->st_pos = 0;
+ +                              state = json_tokener_state_string_escape;
+ +                              goto redo_char;
+ +                      }
+ +                      state = json_tokener_state_escape_unicode;
+ +                      break;
+ +
+ +                      // ===================================================
+ +
+ +              case json_tokener_state_boolean:
+ +              {
+ +                      int size1, size2;
+ +                      printbuf_memappend_fast(tok->pb, &c, 1);
+ +                      size1 = json_min(tok->st_pos + 1, json_true_str_len);
+ +                      size2 = json_min(tok->st_pos + 1, json_false_str_len);
+ +                      if ((!(tok->flags & JSON_TOKENER_STRICT) &&
+ +                           strncasecmp(json_true_str, tok->pb->buf, size1) == 0) ||
+ +                          (strncmp(json_true_str, tok->pb->buf, size1) == 0))
+ +                      {
+ +                              if (tok->st_pos == json_true_str_len)
+ +                              {
+ +                                      current = json_object_new_boolean(1);
+ +                                      if (current == NULL)
+ +                                              goto out;
+ +                                      saved_state = json_tokener_state_finish;
+ +                                      state = json_tokener_state_eatws;
+ +                                      goto redo_char;
+ +                              }
+ +                      }
+ +                      else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
+ +                                strncasecmp(json_false_str, tok->pb->buf, size2) == 0) ||
+ +                               (strncmp(json_false_str, tok->pb->buf, size2) == 0))
+ +                      {
+ +                              if (tok->st_pos == json_false_str_len)
+ +                              {
+ +                                      current = json_object_new_boolean(0);
+ +                                      if (current == NULL)
+ +                                              goto out;
+ +                                      saved_state = json_tokener_state_finish;
+ +                                      state = json_tokener_state_eatws;
+ +                                      goto redo_char;
+ +                              }
+ +                      }
+ +                      else
+ +                      {
+ +                              tok->err = json_tokener_error_parse_boolean;
+ +                              goto out;
+ +                      }
+ +                      tok->st_pos++;
+ +              }
+ +              break;
+ +
+ +              case json_tokener_state_number:
+ +              {
+ +                      /* Advance until we change state */
+ +                      const char *case_start = str;
+ +                      int case_len = 0;
+ +                      int is_exponent = 0;
+ +                      int neg_sign_ok = 1;
+ +                      int pos_sign_ok = 0;
+ +                      if (printbuf_length(tok->pb) > 0)
+ +                      {
+ +                              /* We don't save all state from the previous incremental parse
+ +                                 so we need to re-generate it based on the saved string so far.
+ +                               */
+ +                              char *e_loc = strchr(tok->pb->buf, 'e');
+ +                              if (!e_loc)
+ +                                      e_loc = strchr(tok->pb->buf, 'E');
+ +                              if (e_loc)
+ +                              {
+ +                                      char *last_saved_char =
+ +                                          &tok->pb->buf[printbuf_length(tok->pb) - 1];
+ +                                      is_exponent = 1;
+ +                                      pos_sign_ok = neg_sign_ok = 1;
+ +                                      /* If the "e" isn't at the end, we can't start with a '-' */
+ +                                      if (e_loc != last_saved_char)
+ +                                      {
+ +                                              neg_sign_ok = 0;
+ +                                              pos_sign_ok = 0;
+ +                                      }
+ +                                      // else leave it set to 1, i.e. start of the new input
+ +                              }
+ +                      }
+ +
+ +                      while (c && ((c >= '0' && c <= '9') ||
+ +                                   (!is_exponent && (c == 'e' || c == 'E')) ||
+ +                                   (neg_sign_ok && c == '-') || (pos_sign_ok && c == '+') ||
+ +                                   (!tok->is_double && c == '.')))
+ +                      {
+ +                              pos_sign_ok = neg_sign_ok = 0;
+ +                              ++case_len;
+ +
+ +                              /* non-digit characters checks */
+ +                              /* note: since the main loop condition to get here was
+ +                               * an input starting with 0-9 or '-', we are
+ +                               * protected from input starting with '.' or
+ +                               * e/E.
+ +                               */
+ +                              switch (c)
+ +                              {
+ +                              case '.':
+ +                                      tok->is_double = 1;
+ +                                      pos_sign_ok = 1;
+ +                                      neg_sign_ok = 1;
+ +                                      break;
+ +                              case 'e': /* FALLTHRU */
+ +                              case 'E':
+ +                                      is_exponent = 1;
+ +                                      tok->is_double = 1;
+ +                                      /* the exponent part can begin with a negative sign */
+ +                                      pos_sign_ok = neg_sign_ok = 1;
+ +                                      break;
+ +                              default: break;
+ +                              }
+ +
+ +                              if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
+ +                              {
+ +                                      printbuf_memappend_fast(tok->pb, case_start, case_len);
+ +                                      goto out;
+ +                              }
+ +                      }
+ +                      /*
+ +                              Now we know c isn't a valid number char, but check whether
+ +                              it might have been intended to be, and return a potentially
+ +                              more understandable error right away.
+ +                              However, if we're at the top-level, use the number as-is
+ +                          because c can be part of a new object to parse on the
+ +                              next call to json_tokener_parse().
+ +                       */
+ +                      if (tok->depth > 0 && c != ',' && c != ']' && c != '}' && c != '/' &&
-                           c != 'I' && c != 'i' && !isspace((unsigned char)c))
++                          c != 'I' && c != 'i' && !is_ws_char(c))
+ +                      {
+ +                              tok->err = json_tokener_error_parse_number;
+ +                              goto out;
+ +                      }
+ +                      if (case_len > 0)
+ +                              printbuf_memappend_fast(tok->pb, case_start, case_len);
+ +
+ +                      // Check for -Infinity
+ +                      if (tok->pb->buf[0] == '-' && case_len <= 1 && (c == 'i' || c == 'I'))
+ +                      {
+ +                              state = json_tokener_state_inf;
+ +                              tok->st_pos = 0;
+ +                              goto redo_char;
+ +                      }
+ +                      if (tok->is_double && !(tok->flags & JSON_TOKENER_STRICT))
+ +                      {
+ +                              /* Trim some chars off the end, to allow things
+ +                                 like "123e+" to parse ok. */
+ +                              while (printbuf_length(tok->pb) > 1)
+ +                              {
+ +                                      char last_char = tok->pb->buf[printbuf_length(tok->pb) - 1];
+ +                                      if (last_char != 'e' && last_char != 'E' &&
+ +                                          last_char != '-' && last_char != '+')
+ +                                      {
+ +                                              break;
+ +                                      }
+ +                                      tok->pb->buf[printbuf_length(tok->pb) - 1] = '\0';
+ +                                      printbuf_length(tok->pb)--;
+ +                              }
+ +                      }
+ +              }
+ +                      {
+ +                              int64_t num64;
+ +                              uint64_t numuint64;
+ +                              double numd;
+ +                              if (!tok->is_double && tok->pb->buf[0] == '-' &&
+ +                                  json_parse_int64(tok->pb->buf, &num64) == 0)
+ +                              {
+ +                                      current = json_object_new_int64(num64);
+ +                                      if (current == NULL)
+ +                                              goto out;
+ +                              }
+ +                              else if (!tok->is_double && tok->pb->buf[0] != '-' &&
+ +                                       json_parse_uint64(tok->pb->buf, &numuint64) == 0)
+ +                              {
+ +                                      if (numuint64 && tok->pb->buf[0] == '0' &&
+ +                                          (tok->flags & JSON_TOKENER_STRICT))
+ +                                      {
+ +                                              tok->err = json_tokener_error_parse_number;
+ +                                              goto out;
+ +                                      }
+ +                                      if (numuint64 <= INT64_MAX)
+ +                                      {
+ +                                              num64 = (uint64_t)numuint64;
+ +                                              current = json_object_new_int64(num64);
+ +                                              if (current == NULL)
+ +                                                      goto out;
+ +                                      }
+ +                                      else
+ +                                      {
+ +                                              current = json_object_new_uint64(numuint64);
+ +                                              if (current == NULL)
+ +                                                      goto out;
+ +                                      }
+ +                              }
+ +                              else if (tok->is_double &&
+ +                                       json_tokener_parse_double(
+ +                                           tok->pb->buf, printbuf_length(tok->pb), &numd) == 0)
+ +                              {
+ +                                      current = json_object_new_double_s(numd, tok->pb->buf);
+ +                                      if (current == NULL)
+ +                                              goto out;
+ +                              }
+ +                              else
+ +                              {
+ +                                      tok->err = json_tokener_error_parse_number;
+ +                                      goto out;
+ +                              }
+ +                              saved_state = json_tokener_state_finish;
+ +                              state = json_tokener_state_eatws;
+ +                              goto redo_char;
+ +                      }
+ +                      break;
+ +
+ +              case json_tokener_state_array_after_sep:
+ +              case json_tokener_state_array:
+ +                      if (c == ']')
+ +                      {
+ +                              // Minimize memory usage; assume parsed objs are unlikely to be changed
+ +                              json_object_array_shrink(current, 0);
+ +
+ +                              if (state == json_tokener_state_array_after_sep &&
+ +                                  (tok->flags & JSON_TOKENER_STRICT))
+ +                              {
+ +                                      tok->err = json_tokener_error_parse_unexpected;
+ +                                      goto out;
+ +                              }
+ +                              saved_state = json_tokener_state_finish;
+ +                              state = json_tokener_state_eatws;
+ +                      }
+ +                      else
+ +                      {
+ +                              if (tok->depth >= tok->max_depth - 1)
+ +                              {
+ +                                      tok->err = json_tokener_error_depth;
+ +                                      goto out;
+ +                              }
+ +                              state = json_tokener_state_array_add;
+ +                              tok->depth++;
+ +                              json_tokener_reset_level(tok, tok->depth);
+ +                              goto redo_char;
+ +                      }
+ +                      break;
+ +
+ +              case json_tokener_state_array_add:
+ +                      if (json_object_array_add(current, obj) != 0)
+ +                              goto out;
+ +                      saved_state = json_tokener_state_array_sep;
+ +                      state = json_tokener_state_eatws;
+ +                      goto redo_char;
+ +
+ +              case json_tokener_state_array_sep:
+ +                      if (c == ']')
+ +                      {
+ +                              // Minimize memory usage; assume parsed objs are unlikely to be changed
+ +                              json_object_array_shrink(current, 0);
+ +
+ +                              saved_state = json_tokener_state_finish;
+ +                              state = json_tokener_state_eatws;
+ +                      }
+ +                      else if (c == ',')
+ +                      {
+ +                              saved_state = json_tokener_state_array_after_sep;
+ +                              state = json_tokener_state_eatws;
+ +                      }
+ +                      else
+ +                      {
+ +                              tok->err = json_tokener_error_parse_array;
+ +                              goto out;
+ +                      }
+ +                      break;
+ +
+ +              case json_tokener_state_object_field_start:
+ +              case json_tokener_state_object_field_start_after_sep:
+ +                      if (c == '}')
+ +                      {
+ +                              if (state == json_tokener_state_object_field_start_after_sep &&
+ +                                  (tok->flags & JSON_TOKENER_STRICT))
+ +                              {
+ +                                      tok->err = json_tokener_error_parse_unexpected;
+ +                                      goto out;
+ +                              }
+ +                              saved_state = json_tokener_state_finish;
+ +                              state = json_tokener_state_eatws;
+ +                      }
+ +                      else if (c == '"' || c == '\'')
+ +                      {
+ +                              tok->quote_char = c;
+ +                              printbuf_reset(tok->pb);
+ +                              state = json_tokener_state_object_field;
+ +                      }
+ +                      else
+ +                      {
+ +                              tok->err = json_tokener_error_parse_object_key_name;
+ +                              goto out;
+ +                      }
+ +                      break;
+ +
+ +              case json_tokener_state_object_field:
+ +              {
+ +                      /* Advance until we change state */
+ +                      const char *case_start = str;
+ +                      while (1)
+ +                      {
+ +                              if (c == tok->quote_char)
+ +                              {
+ +                                      printbuf_memappend_fast(tok->pb, case_start,
+ +                                                              str - case_start);
+ +                                      obj_field_name = strdup(tok->pb->buf);
+ +                                      saved_state = json_tokener_state_object_field_end;
+ +                                      state = json_tokener_state_eatws;
+ +                                      break;
+ +                              }
+ +                              else if (c == '\\')
+ +                              {
+ +                                      printbuf_memappend_fast(tok->pb, case_start,
+ +                                                              str - case_start);
+ +                                      saved_state = json_tokener_state_object_field;
+ +                                      state = json_tokener_state_string_escape;
+ +                                      break;
+ +                              }
+ +                              if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
+ +                              {
+ +                                      printbuf_memappend_fast(tok->pb, case_start,
+ +                                                              str - case_start);
+ +                                      goto out;
+ +                              }
+ +                      }
+ +              }
+ +              break;
+ +
+ +              case json_tokener_state_object_field_end:
+ +                      if (c == ':')
+ +                      {
+ +                              saved_state = json_tokener_state_object_value;
+ +                              state = json_tokener_state_eatws;
+ +                      }
+ +                      else
+ +                      {
+ +                              tok->err = json_tokener_error_parse_object_key_sep;
+ +                              goto out;
+ +                      }
+ +                      break;
+ +
+ +              case json_tokener_state_object_value:
+ +                      if (tok->depth >= tok->max_depth - 1)
+ +                      {
+ +                              tok->err = json_tokener_error_depth;
+ +                              goto out;
+ +                      }
+ +                      state = json_tokener_state_object_value_add;
+ +                      tok->depth++;
+ +                      json_tokener_reset_level(tok, tok->depth);
+ +                      goto redo_char;
+ +
+ +              case json_tokener_state_object_value_add:
+ +                      json_object_object_add(current, obj_field_name, obj);
+ +                      free(obj_field_name);
+ +                      obj_field_name = NULL;
+ +                      saved_state = json_tokener_state_object_sep;
+ +                      state = json_tokener_state_eatws;
+ +                      goto redo_char;
+ +
+ +              case json_tokener_state_object_sep:
+ +                      /* { */
+ +                      if (c == '}')
+ +                      {
+ +                              saved_state = json_tokener_state_finish;
+ +                              state = json_tokener_state_eatws;
+ +                      }
+ +                      else if (c == ',')
+ +                      {
+ +                              saved_state = json_tokener_state_object_field_start_after_sep;
+ +                              state = json_tokener_state_eatws;
+ +                      }
+ +                      else
+ +                      {
+ +                              tok->err = json_tokener_error_parse_object_value_sep;
+ +                              goto out;
+ +                      }
+ +                      break;
+ +              }
+ +              (void)ADVANCE_CHAR(str, tok);
+ +              if (!c) // This is the char *before* advancing
+ +                      break;
+ +      } /* while(PEEK_CHAR) */
+ +
+ +out:
+ +      if ((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && (nBytes != 0))
+ +      {
+ +              tok->err = json_tokener_error_parse_utf8_string;
         }
- -      else if(tok->is_double && json_parse_double(tok->pb->buf, &numd) == 0)
+ +      if (c && (state == json_tokener_state_finish) && (tok->depth == 0) &&
+ +          (tok->flags & (JSON_TOKENER_STRICT | JSON_TOKENER_ALLOW_TRAILING_CHARS)) ==
+ +              JSON_TOKENER_STRICT)
         {
- -          current = json_object_new_double_s(numd, tok->pb->buf);
- -        if(current == NULL)
- -              goto out;
- -        } else {
- -          tok->err = json_tokener_error_parse_number;
- -          goto out;
- -        }
- -        saved_state = json_tokener_state_finish;
- -        state = json_tokener_state_eatws;
- -        goto redo_char;
- -      }
- -      break;
- -
- -    case json_tokener_state_array_after_sep:
- -    case json_tokener_state_array:
- -      if(c == ']') {
- -      if (state == json_tokener_state_array_after_sep &&
- -          (tok->flags & JSON_TOKENER_STRICT))
- -        {
- -          tok->err = json_tokener_error_parse_unexpected;
- -          goto out;
- -        }
- -      saved_state = json_tokener_state_finish;
- -      state = json_tokener_state_eatws;
- -      } else {
- -      if(tok->depth >= tok->max_depth-1) {
- -        tok->err = json_tokener_error_depth;
- -        goto out;
+ +              /* unexpected char after JSON data */
+ +              tok->err = json_tokener_error_parse_unexpected;
         }
- -      state = json_tokener_state_array_add;
- -      tok->depth++;
- -      json_tokener_reset_level(tok, tok->depth);
- -      goto redo_char;
- -      }
- -      break;
- -
- -    case json_tokener_state_array_add:
- -      if( json_object_array_add(current, obj) != 0 )
- -        goto out;
- -      saved_state = json_tokener_state_array_sep;
- -      state = json_tokener_state_eatws;
- -      goto redo_char;
- -
- -    case json_tokener_state_array_sep:
- -      if(c == ']') {
- -      saved_state = json_tokener_state_finish;
- -      state = json_tokener_state_eatws;
- -      } else if(c == ',') {
- -      saved_state = json_tokener_state_array_after_sep;
- -      state = json_tokener_state_eatws;
- -      } else {
- -      tok->err = json_tokener_error_parse_array;
- -      goto out;
- -      }
- -      break;
- -
- -    case json_tokener_state_object_field_start:
- -    case json_tokener_state_object_field_start_after_sep:
- -      if(c == '}') {
- -              if (state == json_tokener_state_object_field_start_after_sep &&
- -                  (tok->flags & JSON_TOKENER_STRICT))
- -              {
- -                      tok->err = json_tokener_error_parse_unexpected;
- -                      goto out;
- -              }
- -      saved_state = json_tokener_state_finish;
- -      state = json_tokener_state_eatws;
- -      } else if (c == '"' || c == '\'') {
- -      tok->quote_char = c;
- -      printbuf_reset(tok->pb);
- -      state = json_tokener_state_object_field;
- -      } else {
- -      tok->err = json_tokener_error_parse_object_key_name;
- -      goto out;
- -      }
- -      break;
- -
- -    case json_tokener_state_object_field:
- -      {
- -      /* Advance until we change state */
- -      const char *case_start = str;
- -      while(1) {
- -        if(c == tok->quote_char) {
- -          printbuf_memappend_fast(tok->pb, case_start, str-case_start);
- -          obj_field_name = strdup(tok->pb->buf);
- -          saved_state = json_tokener_state_object_field_end;
- -          state = json_tokener_state_eatws;
- -          break;
- -        } else if(c == '\\') {
- -          printbuf_memappend_fast(tok->pb, case_start, str-case_start);
- -          saved_state = json_tokener_state_object_field;
- -          state = json_tokener_state_string_escape;
- -          break;
- -        }
- -        if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
- -          printbuf_memappend_fast(tok->pb, case_start, str-case_start);
- -          goto out;
- -        }
+ +      if (!c)
+ +      {
+ +              /* We hit an eof char (0) */
+ +              if (state != json_tokener_state_finish && saved_state != json_tokener_state_finish)
+ +                      tok->err = json_tokener_error_parse_eof;
         }
- -      }
- -      break;
- -
- -    case json_tokener_state_object_field_end:
- -      if(c == ':') {
- -      saved_state = json_tokener_state_object_value;
- -      state = json_tokener_state_eatws;
- -      } else {
- -      tok->err = json_tokener_error_parse_object_key_sep;
- -      goto out;
- -      }
- -      break;
- -
- -    case json_tokener_state_object_value:
- -      if(tok->depth >= tok->max_depth-1) {
- -      tok->err = json_tokener_error_depth;
- -      goto out;
- -      }
- -      state = json_tokener_state_object_value_add;
- -      tok->depth++;
- -      json_tokener_reset_level(tok, tok->depth);
- -      goto redo_char;
- -
- -    case json_tokener_state_object_value_add:
- -      json_object_object_add(current, obj_field_name, obj);
- -      free(obj_field_name);
- -      obj_field_name = NULL;
- -      saved_state = json_tokener_state_object_sep;
- -      state = json_tokener_state_eatws;
- -      goto redo_char;
- -
- -    case json_tokener_state_object_sep:
- -      /* { */
- -      if(c == '}') {
- -      saved_state = json_tokener_state_finish;
- -      state = json_tokener_state_eatws;
- -      } else if(c == ',') {
- -      saved_state = json_tokener_state_object_field_start_after_sep;
- -      state = json_tokener_state_eatws;
- -      } else {
- -      tok->err = json_tokener_error_parse_object_value_sep;
- -      goto out;
- -      }
- -      break;
- -
- -    }
- -    if (!ADVANCE_CHAR(str, tok))
- -      goto out;
- -  } /* while(PEEK_CHAR) */
- -
- - out:
- -  if (c &&
- -     (state == json_tokener_state_finish) &&
- -     (tok->depth == 0) &&
- -     (tok->flags & JSON_TOKENER_STRICT)) {
- -      /* unexpected char after JSON data */
- -      tok->err = json_tokener_error_parse_unexpected;
- -  }
- -  if (!c) { /* We hit an eof char (0) */
- -    if(state != json_tokener_state_finish &&
- -       saved_state != json_tokener_state_finish)
- -      tok->err = json_tokener_error_parse_eof;
- -  }
   
   #ifdef HAVE_USELOCALE
- -  uselocale(oldlocale);
- -  freelocale(newloc); 
+ +      uselocale(oldlocale);
+ +      freelocale(newloc);
   #elif defined(HAVE_SETLOCALE)
- -  setlocale(LC_NUMERIC, oldlocale);
- -  free(oldlocale);
+ +      setlocale(LC_NUMERIC, oldlocale);
+ +      free(oldlocale);
   #endif
   
- -  if (tok->err == json_tokener_success)
- -  {
- -    json_object *ret = json_object_get(current);
- -      int ii;
+ +      if (tok->err == json_tokener_success)
+ +      {
+ +              json_object *ret = json_object_get(current);
+ +              int ii;
   
- -      /* Partially reset, so we parse additional objects on subsequent calls. */
- -    for(ii = tok->depth; ii >= 0; ii--)
- -      json_tokener_reset_level(tok, ii);
- -    return ret;
- -  }
+ +              /* Partially reset, so we parse additional objects on subsequent calls. */
+ +              for (ii = tok->depth; ii >= 0; ii--)
+ +                      json_tokener_reset_level(tok, ii);
+ +              return ret;
+ +      }
   
- -  MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
- -         json_tokener_errors[tok->err], tok->char_offset);
- -  return NULL;
+ +      MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", json_tokener_errors[tok->err],
+ +               tok->char_offset);
+ +      return NULL;
+ +}
+ +
+ +static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes)
+ +{
+ +      unsigned char chr = c;
+ +      if (*nBytes == 0)
+ +      {
+ +              if (chr >= 0x80)
+ +              {
+ +                      if ((chr & 0xe0) == 0xc0)
+ +                              *nBytes = 1;
+ +                      else if ((chr & 0xf0) == 0xe0)
+ +                              *nBytes = 2;
+ +                      else if ((chr & 0xf8) == 0xf0)
+ +                              *nBytes = 3;
+ +                      else
+ +                              return 0;
+ +              }
+ +      }
+ +      else
+ +      {
+ +              if ((chr & 0xC0) != 0x80)
+ +                      return 0;
+ +              (*nBytes)--;
+ +      }
+ +      return 1;
   }
   
   void json_tokener_set_flags(struct json_tokener *tok, int flags)
author	Eric Haszlakiewicz <erh+git@nimenees.com>
	Sun, 2 Aug 2020 02:54:36 +0000 (02:54 +0000)
committer	Eric Haszlakiewicz <erh+git@nimenees.com>
	Sun, 2 Aug 2020 02:55:45 +0000 (02:55 +0000)