update code

author dota17 <chenguopingdota@163.com>

Fri, 17 Jan 2020 07:33:44 +0000 (15:33 +0800)

committer dota17 <chenguopingdota@163.com>

Mon, 20 Jan 2020 02:41:24 +0000 (10:41 +0800)
author dota17 <chenguopingdota@163.com>
Fri, 17 Jan 2020 07:33:44 +0000 (15:33 +0800)
committer dota17 <chenguopingdota@163.com>
Mon, 20 Jan 2020 02:41:24 +0000 (10:41 +0800)
diff --git a/json_tokener.c b/json_tokener.c

index 2a8451df1643c7977a46436db6ab60de4d70422b..246d5bd7018d4926c2f1a86eedeb5141ca4d0784 100644 (file)
--- a/json_tokener.c
+++ b/json_tokener.c
@@ -223,7 +223,7 @@ struct json_object* json_tokener_parse_verbose(const char *str,
      :                                          \
      (((tok)->err = json_tokener_continue), 0)  \
      ) :                                                \
-   (((tok->flags & JSON_TOKENER_STRICT) &&   \
+   (((tok->flags & JSON_TOKENER_VALIDATE_UTF8) &&   \
      (!json_tokener_validate_utf8(*str, nBytesp)))?  \
      ((tok->err = json_tokener_error_parse_utf8_string), 0)  \
      :            \
@@ -956,7 +956,7 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
    } /* while(PEEK_CHAR) */
  
   out:
-  if ((tok->flags & JSON_TOKENER_STRICT) && (nBytes != 0))
+  if ((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && (nBytes != 0))
    {
      tok->err = json_tokener_error_parse_utf8_string;
    }
@@ -1004,19 +1004,14 @@ json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes)
    {
      if (chr >= 0x80)
      {
-      if(chr >= 0xFC && chr <= 0xFd)
-        *nBytes = 6;
-      else if (chr >= 0xF8)
-        *nBytes = 5;
-      else if (chr >= 0xF0)
-        *nBytes = 4;
-      else if (chr >= 0xE0)
-        *nBytes = 3;
-      else if (chr >= 0xC0)
+      if ((chr & 0xe0) == 0xc0)
+        *nBytes = 1;
+      else if ((chr & 0xf0) == 0xe0)
          *nBytes = 2;
+      else if ((chr & 0xf8) == 0xf0)
+        *nBytes = 3;
        else
          return 0;
-      (*nBytes)--;
      }
    }
    else
diff --git a/json_tokener.h b/json_tokener.h

index 061f81bca7582148bc765a08e17646d1a7225e1a..274e5487b7a6bd4e92a60b6121dd89a6a18bdbe9 100644 (file)
--- a/json_tokener.h
+++ b/json_tokener.h
@@ -137,6 +137,17 @@ typedef struct json_tokener json_tokener;
   */
  #define JSON_TOKENER_STRICT  0x01
  
+/**
+ * Allow json_tokener_parse_ex() validate utf-8 char.
+ * The json_tokener_validate_utf8() validate one utf8 char
+ * after get one char, then begin to parse it.
+ *
+ * This flag is not set by default.
+ *
+ * @see json_tokener_set_flags()
+ */
+#define JSON_TOKENER_VALIDATE_UTF8  0x10
+
  /**
   * Given an error previously returned by json_tokener_get_error(),
   * return a human readable description of the error.
diff --git a/tests/test_parse.c b/tests/test_parse.c

index 14d4b11384c2b58e81a5cc96d7293aae2d554f95..bd7768f8de680ca70bdb359fabc7b6f789ce3524 100644 (file)
--- a/tests/test_parse.c
+++ b/tests/test_parse.c
@@ -357,38 +357,36 @@ struct incremental_step {
  
    // utf-8 test
    // acsll encoding
-       { "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22",-1, -1, json_tokener_success, 3 },
+       { "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22",-1, -1, json_tokener_success, 5 },
         { "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22",-1, -1, json_tokener_success, 1 },
    // utf-8 encoding
-       { "\x22\xe4\xb8\x96\xe7\x95\x8c\x22",-1, -1, json_tokener_success, 3 },
-       { "\x22\xe4\xb8",-1, -1, json_tokener_error_parse_utf8_string, 2 },
-       { "\x96\xe7\x95\x8c\x22",-1, 0, json_tokener_error_parse_utf8_string, 3 },
+       { "\x22\xe4\xb8\x96\xe7\x95\x8c\x22",-1, -1, json_tokener_success, 5 },
+       { "\x22\xe4\xb8",-1, 3, json_tokener_error_parse_utf8_string, 4 },
+       { "\x96\xe7\x95\x8c\x22",-1, 0, json_tokener_error_parse_utf8_string, 5 },
         { "\x22\xe4\xb8\x96\xe7\x95\x8c\x22",-1, -1, json_tokener_success, 1 },
-       { "\x22\xcf\x80\xcf\x86\x22",-1, -1, json_tokener_success, 3 },
-       { "\x22\xf0\xa5\x91\x95\x22",-1, -1, json_tokener_success, 3 },
-       { "\x22\xf8\xa5\xa5\x91\x95\x22",-1, -1, json_tokener_success, 3 },
-       { "\x22\xfd\xa5\xa5\xa5\x91\x95\x22",-1, -1, json_tokener_success, 3 },
+       { "\x22\xcf\x80\xcf\x86\x22",-1, -1, json_tokener_success, 5 },
+       { "\x22\xf0\xa5\x91\x95\x22",-1, -1, json_tokener_success, 5 },
    // wrong utf-8 encoding
-       { "\x22\xe6\x9d\x4e\x22",-1, 3, json_tokener_error_parse_utf8_string, 3 },
+       { "\x22\xe6\x9d\x4e\x22",-1, 3, json_tokener_error_parse_utf8_string, 5 },
         { "\x22\xe6\x9d\x4e\x22",-1, 5, json_tokener_success, 1 },
    // GBK encoding
-       { "\x22\xc0\xee\xc5\xf4\x22",-1, 2, json_tokener_error_parse_utf8_string, 3 },
+       { "\x22\xc0\xee\xc5\xf4\x22",-1, 2, json_tokener_error_parse_utf8_string, 5 },
         { "\x22\xc0\xee\xc5\xf4\x22",-1, 6, json_tokener_success, 1 },
    // char after space
-       { "\x20\x20\x22\xe4\xb8\x96\x22",-1, -1, json_tokener_success, 3 },
-       { "\x20\x20\x81\x22\xe4\xb8\x96\x22",-1, 2, json_tokener_error_parse_utf8_string, 3 },
-       { "\x5b\x20\x81\x31\x5d",-1, 2, json_tokener_error_parse_utf8_string, 3 },
+       { "\x20\x20\x22\xe4\xb8\x96\x22",-1, -1, json_tokener_success, 5 },
+       { "\x20\x20\x81\x22\xe4\xb8\x96\x22",-1, 2, json_tokener_error_parse_utf8_string, 5 },
+       { "\x5b\x20\x81\x31\x5d",-1, 2, json_tokener_error_parse_utf8_string, 5 },
    // char in state inf
         { "\x49\x6e\x66\x69\x6e\x69\x74\x79",9, 8, json_tokener_success, 1 },
-       { "\x49\x6e\x66\x81\x6e\x69\x74\x79",-1, 3, json_tokener_error_parse_utf8_string, 3 },
+       { "\x49\x6e\x66\x81\x6e\x69\x74\x79",-1, 3, json_tokener_error_parse_utf8_string, 5 },
    // char in escape unicode
-       { "\x22\x5c\x75\x64\x38\x35\x35\x5c\x75\x64\x63\x35\x35\x22",15, 14, json_tokener_success, 3 },
-       { "\x22\x5c\x75\x64\x38\x35\x35\xc0\x75\x64\x63\x35\x35\x22",-1, 8, json_tokener_error_parse_utf8_string, 3 },
-       { "\x22\x5c\x75\x64\x30\x30\x33\x31\xc0\x22",-1, 9, json_tokener_error_parse_utf8_string, 3 },
+       { "\x22\x5c\x75\x64\x38\x35\x35\x5c\x75\x64\x63\x35\x35\x22",15, 14, json_tokener_success, 5 },
+       { "\x22\x5c\x75\x64\x38\x35\x35\xc0\x75\x64\x63\x35\x35\x22",-1, 8, json_tokener_error_parse_utf8_string, 5 },
+       { "\x22\x5c\x75\x64\x30\x30\x33\x31\xc0\x22",-1, 9, json_tokener_error_parse_utf8_string, 5 },
    // char in number
-       { "\x31\x31\x81\x31\x31",-1, 2, json_tokener_error_parse_utf8_string, 3 },
+       { "\x31\x31\x81\x31\x31",-1, 2, json_tokener_error_parse_utf8_string, 5 },
    // char in object
-       { "\x7b\x22\x31\x81\x22\x3a\x31\x7d",-1, 3, json_tokener_error_parse_utf8_string, 3 },
+       { "\x7b\x22\x31\x81\x22\x3a\x31\x7d",-1, 3, json_tokener_error_parse_utf8_string, 5 },
  
         { NULL, -1, -1, json_tokener_success, 0 },
  };
@@ -424,9 +422,19 @@ static void test_incremental_parse()
                 size_t expected_char_offset;
  
                 if (step->reset_tokener & 2)
-                       json_tokener_set_flags(tok, JSON_TOKENER_STRICT);
+                       {
+                               if (step->reset_tokener & 4)
+                                       json_tokener_set_flags(tok, 3);
+                               else
+                                       json_tokener_set_flags(tok, JSON_TOKENER_STRICT);
+                       }
                 else
-                       json_tokener_set_flags(tok, 0);
+                       {
+                               if (step->reset_tokener & 4)
+                                       json_tokener_set_flags(tok, JSON_TOKENER_VALIDATE_UTF8);
+                               else
+                                       json_tokener_set_flags(tok, 0);
+                       }
  
                 if (length == -1)
                         length = strlen(step->string_to_parse);
diff --git a/tests/test_parse.expected b/tests/test_parse.expected

index a5c2454a9ea8df3a86342a3a15d718b121c97157..68e55b1b7a1c5c050c64c16718c746ce6b180346 100644 (file)
--- a/tests/test_parse.expected
+++ b/tests/test_parse.expected
@@ -191,8 +191,6 @@ json_tokener_parse_ex(tok, 
  json_tokener_parse_ex(tok, "世界"    ,   8) ... OK: got object of type [string]: "世界"
  json_tokener_parse_ex(tok, "πφ"      ,   6) ... OK: got object of type [string]: "πφ"
  json_tokener_parse_ex(tok, "𥑕"      ,   6) ... OK: got object of type [string]: "𥑕"
-json_tokener_parse_ex(tok, "�����"     ,   7) ... OK: got object of type [string]: "�����"
-json_tokener_parse_ex(tok, "������"    ,   8) ... OK: got object of type [string]: "������"
  json_tokener_parse_ex(tok, "æ\9dN"       ,   5) ... OK: got correct error: invalid utf-8 string
  json_tokener_parse_ex(tok, "æ\9dN"       ,   5) ... OK: got object of type [string]: "æ\9dN"
  json_tokener_parse_ex(tok, "ÀîÅô"      ,   6) ... OK: got correct error: invalid utf-8 string
@@ -207,5 +205,5 @@ json_tokener_parse_ex(tok, "\ud855
  json_tokener_parse_ex(tok, "\ud0031À"  ,  10) ... OK: got correct error: invalid utf-8 string
  json_tokener_parse_ex(tok, 11\8111       ,   5) ... OK: got correct error: invalid utf-8 string
  json_tokener_parse_ex(tok, {"1\81":1}    ,   8) ... OK: got correct error: invalid utf-8 string
-End Incremental Tests OK=129 ERROR=0
+End Incremental Tests OK=127 ERROR=0
  ==================================
author	dota17 <chenguopingdota@163.com>
	Fri, 17 Jan 2020 07:33:44 +0000 (15:33 +0800)
committer	dota17 <chenguopingdota@163.com>
	Mon, 20 Jan 2020 02:41:24 +0000 (10:41 +0800)
json_tokener.c		patch \| blob \| history
json_tokener.h		patch \| blob \| history
tests/test_parse.c		patch \| blob \| history
tests/test_parse.expected		patch \| blob \| history