From 94a140214c9df560aef047de93cf17d5dcf2e698 Mon Sep 17 00:00:00 2001 From: Omar Kilani Date: Tue, 31 Jan 2006 08:59:06 +0000 Subject: [PATCH] - Rewritten for better performance. 3-8x faster encodes, 2-4x faster decodes. - No longer uses json-c, implements it's own JSON parser and encoder. - JSON parser based on Douglas Crockford's JSON_checker. --- ext/json/JSON_parser.c | 757 +++++++++++++++++++++++++++++++++++++++ ext/json/JSON_parser.h | 8 + ext/json/config.m4 | 26 +- ext/json/config.w32 | 4 +- ext/json/json.c | 373 +++++++++++-------- ext/json/json.dsp | 90 +---- ext/json/package.xml | 54 ++- ext/json/php_json.h | 6 +- ext/json/utf8_decode.c | 179 +++++++++ ext/json/utf8_decode.h | 18 + ext/json/utf8_to_utf16.c | 56 +++ ext/json/utf8_to_utf16.h | 3 + 12 files changed, 1286 insertions(+), 288 deletions(-) create mode 100644 ext/json/JSON_parser.c create mode 100644 ext/json/JSON_parser.h create mode 100644 ext/json/utf8_decode.c create mode 100644 ext/json/utf8_decode.h create mode 100644 ext/json/utf8_to_utf16.c create mode 100644 ext/json/utf8_to_utf16.h diff --git a/ext/json/JSON_parser.c b/ext/json/JSON_parser.c new file mode 100644 index 0000000000..adb974346e --- /dev/null +++ b/ext/json/JSON_parser.c @@ -0,0 +1,757 @@ +/* JSON_parser.c */ + +/* 2005-12-30 */ + +/* +Copyright (c) 2005 JSON.org + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +The Software shall be used for Good, not Evil. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + + +#include "JSON_parser.h" +#include + +#define true 1 +#define false 0 + +/* + Characters are mapped into these 32 symbol classes. This allows for + significant reductions in the size of the state transition table. +*/ + +/* error */ +#define S_ERR -1 + +/* space */ +#define S_SPA 0 + +/* other whitespace */ +#define S_WSP 1 + +/* { */ +#define S_LBE 2 + +/* } */ +#define S_RBE 3 + +/* [ */ +#define S_LBT 4 + +/* ] */ +#define S_RBT 5 + +/* : */ +#define S_COL 6 + +/* , */ +#define S_COM 7 + +/* " */ +#define S_QUO 8 + +/* \ */ +#define S_BAC 9 + +/* / */ +#define S_SLA 10 + +/* + */ +#define S_PLU 11 + +/* - */ +#define S_MIN 12 + +/* . */ +#define S_DOT 13 + +/* 0 */ +#define S_ZER 14 + +/* 123456789 */ +#define S_DIG 15 + +/* a */ +#define S__A_ 16 + +/* b */ +#define S__B_ 17 + +/* c */ +#define S__C_ 18 + +/* d */ +#define S__D_ 19 + +/* e */ +#define S__E_ 20 + +/* f */ +#define S__F_ 21 + +/* l */ +#define S__L_ 22 + +/* n */ +#define S__N_ 23 + +/* r */ +#define S__R_ 24 + +/* s */ +#define S__S_ 25 + +/* t */ +#define S__T_ 26 + +/* u */ +#define S__U_ 27 + +/* ABCDF */ +#define S_A_F 28 + +/* E */ +#define S_E 29 + +/* everything else */ +#define S_ETC 30 + + +/* + This table maps the 128 ASCII characters into the 32 character classes. + The remaining Unicode characters should be mapped to S_ETC. +*/ +static int ascii_class[128] = { + S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, + S_ERR, S_WSP, S_WSP, S_ERR, S_ERR, S_WSP, S_ERR, S_ERR, + S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, + S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, + + S_SPA, S_ETC, S_QUO, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, + S_ETC, S_ETC, S_ETC, S_PLU, S_COM, S_MIN, S_DOT, S_SLA, + S_ZER, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, + S_DIG, S_DIG, S_COL, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, + + S_ETC, S_A_F, S_A_F, S_A_F, S_A_F, S_E , S_A_F, S_ETC, + S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, + S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, + S_ETC, S_ETC, S_ETC, S_LBT, S_BAC, S_RBT, S_ETC, S_ETC, + + S_ETC, S__A_, S__B_, S__C_, S__D_, S__E_, S__F_, S_ETC, + S_ETC, S_ETC, S_ETC, S_ETC, S__L_, S_ETC, S__N_, S_ETC, + S_ETC, S_ETC, S__R_, S__S_, S__T_, S__U_, S_ETC, S_ETC, + S_ETC, S_ETC, S_ETC, S_LBE, S_ETC, S_RBE, S_ETC, S_ETC +}; + + +/* + The state transition table takes the current state and the current symbol, + and returns either a new state or an action. A new state is a number between + 0 and 29. An action is a negative number between -1 and -9. A JSON text is + accepted if the end of the text is in state 9 and mode is MODE_DONE. +*/ +static int state_transition_table[30][31] = { +/* 0*/ { 0, 0,-8,-1,-6,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, +/* 1*/ { 1, 1,-1,-9,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, +/* 2*/ { 2, 2,-8,-1,-6,-5,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1}, +/* 3*/ { 3,-1, 3, 3, 3, 3, 3, 3,-4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}, +/* 4*/ {-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1, 3,-1, 3, 3,-1, 3, 5,-1,-1,-1}, +/* 5*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 6, 6, 6, 6, 6, 6, 6, 6,-1,-1,-1,-1,-1,-1, 6, 6,-1}, +/* 6*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 7, 7, 7, 7, 7, 7, 7, 7,-1,-1,-1,-1,-1,-1, 7, 7,-1}, +/* 7*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 8, 8, 8, 8, 8, 8, 8, 8,-1,-1,-1,-1,-1,-1, 8, 8,-1}, +/* 8*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3, 3, 3, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3, 3,-1}, +/* 9*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, +/*10*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,11,-1,-1,-1,-1,-1,-1}, +/*11*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,12,-1,-1,-1}, +/*12*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, +/*13*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,14,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, +/*14*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,15,-1,-1,-1,-1,-1,-1,-1,-1}, +/*15*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,16,-1,-1,-1,-1,-1}, +/*16*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, +/*17*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,18,-1,-1,-1}, +/*18*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,19,-1,-1,-1,-1,-1,-1,-1,-1}, +/*19*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1}, +/*20*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,21,22,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, +/*21*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, +/*22*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,22,22,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1}, +/*23*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,23,23,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1}, +/*24*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,25,25,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, +/*25*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, +/*26*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, +/*27*/ {27,27,-1,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, +/*28*/ {28,28,-8,-1,-6,-1,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1}, +/*29*/ {29,29,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1} +}; + +#define JSON_PARSER_MAX_DEPTH 20 + + +/* + A stack maintains the states of nested structures. +*/ + +typedef struct json_parser +{ + int the_stack[JSON_PARSER_MAX_DEPTH]; + zval *the_zstack[JSON_PARSER_MAX_DEPTH]; + int the_top; +} json_parser; + + +/* + These modes can be pushed on the PDA stack. +*/ +#define MODE_DONE 1 +#define MODE_KEY 2 +#define MODE_OBJECT 3 +#define MODE_ARRAY 4 + +/* + Push a mode onto the stack. Return false if there is overflow. +*/ +static int +push(json_parser *json, zval *z, int mode) +{ + json->the_top += 1; + if (json->the_top >= JSON_PARSER_MAX_DEPTH) { + return false; + } + + json->the_stack[json->the_top] = mode; + return true; +} + + +/* + Pop the stack, assuring that the current mode matches the expectation. + Return false if there is underflow or if the modes mismatch. +*/ +static int +pop(json_parser *json, zval *z, int mode) +{ + if (json->the_top < 0 || json->the_stack[json->the_top] != mode) { + return false; + } + json->the_stack[json->the_top] = 0; + json->the_top -= 1; + + return true; +} + + +static int dehexchar(char c) +{ + if (c >= '0' && c <= '9') + { + return c - '0'; + } + else if (c >= 'A' && c <= 'F') + { + return c - ('A' - 10); + } + else if (c >= 'a' && c <= 'f') + { + return c - ('a' - 10); + } + else + { + return -1; + } +} + + +static void json_create_zval(zval **z, smart_str *buf, int type) +{ + ALLOC_INIT_ZVAL(*z); + + if (type == IS_LONG) + { + ZVAL_LONG(*z, atol(buf->c)); + } + else if (type == IS_DOUBLE) + { + ZVAL_DOUBLE(*z, atof(buf->c)); + } + else if (type == IS_STRING) + { + ZVAL_STRINGL(*z, buf->c, buf->len, 1); + } + else if (type == IS_BOOL) + { + ZVAL_BOOL(*z, (*(buf->c) == 't')); + } + else /* type == IS_NULL) || type unknown */ + { + ZVAL_NULL(*z); + } +} + + +static void utf16_to_utf8(smart_str *buf, unsigned short utf16) +{ + if (utf16 < 0x80) + { + smart_str_appendc(buf, (unsigned char) utf16); + } + else if (utf16 < 0x800) + { + smart_str_appendc(buf, 0xc0 | (utf16 >> 6)); + smart_str_appendc(buf, 0x80 | (utf16 & 0x3f)); + } + else + { + smart_str_appendc(buf, 0xe0 | (utf16 >> 12)); + smart_str_appendc(buf, 0x80 | ((utf16 >> 6) & 0x3f)); + smart_str_appendc(buf, 0x80 | (utf16 & 0x3f)); + } +} + +static void attach_zval(json_parser *json, int up, int cur, smart_str *key, int assoc TSRMLS_DC) +{ + zval *root = json->the_zstack[up]; + zval *child = json->the_zstack[cur]; + int up_mode = json->the_stack[up]; + + if (up_mode == MODE_ARRAY) + { + add_next_index_zval(root, child); + } + else if (up_mode == MODE_OBJECT) + { + if (!assoc) + { + add_property_zval(root, key->c, child); +#if PHP_MAJOR_VERSION >= 5 + ZVAL_DELREF(child); +#endif + } + else + { + add_assoc_zval(root, key->c, child); + } + key->len = 0; + } +} + + +#define FREE_BUFFERS() do { smart_str_free(&buf); smart_str_free(&key); } while (0); +#define SWAP_BUFFERS(from, to) do { \ + char *t1 = from.c; \ + int t2 = from.a; \ + from.c = to.c; \ + from.a = to.a; \ + to.c = t1; \ + to.a = t2; \ + to.len = from.len; \ + from.len = 0; \ + } while(0); +#define JSON_RESET_TYPE() do { type = -1; } while(0); +#define JSON(x) the_json.x + + +/* + The JSON_parser takes a UTF-16 encoded string and determines if it is a + syntactically correct JSON text. Along the way, it creates a PHP variable. + + It is implemented as a Pushdown Automaton; that means it is a finite state + machine with a stack. +*/ +int +JSON_parser(zval *z, unsigned short p[], int length, int assoc TSRMLS_DC) +{ + int b; /* the next character */ + int c; /* the next character class */ + int s; /* the next state */ + json_parser the_json; /* the parser state */ + int the_state = 0; + int the_index; + + smart_str buf = {0}; + smart_str key = {0}; + + int type = -1; + unsigned short utf16; + + JSON(the_top) = -1; + push(&the_json, z, MODE_DONE); + + for (the_index = 0; the_index < length; the_index += 1) { + b = p[the_index]; + if ((b & 127) == b) { + c = ascii_class[b]; + if (c <= S_ERR) { + FREE_BUFFERS(); + return false; + } + } else { + c = S_ETC; + } +/* + Get the next state from the transition table. +*/ + s = state_transition_table[the_state][c]; + if (s < 0) { +/* + Perform one of the predefined actions. +*/ + switch (s) { +/* + empty } +*/ + case -9: + if (!pop(&the_json, z, MODE_KEY)) { + FREE_BUFFERS(); + return false; + } + the_state = 9; + break; +/* + { +*/ + case -8: + if (!push(&the_json, z, MODE_KEY)) { + FREE_BUFFERS(); + return false; + } + + the_state = 1; + if (JSON(the_top) > 0) + { + zval *obj; + + if (JSON(the_top) == 1) + { + obj = z; + } + else + { + ALLOC_INIT_ZVAL(obj); + } + + if (!assoc) + { + object_init(obj); + } + else + { + array_init(obj); + } + + JSON(the_zstack)[JSON(the_top)] = obj; + + if (JSON(the_top) > 1) + { + attach_zval(&the_json, JSON(the_top-1), JSON(the_top), &key, assoc TSRMLS_CC); + } + + JSON_RESET_TYPE(); + } + + break; +/* + } +*/ + case -7: + if (type != -1 && + (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT || + JSON(the_stack)[JSON(the_top)] == MODE_ARRAY)) + { + zval *mval; + smart_str_0(&buf); + + json_create_zval(&mval, &buf, type); + + if (!assoc) + { + add_property_zval(JSON(the_zstack)[JSON(the_top)], key.c, mval); +#if PHP_MAJOR_VERSION >= 5 + ZVAL_DELREF(mval); +#endif + } + else + { + add_assoc_zval(JSON(the_zstack)[JSON(the_top)], key.c, mval); + } + key.len = 0; + buf.len = 0; + JSON_RESET_TYPE(); + } + + + if (!pop(&the_json, z, MODE_OBJECT)) { + FREE_BUFFERS(); + return false; + } + the_state = 9; + break; +/* + [ +*/ + case -6: + if (!push(&the_json, z, MODE_ARRAY)) { + FREE_BUFFERS(); + return false; + } + the_state = 2; + + if (JSON(the_top) > 0) + { + zval *arr; + + if (JSON(the_top) == 1) + { + arr = z; + } + else + { + ALLOC_INIT_ZVAL(arr); + } + + array_init(arr); + JSON(the_zstack)[JSON(the_top)] = arr; + + if (JSON(the_top) > 1) + { + attach_zval(&the_json, JSON(the_top-1), JSON(the_top), &key, assoc TSRMLS_CC); + } + + JSON_RESET_TYPE(); + } + + break; +/* + ] +*/ + case -5: + { + if (type != -1 && + (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT || + JSON(the_stack)[JSON(the_top)] == MODE_ARRAY)) + { + zval *mval; + smart_str_0(&buf); + + json_create_zval(&mval, &buf, type); + add_next_index_zval(JSON(the_zstack)[JSON(the_top)], mval); + buf.len = 0; + JSON_RESET_TYPE(); + } + + if (!pop(&the_json, z, MODE_ARRAY)) { + FREE_BUFFERS(); + return false; + } + the_state = 9; + } + break; +/* + " +*/ + case -4: + switch (JSON(the_stack)[JSON(the_top)]) { + case MODE_KEY: + the_state = 27; + smart_str_0(&buf); + SWAP_BUFFERS(buf, key); + JSON_RESET_TYPE(); + break; + case MODE_ARRAY: + case MODE_OBJECT: + the_state = 9; + break; + default: + FREE_BUFFERS(); + return false; + } + break; +/* + , +*/ + case -3: + { + zval *mval; + + if (type != -1 && + (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT || + JSON(the_stack[JSON(the_top)]) == MODE_ARRAY)) + { + smart_str_0(&buf); + json_create_zval(&mval, &buf, type); + } + + switch (JSON(the_stack)[JSON(the_top)]) { + case MODE_OBJECT: + if (pop(&the_json, z, MODE_OBJECT) && push(&the_json, z, MODE_KEY)) { + if (type != -1) + { + if (!assoc) + { + add_property_zval(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : "_empty_"), mval); +#if PHP_MAJOR_VERSION >= 5 + ZVAL_DELREF(mval); +#endif + } + else + { + add_assoc_zval(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : "_empty_"), mval); + } + key.len = 0; + } + the_state = 29; + } + break; + case MODE_ARRAY: + if (type != -1) + { + add_next_index_zval(JSON(the_zstack)[JSON(the_top)], mval); + } + the_state = 28; + break; + default: + FREE_BUFFERS(); + return false; + } + buf.len = 0; + JSON_RESET_TYPE(); + } + break; +/* + : +*/ + case -2: + if (pop(&the_json, z, MODE_KEY) && push(&the_json, z, MODE_OBJECT)) { + the_state = 28; + break; + } +/* + syntax error +*/ + case -1: + { + FREE_BUFFERS(); + return false; + } + } + } else { +/* + Change the state and iterate. +*/ + if (type == IS_STRING) + { + if (s == 3 && the_state != 8) + { + if (the_state != 4) + { + utf16_to_utf8(&buf, b); + } + else + { + switch (b) + { + case 'b': + smart_str_appendc(&buf, '\b'); + break; + case 't': + smart_str_appendc(&buf, '\t'); + break; + case 'n': + smart_str_appendc(&buf, '\n'); + break; + case 'f': + smart_str_appendc(&buf, '\f'); + break; + case 'r': + smart_str_appendc(&buf, '\r'); + break; + default: + utf16_to_utf8(&buf, b); + break; + } + } + } + else if (s == 6) + { + utf16 = dehexchar(b) << 12; + } + else if (s == 7) + { + utf16 += dehexchar(b) << 8; + } + else if (s == 8) + { + utf16 += dehexchar(b) << 4; + } + else if (s == 3 && the_state == 8) + { + utf16 += dehexchar(b); + utf16_to_utf8(&buf, utf16); + } + } + else if (type < IS_LONG && (c == S_DIG || c == S_ZER)) + { + type = IS_LONG; + smart_str_appendc(&buf, b); + } + else if (type == IS_LONG && s == 24) + { + type = IS_DOUBLE; + smart_str_appendc(&buf, b); + } + else if (type < IS_DOUBLE && c == S_DOT) + { + type = IS_DOUBLE; + smart_str_appendc(&buf, b); + } + else if (type < IS_STRING && c == S_QUO) + { + type = IS_STRING; + } + else if (type < IS_BOOL && ((the_state == 12 && s == 9) || (the_state == 16 && s == 9))) + { + type = IS_BOOL; + } + else if (type < IS_NULL && the_state == 19 && s == 9) + { + type = IS_NULL; + } + else if (type != IS_STRING && c > S_WSP) + { + utf16_to_utf8(&buf, b); + } + + the_state = s; + } + } + + FREE_BUFFERS(); + + return the_state == 9 && pop(&the_json, z, MODE_DONE); +} + + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 + * vim<600: noet sw=4 ts=4 + */ diff --git a/ext/json/JSON_parser.h b/ext/json/JSON_parser.h new file mode 100644 index 0000000000..085e776982 --- /dev/null +++ b/ext/json/JSON_parser.h @@ -0,0 +1,8 @@ +/* JSON_checker.h */ + +#include "php.h" +#include "ext/standard/php_smart_str.h" + +static char digits[] = "0123456789abcdef"; + +extern int JSON_parser(zval *z, unsigned short p[], int length, int assoc TSRMLS_DC); diff --git a/ext/json/config.m4 b/ext/json/config.m4 index 9e84ca4678..a937b1f91b 100644 --- a/ext/json/config.m4 +++ b/ext/json/config.m4 @@ -58,26 +58,12 @@ EOF done ]) -AC_DEFUN([PHP_JSON_SETUP_JSON_C], [ - dnl json-c is required and can not be disabled - dnl - dnl Bundled json-c - dnl - - PHP_JSON_ADD_BUILD_DIR([json_c]) - PHP_JSON_ADD_INCLUDE([json_c]) - +AC_DEFUN([PHP_JSON_SETUP_JSON_CHECKER], [ PHP_JSON_ADD_SOURCES([ - json_c/ConvertUTF.c - json_c/debug.c - json_c/linkhash.c - json_c/printbuf.c - json_c/arraylist.c - json_c/json_object.c - json_c/json_tokener.c - json_c/ossupport.c + utf8_to_utf16.c + utf8_decode.c + JSON_parser.c ]) - PHP_JSON_ADD_CFLAG([-DHAVE_CONFIG_H]) ]) dnl @@ -90,13 +76,11 @@ PHP_ARG_WITH(json, whether to enable JavaScript Object Serialization support, if test "$PHP_JSON" != "no"; then AC_DEFINE([HAVE_JSON],1,[whether to have JavaScript Object Serialization support]) AC_HEADER_STDC - AC_CHECK_FUNCS([strndup vsnprintf vasprintf strncasecmp]) - AC_CHECK_HEADERS([stdarg.h]) PHP_JSON_ADD_BASE_SOURCES([json.c]) dnl json_c is required - PHP_JSON_SETUP_JSON_C + PHP_JSON_SETUP_JSON_CHECKER PHP_JSON_EXTENSION dnl PHP_INSTALL_HEADERS([ext/json], [json_c]) fi diff --git a/ext/json/config.w32 b/ext/json/config.w32 index d933c76994..fa001d308c 100644 --- a/ext/json/config.w32 +++ b/ext/json/config.w32 @@ -4,9 +4,7 @@ ARG_WITH("json", "JavaScript Object Serialization support", "no"); if (PHP_JSON != "no") { - CHECK_HEADER_ADD_INCLUDE("json.h", "CFLAGS_JSON", configure_module_dirname + "/json_c"); EXTENSION('json', 'json.c', PHP_JSON_SHARED, ""); - ADD_SOURCES(configure_module_dirname + "/json_c", "ConvertUTF.c debug.c linkhash.c \ - printbuf.c arraylist.c json_object.c json_tokener.c ossupport.c", "json"); + ADD_SOURCES(configure_module_dirname, "JSON_parser.c utf8_decode.c utf8_to_utf16.c", "json"); } diff --git a/ext/json/json.c b/ext/json/json.c index a50e72000f..c89147fa5f 100644 --- a/ext/json/json.c +++ b/ext/json/json.c @@ -12,7 +12,7 @@ | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ - | Author: Omar Kilani | + | Author: Omar Kilani | +----------------------------------------------------------------------+ */ @@ -25,8 +25,10 @@ #include "php.h" #include "php_ini.h" #include "ext/standard/info.h" +#include "ext/standard/php_smart_str.h" +#include "utf8_to_utf16.h" +#include "JSON_parser.h" #include "php_json.h" -#include "json.h" /* If you declare any globals in php_json.h uncomment this: ZEND_DECLARE_MODULE_GLOBALS(json) @@ -40,9 +42,9 @@ static int le_json; * Every user visible function must have an entry in json_functions[]. */ function_entry json_functions[] = { - PHP_FE(json_encode, NULL) - PHP_FE(json_decode, NULL) - {NULL, NULL, NULL} /* Must be the last line in json_functions[] */ + PHP_FE(json_encode, NULL) + PHP_FE(json_decode, NULL) + {NULL, NULL, NULL} /* Must be the last line in json_functions[] */ }; /* }}} */ @@ -50,19 +52,19 @@ function_entry json_functions[] = { */ zend_module_entry json_module_entry = { #if ZEND_MODULE_API_NO >= 20010901 - STANDARD_MODULE_HEADER, + STANDARD_MODULE_HEADER, #endif - "json", - json_functions, - NULL, - NULL, - NULL, - NULL, - PHP_MINFO(json), + "json", + json_functions, + NULL, + NULL, + NULL, + NULL, + PHP_MINFO(json), #if ZEND_MODULE_API_NO >= 20010901 - PHP_JSON_VERSION, + PHP_JSON_VERSION, #endif - STANDARD_MODULE_PROPERTIES + STANDARD_MODULE_PROPERTIES }; /* }}} */ @@ -74,20 +76,20 @@ ZEND_GET_MODULE(json) */ PHP_MINFO_FUNCTION(json) { - php_info_print_table_start(); - php_info_print_table_row(2, "json support", "enabled"); - php_info_print_table_row(2, "json version", PHP_JSON_VERSION); - php_info_print_table_row(2, "json-c version", JSON_C_VERSION); - php_info_print_table_end(); + php_info_print_table_start(); + php_info_print_table_row(2, "json support", "enabled"); + php_info_print_table_row(2, "json version", PHP_JSON_VERSION); + php_info_print_table_end(); } /* }}} */ -static struct json_object *json_encode_r(zval *val TSRMLS_DC); +static void json_encode_r(smart_str *buf, zval *val TSRMLS_DC); +static void json_escape_string(smart_str *buf, char *s, int len TSRMLS_DC); static int json_determine_array_type(zval **val TSRMLS_DC) { int i; HashTable *myht; - + if (Z_TYPE_PP(val) == IS_ARRAY) { myht = HASH_OF(*val); } else { @@ -123,11 +125,10 @@ static int json_determine_array_type(zval **val TSRMLS_DC) { return 0; } -static struct json_object *json_encode_array(zval **val TSRMLS_DC) { +static void json_encode_array(smart_str *buf, zval **val TSRMLS_DC) { int i, r; HashTable *myht; - struct json_object *obj; - + if (Z_TYPE_PP(val) == IS_ARRAY) { myht = HASH_OF(*val); r = json_determine_array_type(val TSRMLS_CC); @@ -136,10 +137,13 @@ static struct json_object *json_encode_array(zval **val TSRMLS_DC) { r = 1; } - if (r == 0 /* all keys numeric */) { - obj = json_object_new_array(); - } else { - obj = json_object_new_object(); + if (r == 0) + { + smart_str_appendc(buf, '['); + } + else + { + smart_str_appendc(buf, '{'); } i = myht ? zend_hash_num_elements(myht) : 0; @@ -149,8 +153,8 @@ static struct json_object *json_encode_array(zval **val TSRMLS_DC) { ulong index; uint key_len; HashPosition pos; - struct json_object *member; - char buffer[11]; + int htlen = i; + int wpos = 0; zend_hash_internal_pointer_reset_ex(myht, &pos); for (;; zend_hash_move_forward_ex(myht, &pos)) { @@ -159,192 +163,257 @@ static struct json_object *json_encode_array(zval **val TSRMLS_DC) { break; if (zend_hash_get_current_data_ex(myht, (void **) &data, &pos) == SUCCESS) { - member = json_encode_r(*data TSRMLS_CC); if (r == 0) { - json_object_array_add(obj, member); + json_encode_r(buf, *data TSRMLS_CC); } else if (r == 1) { if (i == HASH_KEY_IS_STRING) { if (key[0] == '\0') { /* Skip protected and private members. */ - if (member != NULL) - json_object_put(member); continue; } - json_object_object_add(obj, key, member); + json_escape_string(buf, key, key_len - 1 TSRMLS_CC); + smart_str_appendc(buf, ':'); + json_encode_r(buf, *data TSRMLS_CC); } else { - snprintf(buffer, sizeof(buffer), "%ld", index); - buffer[10] = 0; - json_object_object_add(obj, buffer, member); + smart_str_appendc(buf, '"'); + smart_str_append_long(buf, (long) index); + smart_str_appendc(buf, '"'); } } + + if (htlen > 1 && wpos++ < htlen - 1) + { + smart_str_appendc(buf, ','); + } } } } - return obj; + if (r == 0) + { + smart_str_appendc(buf, ']'); + } + else + { + smart_str_appendc(buf, '}'); + } } -static struct json_object *json_encode_r(zval *val TSRMLS_DC) { - struct json_object *jo; +#define REVERSE16(us) (((us & 0xf) << 12) | (((us >> 4) & 0xf) << 8) | (((us >> 8) & 0xf) << 4) | ((us >> 12) & 0xf)) + +static void json_escape_string(smart_str *buf, char *s, int len TSRMLS_DC) +{ + int pos = 0; + unsigned short us; + unsigned short *utf16; + + utf16 = (unsigned short *) emalloc(len * sizeof(unsigned short)); + + len = utf8_to_utf16(utf16, s, len); + if (len <= 0) + { + if (utf16) + { + efree(utf16); + } + + return; + } + + smart_str_appendc(buf, '"'); + + while(pos < len) + { + us = utf16[pos++]; + + switch (us) + { + case '"': + { + smart_str_appendl(buf, "\\\"", 2); + } + break; + case '\\': + { + smart_str_appendl(buf, "\\\\", 2); + } + break; + case '/': + { + smart_str_appendl(buf, "\\/", 2); + } + break; + case '\b': + { + smart_str_appendl(buf, "\\b", 2); + } + break; + case '\f': + { + smart_str_appendl(buf, "\\f", 2); + } + break; + case '\n': + { + smart_str_appendl(buf, "\\n", 2); + } + break; + case '\r': + { + smart_str_appendl(buf, "\\r", 2); + } + break; + case '\t': + { + smart_str_appendl(buf, "\\t", 2); + } + break; + default: + { + if (us < ' ' || (us & 127) == us) + { + smart_str_appendc(buf, (unsigned char) us); + } + else + { + smart_str_appendl(buf, "\\u", 2); + us = REVERSE16(us); + + smart_str_appendc(buf, digits[us & ((1 << 4) - 1)]); + us >>= 4; + smart_str_appendc(buf, digits[us & ((1 << 4) - 1)]); + us >>= 4; + smart_str_appendc(buf, digits[us & ((1 << 4) - 1)]); + us >>= 4; + smart_str_appendc(buf, digits[us & ((1 << 4) - 1)]); + } + } + break; + } + } + + smart_str_appendc(buf, '"'); + efree(utf16); +} +static void json_encode_r(smart_str *buf, zval *val TSRMLS_DC) { switch (Z_TYPE_P(val)) { case IS_NULL: - jo = NULL; + smart_str_appendl(buf, "null", 4); break; case IS_BOOL: - jo = json_object_new_boolean(Z_BVAL_P(val)); + if (Z_BVAL_P(val)) + { + smart_str_appendl(buf, "true", 4); + } + else + { + smart_str_appendl(buf, "false", 5); + } break; case IS_LONG: - jo = json_object_new_int(Z_LVAL_P(val)); + smart_str_append_long(buf, Z_LVAL_P(val)); break; case IS_DOUBLE: - jo = json_object_new_double(Z_DVAL_P(val)); + { + char *d = NULL; + int len; + double dbl = Z_DVAL_P(val); + + if (!zend_isinf(dbl) && !zend_isnan(dbl)) + { + len = spprintf(&d, 0, "%.9g", dbl); + if (d) + { + smart_str_appendl(buf, d, len); + efree(d); + } + } + else + { + zend_error(E_WARNING, "[json] (json_encode_r) double %.9g does not conform to the JSON spec, encoded as 0.", dbl); + smart_str_appendc(buf, '0'); + } + } break; case IS_STRING: - jo = json_object_new_string_len(Z_STRVAL_P(val), Z_STRLEN_P(val)); + json_escape_string(buf, Z_STRVAL_P(val), Z_STRLEN_P(val) TSRMLS_CC); break; case IS_ARRAY: - jo = json_encode_array(&val TSRMLS_CC); - break; case IS_OBJECT: - jo = json_encode_array(&val TSRMLS_CC); + json_encode_array(buf, &val TSRMLS_CC); break; default: - zend_error(E_WARNING, "[json] (json_encode) type is unsupported\n"); - jo = NULL; + zend_error(E_WARNING, "[json] (json_encode_r) type is unsupported, encoded as null."); + smart_str_appendl(buf, "null", 4); break; } - return jo; + return; } PHP_FUNCTION(json_encode) { zval *parameter; - struct json_object *jo; - char *s; + smart_str buf = {0}; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", ¶meter) == FAILURE) { return; } - jo = json_encode_r(parameter TSRMLS_CC); - - s = estrdup(json_object_to_json_string(jo)); - - json_object_put(jo); + json_encode_r(&buf, parameter TSRMLS_CC); - RETURN_STRING(s, 0); -} + ZVAL_STRINGL(return_value, buf.c, buf.len, 1); -static zval *json_decode_r(struct json_object *jo, zend_bool assoc TSRMLS_DC) { - zval *return_value; - - MAKE_STD_ZVAL(return_value); - - switch (json_object_get_type(jo)) { - case json_type_boolean: - ZVAL_BOOL(return_value, json_object_get_boolean(jo)); - break; - case json_type_double: - ZVAL_DOUBLE(return_value, json_object_get_double(jo)); - break; - case json_type_int: - ZVAL_LONG(return_value, json_object_get_int(jo)); - break; - case json_type_object: { - zval *mval; - struct json_object_iter iter; - - if (assoc) { - array_init(return_value); - } else { - object_init(return_value); - } - - json_object_object_foreachC(jo, iter) { - if (iter.val) { - mval = json_decode_r(iter.val, assoc TSRMLS_CC); - } else { - MAKE_STD_ZVAL(mval); - ZVAL_NULL(mval); - } - - if (assoc) { - add_assoc_zval(return_value, iter.key, mval); - } else { - add_property_zval(return_value, iter.key, mval); -#if PHP_MAJOR_VERSION >= 5 - ZVAL_DELREF(mval); -#endif - } - } - } - break; - case json_type_array: { - zval *mval; - struct json_object *val; - int i = 0, l; - - array_init(return_value); - l = json_object_array_length(jo); - for (i = 0; i < l; i++) { - val = json_object_array_get_idx(jo, i); - if (val) { - mval = json_decode_r(val, assoc TSRMLS_CC); - } else { - MAKE_STD_ZVAL(mval); - ZVAL_NULL(mval); - } - add_index_zval(return_value, i, mval); - } - } - break; - case json_type_string: { - char *s = json_object_get_string(jo); - ZVAL_STRING(return_value, s, 1); - break; - } - - default: - ZVAL_NULL(return_value); - break; - } - - return return_value; + smart_str_free(&buf); } PHP_FUNCTION(json_decode) { char *parameter; - int parameter_len; + int parameter_len, utf16_len; zend_bool assoc = 0; /* return JS objects as PHP objects by default */ - struct json_object *jo; zval *z; - + unsigned short *utf16; + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|b", ¶meter, ¶meter_len, &assoc) == FAILURE) { return; } - jo = json_tokener_parse(parameter); - if (!jo) { - RETURN_NULL(); + if (!parameter_len) + { + RETURN_NULL(); } - z = json_decode_r(jo, assoc TSRMLS_CC); - if (!z) { - json_object_put(jo); + utf16 = (unsigned short *) emalloc((parameter_len+1) * sizeof(unsigned short)); + + utf16_len = utf8_to_utf16(utf16, parameter, parameter_len); + if (utf16_len <= 0) + { + if (utf16) + { + efree(utf16); + } + RETURN_NULL(); } - - json_object_put(jo); - - *return_value = *z; - FREE_ZVAL(z); + ALLOC_INIT_ZVAL(z); + if (JSON_parser(z, utf16, utf16_len, assoc TSRMLS_CC)) + { + *return_value = *z; - return; + FREE_ZVAL(z); + efree(utf16); + } + else + { + zval_dtor(z); + FREE_ZVAL(z); + efree(utf16); + RETURN_NULL(); + } } /* diff --git a/ext/json/json.dsp b/ext/json/json.dsp index e465b44644..e5bb3767bf 100644 --- a/ext/json/json.dsp +++ b/ext/json/json.dsp @@ -94,106 +94,42 @@ LINK32=link.exe SOURCE=".\json.c" # End Source File -# End Group -# Begin Group "Header Files" - -# PROP Default_Filter "h;hpp;hxx;hm;inl" -# Begin Source File - -SOURCE=.\php_json.h -# End Source File -# End Group -# Begin Group "Resource Files" - -# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" -# End Group -# Begin Group "json_c" - -# PROP Default_Filter "" # Begin Source File -SOURCE=.\json_c\arraylist.c +SOURCE=.\JSON_parser.c # End Source File # Begin Source File -SOURCE=.\json_c\arraylist.h +SOURCE=.\JSON_parser.h # End Source File # Begin Source File -SOURCE=.\json_c\bits.h +SOURCE=.\utf8_decode.c # End Source File # Begin Source File -SOURCE=.\json_c\ConvertUTF.c +SOURCE=.\utf8_decode.h # End Source File # Begin Source File -SOURCE=.\json_c\ConvertUTF.h +SOURCE=.\utf8_to_utf16.c # End Source File # Begin Source File -SOURCE=.\json_c\debug.c +SOURCE=.\utf8_to_utf16.h # End Source File -# Begin Source File - -SOURCE=.\json_c\debug.h -# End Source File -# Begin Source File - -SOURCE=.\json_c\json.h -# End Source File -# Begin Source File - -SOURCE=.\json_c\json_object.c -# End Source File -# Begin Source File - -SOURCE=.\json_c\json_object.h -# End Source File -# Begin Source File - -SOURCE=.\json_c\json_object_private.h -# End Source File -# Begin Source File - -SOURCE=.\json_c\json_tokener.c -# End Source File -# Begin Source File - -SOURCE=.\json_c\json_tokener.h -# End Source File -# Begin Source File - -SOURCE=.\json_c\json_util.c -# End Source File -# Begin Source File - -SOURCE=.\json_c\json_util.h -# End Source File -# Begin Source File - -SOURCE=.\json_c\linkhash.c -# End Source File -# Begin Source File - -SOURCE=.\json_c\linkhash.h -# End Source File -# Begin Source File - -SOURCE=.\json_c\ossupport.c -# End Source File -# Begin Source File +# End Group +# Begin Group "Header Files" -SOURCE=.\json_c\ossupport.h -# End Source File +# PROP Default_Filter "h;hpp;hxx;hm;inl" # Begin Source File -SOURCE=.\json_c\printbuf.c +SOURCE=.\php_json.h # End Source File -# Begin Source File +# End Group +# Begin Group "Resource Files" -SOURCE=.\json_c\printbuf.h -# End Source File +# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" # End Group # End Target # End Project diff --git a/ext/json/package.xml b/ext/json/package.xml index 9778040eac..7b6df1de1c 100644 --- a/ext/json/package.xml +++ b/ext/json/package.xml @@ -15,48 +15,30 @@ Support for JSON (JavaScript Object Notation) serialization. - LGPL + PHP 3.01 - stable - 1.1.1 - 2006-01-12 + beta + 1.2.0 + 2006-01-31 - Cleanup and TSRM performance fixes by rasmus. + Complete rewrite using JSON_checker as the base for the parser. Implements the JSON specification. 3-8x faster on encodes and 1.2x-4x faster on decodes. - - + - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + @@ -139,7 +121,15 @@ Port to Win32. - + + stable + 1.1.1 + 2006-01-12 + + Cleanup and TSRM performance fixes by rasmus. + + +