]> granicus.if.org Git - php/commitdiff
is_numeric_string() optimization
authorIlia Alshanetsky <iliaa@php.net>
Tue, 26 Dec 2006 16:44:20 +0000 (16:44 +0000)
committerIlia Alshanetsky <iliaa@php.net>
Tue, 26 Dec 2006 16:44:20 +0000 (16:44 +0000)
# Original Patch by Matt Wilmas

Zend/bench.php
Zend/zend.c
Zend/zend_exceptions.c
Zend/zend_language_scanner.l
Zend/zend_operators.c
Zend/zend_operators.h
Zend/zend_strtod.c
Zend/zend_strtod.h

index f22704a7a773e648e17699c7c5f4a8e75d06488f..44000d991df5e7554bee931d4a3c17b8e0f5aab3 100644 (file)
@@ -1,4 +1,5 @@
 <?php
+date_default_timezone_set("UTC");
 
 function simple() {
   $a = 0;
index 719237173690f16fbeccb0db0395dbf16980f3fd..6801cc4db06c0168c8f9cb8ec654fabd5f10aea4 100644 (file)
@@ -214,7 +214,7 @@ ZEND_API void zend_make_printable_zval(zval *expr, zval *expr_copy, int *use_cop
                        }
                        break;
                case IS_RESOURCE:
-                       expr_copy->value.str.val = (char *) emalloc(sizeof("Resource id #")-1 + MAX_LENGTH_OF_LONG);
+                       expr_copy->value.str.val = (char *) emalloc(sizeof("Resource id #") + MAX_LENGTH_OF_LONG);
                        expr_copy->value.str.len = sprintf(expr_copy->value.str.val, "Resource id #%ld", expr->value.lval);
                        break;
                case IS_ARRAY:
index 6a7cf43bd55da4ce6b4e129386dab0edaad2188d..8f116065173eaad035e61b91e87ee7290f73d027 100644 (file)
@@ -404,7 +404,7 @@ static int _build_trace_string(zval **frame, int num_args, va_list args, zend_ha
                } else {
                        line = 0;
                }
-               s_tmp = emalloc(Z_STRLEN_PP(file) + MAX_LENGTH_OF_LONG + 2 + 1);
+               s_tmp = emalloc(Z_STRLEN_PP(file) + MAX_LENGTH_OF_LONG + 4 + 1);
                sprintf(s_tmp, "%s(%ld): ", Z_STRVAL_PP(file), line);
                TRACE_APPEND_STRL(s_tmp, strlen(s_tmp));
                efree(s_tmp);
index cceffd1969aaf26741b69a168d95cdf2543e8a4a..6c22224246ffe2475f87a5fbcebcbcdcb99001ef 100644 (file)
@@ -1236,38 +1236,44 @@ NEWLINE ("\r"|"\n"|"\r\n")
 
 
 <ST_IN_SCRIPTING>{LNUM} {
-       errno = 0;
-       zendlval->value.lval = strtol(yytext, NULL, 0);
-       if (errno == ERANGE) { /* overflow */
-               zendlval->value.dval = zend_strtod(yytext, NULL);
-               zendlval->type = IS_DOUBLE;
-               return T_DNUMBER;
+       if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
+               zendlval->value.lval = strtol(yytext, NULL, 0);
        } else {
-               zendlval->type = IS_LONG;
-               return T_LNUMBER;
-       }
-}
-
-<ST_IN_SCRIPTING>{HNUM} {
-       errno = 0;
-       zendlval->value.lval = strtoul(yytext, NULL, 16);
-       if (errno == ERANGE) { /* overflow */
-               /* not trying strtod - it returns trash on 0x-es */
-               zendlval->value.lval = LONG_MAX; /* maximal long */
-               zend_error(E_NOTICE,"Hex number is too big: %s", yytext);
-       } else {
-               if (zendlval->value.lval < 0) {
-                       /* maintain consistency with the old way */
-                       zendlval->value.dval = (unsigned long) zendlval->value.lval;
+               errno = 0;
+               zendlval->value.lval = strtol(yytext, NULL, 0);
+               if (errno == ERANGE) { /* Overflow */
+                       zendlval->value.dval = zend_strtod(yytext, NULL);
                        zendlval->type = IS_DOUBLE;
                        return T_DNUMBER;
                }
-               zendlval->type = IS_LONG;
        }
+
        zendlval->type = IS_LONG;
        return T_LNUMBER;
 }
 
+<ST_IN_SCRIPTING>{HNUM} {
+       /* Skip "0x" */
+       yytext += 2;
+       yyleng -= 2;
+
+       /* Skip any leading 0s */
+       while (*yytext == '0') {
+               yytext++;
+               yyleng--;
+       }
+
+       if (yyleng < SIZEOF_LONG * 2 || (yyleng == SIZEOF_LONG * 2 && *yytext <= '7')) {
+               zendlval->value.lval = strtol(yytext, NULL, 16);
+               zendlval->type = IS_LONG;
+               return T_LNUMBER;
+       } else {
+               zendlval->value.dval = zend_hex_strtod(yytext, NULL);
+               zendlval->type = IS_DOUBLE;
+               return T_DNUMBER;
+       }
+}
+
 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{LNUM}|{HNUM} { /* treat numbers (almost) as strings inside encapsulated strings */
        zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
        zendlval->value.str.len = yyleng;
index 286f8ff515dcb22849885d2e6593241d6773c700..a233ede195611720b0496636058fcc9d3e8f41e4 100644 (file)
@@ -123,14 +123,9 @@ ZEND_API void convert_scalar_to_number(zval *op TSRMLS_DC)
                                char *strval;
 
                                strval = op->value.str.val;
-                               switch ((op->type=is_numeric_string(strval, op->value.str.len, &op->value.lval, &op->value.dval, 1))) {
-                                       case IS_DOUBLE:
-                                       case IS_LONG:
-                                               break;
-                                       default:
-                                               op->value.lval = strtol(op->value.str.val, NULL, 10);
-                                               op->type = IS_LONG;
-                                               break;
+                               if ((op->type=is_numeric_string(strval, op->value.str.len, &op->value.lval, &op->value.dval, 1)) == 0) {
+                                       op->value.lval = 0;
+                                       op->type = IS_LONG;
                                }
                                STR_FREE(strval);
                                break;
@@ -161,14 +156,9 @@ ZEND_API void convert_scalar_to_number(zval *op TSRMLS_DC)
                switch ((op)->type) {                                                                           \
                        case IS_STRING:                                                                                 \
                                {                                                                                                       \
-                                       switch (((holder).type=is_numeric_string((op)->value.str.val, (op)->value.str.len, &(holder).value.lval, &(holder).value.dval, 1))) {   \
-                                               case IS_DOUBLE:                                                                                                                 \
-                                               case IS_LONG:                                                                                                                   \
-                                                       break;                                                                                                                          \
-                                               default:                                                                                                                                \
-                                                       (holder).value.lval = strtol((op)->value.str.val, NULL, 10);            \
-                                                       (holder).type = IS_LONG;                                                \
-                                                       break;                                                                                  \
+                                       if (((holder).type=is_numeric_string((op)->value.str.val, (op)->value.str.len, &(holder).value.lval, &(holder).value.dval, 1)) == 0) {  \
+                                               (holder).value.lval = 0;                                                \
+                                               (holder).type = IS_LONG;                                                \
                                        }                                                                                                               \
                                        (op) = &(holder);                                                                               \
                                        break;                                                                                                  \
@@ -560,7 +550,7 @@ ZEND_API void _convert_to_string(zval *op ZEND_FILE_LINE_DC)
                        TSRMLS_FETCH();
 
                        zend_list_delete(op->value.lval);
-                       op->value.str.val = (char *) emalloc(sizeof("Resource id #")-1 + MAX_LENGTH_OF_LONG);
+                       op->value.str.val = (char *) emalloc(sizeof("Resource id #") + MAX_LENGTH_OF_LONG);
                        op->value.str.len = sprintf(op->value.str.val, "Resource id #%ld", tmp);
                        break;
                }
@@ -1227,10 +1217,14 @@ ZEND_API int concat_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
 ZEND_API int string_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
 {
        zval op1_copy, op2_copy;
-       int use_copy1, use_copy2;
+       int use_copy1 = 0, use_copy2 = 0;
 
-       zend_make_printable_zval(op1, &op1_copy, &use_copy1);
-       zend_make_printable_zval(op2, &op2_copy, &use_copy2);
+       if (op1->type != IS_STRING) {
+               zend_make_printable_zval(op1, &op1_copy, &use_copy1);
+       }
+       if (op2->type != IS_STRING) {
+               zend_make_printable_zval(op2, &op2_copy, &use_copy2);
+       }
 
        if (use_copy1) {
                op1 = &op1_copy;
@@ -1255,10 +1249,14 @@ ZEND_API int string_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_D
 ZEND_API int string_locale_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
 {
        zval op1_copy, op2_copy;
-       int use_copy1, use_copy2;
+       int use_copy1 = 0, use_copy2 = 0;
 
-       zend_make_printable_zval(op1, &op1_copy, &use_copy1);
-       zend_make_printable_zval(op2, &op2_copy, &use_copy2);
+       if (op1->type != IS_STRING) {
+               zend_make_printable_zval(op1, &op1_copy, &use_copy1);
+       }
+       if (op2->type != IS_STRING) {
+               zend_make_printable_zval(op2, &op2_copy, &use_copy2);
+       }
 
        if (use_copy1) {
                op1 = &op1_copy;
index 36b815bfd6f4935a5b62146f5eb76ab3ddbf5113..1a6f38a8b1e9c772997becb7fd90f4231c1eb308 100644 (file)
 #include "ext/bcmath/libbcmath/src/bcmath.h"
 #endif
 
+#if SIZEOF_LONG == 4
+#define MAX_LENGTH_OF_LONG 11
+static const char long_min_digits[] = "2147483648";
+#elif SIZEOF_LONG == 8
 #define MAX_LENGTH_OF_LONG 20
+static const char long_min_digits[] = "9223372036854775808";
+#else
+#error "Unknown SIZEOF_LONG"
+#endif
+
 #define MAX_LENGTH_OF_DOUBLE 32
 
 BEGIN_EXTERN_C()
@@ -66,82 +75,143 @@ ZEND_API zend_bool instanceof_function_ex(zend_class_entry *instance_ce, zend_cl
 ZEND_API zend_bool instanceof_function(zend_class_entry *instance_ce, zend_class_entry *ce TSRMLS_DC);
 END_EXTERN_C()
 
+#define ZEND_IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
+#define ZEND_IS_XDIGIT(c) (((c) >= 'A' && (c) <= 'F') || ((c) >= 'a'  && (c) <= 'f'))
+
 /**
- * Checks whether the string "str" with the length "length" is a numeric string.
+ * Checks whether the string "str" with length "length" is numeric. The value
+ * of allow_errors determines whether it's required to be entirely numeric, or
+ * just its prefix. Leading whitespace is allowed.
  *
- * The function returns 0 if the string did not contain a string; IS_LONG if
- * the string contained a number that fits in the integer range and IS_DOUBLE
- * in case it did not. The long value is returned into the pointer *lval if
- * that pointer was not NULL or into the pointer *dval if that pointer was not
- * NULL.
+ * The function returns 0 if the string did not contain a valid number; IS_LONG
+ * if it contained a number that fits within the range of a long; or IS_DOUBLE
+ * if the number was out of long range or contained a decimal point/exponent.
+ * The number's value is returned into the respective pointer, *lval or *dval,
+ * if that pointer is not NULL.
  */
-static inline zend_bool is_numeric_string(char *str, int length, long *lval, double *dval, int allow_errors)
+
+static inline zend_uchar is_numeric_string(const char *str, int length, long *lval, double *dval, int allow_errors)
 {
-       long local_lval;
+       const char *ptr;
+       int base = 10, digits = 0, dp_or_e = 0;
        double local_dval;
-       char *end_ptr_long, *end_ptr_double;
-       int conv_base=10;
+       zend_uchar type;
 
        if (!length) {
                return 0;
        }
 
-       /* handle hex numbers */
-       if (length>=2 && str[0]=='0' && (str[1]=='x' || str[1]=='X')) {
-               conv_base=16;
-       }
-       errno=0;
-       local_lval = strtol(str, &end_ptr_long, conv_base);
-       if (errno!=ERANGE) {
-               if (end_ptr_long == str+length) { /* integer string */
-                       if (lval) {
-                               *lval = local_lval;
-                       }
-                       return IS_LONG;
-               } else if (end_ptr_long == str && *end_ptr_long != '\0' && *str != '.' && *str != '-') { /* ignore partial string matches */
-                       return 0;
-               }
-       } else {
-               end_ptr_long=NULL;
+       /* Skip any whitespace
+        * This is much faster than the isspace() function */
+       while (*str == ' ' || *str == '\t' || *str == '\n' || *str == '\r' || *str == '\v' || *str == '\f') {
+               str++;
+               length--;
        }
+       ptr = str;
 
-       if (conv_base==16) { /* hex string, under UNIX strtod() messes it up */
-               return 0;
+       if (*ptr == '-' || *ptr == '+') {
+               ptr++;
        }
 
-       errno=0;
-       local_dval = zend_strtod(str, &end_ptr_double);
-       if (errno != ERANGE) {
-               if (end_ptr_double == str+length) { /* floating point string */
-                       if (!zend_finite(local_dval)) {
-                               /* "inf","nan" and maybe other weird ones */
-                               return 0;
+       if (ZEND_IS_DIGIT(*ptr)) {
+               /* Handle hex numbers
+                * str is used instead of ptr to disallow signs and keep old behavior */
+               if (length > 2 && *str == '0' && (str[1] == 'x' || str[1] == 'X')) {
+                       base = 16;
+                       ptr += 2;
+               }
+
+               /* Skip any leading 0s */
+               while (*ptr == '0') {
+                       ptr++;
+               }
+
+               /* Count the number of digits. If a decimal point/exponent is found,
+                * it's a double. Otherwise, if there's a dval or no need to check for
+                * a full match, stop when there are too many digits for a long */
+               for (type = IS_LONG; !(digits >= MAX_LENGTH_OF_LONG && (dval || allow_errors == 1)); digits++, ptr++) {
+check_digits:
+                       if (ZEND_IS_DIGIT(*ptr) || (base == 16 && ZEND_IS_XDIGIT(*ptr))) {
+                               continue;
+                       } else if (base == 10) {
+                               if (*ptr == '.' && dp_or_e < 1) {
+                                       goto process_double;
+                               } else if ((*ptr == 'e' || *ptr == 'E') && dp_or_e < 2) {
+                                       const char *e = ptr + 1;
+
+                                       if (*e == '-' || *e == '+') {
+                                               ptr = e++;
+                                       }
+                                       if (ZEND_IS_DIGIT(*e)) {
+                                               goto process_double;
+                                       }
+                               }
                        }
 
+                       break;
+               }
+
+               if (base == 10) {
+                       if (digits >= MAX_LENGTH_OF_LONG) {
+                               dp_or_e = -1;
+                               goto process_double;
+                       }
+               } else if (!(digits < SIZEOF_LONG * 2 || (digits == SIZEOF_LONG * 2 && ptr[-digits] <= '7'))) {
                        if (dval) {
-                               *dval = local_dval;
+                               local_dval = zend_hex_strtod(str, (char **)&ptr);
                        }
-                       return IS_DOUBLE;
+                       type = IS_DOUBLE;
+               }
+       } else if (*ptr == '.' && ZEND_IS_DIGIT(ptr[1])) {
+process_double:
+               type = IS_DOUBLE;
+
+               /* If there's a dval, do the conversion; else continue checking
+                * the digits if we need to check for a full match */
+               if (dval) {
+                       local_dval = zend_strtod(str, (char **)&ptr);
+               } else if (allow_errors != 1 && dp_or_e != -1) {
+                       dp_or_e = (*ptr++ == '.') ? 1 : 2;
+                       goto check_digits;
                }
        } else {
-               end_ptr_double=NULL;
-       }
-
-       if (!allow_errors) {
                return 0;
        }
-       if (allow_errors == -1) {
-               zend_error(E_NOTICE, "A non well formed numeric value encountered");
+
+       if (ptr != str + length) {
+               if (!allow_errors) {
+                       return 0;
+               }
+               if (allow_errors == -1) {
+                       zend_error(E_NOTICE, "A non well formed numeric value encountered");
+               }
        }
 
-       if (end_ptr_double>end_ptr_long && dval) {
-               *dval = local_dval;
-               return IS_DOUBLE;
-       } else if (end_ptr_long && lval) {
-               *lval = local_lval;
+       if (type == IS_LONG) {
+               if (digits == MAX_LENGTH_OF_LONG - 1) {
+                       int cmp = strcmp(&ptr[-digits], long_min_digits);
+
+                       if (!(cmp < 0 || (cmp == 0 && *str == '-'))) {
+                               if (dval) {
+                                       *dval = zend_strtod(str, NULL);
+                               }
+
+                               return IS_DOUBLE;
+                       }
+               }
+
+               if (lval) {
+                       *lval = strtol(str, NULL, base);
+               }
+
                return IS_LONG;
+       } else {
+               if (dval) {
+                       *dval = local_dval;
+               }
+
+               return IS_DOUBLE;
        }
-       return 0;
 }
 
 static inline char *
index 4fed8c3552f3d9a62554b8a69a8cbbbf83e55efb..d388697f5b8e2175fe74e36d60fe4acdc152ff2d 100644 (file)
@@ -2557,6 +2557,39 @@ ret:
        return result;
 }
 
+ZEND_API double zend_hex_strtod(const char *str, char **endptr)
+{
+       const char *s = str;
+       char c;
+       int any = 0;
+       double value = 0;
+
+       if (*s == '0' && (s[1] == 'x' || s[1] == 'X')) {
+               s += 2;
+       }
+
+       while (c = *s++) {
+               if (c >= '0' && c <= '9') {
+                       c -= '0';
+               } else if (c >= 'A' && c <= 'F') {
+                       c -= 'A' - 10;
+               } else if (c >= 'a' && c <= 'f') {
+                       c -= 'a' - 10;
+               } else {
+                       break;
+               }
+
+               any = 1;
+               value = value * 16 + c;
+       }
+
+       if (endptr != NULL) {
+               *endptr = (char *)(any ? s - 1 : str);
+       }
+
+       return value;
+}
+
 /*
  * Local variables:
  * tab-width: 4
index fc42991ba89a664c4b088703b027550d01c281af..f2c15d27dab222be2a2618b32ea6582b36467d02 100644 (file)
@@ -28,6 +28,7 @@ BEGIN_EXTERN_C()
 ZEND_API void zend_freedtoa(char *s);
 ZEND_API char * zend_dtoa(double _d, int mode, int ndigits, int *decpt, int *sign, char **rve);
 ZEND_API double zend_strtod(const char *s00, char **se);
+ZEND_API double zend_hex_strtod(const char *str, char **endptr);
 ZEND_API int zend_startup_strtod(void);
 ZEND_API int zend_shutdown_strtod(void);
 END_EXTERN_C()