]> granicus.if.org Git - postgresql/commitdiff
Hand code string to integer conversion for performance.
authorAndres Freund <andres@anarazel.de>
Sun, 22 Jul 2018 21:58:01 +0000 (14:58 -0700)
committerAndres Freund <andres@anarazel.de>
Sun, 22 Jul 2018 21:58:23 +0000 (14:58 -0700)
As benchmarks show, using libc's string-to-integer conversion is
pretty slow. At least part of the reason for that is that strtol[l]
have to be more generic than what largely is required inside pg.

This patch considerably speeds up int2/int4 input (int8 already was
already using hand-rolled code).

Most of the existing pg_atoi callers have been converted. But as one
requires pg_atoi's custom delimiter functionality, and as it seems
likely that there's external pg_atoi users, it seems sensible to just
keep pg_atoi around.

Author: Andres Freund
Reviewed-By: Robert Haas
Discussion: https://postgr.es/m/20171208214437.qgn6zdltyq5hmjpk@alap3.anarazel.de

13 files changed:
contrib/spi/refint.c
doc/src/sgml/sources.sgml
src/backend/libpq/pqmq.c
src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
src/backend/tsearch/wparser_def.c
src/backend/utils/adt/arrayutils.c
src/backend/utils/adt/int.c
src/backend/utils/adt/int8.c
src/backend/utils/adt/numutils.c
src/backend/utils/adt/varlena.c
src/include/utils/builtins.h
src/test/regress/expected/int2.out
src/test/regress/expected/select_parallel.out

index b065ffa400d4caf1c60ae1270495d7585a175a20..f90f2bce0ea673997c20c7b424c9f66dfcc3249c 100644 (file)
@@ -306,7 +306,7 @@ check_foreign_key(PG_FUNCTION_ARGS)
                /* internal error */
                elog(ERROR, "check_foreign_key: too short %d (< 5) list of arguments", nargs);
 
-       nrefs = pg_atoi(args[0], sizeof(int), 0);
+       nrefs = pg_strtoint32(args[0]);
        if (nrefs < 1)
                /* internal error */
                elog(ERROR, "check_foreign_key: %d (< 1) number of references specified", nrefs);
index 8870ee938aa5ed681d007406f5e08c127dc26784..b08919dc70f34625c26b267a1db2155c03e4a442 100644 (file)
@@ -709,7 +709,7 @@ BETTER: could not open file %s (I/O failure)
     not helpful information.  If the error text doesn't make as much sense
     without the function name, reword it.
 <programlisting>
-BAD:    pg_atoi: error in "z": cannot parse "z"
+BAD:    pg_strtoint32: error in "z": cannot parse "z"
 BETTER: invalid input syntax for integer: "z"
 </programlisting>
    </para>
index 201075dd4773f52933c5825f2bbb6e1b92cae74a..4fbc6b5115dda3bf153afa94c6581bbf7fc27c4b 100644 (file)
@@ -286,10 +286,10 @@ pq_parse_errornotice(StringInfo msg, ErrorData *edata)
                                edata->hint = pstrdup(value);
                                break;
                        case PG_DIAG_STATEMENT_POSITION:
-                               edata->cursorpos = pg_atoi(value, sizeof(int), '\0');
+                               edata->cursorpos = pg_strtoint32(value);
                                break;
                        case PG_DIAG_INTERNAL_POSITION:
-                               edata->internalpos = pg_atoi(value, sizeof(int), '\0');
+                               edata->internalpos = pg_strtoint32(value);
                                break;
                        case PG_DIAG_INTERNAL_QUERY:
                                edata->internalquery = pstrdup(value);
@@ -316,7 +316,7 @@ pq_parse_errornotice(StringInfo msg, ErrorData *edata)
                                edata->filename = pstrdup(value);
                                break;
                        case PG_DIAG_SOURCE_LINE:
-                               edata->lineno = pg_atoi(value, sizeof(int), '\0');
+                               edata->lineno = pg_strtoint32(value);
                                break;
                        case PG_DIAG_SOURCE_FUNCTION:
                                edata->funcname = pstrdup(value);
index bd489061602746f4226ea9bff472f4755261a506..1e1695ef4f40ad644e4871a72d629b984b15e4af 100644 (file)
@@ -345,7 +345,7 @@ libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli,
                                                   ntuples, nfields, 3, 1)));
        }
        primary_sysid = pstrdup(PQgetvalue(res, 0, 0));
-       *primary_tli = pg_atoi(PQgetvalue(res, 0, 1), 4, 0);
+       *primary_tli = pg_strtoint32(PQgetvalue(res, 0, 1));
        PQclear(res);
 
        *server_version = PQserverVersion(conn->streamConn);
@@ -480,7 +480,7 @@ libpqrcv_endstreaming(WalReceiverConn *conn, TimeLineID *next_tli)
                if (PQnfields(res) < 2 || PQntuples(res) != 1)
                        ereport(ERROR,
                                        (errmsg("unexpected result set after end-of-streaming")));
-               *next_tli = pg_atoi(PQgetvalue(res, 0, 0), sizeof(uint32), 0);
+               *next_tli = pg_strtoint32(PQgetvalue(res, 0, 0));
                PQclear(res);
 
                /* the result set should be followed by CommandComplete */
index f0c3441990583ffc7ee94a4047b9a234ec1c6cd6..d7cd2e58398023a3d2bac574c2fb281e8fa598d7 100644 (file)
@@ -2460,13 +2460,13 @@ prsd_headline(PG_FUNCTION_ARGS)
                char       *val = defGetString(defel);
 
                if (pg_strcasecmp(defel->defname, "MaxWords") == 0)
-                       max_words = pg_atoi(val, sizeof(int32), 0);
+                       max_words = pg_strtoint32(val);
                else if (pg_strcasecmp(defel->defname, "MinWords") == 0)
-                       min_words = pg_atoi(val, sizeof(int32), 0);
+                       min_words = pg_strtoint32(val);
                else if (pg_strcasecmp(defel->defname, "ShortWord") == 0)
-                       shortword = pg_atoi(val, sizeof(int32), 0);
+                       shortword = pg_strtoint32(val);
                else if (pg_strcasecmp(defel->defname, "MaxFragments") == 0)
-                       max_fragments = pg_atoi(val, sizeof(int32), 0);
+                       max_fragments = pg_strtoint32(val);
                else if (pg_strcasecmp(defel->defname, "StartSel") == 0)
                        prs->startsel = pstrdup(val);
                else if (pg_strcasecmp(defel->defname, "StopSel") == 0)
index c0d719e98cc06a568374f5f6ab051cb91e6882c4..5b98efe76bcd174c4f9f022cf61c4c224efd5cbc 100644 (file)
@@ -226,8 +226,7 @@ ArrayGetIntegerTypmods(ArrayType *arr, int *n)
        result = (int32 *) palloc(*n * sizeof(int32));
 
        for (i = 0; i < *n; i++)
-               result[i] = pg_atoi(DatumGetCString(elem_values[i]),
-                                                       sizeof(int32), '\0');
+               result[i] = pg_strtoint32(DatumGetCString(elem_values[i]));
 
        pfree(elem_values);
 
index 02783d8d6fe59cc7c7e63f5c1a87a02f93728877..8149dc1369b918335dbd8dc85c31d83613e7228b 100644 (file)
@@ -60,7 +60,7 @@ int2in(PG_FUNCTION_ARGS)
 {
        char       *num = PG_GETARG_CSTRING(0);
 
-       PG_RETURN_INT16(pg_atoi(num, sizeof(int16), '\0'));
+       PG_RETURN_INT16(pg_strtoint16(num));
 }
 
 /*
@@ -265,7 +265,7 @@ int4in(PG_FUNCTION_ARGS)
 {
        char       *num = PG_GETARG_CSTRING(0);
 
-       PG_RETURN_INT32(pg_atoi(num, sizeof(int32), '\0'));
+       PG_RETURN_INT32(pg_strtoint32(num));
 }
 
 /*
index 49f32a8b3dd94d77292cd6a5a08183f0b4bf2fdf..3c595e800a44d21c566ee83d7910e4375caaaf34 100644 (file)
@@ -101,6 +101,7 @@ scanint8(const char *str, bool errorOK, int64 *result)
 
        if (!neg)
        {
+               /* could fail if input is most negative number */
                if (unlikely(tmp == PG_INT64_MIN))
                        goto out_of_range;
                tmp = -tmp;
index fb46f692e3a0b726e0c239058fe84f29ca50b633..49055d519cbac7284172bf0490d45f5856fd0da5 100644 (file)
@@ -18,6 +18,7 @@
 #include <limits.h>
 #include <ctype.h>
 
+#include "common/int.h"
 #include "utils/builtins.h"
 
 /*
@@ -108,6 +109,154 @@ pg_atoi(const char *s, int size, int c)
        return (int32) l;
 }
 
+/*
+ * Convert input string to a signed 16 bit integer.
+ *
+ * Allows any number of leading or trailing whitespace characters. Will throw
+ * ereport() upon bad input format or overflow.
+ *
+ * NB: Accumulate input as a negative number, to deal with two's complement
+ * representation of the most negative number, which can't be represented as a
+ * positive number.
+ */
+int16
+pg_strtoint16(const char *s)
+{
+       const char *ptr = s;
+       int16           tmp = 0;
+       bool            neg = false;
+
+       /* skip leading spaces */
+       while (likely(*ptr) && isspace((unsigned char) *ptr))
+               ptr++;
+
+       /* handle sign */
+       if (*ptr == '-')
+       {
+               ptr++;
+               neg = true;
+       }
+       else if (*ptr == '+')
+               ptr++;
+
+       /* require at least one digit */
+       if (unlikely(!isdigit((unsigned char) *ptr)))
+               goto invalid_syntax;
+
+       /* process digits */
+       while (*ptr && isdigit((unsigned char) *ptr))
+       {
+               int8            digit = (*ptr++ - '0');
+
+               if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) ||
+                       unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+                       goto out_of_range;
+       }
+
+       /* allow trailing whitespace, but not other trailing chars */
+       while (*ptr != '\0' && isspace((unsigned char) *ptr))
+               ptr++;
+
+       if (unlikely(*ptr != '\0'))
+               goto invalid_syntax;
+
+       if (!neg)
+       {
+               /* could fail if input is most negative number */
+               if (unlikely(tmp == PG_INT16_MIN))
+                       goto out_of_range;
+               tmp = -tmp;
+       }
+
+       return tmp;
+
+out_of_range:
+       ereport(ERROR,
+                       (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                        errmsg("value \"%s\" is out of range for type %s",
+                                       s, "smallint")));
+
+invalid_syntax:
+       ereport(ERROR,
+                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                        errmsg("invalid input syntax for type %s: \"%s\"",
+                                       "smallint", s)));
+}
+
+/*
+ * Convert input string to a signed 32 bit integer.
+ *
+ * Allows any number of leading or trailing whitespace characters. Will throw
+ * ereport() upon bad input format or overflow.
+ *
+ * NB: Accumulate input as a negative number, to deal with two's complement
+ * representation of the most negative number, which can't be represented as a
+ * positive number.
+ */
+int32
+pg_strtoint32(const char *s)
+{
+       const char *ptr = s;
+       int32           tmp = 0;
+       bool            neg = false;
+
+       /* skip leading spaces */
+       while (likely(*ptr) && isspace((unsigned char) *ptr))
+               ptr++;
+
+       /* handle sign */
+       if (*ptr == '-')
+       {
+               ptr++;
+               neg = true;
+       }
+       else if (*ptr == '+')
+               ptr++;
+
+       /* require at least one digit */
+       if (unlikely(!isdigit((unsigned char) *ptr)))
+               goto invalid_syntax;
+
+       /* process digits */
+       while (*ptr && isdigit((unsigned char) *ptr))
+       {
+               int8            digit = (*ptr++ - '0');
+
+               if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) ||
+                       unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+                       goto out_of_range;
+       }
+
+       /* allow trailing whitespace, but not other trailing chars */
+       while (*ptr != '\0' && isspace((unsigned char) *ptr))
+               ptr++;
+
+       if (unlikely(*ptr != '\0'))
+               goto invalid_syntax;
+
+       if (!neg)
+       {
+               /* could fail if input is most negative number */
+               if (unlikely(tmp == PG_INT32_MIN))
+                       goto out_of_range;
+               tmp = -tmp;
+       }
+
+       return tmp;
+
+out_of_range:
+       ereport(ERROR,
+                       (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                        errmsg("value \"%s\" is out of range for type %s",
+                                       s, "integer")));
+
+invalid_syntax:
+       ereport(ERROR,
+                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                        errmsg("invalid input syntax for type %s: \"%s\"",
+                                       "integer", s)));
+}
+
 /*
  * pg_itoa: converts a signed 16-bit integer to its string representation
  *
index e8500b274dc70fd40cde0303c4294e8688a93229..31eaa92c3b7abb6c944f990c9ab75a450735c326 100644 (file)
@@ -5155,8 +5155,8 @@ text_format(PG_FUNCTION_ARGS)
 
                                str = OutputFunctionCall(&typoutputinfo_width, value);
 
-                               /* pg_atoi will complain about bad data or overflow */
-                               width = pg_atoi(str, sizeof(int), '\0');
+                               /* pg_strtoint32 will complain about bad data or overflow */
+                               width = pg_strtoint32(str);
 
                                pfree(str);
                        }
index d0416e90fcce7f47a8962f6c173c44f315402480..88a42b345c181a5a1238f89dbb2349eb308049a3 100644 (file)
@@ -43,6 +43,8 @@ extern int    namestrcmp(Name name, const char *str);
 
 /* numutils.c */
 extern int32 pg_atoi(const char *s, int size, int c);
+extern int16 pg_strtoint16(const char *s);
+extern int32 pg_strtoint32(const char *s);
 extern void pg_itoa(int16 i, char *a);
 extern void pg_ltoa(int32 l, char *a);
 extern void pg_lltoa(int64 ll, char *a);
index a4ec2738cdc7f967f4a3c58498c5c43c080852ef..8c255b9e4dd499043f1b4518e45279557e33f0d2 100644 (file)
@@ -6,7 +6,7 @@ INSERT INTO INT2_TBL(f1) VALUES ('0   ');
 INSERT INTO INT2_TBL(f1) VALUES ('  1234 ');
 INSERT INTO INT2_TBL(f1) VALUES ('    -1234');
 INSERT INTO INT2_TBL(f1) VALUES ('34.5');
-ERROR:  invalid input syntax for type integer: "34.5"
+ERROR:  invalid input syntax for type smallint: "34.5"
 LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('34.5');
                                          ^
 -- largest and smallest values
@@ -18,27 +18,27 @@ ERROR:  value "100000" is out of range for type smallint
 LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('100000');
                                          ^
 INSERT INTO INT2_TBL(f1) VALUES ('asdf');
-ERROR:  invalid input syntax for type integer: "asdf"
+ERROR:  invalid input syntax for type smallint: "asdf"
 LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('asdf');
                                          ^
 INSERT INTO INT2_TBL(f1) VALUES ('    ');
-ERROR:  invalid input syntax for type integer: "    "
+ERROR:  invalid input syntax for type smallint: "    "
 LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('    ');
                                          ^
 INSERT INTO INT2_TBL(f1) VALUES ('- 1234');
-ERROR:  invalid input syntax for type integer: "- 1234"
+ERROR:  invalid input syntax for type smallint: "- 1234"
 LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('- 1234');
                                          ^
 INSERT INTO INT2_TBL(f1) VALUES ('4 444');
-ERROR:  invalid input syntax for type integer: "4 444"
+ERROR:  invalid input syntax for type smallint: "4 444"
 LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('4 444');
                                          ^
 INSERT INTO INT2_TBL(f1) VALUES ('123 dt');
-ERROR:  invalid input syntax for type integer: "123 dt"
+ERROR:  invalid input syntax for type smallint: "123 dt"
 LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('123 dt');
                                          ^
 INSERT INTO INT2_TBL(f1) VALUES ('');
-ERROR:  invalid input syntax for type integer: ""
+ERROR:  invalid input syntax for type smallint: ""
 LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('');
                                          ^
 SELECT '' AS five, * FROM INT2_TBL;
index 2f028d10525ee80f7ba81b2725f061d577e6fae5..f1b8cd43376710ee584c445cebecf0ba8168c3d8 100644 (file)
@@ -975,7 +975,7 @@ ROLLBACK TO SAVEPOINT settings;
 SAVEPOINT settings;
 SET LOCAL force_parallel_mode = 1;
 select stringu1::int2 from tenk1 where unique1 = 1;
-ERROR:  invalid input syntax for type integer: "BAAAAA"
+ERROR:  invalid input syntax for type smallint: "BAAAAA"
 CONTEXT:  parallel worker
 ROLLBACK TO SAVEPOINT settings;
 -- test interaction with set-returning functions