From 86eaf208ea048936df6be77276a246d3f92e9620 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Sun, 22 Jul 2018 14:58:01 -0700 Subject: [PATCH] Hand code string to integer conversion for performance. As benchmarks show, using libc's string-to-integer conversion is pretty slow. At least part of the reason for that is that strtol[l] have to be more generic than what largely is required inside pg. This patch considerably speeds up int2/int4 input (int8 already was already using hand-rolled code). Most of the existing pg_atoi callers have been converted. But as one requires pg_atoi's custom delimiter functionality, and as it seems likely that there's external pg_atoi users, it seems sensible to just keep pg_atoi around. Author: Andres Freund Reviewed-By: Robert Haas Discussion: https://postgr.es/m/20171208214437.qgn6zdltyq5hmjpk@alap3.anarazel.de --- contrib/spi/refint.c | 2 +- doc/src/sgml/sources.sgml | 2 +- src/backend/libpq/pqmq.c | 6 +- .../libpqwalreceiver/libpqwalreceiver.c | 4 +- src/backend/tsearch/wparser_def.c | 8 +- src/backend/utils/adt/arrayutils.c | 3 +- src/backend/utils/adt/int.c | 4 +- src/backend/utils/adt/int8.c | 1 + src/backend/utils/adt/numutils.c | 149 ++++++++++++++++++ src/backend/utils/adt/varlena.c | 4 +- src/include/utils/builtins.h | 2 + src/test/regress/expected/int2.out | 14 +- src/test/regress/expected/select_parallel.out | 2 +- 13 files changed, 176 insertions(+), 25 deletions(-) diff --git a/contrib/spi/refint.c b/contrib/spi/refint.c index b065ffa400..f90f2bce0e 100644 --- a/contrib/spi/refint.c +++ b/contrib/spi/refint.c @@ -306,7 +306,7 @@ check_foreign_key(PG_FUNCTION_ARGS) /* internal error */ elog(ERROR, "check_foreign_key: too short %d (< 5) list of arguments", nargs); - nrefs = pg_atoi(args[0], sizeof(int), 0); + nrefs = pg_strtoint32(args[0]); if (nrefs < 1) /* internal error */ elog(ERROR, "check_foreign_key: %d (< 1) number of references specified", nrefs); diff --git a/doc/src/sgml/sources.sgml b/doc/src/sgml/sources.sgml index 8870ee938a..b08919dc70 100644 --- a/doc/src/sgml/sources.sgml +++ b/doc/src/sgml/sources.sgml @@ -709,7 +709,7 @@ BETTER: could not open file %s (I/O failure) not helpful information. If the error text doesn't make as much sense without the function name, reword it. -BAD: pg_atoi: error in "z": cannot parse "z" +BAD: pg_strtoint32: error in "z": cannot parse "z" BETTER: invalid input syntax for integer: "z" diff --git a/src/backend/libpq/pqmq.c b/src/backend/libpq/pqmq.c index 201075dd47..4fbc6b5115 100644 --- a/src/backend/libpq/pqmq.c +++ b/src/backend/libpq/pqmq.c @@ -286,10 +286,10 @@ pq_parse_errornotice(StringInfo msg, ErrorData *edata) edata->hint = pstrdup(value); break; case PG_DIAG_STATEMENT_POSITION: - edata->cursorpos = pg_atoi(value, sizeof(int), '\0'); + edata->cursorpos = pg_strtoint32(value); break; case PG_DIAG_INTERNAL_POSITION: - edata->internalpos = pg_atoi(value, sizeof(int), '\0'); + edata->internalpos = pg_strtoint32(value); break; case PG_DIAG_INTERNAL_QUERY: edata->internalquery = pstrdup(value); @@ -316,7 +316,7 @@ pq_parse_errornotice(StringInfo msg, ErrorData *edata) edata->filename = pstrdup(value); break; case PG_DIAG_SOURCE_LINE: - edata->lineno = pg_atoi(value, sizeof(int), '\0'); + edata->lineno = pg_strtoint32(value); break; case PG_DIAG_SOURCE_FUNCTION: edata->funcname = pstrdup(value); diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c index bd48906160..1e1695ef4f 100644 --- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c +++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c @@ -345,7 +345,7 @@ libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli, ntuples, nfields, 3, 1))); } primary_sysid = pstrdup(PQgetvalue(res, 0, 0)); - *primary_tli = pg_atoi(PQgetvalue(res, 0, 1), 4, 0); + *primary_tli = pg_strtoint32(PQgetvalue(res, 0, 1)); PQclear(res); *server_version = PQserverVersion(conn->streamConn); @@ -480,7 +480,7 @@ libpqrcv_endstreaming(WalReceiverConn *conn, TimeLineID *next_tli) if (PQnfields(res) < 2 || PQntuples(res) != 1) ereport(ERROR, (errmsg("unexpected result set after end-of-streaming"))); - *next_tli = pg_atoi(PQgetvalue(res, 0, 0), sizeof(uint32), 0); + *next_tli = pg_strtoint32(PQgetvalue(res, 0, 0)); PQclear(res); /* the result set should be followed by CommandComplete */ diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index f0c3441990..d7cd2e5839 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -2460,13 +2460,13 @@ prsd_headline(PG_FUNCTION_ARGS) char *val = defGetString(defel); if (pg_strcasecmp(defel->defname, "MaxWords") == 0) - max_words = pg_atoi(val, sizeof(int32), 0); + max_words = pg_strtoint32(val); else if (pg_strcasecmp(defel->defname, "MinWords") == 0) - min_words = pg_atoi(val, sizeof(int32), 0); + min_words = pg_strtoint32(val); else if (pg_strcasecmp(defel->defname, "ShortWord") == 0) - shortword = pg_atoi(val, sizeof(int32), 0); + shortword = pg_strtoint32(val); else if (pg_strcasecmp(defel->defname, "MaxFragments") == 0) - max_fragments = pg_atoi(val, sizeof(int32), 0); + max_fragments = pg_strtoint32(val); else if (pg_strcasecmp(defel->defname, "StartSel") == 0) prs->startsel = pstrdup(val); else if (pg_strcasecmp(defel->defname, "StopSel") == 0) diff --git a/src/backend/utils/adt/arrayutils.c b/src/backend/utils/adt/arrayutils.c index c0d719e98c..5b98efe76b 100644 --- a/src/backend/utils/adt/arrayutils.c +++ b/src/backend/utils/adt/arrayutils.c @@ -226,8 +226,7 @@ ArrayGetIntegerTypmods(ArrayType *arr, int *n) result = (int32 *) palloc(*n * sizeof(int32)); for (i = 0; i < *n; i++) - result[i] = pg_atoi(DatumGetCString(elem_values[i]), - sizeof(int32), '\0'); + result[i] = pg_strtoint32(DatumGetCString(elem_values[i])); pfree(elem_values); diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c index 02783d8d6f..8149dc1369 100644 --- a/src/backend/utils/adt/int.c +++ b/src/backend/utils/adt/int.c @@ -60,7 +60,7 @@ int2in(PG_FUNCTION_ARGS) { char *num = PG_GETARG_CSTRING(0); - PG_RETURN_INT16(pg_atoi(num, sizeof(int16), '\0')); + PG_RETURN_INT16(pg_strtoint16(num)); } /* @@ -265,7 +265,7 @@ int4in(PG_FUNCTION_ARGS) { char *num = PG_GETARG_CSTRING(0); - PG_RETURN_INT32(pg_atoi(num, sizeof(int32), '\0')); + PG_RETURN_INT32(pg_strtoint32(num)); } /* diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index 49f32a8b3d..3c595e800a 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -101,6 +101,7 @@ scanint8(const char *str, bool errorOK, int64 *result) if (!neg) { + /* could fail if input is most negative number */ if (unlikely(tmp == PG_INT64_MIN)) goto out_of_range; tmp = -tmp; diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index fb46f692e3..49055d519c 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -18,6 +18,7 @@ #include #include +#include "common/int.h" #include "utils/builtins.h" /* @@ -108,6 +109,154 @@ pg_atoi(const char *s, int size, int c) return (int32) l; } +/* + * Convert input string to a signed 16 bit integer. + * + * Allows any number of leading or trailing whitespace characters. Will throw + * ereport() upon bad input format or overflow. + * + * NB: Accumulate input as a negative number, to deal with two's complement + * representation of the most negative number, which can't be represented as a + * positive number. + */ +int16 +pg_strtoint16(const char *s) +{ + const char *ptr = s; + int16 tmp = 0; + bool neg = false; + + /* skip leading spaces */ + while (likely(*ptr) && isspace((unsigned char) *ptr)) + ptr++; + + /* handle sign */ + if (*ptr == '-') + { + ptr++; + neg = true; + } + else if (*ptr == '+') + ptr++; + + /* require at least one digit */ + if (unlikely(!isdigit((unsigned char) *ptr))) + goto invalid_syntax; + + /* process digits */ + while (*ptr && isdigit((unsigned char) *ptr)) + { + int8 digit = (*ptr++ - '0'); + + if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) || + unlikely(pg_sub_s16_overflow(tmp, digit, &tmp))) + goto out_of_range; + } + + /* allow trailing whitespace, but not other trailing chars */ + while (*ptr != '\0' && isspace((unsigned char) *ptr)) + ptr++; + + if (unlikely(*ptr != '\0')) + goto invalid_syntax; + + if (!neg) + { + /* could fail if input is most negative number */ + if (unlikely(tmp == PG_INT16_MIN)) + goto out_of_range; + tmp = -tmp; + } + + return tmp; + +out_of_range: + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value \"%s\" is out of range for type %s", + s, "smallint"))); + +invalid_syntax: + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "smallint", s))); +} + +/* + * Convert input string to a signed 32 bit integer. + * + * Allows any number of leading or trailing whitespace characters. Will throw + * ereport() upon bad input format or overflow. + * + * NB: Accumulate input as a negative number, to deal with two's complement + * representation of the most negative number, which can't be represented as a + * positive number. + */ +int32 +pg_strtoint32(const char *s) +{ + const char *ptr = s; + int32 tmp = 0; + bool neg = false; + + /* skip leading spaces */ + while (likely(*ptr) && isspace((unsigned char) *ptr)) + ptr++; + + /* handle sign */ + if (*ptr == '-') + { + ptr++; + neg = true; + } + else if (*ptr == '+') + ptr++; + + /* require at least one digit */ + if (unlikely(!isdigit((unsigned char) *ptr))) + goto invalid_syntax; + + /* process digits */ + while (*ptr && isdigit((unsigned char) *ptr)) + { + int8 digit = (*ptr++ - '0'); + + if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) || + unlikely(pg_sub_s32_overflow(tmp, digit, &tmp))) + goto out_of_range; + } + + /* allow trailing whitespace, but not other trailing chars */ + while (*ptr != '\0' && isspace((unsigned char) *ptr)) + ptr++; + + if (unlikely(*ptr != '\0')) + goto invalid_syntax; + + if (!neg) + { + /* could fail if input is most negative number */ + if (unlikely(tmp == PG_INT32_MIN)) + goto out_of_range; + tmp = -tmp; + } + + return tmp; + +out_of_range: + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value \"%s\" is out of range for type %s", + s, "integer"))); + +invalid_syntax: + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "integer", s))); +} + /* * pg_itoa: converts a signed 16-bit integer to its string representation * diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index e8500b274d..31eaa92c3b 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -5155,8 +5155,8 @@ text_format(PG_FUNCTION_ARGS) str = OutputFunctionCall(&typoutputinfo_width, value); - /* pg_atoi will complain about bad data or overflow */ - width = pg_atoi(str, sizeof(int), '\0'); + /* pg_strtoint32 will complain about bad data or overflow */ + width = pg_strtoint32(str); pfree(str); } diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index d0416e90fc..88a42b345c 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -43,6 +43,8 @@ extern int namestrcmp(Name name, const char *str); /* numutils.c */ extern int32 pg_atoi(const char *s, int size, int c); +extern int16 pg_strtoint16(const char *s); +extern int32 pg_strtoint32(const char *s); extern void pg_itoa(int16 i, char *a); extern void pg_ltoa(int32 l, char *a); extern void pg_lltoa(int64 ll, char *a); diff --git a/src/test/regress/expected/int2.out b/src/test/regress/expected/int2.out index a4ec2738cd..8c255b9e4d 100644 --- a/src/test/regress/expected/int2.out +++ b/src/test/regress/expected/int2.out @@ -6,7 +6,7 @@ INSERT INTO INT2_TBL(f1) VALUES ('0 '); INSERT INTO INT2_TBL(f1) VALUES (' 1234 '); INSERT INTO INT2_TBL(f1) VALUES (' -1234'); INSERT INTO INT2_TBL(f1) VALUES ('34.5'); -ERROR: invalid input syntax for type integer: "34.5" +ERROR: invalid input syntax for type smallint: "34.5" LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('34.5'); ^ -- largest and smallest values @@ -18,27 +18,27 @@ ERROR: value "100000" is out of range for type smallint LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('100000'); ^ INSERT INTO INT2_TBL(f1) VALUES ('asdf'); -ERROR: invalid input syntax for type integer: "asdf" +ERROR: invalid input syntax for type smallint: "asdf" LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('asdf'); ^ INSERT INTO INT2_TBL(f1) VALUES (' '); -ERROR: invalid input syntax for type integer: " " +ERROR: invalid input syntax for type smallint: " " LINE 1: INSERT INTO INT2_TBL(f1) VALUES (' '); ^ INSERT INTO INT2_TBL(f1) VALUES ('- 1234'); -ERROR: invalid input syntax for type integer: "- 1234" +ERROR: invalid input syntax for type smallint: "- 1234" LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('- 1234'); ^ INSERT INTO INT2_TBL(f1) VALUES ('4 444'); -ERROR: invalid input syntax for type integer: "4 444" +ERROR: invalid input syntax for type smallint: "4 444" LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('4 444'); ^ INSERT INTO INT2_TBL(f1) VALUES ('123 dt'); -ERROR: invalid input syntax for type integer: "123 dt" +ERROR: invalid input syntax for type smallint: "123 dt" LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('123 dt'); ^ INSERT INTO INT2_TBL(f1) VALUES (''); -ERROR: invalid input syntax for type integer: "" +ERROR: invalid input syntax for type smallint: "" LINE 1: INSERT INTO INT2_TBL(f1) VALUES (''); ^ SELECT '' AS five, * FROM INT2_TBL; diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index 2f028d1052..f1b8cd4337 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -975,7 +975,7 @@ ROLLBACK TO SAVEPOINT settings; SAVEPOINT settings; SET LOCAL force_parallel_mode = 1; select stringu1::int2 from tenk1 where unique1 = 1; -ERROR: invalid input syntax for type integer: "BAAAAA" +ERROR: invalid input syntax for type smallint: "BAAAAA" CONTEXT: parallel worker ROLLBACK TO SAVEPOINT settings; -- test interaction with set-returning functions -- 2.40.0