From a6f22e83562d8b78293229587cd3d9430d16d466 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 27 Mar 2017 20:14:36 -0400 Subject: [PATCH] Show ignored constants as "$N" rather than "?" in pg_stat_statements. The trouble with the original choice here is that "?" is a valid (and indeed used) operator name, so that you could end up with ambiguous statement texts like "SELECT ? ? ?". With this patch, you instead see "SELECT $1 ? $2", which seems significantly more readable. The numbers used for this purpose begin after the last actual $N parameter in the particular query. The conflict with external parameters has its own potential for confusion of course, but it was agreed to be an improvement over the previous behavior. Lukas Fittl Discussion: https://postgr.es/m/CAP53PkxeaCuwYmF-A4J5z2-qk5fYFo5_NH3gpXGJJBxv1DMwEw@mail.gmail.com --- .../expected/pg_stat_statements.out | 113 ++++++++++++------ .../pg_stat_statements/pg_stat_statements.c | 40 +++++-- .../sql/pg_stat_statements.sql | 16 ++- doc/src/sgml/pgstatstatements.sgml | 29 +++-- 4 files changed, 142 insertions(+), 56 deletions(-) diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out index fd53f15d8b..5318c3550c 100644 --- a/contrib/pg_stat_statements/expected/pg_stat_statements.out +++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out @@ -68,6 +68,13 @@ SELECT 1 AS i UNION SELECT 2 ORDER BY i; 2 (2 rows) +-- ? operator +select '{"a":1, "b":2}'::jsonb ? 'b'; + ?column? +---------- + t +(1 row) + -- cte WITH t(f) AS ( VALUES (1.0), (2.0) @@ -79,24 +86,35 @@ WITH t(f) AS ( 2.0 (2 rows) +-- prepared statement with parameter +PREPARE pgss_test (int) AS SELECT $1, 'test' LIMIT 1; +EXECUTE pgss_test(1); + ?column? | ?column? +----------+---------- + 1 | test +(1 row) + +DEALLOCATE pgss_test; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls | rows ------------------------------------------+-------+------ - SELECT ? +| 4 | 4 - +| | - AS "text" | | - SELECT ? + ? | 2 | 2 - SELECT ? + ? + ? AS "add" | 3 | 3 - SELECT ? AS "float" | 1 | 1 - SELECT ? AS "int" | 2 | 2 - SELECT ? AS i UNION SELECT ? ORDER BY i | 1 | 2 - SELECT ? || ? | 1 | 1 - SELECT pg_stat_statements_reset() | 1 | 1 - WITH t(f) AS ( +| 1 | 2 - VALUES (?), (?) +| | - ) +| | - SELECT f FROM t ORDER BY f | | -(9 rows) + query | calls | rows +---------------------------------------------------+-------+------ + PREPARE pgss_test (int) AS SELECT $1, $2 LIMIT $3 | 1 | 1 + SELECT $1 +| 4 | 4 + +| | + AS "text" | | + SELECT $1 + $2 | 2 | 2 + SELECT $1 + $2 + $3 AS "add" | 3 | 3 + SELECT $1 AS "float" | 1 | 1 + SELECT $1 AS "int" | 2 | 2 + SELECT $1 AS i UNION SELECT $2 ORDER BY i | 1 | 2 + SELECT $1 || $2 | 1 | 1 + SELECT pg_stat_statements_reset() | 1 | 1 + WITH t(f) AS ( +| 1 | 2 + VALUES ($1), ($2) +| | + ) +| | + SELECT f FROM t ORDER BY f | | + select $1::jsonb ? $2 | 1 | 1 +(11 rows) -- -- CRUD: INSERT SELECT UPDATE DELETE on test table @@ -108,7 +126,7 @@ SELECT pg_stat_statements_reset(); (1 row) -- utility "create table" should not be shown -CREATE TABLE test (a int, b char(20)); +CREATE TEMP TABLE test (a int, b char(20)); INSERT INTO test VALUES(generate_series(1, 10), 'aaa'); UPDATE test SET b = 'bbb' WHERE a > 7; DELETE FROM test WHERE a > 9; @@ -125,6 +143,8 @@ BEGIN \; UPDATE test SET b = '555' WHERE a = 5 \; UPDATE test SET b = '666' WHERE a = 6 \; COMMIT ; +-- many INSERT values +INSERT INTO test (a, b) VALUES (1, 'a'), (2, 'b'), (3, 'c'); -- SELECT with constants SELECT * FROM test WHERE a > 5 ORDER BY a ; a | b @@ -147,8 +167,11 @@ SELECT * SELECT * FROM test ORDER BY a; a | b ---+---------------------- + 1 | a 1 | 111 + 2 | b 2 | 222 + 3 | c 3 | 333 4 | 444 5 | 555 @@ -156,19 +179,35 @@ SELECT * FROM test ORDER BY a; 7 | aaa 8 | bbb 9 | bbb -(9 rows) +(12 rows) + +-- SELECT with IN clause +SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5); + a | b +---+---------------------- + 1 | 111 + 2 | 222 + 3 | 333 + 4 | 444 + 5 | 555 + 1 | a + 2 | b + 3 | c +(8 rows) SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls | rows ----------------------------------------------------+-------+------ - DELETE FROM test WHERE a > ? | 1 | 1 - INSERT INTO test VALUES(generate_series(?, ?), ?) | 1 | 10 - SELECT * FROM test ORDER BY a | 1 | 9 - SELECT * FROM test WHERE a > ? ORDER BY a | 2 | 4 - SELECT pg_stat_statements_reset() | 1 | 1 - UPDATE test SET b = ? WHERE a = ? | 6 | 6 - UPDATE test SET b = ? WHERE a > ? | 1 | 3 -(7 rows) + query | calls | rows +-------------------------------------------------------------+-------+------ + DELETE FROM test WHERE a > $1 | 1 | 1 + INSERT INTO test (a, b) VALUES ($1, $2), ($3, $4), ($5, $6) | 1 | 3 + INSERT INTO test VALUES(generate_series($1, $2), $3) | 1 | 10 + SELECT * FROM test ORDER BY a | 1 | 12 + SELECT * FROM test WHERE a > $1 ORDER BY a | 2 | 4 + SELECT * FROM test WHERE a IN ($1, $2, $3, $4, $5) | 1 | 8 + SELECT pg_stat_statements_reset() | 1 | 1 + UPDATE test SET b = $1 WHERE a = $2 | 6 | 6 + UPDATE test SET b = $1 WHERE a > $2 | 1 | 3 +(9 rows) -- -- pg_stat_statements.track = none @@ -251,9 +290,9 @@ SELECT PLUS_ONE(10); SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; query | calls | rows -----------------------------------+-------+------ - SELECT ?::TEXT | 1 | 1 - SELECT PLUS_ONE(?) | 2 | 2 - SELECT PLUS_TWO(?) | 2 | 2 + SELECT $1::TEXT | 1 | 1 + SELECT PLUS_ONE($1) | 2 | 2 + SELECT PLUS_TWO($1) | 2 | 2 SELECT pg_stat_statements_reset() | 1 | 1 (4 rows) @@ -308,10 +347,10 @@ SELECT PLUS_ONE(1); SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; query | calls | rows -----------------------------------+-------+------ - SELECT (i + ? + ?)::INTEGER | 2 | 2 - SELECT (i + ?)::INTEGER LIMIT ? | 2 | 2 - SELECT PLUS_ONE(?) | 2 | 2 - SELECT PLUS_TWO(?) | 2 | 2 + SELECT (i + $2 + $3)::INTEGER | 2 | 2 + SELECT (i + $2)::INTEGER LIMIT $3 | 2 | 2 + SELECT PLUS_ONE($1) | 2 | 2 + SELECT PLUS_TWO($1) | 2 | 2 SELECT pg_stat_statements_reset() | 1 | 1 (5 rows) @@ -352,7 +391,7 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; DROP FUNCTION PLUS_TWO(INTEGER) | 1 | 0 DROP TABLE IF EXISTS test | 3 | 0 DROP TABLE test | 1 | 0 - SELECT ? | 1 | 1 + SELECT $1 | 1 | 1 SELECT pg_stat_statements_reset() | 1 | 1 (8 rows) diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 42f43233f8..cd4c16e9d2 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -27,10 +27,10 @@ * to blame query costs on the proper queryId. * * To facilitate presenting entries to users, we create "representative" query - * strings in which constants are replaced with '?' characters, to make it - * clearer what a normalized entry can represent. To save on shared memory, - * and to avoid having to truncate oversized query strings, we store these - * strings in a temporary external query-texts file. Offsets into this + * strings in which constants are replaced with parameter symbols ($n), to + * make it clearer what a normalized entry can represent. To save on shared + * memory, and to avoid having to truncate oversized query strings, we store + * these strings in a temporary external query-texts file. Offsets into this * file are kept in shared memory. * * Note about locking issues: to create or delete an entry in the shared @@ -219,6 +219,9 @@ typedef struct pgssJumbleState /* Current number of valid entries in clocations array */ int clocations_count; + + /* highest Param id we've seen, in order to start normalization correctly */ + int highest_extern_param_id; } pgssJumbleState; /*---- Local variables ----*/ @@ -803,6 +806,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query) jstate.clocations = (pgssLocationLen *) palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen)); jstate.clocations_count = 0; + jstate.highest_extern_param_id = 0; /* Compute query ID and mark the Query node with it */ JumbleQuery(&jstate, query); @@ -2482,6 +2486,10 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) APP_JUMB(p->paramkind); APP_JUMB(p->paramid); APP_JUMB(p->paramtype); + /* Also, track the highest external Param id */ + if (p->paramkind == PARAM_EXTERN && + p->paramid > jstate->highest_extern_param_id) + jstate->highest_extern_param_id = p->paramid; } break; case T_Aggref: @@ -2874,7 +2882,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) break; case T_TableFunc: { - TableFunc *tablefunc = (TableFunc *) node; + TableFunc *tablefunc = (TableFunc *) node; JumbleExpr(jstate, tablefunc->docexpr); JumbleExpr(jstate, tablefunc->rowexpr); @@ -2938,7 +2946,8 @@ RecordConstLocation(pgssJumbleState *jstate, int location) * of interest, so it's worth doing.) * * *query_len_p contains the input string length, and is updated with - * the result string length (which cannot be longer) on exit. + * the result string length on exit. The resulting string might be longer + * or shorter depending on what happens with replacement of constants. * * Returns a palloc'd string. */ @@ -2949,6 +2958,7 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query, char *norm_query; int query_len = *query_len_p; int i, + norm_query_buflen, /* Space allowed for norm_query */ len_to_wrt, /* Length (in bytes) to write */ quer_loc = 0, /* Source query byte location */ n_quer_loc = 0, /* Normalized query byte location */ @@ -2961,8 +2971,17 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query, */ fill_in_constant_lengths(jstate, query, query_loc); + /* + * Allow for $n symbols to be longer than the constants they replace. + * Constants must take at least one byte in text form, while a $n symbol + * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We + * could refine that limit based on the max value of n for the current + * query, but it hardly seems worth any extra effort to do so. + */ + norm_query_buflen = query_len + jstate->clocations_count * 10; + /* Allocate result buffer */ - norm_query = palloc(query_len + 1); + norm_query = palloc(norm_query_buflen + 1); for (i = 0; i < jstate->clocations_count; i++) { @@ -2986,8 +3005,9 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query, memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); n_quer_loc += len_to_wrt; - /* And insert a '?' in place of the constant token */ - norm_query[n_quer_loc++] = '?'; + /* And insert a param symbol in place of the constant token */ + n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", + i + 1 + jstate->highest_extern_param_id); quer_loc = off + tok_len; last_off = off; @@ -3004,7 +3024,7 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query, memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); n_quer_loc += len_to_wrt; - Assert(n_quer_loc <= query_len); + Assert(n_quer_loc <= norm_query_buflen); norm_query[n_quer_loc] = '\0'; *query_len_p = n_quer_loc; diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql index 385c8a8d99..a8361fd1bf 100644 --- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql +++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql @@ -37,12 +37,20 @@ SELECT :add + 1 + 1 AS "add" \gset -- set operator SELECT 1 AS i UNION SELECT 2 ORDER BY i; +-- ? operator +select '{"a":1, "b":2}'::jsonb ? 'b'; + -- cte WITH t(f) AS ( VALUES (1.0), (2.0) ) SELECT f FROM t ORDER BY f; +-- prepared statement with parameter +PREPARE pgss_test (int) AS SELECT $1, 'test' LIMIT 1; +EXECUTE pgss_test(1); +DEALLOCATE pgss_test; + SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; -- @@ -51,7 +59,7 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT pg_stat_statements_reset(); -- utility "create table" should not be shown -CREATE TABLE test (a int, b char(20)); +CREATE TEMP TABLE test (a int, b char(20)); INSERT INTO test VALUES(generate_series(1, 10), 'aaa'); UPDATE test SET b = 'bbb' WHERE a > 7; @@ -74,6 +82,9 @@ UPDATE test SET b = '555' WHERE a = 5 \; UPDATE test SET b = '666' WHERE a = 6 \; COMMIT ; +-- many INSERT values +INSERT INTO test (a, b) VALUES (1, 'a'), (2, 'b'), (3, 'c'); + -- SELECT with constants SELECT * FROM test WHERE a > 5 ORDER BY a ; @@ -85,6 +96,9 @@ SELECT * -- SELECT without constants SELECT * FROM test ORDER BY a; +-- SELECT with IN clause +SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5); + SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; -- diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml index d4f09fc2a3..082994cae0 100644 --- a/doc/src/sgml/pgstatstatements.sgml +++ b/doc/src/sgml/pgstatstatements.sgml @@ -244,11 +244,12 @@ - When a constant's value has been ignored for purposes of matching the - query to other queries, the constant is replaced by ? - in the pg_stat_statements display. The rest of the query - text is that of the first query that had the particular - queryid hash value associated with the + When a constant's value has been ignored for purposes of matching the query + to other queries, the constant is replaced by a parameter symbol, such + as $1, in the pg_stat_statements + display. + The rest of the query text is that of the first query that had the + particular queryid hash value associated with the pg_stat_statements entry. @@ -301,6 +302,18 @@ replicas. If in doubt, direct testing is recommended. + + The parameter symbols used to replace constants in + representative query texts start from the next number after the + highest $n parameter in the original query + text, or $1 if there was none. It's worth noting that in + some cases there may be hidden parameter symbols that affect this + numbering. For example, PL/pgSQL uses hidden parameter + symbols to insert values of function local variables into queries, so that + a PL/pgSQL statement like SELECT i + 1 INTO j + would have representative text like SELECT i + $2. + + The representative query texts are kept in an external disk file, and do not consume shared memory. Therefore, even very lengthy query texts can @@ -481,13 +494,13 @@ bench=# SELECT query, calls, total_time, rows, 100.0 * shared_blks_hit / nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent FROM pg_stat_statements ORDER BY total_time DESC LIMIT 5; -[ RECORD 1 ]--------------------------------------------------------------------- -query | UPDATE pgbench_branches SET bbalance = bbalance + ? WHERE bid = ?; +query | UPDATE pgbench_branches SET bbalance = bbalance + $1 WHERE bid = $2; calls | 3000 total_time | 9609.00100000002 rows | 2836 hit_percent | 99.9778970000200936 -[ RECORD 2 ]--------------------------------------------------------------------- -query | UPDATE pgbench_tellers SET tbalance = tbalance + ? WHERE tid = ?; +query | UPDATE pgbench_tellers SET tbalance = tbalance + $1 WHERE tid = $2; calls | 3000 total_time | 8015.156 rows | 2990 @@ -499,7 +512,7 @@ total_time | 310.624 rows | 100000 hit_percent | 0.30395136778115501520 -[ RECORD 4 ]--------------------------------------------------------------------- -query | UPDATE pgbench_accounts SET abalance = abalance + ? WHERE aid = ?; +query | UPDATE pgbench_accounts SET abalance = abalance + $1 WHERE aid = $2; calls | 3000 total_time | 271.741999999997 rows | 3000 -- 2.40.0