]> granicus.if.org Git - postgresql/commitdiff
Add contrib/pg_stat_statements for server-wide tracking of statement execution
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 4 Jan 2009 22:19:59 +0000 (22:19 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 4 Jan 2009 22:19:59 +0000 (22:19 +0000)
statistics.

Takahiro Itagaki

contrib/Makefile
contrib/README
contrib/pg_stat_statements/Makefile [new file with mode: 0644]
contrib/pg_stat_statements/pg_stat_statements.c [new file with mode: 0644]
contrib/pg_stat_statements/pg_stat_statements.sql.in [new file with mode: 0644]
contrib/pg_stat_statements/uninstall_pg_stat_statements.sql [new file with mode: 0644]
doc/src/sgml/contrib.sgml
doc/src/sgml/filelist.sgml
doc/src/sgml/pgstatstatements.sgml [new file with mode: 0644]
src/backend/postmaster/postmaster.c
src/include/pgstat.h

index bbd43e1f661eb3bbf987088b68fa476a1cc27c6b..4b1d2ae95d39c6951e0fbcb5e764121ba4db1904 100644 (file)
@@ -1,4 +1,4 @@
-# $PostgreSQL: pgsql/contrib/Makefile,v 1.85 2008/11/19 02:59:28 tgl Exp $
+# $PostgreSQL: pgsql/contrib/Makefile,v 1.86 2009/01/04 22:19:59 tgl Exp $
 
 subdir = contrib
 top_builddir = ..
@@ -27,6 +27,7 @@ WANTED_DIRS = \
                pg_buffercache  \
                pg_freespacemap \
                pg_standby      \
+               pg_stat_statements \
                pg_trgm         \
                pgbench         \
                pgcrypto        \
index 060853fa216272770d291a994dd1ece966982b88..7d258d4b2fc43a945b3fab3e335393195d71732a 100644 (file)
@@ -112,6 +112,10 @@ pg_standby -
        Sample archive_command for warm standby operation
        by Simon Riggs <simon@2ndquadrant.com>
 
+pg_stat_statements -
+       Track statement execution times across a whole database cluster
+       by Takahiro Itagaki <itagaki.takahiro@oss.ntt.co.jp>
+
 pg_trgm -
        Functions for determining the similarity of text based on trigram
        matching.
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
new file mode 100644 (file)
index 0000000..ce335a6
--- /dev/null
@@ -0,0 +1,17 @@
+# $PostgreSQL: pgsql/contrib/pg_stat_statements/Makefile,v 1.1 2009/01/04 22:19:59 tgl Exp $
+
+MODULE_big = pg_stat_statements
+DATA_built = pg_stat_statements.sql
+DATA = uninstall_pg_stat_statements.sql
+OBJS = pg_stat_statements.o
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_stat_statements
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
new file mode 100644 (file)
index 0000000..df14d05
--- /dev/null
@@ -0,0 +1,904 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_stat_statements.c
+ *             Track statement execution times across a whole database cluster.
+ *
+ * Note about locking issues: to create or delete an entry in the shared
+ * hashtable, one must hold pgss->lock exclusively.  Modifying any field
+ * in an entry except the counters requires the same.  To look up an entry,
+ * one must hold the lock shared.  To read or update the counters within
+ * an entry, one must hold the lock shared or exclusive (so the entry doesn't
+ * disappear!) and also take the entry's mutex spinlock.
+ *
+ *
+ * Copyright (c) 2008-2009, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       $PostgreSQL: pgsql/contrib/pg_stat_statements/pg_stat_statements.c,v 1.1 2009/01/04 22:19:59 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <unistd.h>
+
+#include "access/hash.h"
+#include "catalog/pg_type.h"
+#include "executor/executor.h"
+#include "executor/instrument.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "storage/fd.h"
+#include "storage/ipc.h"
+#include "storage/spin.h"
+#include "utils/builtins.h"
+#include "utils/hsearch.h"
+#include "utils/guc.h"
+
+
+PG_MODULE_MAGIC;
+
+/* Location of stats file */
+#define PGSS_DUMP_FILE "global/pg_stat_statements.stat"
+
+/* This constant defines the magic number in the stats file header */
+static const uint32 PGSS_FILE_HEADER = 0x20081202;
+
+/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
+#define USAGE_EXEC(duration)   (1.0)
+#define USAGE_INIT                             (1.0)   /* including initial planning */
+#define USAGE_DECREASE_FACTOR  (0.99)  /* decreased every entry_dealloc */
+#define USAGE_DEALLOC_PERCENT  5               /* free this % of entries at once */
+
+/*
+ * Hashtable key that defines the identity of a hashtable entry.  The
+ * hash comparators do not assume that the query string is null-terminated;
+ * this lets us search for an mbcliplen'd string without copying it first.
+ *
+ * Presently, the query encoding is fully determined by the source database
+ * and so we don't really need it to be in the key.  But that might not always
+ * be true. Anyway it's notationally convenient to pass it as part of the key.
+ */
+typedef struct pgssHashKey
+{
+       Oid                     userid;                 /* user OID */
+       Oid                     dbid;                   /* database OID */
+       int                     encoding;               /* query encoding */
+       int                     query_len;              /* # of valid bytes in query string */
+       const char *query_ptr;          /* query string proper */
+} pgssHashKey;
+
+/*
+ * The actual stats counters kept within pgssEntry.
+ */
+typedef struct Counters
+{
+       int64           calls;                  /* # of times executed */
+       double          total_time;             /* total execution time in seconds */
+       int64           rows;                   /* total # of retrieved or affected rows */
+       double          usage;                  /* usage factor */
+} Counters;
+
+/*
+ * Statistics per statement
+ *
+ * NB: see the file read/write code before changing field order here.
+ */
+typedef struct pgssEntry
+{
+       pgssHashKey     key;                    /* hash key of entry - MUST BE FIRST */
+       Counters        counters;               /* the statistics for this query */
+       slock_t         mutex;                  /* protects the counters only */
+       char            query[1];               /* VARIABLE LENGTH ARRAY - MUST BE LAST */
+       /* Note: the allocated length of query[] is actually pgss->query_size */
+} pgssEntry;
+
+/*
+ * Global shared state
+ */
+typedef struct pgssSharedState
+{
+       LWLockId        lock;                   /* protects hashtable search/modification */
+       int                     query_size;             /* max query length in bytes */
+} pgssSharedState;
+
+/*---- Local variables ----*/
+
+/* Current nesting depth of ExecutorRun calls */
+static int                                             nested_level = 0;
+/* Saved hook values in case of unload */
+static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
+static ExecutorStart_hook_type prev_ExecutorStart = NULL;
+static ExecutorRun_hook_type   prev_ExecutorRun = NULL;
+static ExecutorEnd_hook_type   prev_ExecutorEnd = NULL;
+/* Links to shared memory state */
+static pgssSharedState            *pgss = NULL;
+static HTAB                                       *pgss_hash = NULL;
+
+/*---- GUC variables ----*/
+
+typedef enum
+{
+       PGSS_TRACK_NONE,                        /* track no statements */
+       PGSS_TRACK_TOP,                         /* only top level statements */
+       PGSS_TRACK_ALL,                         /* all statements, including nested ones */
+} PGSSTrackLevel;
+
+static const struct config_enum_entry track_options[] = {
+       {"none", PGSS_TRACK_NONE, false},
+       {"top", PGSS_TRACK_TOP, false},
+       {"all", PGSS_TRACK_ALL, false},
+       {NULL, 0, false}
+};
+
+static int     pgss_max;                       /* max # statements to track */
+static int     pgss_track;                     /* tracking level */
+static bool pgss_save;                 /* whether to save stats across shutdown */
+
+
+#define pgss_enabled() \
+       (pgss_track == PGSS_TRACK_ALL || \
+       (pgss_track == PGSS_TRACK_TOP && nested_level == 0))
+
+/*---- Function declarations ----*/
+
+void   _PG_init(void);
+void   _PG_fini(void);
+
+Datum  pg_stat_statements_reset(PG_FUNCTION_ARGS);
+Datum  pg_stat_statements(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
+PG_FUNCTION_INFO_V1(pg_stat_statements);
+
+static void pgss_shmem_startup(void);
+static void pgss_shmem_shutdown(int code, Datum arg);
+static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
+static void pgss_ExecutorRun(QueryDesc *queryDesc,
+                                                        ScanDirection direction,
+                                                        long count);
+static void pgss_ExecutorEnd(QueryDesc *queryDesc);
+static uint32 pgss_hash_fn(const void *key, Size keysize);
+static int pgss_match_fn(const void *key1, const void *key2, Size keysize);
+static void pgss_store(const char *query,
+                                          const Instrumentation *instr, uint32 rows);
+static Size    pgss_memsize(void);
+static pgssEntry *entry_alloc(pgssHashKey *key);
+static void entry_dealloc(void);
+static void entry_reset(void);
+
+
+/*
+ * Module load callback
+ */
+void
+_PG_init(void)
+{
+       /*
+        * In order to create our shared memory area, we have to be loaded via
+        * shared_preload_libraries.  If not, fall out without hooking into
+        * any of the main system.  (We don't throw error here because it seems
+        * useful to allow the pg_stat_statements functions to be created even
+        * when the module isn't active.  The functions must protect themselves
+        * against being called then, however.)
+        */
+       if (!process_shared_preload_libraries_in_progress)
+               return;
+
+       /*
+        * Define (or redefine) custom GUC variables.
+        */
+       DefineCustomIntVariable("pg_stat_statements.max",
+                                                       "Sets the maximum number of statements tracked by pg_stat_statements.",
+                                                       NULL,
+                                                       &pgss_max,
+                                                       1000,
+                                                       100,
+                                                       INT_MAX,
+                                                       PGC_POSTMASTER,
+                                                       0,
+                                                       NULL,
+                                                       NULL);
+
+       DefineCustomEnumVariable("pg_stat_statements.track",
+                                                        "Selects which statements are tracked by pg_stat_statements.",
+                                                        NULL,
+                                                        &pgss_track,
+                                                        PGSS_TRACK_TOP,
+                                                        track_options,
+                                                        PGC_SUSET,
+                                                        0,
+                                                        NULL,
+                                                        NULL);
+
+       DefineCustomBoolVariable("pg_stat_statements.save",
+                                                        "Save pg_stat_statements statistics across server shutdowns.",
+                                                        NULL,
+                                                        &pgss_save,
+                                                        true,
+                                                        PGC_SIGHUP,
+                                                        0,
+                                                        NULL,
+                                                        NULL);
+
+       /*
+        * Request additional shared resources.  (These are no-ops if we're not in
+        * the postmaster process.)  We'll allocate or attach to the shared
+        * resources in pgss_shmem_startup().
+        */
+       RequestAddinShmemSpace(pgss_memsize());
+       RequestAddinLWLocks(1);
+
+       /*
+        * Install hooks.
+        */
+       prev_shmem_startup_hook = shmem_startup_hook;
+       shmem_startup_hook = pgss_shmem_startup;
+       prev_ExecutorStart = ExecutorStart_hook;
+       ExecutorStart_hook = pgss_ExecutorStart;
+       prev_ExecutorRun = ExecutorRun_hook;
+       ExecutorRun_hook = pgss_ExecutorRun;
+       prev_ExecutorEnd = ExecutorEnd_hook;
+       ExecutorEnd_hook = pgss_ExecutorEnd;
+}
+
+/*
+ * Module unload callback
+ */
+void
+_PG_fini(void)
+{
+       /* Uninstall hooks. */
+       ExecutorStart_hook = prev_ExecutorStart;
+       ExecutorRun_hook = prev_ExecutorRun;
+       ExecutorEnd_hook = prev_ExecutorEnd;
+       shmem_startup_hook = prev_shmem_startup_hook;
+}
+
+/*
+ * shmem_startup hook: allocate or attach to shared memory,
+ * then load any pre-existing statistics from file.
+ */
+static void
+pgss_shmem_startup(void)
+{
+    bool               found;
+       HASHCTL         info;
+       FILE       *file;
+       uint32          header;
+       int32           num;
+       int32           i;
+       int                     query_size;
+       int                     buffer_size;
+       char       *buffer = NULL;
+
+       if (prev_shmem_startup_hook)
+               prev_shmem_startup_hook();
+
+       /* reset in case this is a restart within the postmaster */
+       pgss = NULL;
+       pgss_hash = NULL;
+
+       /*
+        * Create or attach to the shared memory state, including hash table
+        */
+       LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
+
+       pgss = ShmemInitStruct("pg_stat_statements",
+                                                  sizeof(pgssSharedState),
+                                                  &found);
+       if (!pgss)
+               elog(ERROR, "out of shared memory");
+
+       if (!found)
+    {
+               /* First time through ... */
+               pgss->lock = LWLockAssign();
+               pgss->query_size = pgstat_track_activity_query_size;
+       }
+
+       /* Be sure everyone agrees on the hash table entry size */
+       query_size = pgss->query_size;
+
+       memset(&info, 0, sizeof(info));
+       info.keysize = sizeof(pgssHashKey);
+       info.entrysize = offsetof(pgssEntry, query) + query_size;
+       info.hash = pgss_hash_fn;
+       info.match = pgss_match_fn;
+       pgss_hash = ShmemInitHash("pg_stat_statements hash",
+                                                         pgss_max, pgss_max,
+                                                         &info,
+                                                         HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
+       if (!pgss_hash)
+               elog(ERROR, "out of shared memory");
+
+       LWLockRelease(AddinShmemInitLock);
+
+       /*
+        * If we're in the postmaster (or a standalone backend...), set up a
+        * shmem exit hook to dump the statistics to disk.
+        */
+       if (!IsUnderPostmaster)
+               on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
+
+       /*
+        * Attempt to load old statistics from the dump file.
+        *
+        * Note: we don't bother with locks here, because there should be no
+        * other processes running when this is called.
+        */
+       if (!pgss_save)
+               return;
+
+       file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
+       if (file == NULL)
+       {
+               if (errno == ENOENT)
+                       return;                         /* ignore not-found error */
+               goto error;
+       }
+
+       buffer_size = query_size;
+       buffer = (char *) palloc(buffer_size);
+
+       if (fread(&header, sizeof(uint32), 1, file) != 1 ||
+               header != PGSS_FILE_HEADER ||
+               fread(&num, sizeof(int32), 1, file) != 1)
+               goto error;
+
+       for (i = 0; i < num; i++)
+       {
+               pgssEntry       temp;
+               pgssEntry   *entry;
+
+               if (fread(&temp, offsetof(pgssEntry, mutex), 1, file) != 1)
+                       goto error;
+
+               /* Encoding is the only field we can easily sanity-check */
+               if (!PG_VALID_BE_ENCODING(temp.key.encoding))
+                       goto error;
+
+               /* Previous incarnation might have had a larger query_size */
+               if (temp.key.query_len >= buffer_size)
+               {
+                       buffer = (char *) repalloc(buffer, temp.key.query_len + 1);
+                       buffer_size = temp.key.query_len + 1;
+               }
+
+               if (fread(buffer, 1, temp.key.query_len, file) != temp.key.query_len)
+                       goto error;
+               buffer[temp.key.query_len] = '\0';
+
+               /* Clip to available length if needed */
+               if (temp.key.query_len >= query_size)
+                       temp.key.query_len = pg_encoding_mbcliplen(temp.key.encoding,
+                                                                                                          buffer,
+                                                                                                          temp.key.query_len,
+                                                                                                          query_size - 1);
+               temp.key.query_ptr = buffer;
+
+               /* make the hashtable entry (discards old entries if too many) */
+               entry = entry_alloc(&temp.key);
+
+               /* copy in the actual stats */
+               entry->counters = temp.counters;
+       }
+
+       pfree(buffer);
+       FreeFile(file);
+       return;
+
+error:
+       ereport(LOG,
+                       (errcode_for_file_access(),
+                        errmsg("could not read pg_stat_statement file \"%s\": %m",
+                                       PGSS_DUMP_FILE)));
+       if (buffer)
+               pfree(buffer);
+       if (file)
+               FreeFile(file);
+       /* If possible, throw away the bogus file; ignore any error */
+       unlink(PGSS_DUMP_FILE);
+}
+
+/*
+ * shmem_shutdown hook: Dump statistics into file.
+ *
+ * Note: we don't bother with acquiring lock, because there should be no
+ * other processes running when this is called.
+ */
+static void
+pgss_shmem_shutdown(int code, Datum arg)
+{
+       FILE                       *file;
+       HASH_SEQ_STATUS         hash_seq;
+       int32                           num_entries;
+       pgssEntry                  *entry;
+
+       /* Don't try to dump during a crash. */
+       if (code)
+               return;
+
+       /* Safety check ... shouldn't get here unless shmem is set up. */
+       if (!pgss || !pgss_hash)
+               return;
+
+       /* Don't dump if told not to. */
+       if (!pgss_save)
+               return;
+
+       file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_W);
+       if (file == NULL)
+               goto error;
+
+       if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
+               goto error;
+       num_entries = hash_get_num_entries(pgss_hash);
+       if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
+               goto error;
+
+       hash_seq_init(&hash_seq, pgss_hash);
+       while ((entry = hash_seq_search(&hash_seq)) != NULL)
+       {
+               int             len = entry->key.query_len;
+
+               if (fwrite(entry, offsetof(pgssEntry, mutex), 1, file) != 1 ||
+                       fwrite(entry->query, 1, len, file) != len)
+                       goto error;
+       }
+
+       if (FreeFile(file))
+       {
+               file = NULL;
+               goto error;
+       }
+
+       return;
+
+error:
+       ereport(LOG,
+                       (errcode_for_file_access(),
+                        errmsg("could not write pg_stat_statement file \"%s\": %m",
+                                       PGSS_DUMP_FILE)));
+       if (file)
+               FreeFile(file);
+       unlink(PGSS_DUMP_FILE);
+}
+
+/*
+ * ExecutorStart hook: start up tracking if needed
+ */
+static void
+pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
+{
+       if (prev_ExecutorStart)
+               prev_ExecutorStart(queryDesc, eflags);
+       else
+               standard_ExecutorStart(queryDesc, eflags);
+
+       if (pgss_enabled())
+       {
+               /*
+                * Set up to track total elapsed time in ExecutorRun.  Make sure
+                * the space is allocated in the per-query context so it will go
+                * away at ExecutorEnd.
+                */
+               if (queryDesc->totaltime == NULL)
+               {
+                       MemoryContext oldcxt;
+
+                       oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
+                       queryDesc->totaltime = InstrAlloc(1);
+                       MemoryContextSwitchTo(oldcxt);
+               }
+       }
+}
+
+/*
+ * ExecutorRun hook: all we need do is track nesting depth
+ */
+static void
+pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
+{
+       nested_level++;
+       PG_TRY();
+       {
+               if (prev_ExecutorRun)
+                       prev_ExecutorRun(queryDesc, direction, count);
+               else
+                       standard_ExecutorRun(queryDesc, direction, count);
+               nested_level--;
+       }
+       PG_CATCH();
+       {
+               nested_level--;
+               PG_RE_THROW();
+       }
+       PG_END_TRY();
+}
+
+/*
+ * ExecutorEnd hook: store results if needed
+ */
+static void
+pgss_ExecutorEnd(QueryDesc *queryDesc)
+{
+       if (queryDesc->totaltime && pgss_enabled())
+       {
+               /*
+                * Make sure stats accumulation is done.  (Note: it's okay if
+                * several levels of hook all do this.)
+                */
+               InstrEndLoop(queryDesc->totaltime);
+
+               pgss_store(queryDesc->sourceText,
+                                  queryDesc->totaltime,
+                                  queryDesc->estate->es_processed);
+       }
+
+       if (prev_ExecutorEnd)
+               prev_ExecutorEnd(queryDesc);
+       else
+               standard_ExecutorEnd(queryDesc);
+}
+
+/*
+ * Calculate hash value for a key
+ */
+static uint32
+pgss_hash_fn(const void *key, Size keysize)
+{
+       const pgssHashKey *k = (const pgssHashKey *) key;
+
+       /* we don't bother to include encoding in the hash */
+       return hash_uint32((uint32) k->userid) ^
+               hash_uint32((uint32) k->dbid) ^
+               DatumGetUInt32(hash_any((const unsigned char *) k->query_ptr,
+                                                               k->query_len));
+}
+
+/*
+ * Compare two keys - zero means match
+ */
+static int
+pgss_match_fn(const void *key1, const void *key2, Size keysize)
+{
+       const pgssHashKey *k1 = (const pgssHashKey *) key1;
+       const pgssHashKey *k2 = (const pgssHashKey *) key2;
+
+       if (k1->userid == k2->userid &&
+               k1->dbid == k2->dbid &&
+               k1->encoding == k2->encoding &&
+               k1->query_len == k2->query_len &&
+               memcmp(k1->query_ptr, k2->query_ptr, k1->query_len) == 0)
+               return 0;
+       else
+               return 1;
+}
+
+/*
+ * Store some statistics for a statement.
+ */
+static void
+pgss_store(const char *query, const Instrumentation *instr, uint32 rows)
+{
+       pgssHashKey     key;
+       double          usage;
+       pgssEntry   *entry;
+
+       Assert(query != NULL);
+
+       /* Safety check... */
+       if (!pgss || !pgss_hash)
+               return;
+
+       /* Set up key for hashtable search */
+       key.userid = GetUserId();
+       key.dbid = MyDatabaseId;
+       key.encoding = GetDatabaseEncoding();
+       key.query_len = strlen(query);
+       if (key.query_len >= pgss->query_size)
+               key.query_len = pg_encoding_mbcliplen(key.encoding,
+                                                                                         query,
+                                                                                         key.query_len,
+                                                                                         pgss->query_size - 1);
+       key.query_ptr = query;
+
+       usage = USAGE_EXEC(duration);
+
+       /* Lookup the hash table entry with shared lock. */
+       LWLockAcquire(pgss->lock, LW_SHARED);
+
+       entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
+       if (!entry)
+       {
+               /* Must acquire exclusive lock to add a new entry. */
+               LWLockRelease(pgss->lock);
+               LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
+               entry = entry_alloc(&key);
+       }
+
+       /* Grab the spinlock while updating the counters. */
+       {
+               volatile pgssEntry *e = (volatile pgssEntry *) entry;
+
+               SpinLockAcquire(&e->mutex);
+               e->counters.calls += 1;
+               e->counters.total_time += instr->total;
+               e->counters.rows += rows;
+               e->counters.usage += usage;
+               SpinLockRelease(&e->mutex);
+       }
+
+       LWLockRelease(pgss->lock);
+}
+
+/*
+ * Reset all statement statistics.
+ */
+Datum
+pg_stat_statements_reset(PG_FUNCTION_ARGS)
+{
+       if (!pgss || !pgss_hash)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
+       entry_reset();
+       PG_RETURN_VOID();
+}
+
+#define PG_STAT_STATEMENTS_COLS                6
+
+/*
+ * Retrieve statement statistics.
+ */
+Datum
+pg_stat_statements(PG_FUNCTION_ARGS)
+{
+       ReturnSetInfo      *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+       TupleDesc                       tupdesc;
+       Tuplestorestate    *tupstore;
+       MemoryContext           per_query_ctx;
+       MemoryContext           oldcontext;
+       Oid                                     userid = GetUserId();
+       bool                            is_superuser = superuser();
+       HASH_SEQ_STATUS         hash_seq;
+       pgssEntry                  *entry;
+
+       if (!pgss || !pgss_hash)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
+
+       /* check to see if caller supports us returning a tuplestore */
+       if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("set-valued function called in context that cannot accept a set")));
+       if (!(rsinfo->allowedModes & SFRM_Materialize))
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("materialize mode required, but it is not " \
+                                               "allowed in this context")));
+
+       per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
+       oldcontext = MemoryContextSwitchTo(per_query_ctx);
+
+       tupdesc = CreateTemplateTupleDesc(PG_STAT_STATEMENTS_COLS, false);
+       TupleDescInitEntry(tupdesc, (AttrNumber) 1, "userid",
+                                          OIDOID, -1, 0);
+       TupleDescInitEntry(tupdesc, (AttrNumber) 2, "dbid",
+                                          OIDOID, -1, 0);
+       TupleDescInitEntry(tupdesc, (AttrNumber) 3, "query",
+                                          TEXTOID, -1, 0);
+       TupleDescInitEntry(tupdesc, (AttrNumber) 4, "calls",
+                                          INT8OID, -1, 0);
+       TupleDescInitEntry(tupdesc, (AttrNumber) 5, "total_time",
+                                          FLOAT8OID, -1, 0);
+       TupleDescInitEntry(tupdesc, (AttrNumber) 6, "rows",
+                                          INT8OID, -1, 0);
+
+       tupstore = tuplestore_begin_heap(true, false, work_mem);
+       rsinfo->returnMode = SFRM_Materialize;
+       rsinfo->setResult = tupstore;
+       rsinfo->setDesc = tupdesc;
+
+       LWLockAcquire(pgss->lock, LW_SHARED);
+
+       hash_seq_init(&hash_seq, pgss_hash);
+       while ((entry = hash_seq_search(&hash_seq)) != NULL)
+       {
+               Datum           values[PG_STAT_STATEMENTS_COLS];
+               bool            nulls[PG_STAT_STATEMENTS_COLS];
+               int                     i = 0;
+               Counters        tmp;
+
+               /* generate junk in short-term context */
+               MemoryContextSwitchTo(oldcontext);
+
+               memset(values, 0, sizeof(values));
+               memset(nulls, 0, sizeof(nulls));
+
+               values[i++] = ObjectIdGetDatum(entry->key.userid);
+               values[i++] = ObjectIdGetDatum(entry->key.dbid);
+
+               if (is_superuser || entry->key.userid == userid)
+               {
+                       char   *qstr;
+
+                       qstr = (char *)
+                               pg_do_encoding_conversion((unsigned char *) entry->query,
+                                                                                 entry->key.query_len,
+                                                                                 entry->key.encoding,
+                                                                                 GetDatabaseEncoding());
+                       values[i++] = CStringGetTextDatum(qstr);
+                       if (qstr != entry->query)
+                               pfree(qstr);
+               }
+               else
+                       values[i++] = CStringGetTextDatum("<insufficient privilege>");
+
+               /* copy counters to a local variable to keep locking time short */
+               {
+                       volatile pgssEntry *e = (volatile pgssEntry *) entry;
+
+                       SpinLockAcquire(&e->mutex);
+                       tmp = e->counters;
+                       SpinLockRelease(&e->mutex);
+               }
+
+               values[i++] = Int64GetDatumFast(tmp.calls);
+               values[i++] = Float8GetDatumFast(tmp.total_time);
+               values[i++] = Int64GetDatumFast(tmp.rows);
+
+               Assert(i == PG_STAT_STATEMENTS_COLS);
+
+               /* switch to appropriate context while storing the tuple */
+               MemoryContextSwitchTo(per_query_ctx);
+               tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+       }
+
+       LWLockRelease(pgss->lock);
+
+       /* clean up and return the tuplestore */
+       tuplestore_donestoring(tupstore);
+
+       MemoryContextSwitchTo(oldcontext);
+
+       return (Datum) 0;
+}
+
+/*
+ * Estimate shared memory space needed.
+ */
+static Size
+pgss_memsize(void)
+{
+       Size    size;
+       Size    entrysize;
+
+       size = MAXALIGN(sizeof(pgssSharedState));
+       entrysize = offsetof(pgssEntry, query) + pgstat_track_activity_query_size;
+       size = add_size(size, hash_estimate_size(pgss_max, entrysize));
+
+       return size;
+}
+
+/*
+ * Allocate a new hashtable entry.
+ * caller must hold an exclusive lock on pgss->lock
+ *
+ * Note: despite needing exclusive lock, it's not an error for the target
+ * entry to already exist.  This is because pgss_store releases and
+ * reacquires lock after failing to find a match; so someone else could
+ * have made the entry while we waited to get exclusive lock.
+ */
+static pgssEntry *
+entry_alloc(pgssHashKey *key)
+{
+       pgssEntry  *entry;
+       bool    found;
+
+       /* Caller must have clipped query properly */
+       Assert(key->query_len < pgss->query_size);
+
+       /* Make space if needed */
+       while (hash_get_num_entries(pgss_hash) >= pgss_max)
+               entry_dealloc();
+
+       /* Find or create an entry with desired hash code */
+       entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
+
+       if (!found)
+       {
+               /* New entry, initialize it */
+
+               /* dynahash tried to copy the key for us, but must fix query_ptr */
+               entry->key.query_ptr = entry->query;
+               /* reset the statistics */
+               memset(&entry->counters, 0, sizeof(Counters));
+               entry->counters.usage = USAGE_INIT;
+               /* re-initialize the mutex each time ... we assume no one using it */
+               SpinLockInit(&entry->mutex);
+               /* ... and don't forget the query text */
+               memcpy(entry->query, key->query_ptr, key->query_len);
+               entry->query[key->query_len] = '\0';
+       }
+
+       return entry;
+}
+
+/*
+ * qsort comparator for sorting into increasing usage order
+ */
+static int
+entry_cmp(const void *lhs, const void *rhs)
+{
+       double  l_usage = (*(const pgssEntry **)lhs)->counters.usage;
+       double  r_usage = (*(const pgssEntry **)rhs)->counters.usage;
+
+       if (l_usage < r_usage)
+               return -1;
+       else if (l_usage > r_usage)
+               return +1;
+       else
+               return 0;
+}
+
+/*
+ * Deallocate least used entries.
+ * Caller must hold an exclusive lock on pgss->lock.
+ */
+static void
+entry_dealloc(void)
+{
+       HASH_SEQ_STATUS         hash_seq;
+       pgssEntry                 **entries;
+       pgssEntry                  *entry;
+       int                                     nvictims;
+       int                                     i;
+
+       /* Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them. */
+
+       entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
+
+       i = 0;
+       hash_seq_init(&hash_seq, pgss_hash);
+       while ((entry = hash_seq_search(&hash_seq)) != NULL)
+       {
+               entries[i++] = entry;
+               entry->counters.usage *= USAGE_DECREASE_FACTOR;
+       }
+
+       qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
+       nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
+       nvictims = Min(nvictims, i);
+
+       for (i = 0; i < nvictims; i++)
+       {
+               hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
+       }
+
+       pfree(entries);
+}
+
+/*
+ * Release all entries.
+ */
+static void
+entry_reset(void)
+{
+       HASH_SEQ_STATUS         hash_seq;
+       pgssEntry                  *entry;
+
+       LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
+
+       hash_seq_init(&hash_seq, pgss_hash);
+       while ((entry = hash_seq_search(&hash_seq)) != NULL)
+       {
+               hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
+       }
+
+       LWLockRelease(pgss->lock);
+}
diff --git a/contrib/pg_stat_statements/pg_stat_statements.sql.in b/contrib/pg_stat_statements/pg_stat_statements.sql.in
new file mode 100644 (file)
index 0000000..7655136
--- /dev/null
@@ -0,0 +1,31 @@
+/* $PostgreSQL: pgsql/contrib/pg_stat_statements/pg_stat_statements.sql.in,v 1.1 2009/01/04 22:19:59 tgl Exp $ */
+
+-- Adjust this setting to control where the objects get created.
+SET search_path = public;
+
+-- Register functions.
+CREATE FUNCTION pg_stat_statements_reset()
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C;
+
+CREATE FUNCTION pg_stat_statements(
+    OUT userid oid,
+    OUT dbid oid,
+    OUT query text,
+    OUT calls int8,
+    OUT total_time float8,
+    OUT rows int8
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME'
+LANGUAGE C;
+
+-- Register a view on the function for ease of use.
+CREATE VIEW pg_stat_statements AS
+  SELECT * FROM pg_stat_statements();
+
+GRANT SELECT ON pg_stat_statements TO PUBLIC;
+
+-- Don't want this to be available to non-superusers.
+REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
diff --git a/contrib/pg_stat_statements/uninstall_pg_stat_statements.sql b/contrib/pg_stat_statements/uninstall_pg_stat_statements.sql
new file mode 100644 (file)
index 0000000..31fd0af
--- /dev/null
@@ -0,0 +1,8 @@
+/* $PostgreSQL: pgsql/contrib/pg_stat_statements/uninstall_pg_stat_statements.sql,v 1.1 2009/01/04 22:19:59 tgl Exp $ */
+
+-- Adjust this setting to control where the objects get dropped.
+SET search_path = public;
+
+DROP VIEW pg_stat_statements;
+DROP FUNCTION pg_stat_statements();
+DROP FUNCTION pg_stat_statements_reset();
index ecc5a0b23e3c8d77985beac990a9597489c0caaa..89fb5314fe61b3bd11e59dd19a5c80fdef87a3fb 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/contrib.sgml,v 1.10 2008/11/19 02:59:28 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/contrib.sgml,v 1.11 2009/01/04 22:19:59 tgl Exp $ -->
 
 <appendix id="contrib">
  <title>Additional Supplied Modules</title>
@@ -103,6 +103,7 @@ psql -d dbname -f <replaceable>SHAREDIR</>/contrib/<replaceable>module</>.sql
  &pgfreespacemap;
  &pgrowlocks;
  &pgstandby;
+ &pgstatstatements;
  &pgstattuple;
  &pgtrgm;
  &seg;
index ea1c7c274fa7cc44f01fe37fe2222880ab4ff30a..273d5a09799c011c2e036c8a0bc8b2e221ee4f2f 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/filelist.sgml,v 1.58 2008/11/19 02:59:28 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/filelist.sgml,v 1.59 2009/01/04 22:19:59 tgl Exp $ -->
 
 <!entity history    SYSTEM "history.sgml">
 <!entity info       SYSTEM "info.sgml">
 <!entity pgfreespacemap  SYSTEM "pgfreespacemap.sgml">
 <!entity pgrowlocks      SYSTEM "pgrowlocks.sgml">
 <!entity pgstandby       SYSTEM "pgstandby.sgml">
+<!entity pgstatstatements SYSTEM "pgstatstatements.sgml">
 <!entity pgstattuple     SYSTEM "pgstattuple.sgml">
 <!entity pgtrgm          SYSTEM "pgtrgm.sgml">
 <!entity seg             SYSTEM "seg.sgml">
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
new file mode 100644 (file)
index 0000000..93cda9f
--- /dev/null
@@ -0,0 +1,265 @@
+<!-- $PostgreSQL: pgsql/doc/src/sgml/pgstatstatements.sgml,v 1.1 2009/01/04 22:19:59 tgl Exp $ -->
+
+<sect1 id="pgstatstatements">
+ <title>pg_stat_statements</title>
+
+ <indexterm zone="pgstatstatements">
+  <primary>pg_stat_statements</primary>
+ </indexterm>
+
+ <para>
+  The <filename>pg_stat_statements</filename> module provides a means for
+  tracking execution statistics of all SQL statements executed by a server.
+ </para>
+
+ <para>
+  The module must be loaded by adding <literal>pg_stat_statements</> to
+  <xref linkend="guc-shared-preload-libraries"> in
+  <filename>postgresql.conf</>, because it requires additional shared memory.
+  This means that a server restart is needed to add or remove the module.
+ </para>
+
+ <sect2>
+  <title>The <structname>pg_stat_statements</structname> view</title>
+
+  <para>
+   The statistics gathered by the module are made available via a system view
+   named <structname>pg_stat_statements</>.  This view contains one row for
+   each distinct query text, database ID, and user ID (up to the maximum
+   number of distinct statements that the module can track).  The columns
+   of the view are:
+  </para>
+
+  <table>
+   <title><structname>pg_stat_statements</> columns</title>
+
+   <tgroup cols="4">
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Type</entry>
+      <entry>References</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+    <tbody>
+     <row>
+      <entry><structfield>userid</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-authid"><structname>pg_authid</structname></link>.oid</literal></entry>
+      <entry>OID of user who executed the statement</entry>
+     </row>
+
+     <row>
+      <entry><structfield>dbid</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-database"><structname>pg_database</structname></link>.oid</literal></entry>
+      <entry>OID of database in which the statement was executed</entry>
+     </row>
+
+    <row>
+      <entry><structfield>query</structfield></entry>
+      <entry><type>text</type></entry>
+      <entry></entry>
+      <entry>Text of the statement (up to <xref linkend="guc-track-activity-query-size"> bytes)</entry>
+     </row>
+
+     <row>
+      <entry><structfield>calls</structfield></entry>
+      <entry><type>bigint</type></entry>
+      <entry></entry>
+      <entry>Number of times executed</entry>
+     </row>
+
+     <row>
+      <entry><structfield>total_time</structfield></entry>
+      <entry><type>double precision</type></entry>
+      <entry></entry>
+      <entry>Total time spent in the statement, in seconds</entry>
+     </row>
+
+     <row>
+      <entry><structfield>rows</structfield></entry>
+      <entry><type>bigint</type></entry>
+      <entry></entry>
+      <entry>Total number of rows retrieved or affected by the statement</entry>
+     </row>
+
+    </tbody>
+   </tgroup>
+  </table>
+
+  <para>
+   This view, and the function <function>pg_stat_statements_reset</>,
+   are available only in databases they have been specifically installed into
+   by running the <filename>pg_stat_statements.sql</> install script.
+   However, statistics are tracked across all databases of the server
+   whenever the <filename>pg_stat_statements</filename> module is loaded
+   into the server, regardless of presence of the view.
+  </para>
+
+  <para>
+   For security reasons, non-superusers are not allowed to see the text of
+   queries executed by other users.  They can see the statistics, however,
+   if the view has been installed in their database.
+  </para>
+
+  <para>
+   Note that statements are considered the same if they have the same text,
+   regardless of the values of any out-of-line parameters used in the
+   statement.  Using out-of-line parameters will help to group statements
+   together and may make the statistics more useful.
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>Functions</title>
+
+  <variablelist>
+   <varlistentry>
+    <term>
+     <function>pg_stat_statements_reset() returns void</function>
+    </term>
+
+    <listitem>
+     <para>
+      <function>pg_stat_statements_reset</function> discards all statistics
+      gathered so far by <filename>pg_stat_statements</>.
+      By default, this function can only be executed by superusers.
+     </para>
+    </listitem>
+   </varlistentry>
+
+  </variablelist>
+ </sect2>
+
+ <sect2>
+  <title>Configuration parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term>
+     <varname>pg_stat_statements.max</varname> (<type>integer</type>)
+    </term>
+
+    <listitem>
+     <para>
+      <varname>pg_stat_statements.max</varname> is the maximum number of
+      statements tracked by the module (i.e., the maximum number of rows
+      in the <structname>pg_stat_statements</> view).  If more distinct
+      statements than that are observed, information about the least-executed
+      statements is discarded.
+      The default value is 1000.
+      This parameter can only be set at server start.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>
+     <varname>pg_stat_statements.track</varname> (<type>enum</type>)
+    </term>
+
+    <listitem>
+     <para>
+      <varname>pg_stat_statements.track</varname> controls which statements
+      are counted by the module.
+      Specify <literal>top</> to track top-level statements (those issued
+      directly by clients), <literal>all</> to also track nested statements
+      (such as statements invoked within functions), or <literal>none</> to
+      disable.
+      The default value is <literal>top</>.
+      Only superusers can change this setting.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>
+     <varname>pg_stat_statements.save</varname> (<type>boolean</type>)
+    </term>
+
+    <listitem>
+     <para>
+      <varname>pg_stat_statements.save</varname> specifies whether to
+      save statement statistics across server shutdowns.
+      If it is <literal>off</> then statistics are not saved at
+      shutdown nor reloaded at server start.
+      The default value is <literal>on</>.
+      This parameter can only be set in the <filename>postgresql.conf</>
+      file or on the server command line.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+
+  <para>
+   The module requires additional shared memory amounting to about
+   <varname>pg_stat_statements.max</varname> <literal>*</>
+   <xref linkend="guc-track-activity-query-size"> bytes.  Note that this
+   memory is consumed whenever the module is loaded, even if
+   <varname>pg_stat_statements.track</> is set to <literal>none</>.
+  </para>
+
+  <para>
+   In order to set any of these parameters in your
+   <filename>postgresql.conf</> file,
+   you will need to add <literal>pg_stat_statements</> to
+   <xref linkend="guc-custom-variable-classes">.  Typical usage might be:
+  </para>
+
+  <programlisting>
+# postgresql.conf
+shared_preload_libraries = 'pg_stat_statements'
+
+custom_variable_classes = 'pg_stat_statements'
+pg_stat_statements.max = 10000
+pg_stat_statements.track = all
+  </programlisting>
+ </sect2>
+
+ <sect2>
+  <title>Sample output</title>
+
+  <programlisting>
+$ pgbench -i bench
+
+postgres=# SELECT pg_stat_statements_reset();
+
+$ pgbench -c10 -t300 -M prepared bench
+
+postgres=# \x
+postgres=# SELECT * FROM pg_stat_statements ORDER BY total_time DESC LIMIT 3;
+-[ RECORD 1 ]------------------------------------------------------------
+userid     | 10
+dbid       | 63781
+query      | UPDATE branches SET bbalance = bbalance + $1 WHERE bid = $2;
+calls      | 3000
+total_time | 20.716706
+rows       | 3000
+-[ RECORD 2 ]------------------------------------------------------------
+userid     | 10
+dbid       | 63781
+query      | UPDATE tellers SET tbalance = tbalance + $1 WHERE tid = $2;
+calls      | 3000
+total_time | 17.1107649999999
+rows       | 3000
+-[ RECORD 3 ]------------------------------------------------------------
+userid     | 10
+dbid       | 63781
+query      | UPDATE accounts SET abalance = abalance + $1 WHERE aid = $2;
+calls      | 3000
+total_time | 0.645601
+rows       | 3000
+  </programlisting>
+ </sect2>
+
+ <sect2>
+  <title>Author</title>
+
+  <para>
+   Takahiro Itagaki <email>itagaki.takahiro@oss.ntt.co.jp</email>
+  </para>
+ </sect2>
+
+</sect1>
index 22bc5b1f3f1080478876f10c7bbdd50b13c4ee34..0d0d23f53aea6c829c4dcebaa31d2f1eea9bdfe2 100644 (file)
@@ -37,7 +37,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.569 2009/01/03 17:08:38 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.570 2009/01/04 22:19:59 tgl Exp $
  *
  * NOTES
  *
@@ -2731,7 +2731,7 @@ PostmasterStateMachine(void)
                ereport(LOG,
                                (errmsg("all server processes terminated; reinitializing")));
 
-               shmem_exit(0);
+               shmem_exit(1);
                reset_shared(PostPortNumber);
 
                StartupPID = StartupDataBase();
index 5bb4cc597af4b36abea0cbb92fddb3da389d12ab..169821b79e05201a1821dbbf4fff4be15dbc078f 100644 (file)
@@ -5,7 +5,7 @@
  *
  *     Copyright (c) 2001-2009, PostgreSQL Global Development Group
  *
- *     $PostgreSQL: pgsql/src/include/pgstat.h,v 1.81 2009/01/01 17:23:55 momjian Exp $
+ *     $PostgreSQL: pgsql/src/include/pgstat.h,v 1.82 2009/01/04 22:19:59 tgl Exp $
  * ----------
  */
 #ifndef PGSTAT_H
@@ -592,7 +592,7 @@ typedef struct PgStat_FunctionCallUsage
 extern bool pgstat_track_activities;
 extern bool pgstat_track_counts;
 extern int     pgstat_track_functions;
-extern int     pgstat_track_activity_query_size;
+extern PGDLLIMPORT int pgstat_track_activity_query_size;
 extern char *pgstat_stat_tmpname;
 extern char *pgstat_stat_filename;