From 53dbc27c62d8e1b6c5253feba04a5094cb8fe046 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Wed, 29 Dec 2010 06:48:53 -0500 Subject: [PATCH] Support unlogged tables. The contents of an unlogged table are WAL-logged; thus, they are not available on standby servers and are truncated whenever the database system enters recovery. Indexes on unlogged tables are also unlogged. Unlogged GiST indexes are not currently supported. --- doc/src/sgml/catalogs.sgml | 3 +- doc/src/sgml/indexam.sgml | 11 + doc/src/sgml/ref/create_table.sgml | 21 +- doc/src/sgml/ref/create_table_as.sgml | 12 +- doc/src/sgml/ref/pg_dump.sgml | 11 + doc/src/sgml/ref/pg_dumpall.sgml | 11 + doc/src/sgml/storage.sgml | 22 +- src/backend/access/gin/gininsert.c | 42 +++ src/backend/access/gist/gist.c | 13 + src/backend/access/hash/hash.c | 15 +- src/backend/access/hash/hashovfl.c | 9 +- src/backend/access/hash/hashpage.c | 20 +- src/backend/access/nbtree/nbtree.c | 31 ++ src/backend/access/transam/xlog.c | 17 ++ src/backend/catalog/catalog.c | 14 +- src/backend/catalog/heap.c | 19 ++ src/backend/catalog/index.c | 11 + src/backend/catalog/storage.c | 49 ++-- src/backend/commands/tablecmds.c | 19 +- src/backend/parser/gram.y | 11 +- src/backend/storage/buffer/bufmgr.c | 53 ++-- src/backend/storage/file/Makefile | 2 +- src/backend/storage/file/copydir.c | 3 +- src/backend/storage/file/fd.c | 2 +- src/backend/storage/file/reinit.c | 396 ++++++++++++++++++++++++++ src/backend/utils/adt/dbsize.c | 1 + src/backend/utils/cache/relcache.c | 2 + src/bin/pg_dump/pg_dump.c | 71 ++++- src/bin/pg_dump/pg_dump.h | 1 + src/bin/pg_dump/pg_dumpall.c | 7 + src/bin/psql/describe.c | 36 ++- src/include/access/gin.h | 1 + src/include/access/gist_private.h | 1 + src/include/access/hash.h | 9 +- src/include/access/nbtree.h | 1 + src/include/catalog/catalog.h | 2 +- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_am.h | 18 +- src/include/catalog/pg_class.h | 1 + src/include/catalog/pg_proc.h | 8 + src/include/catalog/storage.h | 2 + src/include/parser/kwlist.h | 1 + src/include/pg_config_manual.h | 2 +- src/include/storage/buf_internals.h | 1 + src/include/storage/bufmgr.h | 6 +- src/include/storage/copydir.h | 1 + src/include/storage/reinit.h | 23 ++ src/include/storage/relfilenode.h | 5 +- src/include/utils/rel.h | 1 + 49 files changed, 916 insertions(+), 104 deletions(-) create mode 100644 src/backend/storage/file/reinit.c create mode 100644 src/include/storage/reinit.h diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 9fa20cfeee..0eeb499207 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1644,7 +1644,8 @@ bool - p = permanent table, t = temporary table + p = permanent table, u = unlogged table, + t = temporary table diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index c4eb59f7be..51e70e9200 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -167,6 +167,17 @@ ambuild (Relation heapRelation, +void +ambuildempty (Relation indexRelation); + + Build an empty index, and write it to the initialization fork (INIT_FORKNUM) + of the given relation. This method is called only for unlogged tables; the + empty index written to the initialization fork will be copied over the main + relation fork on each server restart. + + + + bool aminsert (Relation indexRelation, Datum *values, diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index bc5dff0329..efb4b1aca1 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -21,7 +21,7 @@ PostgreSQL documentation -CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } ] TABLE [ IF NOT EXISTS ] table_name ( [ +CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXISTS ] table_name ( [ { column_name data_type [ column_constraint [ ... ] ] | table_constraint | LIKE parent_table [ like_option ... ] } @@ -32,7 +32,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } ] TABLE [ IF NOT EXISTS ] tablespace ] -CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } ] TABLE [ IF NOT EXISTS ] table_name +CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXISTS ] table_name OF type_name [ ( { column_name WITH OPTIONS [ column_constraint [ ... ] ] | table_constraint } @@ -164,6 +164,23 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } ] TABLE [ IF NOT EXISTS ] + + UNLOGGED + + + If specified, the table is created as an unlogged table. Data written + to unlogged tables is not written to the write-ahead log (see ), which makes them considerably faster than ordinary + tables. However, they are not crash-safe: an unlogged table is + automatically truncated after a crash or unclean shutdown. The contents + of an unlogged table are also not replicated to standby servers. + Any indexes created on an unlogged table are automatically unlogged as + well; however, unlogged GiST indexes are + currently not supported and cannot be created on an unlogged table. + + + + IF NOT EXISTS diff --git a/doc/src/sgml/ref/create_table_as.sgml b/doc/src/sgml/ref/create_table_as.sgml index 3a256d1aae..ff71078d1e 100644 --- a/doc/src/sgml/ref/create_table_as.sgml +++ b/doc/src/sgml/ref/create_table_as.sgml @@ -21,7 +21,7 @@ PostgreSQL documentation -CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } ] TABLE table_name +CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE table_name [ (column_name [, ...] ) ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] @@ -81,6 +81,16 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } ] TABLE table_name + + UNLOGGED + + + If specified, the table is created as an unlogged table. + Refer to for details. + + + + table_name diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index fd13c0d9e7..b291a257ea 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -669,6 +669,17 @@ PostgreSQL documentation + + + + + Do not dump the contents of unlogged tables. This option has no + effect on whether or not the table definitions (schema) are dumped; + it only suppresses dumping the table data. + + + + diff --git a/doc/src/sgml/ref/pg_dumpall.sgml b/doc/src/sgml/ref/pg_dumpall.sgml index 39da0b2949..04e95e876d 100644 --- a/doc/src/sgml/ref/pg_dumpall.sgml +++ b/doc/src/sgml/ref/pg_dumpall.sgml @@ -201,6 +201,17 @@ PostgreSQL documentation + + + + + Do not dump the contents of unlogged tables. This option has no + effect on whether or not the table definitions (schema) are dumped; + it only suppresses dumping the table data. + + + + diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml index cda7f6452f..430df4a843 100644 --- a/doc/src/sgml/storage.sgml +++ b/doc/src/sgml/storage.sgml @@ -147,7 +147,9 @@ the relation. The free space map is stored in a file named with the filenode number plus the suffix _fsm. Tables also have a visibility map, stored in a fork with the suffix _vm, to track which pages are known to have no dead tuples. The visibility map is -described further in . +described further in . Unlogged tables and indexes +have a third fork, known as the initialization fork, which is stored in a fork +with the suffix _init (see ). @@ -485,6 +487,24 @@ a bit is not set, it might or might not be true. + + +The Initialization Fork + + + Initialization Fork + + + +Each unlogged table, and each index on an unlogged table, has an initialization +fork. The initialization fork is an empty table or index of the appropriate +type. When an unlogged table must be reset to empty due to a crash, the +initialization fork is copied over the main fork, and any other forks are +erased (they will be recreated automatically as needed). + + + + Database Page Layout diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 8681edefe6..d66c79cb8d 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -19,6 +19,7 @@ #include "catalog/index.h" #include "miscadmin.h" #include "storage/bufmgr.h" +#include "storage/smgr.h" #include "storage/indexfsm.h" #include "utils/memutils.h" @@ -411,6 +412,47 @@ ginbuild(PG_FUNCTION_ARGS) PG_RETURN_POINTER(result); } +/* + * ginbuildempty() -- build an empty gin index in the initialization fork + */ +Datum +ginbuildempty(PG_FUNCTION_ARGS) +{ + Relation index = (Relation) PG_GETARG_POINTER(0); + Buffer RootBuffer, + MetaBuffer; + + /* An empty GIN index has two pages. */ + MetaBuffer = + ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL); + LockBuffer(MetaBuffer, BUFFER_LOCK_EXCLUSIVE); + RootBuffer = + ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL); + LockBuffer(RootBuffer, BUFFER_LOCK_EXCLUSIVE); + + /* Initialize both pages, mark them dirty, unlock and release buffer. */ + START_CRIT_SECTION(); + GinInitMetabuffer(MetaBuffer); + MarkBufferDirty(MetaBuffer); + GinInitBuffer(RootBuffer, GIN_LEAF); + MarkBufferDirty(RootBuffer); + + /* XLOG the new pages */ + log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, + BufferGetBlockNumber(MetaBuffer), + BufferGetPage(MetaBuffer)); + log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, + BufferGetBlockNumber(RootBuffer), + BufferGetPage(RootBuffer)); + END_CRIT_SECTION(); + + /* Unlock and release the buffers. */ + UnlockReleaseBuffer(MetaBuffer); + UnlockReleaseBuffer(RootBuffer); + + PG_RETURN_VOID(); +} + /* * Inserts value during normal insertion */ diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 7cd144e2f0..c26ac74332 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -218,6 +218,19 @@ gistbuildCallback(Relation index, MemoryContextReset(buildstate->tmpCtx); } +/* + * gistbuildempty() -- build an empty gist index in the initialization fork + */ +Datum +gistbuildempty(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unlogged GIST indexes are not supported"))); + + PG_RETURN_VOID(); +} + /* * gistinsert -- wrapper for GiST tuple insertion. * diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index e53ec3d5ea..4df92d44c0 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -69,7 +69,7 @@ hashbuild(PG_FUNCTION_ARGS) estimate_rel_size(heap, NULL, &relpages, &reltuples); /* Initialize the hash index metadata page and initial buckets */ - num_buckets = _hash_metapinit(index, reltuples); + num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM); /* * If we just insert the tuples into the index in scan order, then @@ -113,6 +113,19 @@ hashbuild(PG_FUNCTION_ARGS) PG_RETURN_POINTER(result); } +/* + * hashbuildempty() -- build an empty hash index in the initialization fork + */ +Datum +hashbuildempty(PG_FUNCTION_ARGS) +{ + Relation index = (Relation) PG_GETARG_POINTER(0); + + _hash_metapinit(index, 0, INIT_FORKNUM); + + PG_RETURN_VOID(); +} + /* * Per-tuple callback from IndexBuildHeapScan */ diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c index 7c6e902ea9..454ad6c7a8 100644 --- a/src/backend/access/hash/hashovfl.c +++ b/src/backend/access/hash/hashovfl.c @@ -259,7 +259,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf) * convenient to pre-mark them as "in use" too. */ bit = metap->hashm_spares[splitnum]; - _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit)); + _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit), MAIN_FORKNUM); metap->hashm_spares[splitnum]++; } else @@ -280,7 +280,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf) * with metapage write lock held; would be better to use a lock that * doesn't block incoming searches. */ - newbuf = _hash_getnewbuf(rel, blkno); + newbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM); metap->hashm_spares[splitnum]++; @@ -503,7 +503,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, * All bits in the new bitmap page are set to "1", indicating "in use". */ void -_hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno) +_hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno, + ForkNumber forkNum) { Buffer buf; Page pg; @@ -520,7 +521,7 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno) * page while holding the metapage lock, but this path is taken so seldom * that it's not worth worrying about. */ - buf = _hash_getnewbuf(rel, blkno); + buf = _hash_getnewbuf(rel, blkno, forkNum); pg = BufferGetPage(buf); /* initialize the page's special space */ diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 2ebeda98b5..29f7b25b4e 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -183,9 +183,9 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno) * extend the index at a time. */ Buffer -_hash_getnewbuf(Relation rel, BlockNumber blkno) +_hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum) { - BlockNumber nblocks = RelationGetNumberOfBlocks(rel); + BlockNumber nblocks = RelationGetNumberOfBlocksInFork(rel, forkNum); Buffer buf; if (blkno == P_NEW) @@ -197,13 +197,13 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno) /* smgr insists we use P_NEW to extend the relation */ if (blkno == nblocks) { - buf = ReadBuffer(rel, P_NEW); + buf = ReadBufferExtended(rel, forkNum, P_NEW, RBM_NORMAL, NULL); if (BufferGetBlockNumber(buf) != blkno) elog(ERROR, "unexpected hash relation size: %u, should be %u", BufferGetBlockNumber(buf), blkno); } else - buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_ZERO, NULL); + buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO, NULL); LockBuffer(buf, HASH_WRITE); @@ -324,7 +324,7 @@ _hash_chgbufaccess(Relation rel, * multiple buffer locks is ignored. */ uint32 -_hash_metapinit(Relation rel, double num_tuples) +_hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum) { HashMetaPage metap; HashPageOpaque pageopaque; @@ -340,7 +340,7 @@ _hash_metapinit(Relation rel, double num_tuples) uint32 i; /* safety check */ - if (RelationGetNumberOfBlocks(rel) != 0) + if (RelationGetNumberOfBlocksInFork(rel, forkNum) != 0) elog(ERROR, "cannot initialize non-empty hash index \"%s\"", RelationGetRelationName(rel)); @@ -383,7 +383,7 @@ _hash_metapinit(Relation rel, double num_tuples) * calls to occur. This ensures that the smgr level has the right idea of * the physical index length. */ - metabuf = _hash_getnewbuf(rel, HASH_METAPAGE); + metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum); pg = BufferGetPage(metabuf); pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); @@ -451,7 +451,7 @@ _hash_metapinit(Relation rel, double num_tuples) /* Allow interrupts, in case N is huge */ CHECK_FOR_INTERRUPTS(); - buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i)); + buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i), forkNum); pg = BufferGetPage(buf); pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); pageopaque->hasho_prevblkno = InvalidBlockNumber; @@ -468,7 +468,7 @@ _hash_metapinit(Relation rel, double num_tuples) /* * Initialize first bitmap page */ - _hash_initbitmap(rel, metap, num_buckets + 1); + _hash_initbitmap(rel, metap, num_buckets + 1, forkNum); /* all done */ _hash_wrtbuf(rel, metabuf); @@ -785,7 +785,7 @@ _hash_splitbucket(Relation rel, oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); nblkno = start_nblkno; - nbuf = _hash_getnewbuf(rel, nblkno); + nbuf = _hash_getnewbuf(rel, nblkno, MAIN_FORKNUM); npage = BufferGetPage(nbuf); /* initialize the new bucket's primary page */ diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 655a40090e..a13d629b0e 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -29,6 +29,7 @@ #include "storage/indexfsm.h" #include "storage/ipc.h" #include "storage/lmgr.h" +#include "storage/smgr.h" #include "utils/memutils.h" @@ -204,6 +205,36 @@ btbuildCallback(Relation index, pfree(itup); } +/* + * btbuildempty() -- build an empty btree index in the initialization fork + */ +Datum +btbuildempty(PG_FUNCTION_ARGS) +{ + Relation index = (Relation) PG_GETARG_POINTER(0); + Page metapage; + + /* Construct metapage. */ + metapage = (Page) palloc(BLCKSZ); + _bt_initmetapage(metapage, P_NONE, 0); + + /* Write the page. If archiving/streaming, XLOG it. */ + smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE, + (char *) metapage, true); + if (XLogIsNeeded()) + log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, + BTREE_METAPAGE, metapage); + + /* + * An immediate sync is require even if we xlog'd the page, because the + * write did not go through shared_buffers and therefore a concurrent + * checkpoint may have move the redo pointer past our xlog record. + */ + smgrimmedsync(index->rd_smgr, INIT_FORKNUM); + + PG_RETURN_VOID(); +} + /* * btinsert() -- insert an index tuple into a btree. * diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index bf62138bf8..1ec6f2f15a 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -49,6 +49,7 @@ #include "storage/latch.h" #include "storage/pmsignal.h" #include "storage/procarray.h" +#include "storage/reinit.h" #include "storage/smgr.h" #include "storage/spin.h" #include "utils/builtins.h" @@ -5960,6 +5961,14 @@ StartupXLOG(void) /* Check that the GUCs used to generate the WAL allow recovery */ CheckRequiredParameterValues(); + /* + * We're in recovery, so unlogged relations relations may be trashed + * and must be reset. This should be done BEFORE allowing Hot + * Standby connections, so that read-only backends don't try to + * read whatever garbage is left over from before. + */ + ResetUnloggedRelations(UNLOGGED_RELATION_CLEANUP); + /* * Initialize for Hot Standby, if enabled. We won't let backends in * yet, not until we've reached the min recovery point specified in @@ -6413,6 +6422,14 @@ StartupXLOG(void) */ PreallocXlogFiles(EndOfLog); + /* + * Reset initial contents of unlogged relations. This has to be done + * AFTER recovery is complete so that any unlogged relations created + * during recovery also get picked up. + */ + if (InRecovery) + ResetUnloggedRelations(UNLOGGED_RELATION_INIT); + /* * Okay, we're officially UP. */ diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index 88b5c2a215..fc5a8fcd65 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -55,7 +55,8 @@ const char *forkNames[] = { "main", /* MAIN_FORKNUM */ "fsm", /* FSM_FORKNUM */ - "vm" /* VISIBILITYMAP_FORKNUM */ + "vm", /* VISIBILITYMAP_FORKNUM */ + "init" /* INIT_FORKNUM */ }; /* @@ -82,14 +83,14 @@ forkname_to_number(char *forkName) * We use this to figure out whether a filename could be a relation * fork (as opposed to an oddly named stray file that somehow ended * up in the database directory). If the passed string begins with - * a fork name (other than the main fork name), we return its length. - * If not, we return 0. + * a fork name (other than the main fork name), we return its length, + * and set *fork (if not NULL) to the fork number. If not, we return 0. * * Note that the present coding assumes that there are no fork names which * are prefixes of other fork names. */ int -forkname_chars(const char *str) +forkname_chars(const char *str, ForkNumber *fork) { ForkNumber forkNum; @@ -97,7 +98,11 @@ forkname_chars(const char *str) { int len = strlen(forkNames[forkNum]); if (strncmp(forkNames[forkNum], str, len) == 0) + { + if (fork) + *fork = forkNum; return len; + } } return 0; } @@ -537,6 +542,7 @@ GetNewRelFileNode(Oid reltablespace, Relation pg_class, char relpersistence) case RELPERSISTENCE_TEMP: backend = MyBackendId; break; + case RELPERSISTENCE_UNLOGGED: case RELPERSISTENCE_PERMANENT: backend = InvalidBackendId; break; diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index bcf6caa2ee..8027d740f6 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1210,6 +1210,25 @@ heap_create_with_catalog(const char *relname, if (oncommit != ONCOMMIT_NOOP) register_on_commit_action(relid, oncommit); + /* + * If this is an unlogged relation, it needs an init fork so that it + * can be correctly reinitialized on restart. Since we're going to + * do an immediate sync, we ony need to xlog this if archiving or + * streaming is enabled. And the immediate sync is required, because + * otherwise there's no guarantee that this will hit the disk before + * the next checkpoint moves the redo pointer. + */ + if (relpersistence == RELPERSISTENCE_UNLOGGED) + { + Assert(relkind == RELKIND_RELATION || relkind == RELKIND_TOASTVALUE); + + smgrcreate(new_rel_desc->rd_smgr, INIT_FORKNUM, false); + if (XLogIsNeeded()) + log_smgrcreate(&new_rel_desc->rd_smgr->smgr_rnode.node, + INIT_FORKNUM); + smgrimmedsync(new_rel_desc->rd_smgr, INIT_FORKNUM); + } + /* * ok, the relation has been cataloged, so close our relations and return * the OID of the newly created relation. diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 8fbe8ebc91..e50a084f00 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -1437,6 +1437,17 @@ index_build(Relation heapRelation, PointerGetDatum(indexInfo))); Assert(PointerIsValid(stats)); + /* + * If this is an unlogged index, we need to write out an init fork for it. + */ + if (heapRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) + { + RegProcedure ambuildempty = indexRelation->rd_am->ambuildempty; + RelationOpenSmgr(indexRelation); + smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false); + OidFunctionCall1(ambuildempty, PointerGetDatum(indexRelation)); + } + /* * If it's for an exclusion constraint, make a second pass over the heap * to verify that the constraint is satisfied. diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index 671aaff133..0bd0451f00 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -74,6 +74,7 @@ static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */ typedef struct xl_smgr_create { RelFileNode rnode; + ForkNumber forkNum; } xl_smgr_create; typedef struct xl_smgr_truncate @@ -98,9 +99,6 @@ void RelationCreateStorage(RelFileNode rnode, char relpersistence) { PendingRelDelete *pending; - XLogRecPtr lsn; - XLogRecData rdata; - xl_smgr_create xlrec; SMgrRelation srel; BackendId backend; bool needs_wal; @@ -111,6 +109,10 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence) backend = MyBackendId; needs_wal = false; break; + case RELPERSISTENCE_UNLOGGED: + backend = InvalidBackendId; + needs_wal = false; + break; case RELPERSISTENCE_PERMANENT: backend = InvalidBackendId; needs_wal = true; @@ -124,19 +126,7 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence) smgrcreate(srel, MAIN_FORKNUM, false); if (needs_wal) - { - /* - * Make an XLOG entry reporting the file creation. - */ - xlrec.rnode = rnode; - - rdata.data = (char *) &xlrec; - rdata.len = sizeof(xlrec); - rdata.buffer = InvalidBuffer; - rdata.next = NULL; - - lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE, &rdata); - } + log_smgrcreate(&srel->smgr_rnode.node, MAIN_FORKNUM); /* Add the relation to the list of stuff to delete at abort */ pending = (PendingRelDelete *) @@ -149,6 +139,29 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence) pendingDeletes = pending; } +/* + * Perform XLogInsert of a XLOG_SMGR_CREATE record to WAL. + */ +void +log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum) +{ + xl_smgr_create xlrec; + XLogRecData rdata; + + /* + * Make an XLOG entry reporting the file creation. + */ + xlrec.rnode = *rnode; + xlrec.forkNum = forkNum; + + rdata.data = (char *) &xlrec; + rdata.len = sizeof(xlrec); + rdata.buffer = InvalidBuffer; + rdata.next = NULL; + + XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE, &rdata); +} + /* * RelationDropStorage * Schedule unlinking of physical storage at transaction commit. @@ -478,7 +491,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) SMgrRelation reln; reln = smgropen(xlrec->rnode, InvalidBackendId); - smgrcreate(reln, MAIN_FORKNUM, true); + smgrcreate(reln, xlrec->forkNum, true); } else if (info == XLOG_SMGR_TRUNCATE) { @@ -523,7 +536,7 @@ smgr_desc(StringInfo buf, uint8 xl_info, char *rec) if (info == XLOG_SMGR_CREATE) { xl_smgr_create *xlrec = (xl_smgr_create *) rec; - char *path = relpathperm(xlrec->rnode, MAIN_FORKNUM); + char *path = relpathperm(xlrec->rnode, xlrec->forkNum); appendStringInfo(buf, "file create: %s", path); pfree(path); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 6729d8336f..3f6b814f02 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -5128,12 +5128,12 @@ ATAddForeignKeyConstraint(AlteredTableInfo *tab, Relation rel, RelationGetRelationName(pkrel)))); /* - * References from permanent tables to temp tables are disallowed because - * the contents of the temp table disappear at the end of each session. - * References from temp tables to permanent tables are also disallowed, - * because other backends might need to run the RI triggers on the perm - * table, but they can't reliably see tuples in the local buffers of other - * backends. + * References from permanent or unlogged tables to temp tables, and from + * permanent tables to unlogged tables, are disallowed because the + * referenced data can vanish out from under us. References from temp + * tables to any other table type are also disallowed, because other + * backends might need to run the RI triggers on the perm table, but they + * can't reliably see tuples in the local buffers of other backends. */ switch (rel->rd_rel->relpersistence) { @@ -5143,6 +5143,13 @@ ATAddForeignKeyConstraint(AlteredTableInfo *tab, Relation rel, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("constraints on permanent tables may reference only permanent tables"))); break; + case RELPERSISTENCE_UNLOGGED: + if (pkrel->rd_rel->relpersistence != RELPERSISTENCE_PERMANENT + && pkrel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("constraints on unlogged tables may reference only permanent or unlogged tables"))); + break; case RELPERSISTENCE_TEMP: if (pkrel->rd_rel->relpersistence != RELPERSISTENCE_TEMP) ereport(ERROR, diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 37840baa0f..26a5e84d44 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -538,8 +538,8 @@ static RangeVar *makeRangeVarFromAnyName(List *names, int position, core_yyscan_ TO TRAILING TRANSACTION TREAT TRIGGER TRIM TRUE_P TRUNCATE TRUSTED TYPE_P - UNBOUNDED UNCOMMITTED UNENCRYPTED UNION UNIQUE UNKNOWN UNLISTEN UNTIL - UPDATE USER USING + UNBOUNDED UNCOMMITTED UNENCRYPTED UNION UNIQUE UNKNOWN UNLISTEN UNLOGGED + UNTIL UPDATE USER USING VACUUM VALID VALIDATOR VALUE_P VALUES VARCHAR VARIADIC VARYING VERBOSE VERSION_P VIEW VOLATILE @@ -2365,6 +2365,7 @@ OptTemp: TEMPORARY { $$ = RELPERSISTENCE_TEMP; } | LOCAL TEMP { $$ = RELPERSISTENCE_TEMP; } | GLOBAL TEMPORARY { $$ = RELPERSISTENCE_TEMP; } | GLOBAL TEMP { $$ = RELPERSISTENCE_TEMP; } + | UNLOGGED { $$ = RELPERSISTENCE_UNLOGGED; } | /*EMPTY*/ { $$ = RELPERSISTENCE_PERMANENT; } ; @@ -7927,6 +7928,11 @@ OptTempTableName: $$ = $4; $$->relpersistence = RELPERSISTENCE_TEMP; } + | UNLOGGED opt_table qualified_name + { + $$ = $3; + $$->relpersistence = RELPERSISTENCE_UNLOGGED; + } | TABLE qualified_name { $$ = $2; @@ -11395,6 +11401,7 @@ unreserved_keyword: | UNENCRYPTED | UNKNOWN | UNLISTEN + | UNLOGGED | UNTIL | UPDATE | VACUUM diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 860e736ff0..34e5453669 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -82,7 +82,7 @@ static bool IsForInput; static volatile BufferDesc *PinCountWaitBuf = NULL; -static Buffer ReadBuffer_common(SMgrRelation reln, +static Buffer ReadBuffer_common(SMgrRelation reln, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit); @@ -97,7 +97,9 @@ static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty, int set_flag_bits); static void shared_buffer_write_error_callback(void *arg); static void local_buffer_write_error_callback(void *arg); -static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, +static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, + char relpersistence, + ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr); @@ -241,8 +243,8 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, * miss. */ pgstat_count_buffer_read(reln); - buf = ReadBuffer_common(reln->rd_smgr, forkNum, blockNum, - mode, strategy, &hit); + buf = ReadBuffer_common(reln->rd_smgr, reln->rd_rel->relpersistence, + forkNum, blockNum, mode, strategy, &hit); if (hit) pgstat_count_buffer_hit(reln); return buf; @@ -253,10 +255,10 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, * ReadBufferWithoutRelcache -- like ReadBufferExtended, but doesn't require * a relcache entry for the relation. * - * NB: At present, this function may not be used on temporary relations, which + * NB: At present, this function may only be used on permanent relations, which * is OK, because we only use it during XLOG replay. If in the future we - * want to use it on temporary relations, we could pass the backend ID as an - * additional parameter. + * want to use it on temporary or unlogged relations, we could pass additional + * parameters. */ Buffer ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, @@ -267,7 +269,8 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, SMgrRelation smgr = smgropen(rnode, InvalidBackendId); - return ReadBuffer_common(smgr, forkNum, blockNum, mode, strategy, &hit); + return ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum, + mode, strategy, &hit); } @@ -277,7 +280,7 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, * *hit is set to true if the request was satisfied from shared buffer cache. */ static Buffer -ReadBuffer_common(SMgrRelation smgr, ForkNumber forkNum, +ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit) { @@ -319,7 +322,8 @@ ReadBuffer_common(SMgrRelation smgr, ForkNumber forkNum, * lookup the buffer. IO_IN_PROGRESS is set if the requested block is * not currently in memory. */ - bufHdr = BufferAlloc(smgr, forkNum, blockNum, strategy, &found); + bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum, + strategy, &found); if (found) pgBufferUsage.shared_blks_hit++; else @@ -500,7 +504,7 @@ ReadBuffer_common(SMgrRelation smgr, ForkNumber forkNum, * No locks are held either at entry or exit. */ static volatile BufferDesc * -BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, +BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr) @@ -797,8 +801,11 @@ BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, * 1 so that the buffer can survive one clock-sweep pass.) */ buf->tag = newTag; - buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR); - buf->flags |= BM_TAG_VALID; + buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT); + if (relpersistence == RELPERSISTENCE_PERMANENT) + buf->flags |= BM_TAG_VALID | BM_PERMANENT; + else + buf->flags |= BM_TAG_VALID; buf->usage_count = 1; UnlockBufHdr(buf); @@ -1155,8 +1162,10 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner) * BufferSync -- Write out all dirty buffers in the pool. * * This is called at checkpoint time to write out all dirty shared buffers. - * The checkpoint request flags should be passed in; currently the only one - * examined is CHECKPOINT_IMMEDIATE, which disables delays between writes. + * The checkpoint request flags should be passed in. If CHECKPOINT_IMMEDIATE + * is set, we disable delays between writes; if CHECKPOINT_IS_SHUTDOWN is + * set, we write even unlogged buffers, which are otherwise skipped. The + * remaining flags currently have no effect here. */ static void BufferSync(int flags) @@ -1165,10 +1174,18 @@ BufferSync(int flags) int num_to_scan; int num_to_write; int num_written; + int mask = BM_DIRTY; /* Make sure we can handle the pin inside SyncOneBuffer */ ResourceOwnerEnlargeBuffers(CurrentResourceOwner); + /* + * Unless this is a shutdown checkpoint, we write only permanent, dirty + * buffers. But at shutdown time, we write all dirty buffers. + */ + if (!(flags & CHECKPOINT_IS_SHUTDOWN)) + flags |= BM_PERMANENT; + /* * Loop over all buffers, and mark the ones that need to be written with * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_write), so that we @@ -1196,7 +1213,7 @@ BufferSync(int flags) */ LockBufHdr(bufHdr); - if (bufHdr->flags & BM_DIRTY) + if ((bufHdr->flags & mask) == mask) { bufHdr->flags |= BM_CHECKPOINT_NEEDED; num_to_write++; @@ -1897,12 +1914,12 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln) * Determines the current number of pages in the relation. */ BlockNumber -RelationGetNumberOfBlocks(Relation relation) +RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum) { /* Open it at the smgr level if not already done */ RelationOpenSmgr(relation); - return smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); + return smgrnblocks(relation->rd_smgr, forkNum); } /* --------------------------------------------------------------------- diff --git a/src/backend/storage/file/Makefile b/src/backend/storage/file/Makefile index 3b93aa1b45..d2198f2b93 100644 --- a/src/backend/storage/file/Makefile +++ b/src/backend/storage/file/Makefile @@ -12,6 +12,6 @@ subdir = src/backend/storage/file top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = fd.o buffile.o copydir.o +OBJS = fd.o buffile.o copydir.o reinit.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c index f7dc509b50..587fb9260c 100644 --- a/src/backend/storage/file/copydir.c +++ b/src/backend/storage/file/copydir.c @@ -38,7 +38,6 @@ #endif -static void copy_file(char *fromfile, char *tofile); static void fsync_fname(char *fname, bool isdir); @@ -142,7 +141,7 @@ copydir(char *fromdir, char *todir, bool recurse) /* * copy one file */ -static void +void copy_file(char *fromfile, char *tofile) { char *buffer; diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 4f7dc39d63..a1dc18be44 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -2055,7 +2055,7 @@ looks_like_temp_rel_name(const char *name) /* We might have _forkname or .segment or both. */ if (name[pos] == '_') { - int forkchar = forkname_chars(&name[pos+1]); + int forkchar = forkname_chars(&name[pos+1], NULL); if (forkchar <= 0) return false; pos += forkchar + 1; diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c new file mode 100644 index 0000000000..b75178b804 --- /dev/null +++ b/src/backend/storage/file/reinit.c @@ -0,0 +1,396 @@ +/*------------------------------------------------------------------------- + * + * reinit.c + * Reinitialization of unlogged relations + * + * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/storage/file/reinit.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "catalog/catalog.h" +#include "storage/copydir.h" +#include "storage/fd.h" +#include "storage/reinit.h" +#include "utils/hsearch.h" +#include "utils/memutils.h" + +static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, + int op); +static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, + int op); +static bool parse_filename_for_nontemp_relation(const char *name, + int *oidchars, ForkNumber *fork); + +typedef struct { + char oid[OIDCHARS+1]; +} unlogged_relation_entry; + +/* + * Reset unlogged relations from before the last restart. + * + * If op includes UNLOGGED_RELATION_CLEANUP, we remove all forks of any + * relation with an "init" fork, except for the "init" fork itself. + * + * If op includes UNLOGGED_RELATION_INIT, we copy the "init" fork to the main + * fork. + */ +void +ResetUnloggedRelations(int op) +{ + char temp_path[MAXPGPATH]; + DIR *spc_dir; + struct dirent *spc_de; + MemoryContext tmpctx, oldctx; + + /* Log it. */ + ereport(DEBUG1, + (errmsg("resetting unlogged relations: cleanup %d init %d", + (op & UNLOGGED_RELATION_CLEANUP) != 0, + (op & UNLOGGED_RELATION_INIT) != 0))); + + /* + * Just to be sure we don't leak any memory, let's create a temporary + * memory context for this operation. + */ + tmpctx = AllocSetContextCreate(CurrentMemoryContext, + "ResetUnloggedRelations", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + oldctx = MemoryContextSwitchTo(tmpctx); + + /* + * First process unlogged files in pg_default ($PGDATA/base) + */ + ResetUnloggedRelationsInTablespaceDir("base", op); + + /* + * Cycle through directories for all non-default tablespaces. + */ + spc_dir = AllocateDir("pg_tblspc"); + + while ((spc_de = ReadDir(spc_dir, "pg_tblspc")) != NULL) + { + if (strcmp(spc_de->d_name, ".") == 0 || + strcmp(spc_de->d_name, "..") == 0) + continue; + + snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s", + spc_de->d_name, TABLESPACE_VERSION_DIRECTORY); + ResetUnloggedRelationsInTablespaceDir(temp_path, op); + } + + FreeDir(spc_dir); + + /* + * Restore memory context. + */ + MemoryContextSwitchTo(oldctx); + MemoryContextDelete(tmpctx); +} + +/* Process one tablespace directory for ResetUnloggedRelations */ +static void +ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op) +{ + DIR *ts_dir; + struct dirent *de; + char dbspace_path[MAXPGPATH]; + + ts_dir = AllocateDir(tsdirname); + if (ts_dir == NULL) + { + /* anything except ENOENT is fishy */ + if (errno != ENOENT) + elog(LOG, + "could not open tablespace directory \"%s\": %m", + tsdirname); + return; + } + + while ((de = ReadDir(ts_dir, tsdirname)) != NULL) + { + int i = 0; + + /* + * We're only interested in the per-database directories, which have + * numeric names. Note that this code will also (properly) ignore "." + * and "..". + */ + while (isdigit((unsigned char) de->d_name[i])) + ++i; + if (de->d_name[i] != '\0' || i == 0) + continue; + + snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s", + tsdirname, de->d_name); + ResetUnloggedRelationsInDbspaceDir(dbspace_path, op); + } + + FreeDir(ts_dir); +} + +/* Process one per-dbspace directory for ResetUnloggedRelations */ +static void +ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) +{ + DIR *dbspace_dir; + struct dirent *de; + char rm_path[MAXPGPATH]; + + /* Caller must specify at least one operation. */ + Assert((op & (UNLOGGED_RELATION_CLEANUP | UNLOGGED_RELATION_INIT)) != 0); + + /* + * Cleanup is a two-pass operation. First, we go through and identify all + * the files with init forks. Then, we go through again and nuke + * everything with the same OID except the init fork. + */ + if ((op & UNLOGGED_RELATION_CLEANUP) != 0) + { + HTAB *hash = NULL; + HASHCTL ctl; + + /* Open the directory. */ + dbspace_dir = AllocateDir(dbspacedirname); + if (dbspace_dir == NULL) + { + elog(LOG, + "could not open dbspace directory \"%s\": %m", + dbspacedirname); + return; + } + + /* + * It's possible that someone could create a ton of unlogged relations + * in the same database & tablespace, so we'd better use a hash table + * rather than an array or linked list to keep track of which files + * need to be reset. Otherwise, this cleanup operation would be + * O(n^2). + */ + ctl.keysize = sizeof(unlogged_relation_entry); + ctl.entrysize = sizeof(unlogged_relation_entry); + hash = hash_create("unlogged hash", 32, &ctl, HASH_ELEM); + + /* Scan the directory. */ + while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) + { + ForkNumber forkNum; + int oidchars; + unlogged_relation_entry ent; + + /* Skip anything that doesn't look like a relation data file. */ + if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, + &forkNum)) + continue; + + /* Also skip it unless this is the init fork. */ + if (forkNum != INIT_FORKNUM) + continue; + + /* + * Put the OID portion of the name into the hash table, if it isn't + * already. + */ + memset(ent.oid, 0, sizeof(ent.oid)); + memcpy(ent.oid, de->d_name, oidchars); + hash_search(hash, &ent, HASH_ENTER, NULL); + } + + /* Done with the first pass. */ + FreeDir(dbspace_dir); + + /* + * If we didn't find any init forks, there's no point in continuing; + * we can bail out now. + */ + if (hash_get_num_entries(hash) == 0) + { + hash_destroy(hash); + return; + } + + /* + * Now, make a second pass and remove anything that matches. First, + * reopen the directory. + */ + dbspace_dir = AllocateDir(dbspacedirname); + if (dbspace_dir == NULL) + { + elog(LOG, + "could not open dbspace directory \"%s\": %m", + dbspacedirname); + hash_destroy(hash); + return; + } + + /* Scan the directory. */ + while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) + { + ForkNumber forkNum; + int oidchars; + bool found; + unlogged_relation_entry ent; + + /* Skip anything that doesn't look like a relation data file. */ + if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, + &forkNum)) + continue; + + /* We never remove the init fork. */ + if (forkNum == INIT_FORKNUM) + continue; + + /* + * See whether the OID portion of the name shows up in the hash + * table. + */ + memset(ent.oid, 0, sizeof(ent.oid)); + memcpy(ent.oid, de->d_name, oidchars); + hash_search(hash, &ent, HASH_FIND, &found); + + /* If so, nuke it! */ + if (found) + { + snprintf(rm_path, sizeof(rm_path), "%s/%s", + dbspacedirname, de->d_name); + /* + * It's tempting to actually throw an error here, but since + * this code gets run during database startup, that could + * result in the database failing to start. (XXX Should we do + * it anyway?) + */ + if (unlink(rm_path)) + elog(LOG, "could not unlink file \"%s\": %m", rm_path); + else + elog(DEBUG2, "unlinked file \"%s\"", rm_path); + } + } + + /* Cleanup is complete. */ + FreeDir(dbspace_dir); + hash_destroy(hash); + } + + /* + * Initialization happens after cleanup is complete: we copy each init + * fork file to the corresponding main fork file. Note that if we are + * asked to do both cleanup and init, we may never get here: if the cleanup + * code determines that there are no init forks in this dbspace, it will + * return before we get to this point. + */ + if ((op & UNLOGGED_RELATION_INIT) != 0) + { + /* Open the directory. */ + dbspace_dir = AllocateDir(dbspacedirname); + if (dbspace_dir == NULL) + { + /* we just saw this directory, so it really ought to be there */ + elog(LOG, + "could not open dbspace directory \"%s\": %m", + dbspacedirname); + return; + } + + /* Scan the directory. */ + while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) + { + ForkNumber forkNum; + int oidchars; + char oidbuf[OIDCHARS+1]; + char srcpath[MAXPGPATH]; + char dstpath[MAXPGPATH]; + + /* Skip anything that doesn't look like a relation data file. */ + if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, + &forkNum)) + continue; + + /* Also skip it unless this is the init fork. */ + if (forkNum != INIT_FORKNUM) + continue; + + /* Construct source pathname. */ + snprintf(srcpath, sizeof(srcpath), "%s/%s", + dbspacedirname, de->d_name); + + /* Construct destination pathname. */ + memcpy(oidbuf, de->d_name, oidchars); + oidbuf[oidchars] = '\0'; + snprintf(dstpath, sizeof(dstpath), "%s/%s%s", + dbspacedirname, oidbuf, de->d_name + oidchars + 1 + + strlen(forkNames[INIT_FORKNUM])); + + /* OK, we're ready to perform the actual copy. */ + elog(DEBUG2, "copying %s to %s", srcpath, dstpath); + copy_file(srcpath, dstpath); + } + + /* Done with the first pass. */ + FreeDir(dbspace_dir); + } +} + +/* + * Basic parsing of putative relation filenames. + * + * This funtion returns true if the file appears to be in the correct format + * for a non-temporary relation and false otherwise. + * + * NB: If this function returns true, the caller is entitled to assume that + * *oidchars has been set to the a value no more than OIDCHARS, and thus + * that a buffer of OIDCHARS+1 characters is sufficient to hold the OID + * portion of the filename. This is critical to protect against a possible + * buffer overrun. + */ +static bool +parse_filename_for_nontemp_relation(const char *name, int *oidchars, + ForkNumber *fork) +{ + int pos; + + /* Look for a non-empty string of digits (that isn't too long). */ + for (pos = 0; isdigit((unsigned char) name[pos]); ++pos) + ; + if (pos == 0 || pos > OIDCHARS) + return false; + *oidchars = pos; + + /* Check for a fork name. */ + if (name[pos] != '_') + *fork = MAIN_FORKNUM; + else + { + int forkchar; + + forkchar = forkname_chars(&name[pos+1], fork); + if (forkchar <= 0) + return false; + pos += forkchar + 1; + } + + /* Check for a segment number. */ + if (name[pos] == '.') + { + int segchar; + for (segchar = 1; isdigit((unsigned char) name[pos+segchar]); ++segchar) + ; + if (segchar <= 1) + return false; + pos += segchar; + } + + /* Now we should be at the end. */ + if (name[pos] != '\0') + return false; + return true; +} diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index e352cdafb3..f33c29e4b2 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -615,6 +615,7 @@ pg_relation_filepath(PG_FUNCTION_ARGS) /* Determine owning backend. */ switch (relform->relpersistence) { + case RELPERSISTENCE_UNLOGGED: case RELPERSISTENCE_PERMANENT: backend = InvalidBackendId; break; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 1509686079..fa9e9ca3a4 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -851,6 +851,7 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; switch (relation->rd_rel->relpersistence) { + case RELPERSISTENCE_UNLOGGED: case RELPERSISTENCE_PERMANENT: relation->rd_backend = InvalidBackendId; break; @@ -2490,6 +2491,7 @@ RelationBuildLocalRelation(const char *relname, rel->rd_rel->relpersistence = relpersistence; switch (relpersistence) { + case RELPERSISTENCE_UNLOGGED: case RELPERSISTENCE_PERMANENT: rel->rd_backend = InvalidBackendId; break; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 66274b442e..afd759142b 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -134,6 +134,7 @@ static int disable_dollar_quoting = 0; static int dump_inserts = 0; static int column_inserts = 0; static int no_security_label = 0; +static int no_unlogged_table_data = 0; static void help(const char *progname); @@ -316,6 +317,7 @@ main(int argc, char **argv) {"role", required_argument, NULL, 3}, {"use-set-session-authorization", no_argument, &use_setsessauth, 1}, {"no-security-label", no_argument, &no_security_label, 1}, + {"no-unlogged-table-data", no_argument, &no_unlogged_table_data, 1}, {NULL, 0, NULL, 0} }; @@ -466,6 +468,8 @@ main(int argc, char **argv) use_setsessauth = 1; else if (strcmp(optarg, "no-security-label") == 0) no_security_label = 1; + else if (strcmp(optarg, "no-unlogged-table-data") == 0) + no_unlogged_table_data = 1; else { fprintf(stderr, @@ -864,6 +868,7 @@ help(const char *progname) printf(_(" --quote-all-identifiers quote all identifiers, even if not keywords\n")); printf(_(" --role=ROLENAME do SET ROLE before dump\n")); printf(_(" --no-security-label do not dump security label assignments\n")); + printf(_(" --no-unlogged-table-data do not dump unlogged table data\n")); printf(_(" --use-set-session-authorization\n" " use SET SESSION AUTHORIZATION commands instead of\n" " ALTER OWNER commands to set ownership\n")); @@ -1471,6 +1476,10 @@ getTableData(TableInfo *tblinfo, int numTables, bool oids) /* Skip SEQUENCEs (handled elsewhere) */ if (tblinfo[i].relkind == RELKIND_SEQUENCE) continue; + /* Skip unlogged tables if so requested */ + if (tblinfo[i].relpersistence == RELPERSISTENCE_UNLOGGED + && no_unlogged_table_data) + continue; if (tblinfo[i].dobj.dump) { @@ -3447,6 +3456,7 @@ getTables(int *numTables) int i_relhasrules; int i_relhasoids; int i_relfrozenxid; + int i_relpersistence; int i_owning_tab; int i_owning_col; int i_reltablespace; @@ -3477,7 +3487,40 @@ getTables(int *numTables) * we cannot correctly identify inherited columns, owned sequences, etc. */ - if (g_fout->remoteVersion >= 90000) + if (g_fout->remoteVersion >= 90100) + { + /* + * Left join to pick up dependency info linking sequences to their + * owning column, if any (note this dependency is AUTO as of 8.2) + */ + appendPQExpBuffer(query, + "SELECT c.tableoid, c.oid, c.relname, " + "c.relacl, c.relkind, c.relnamespace, " + "(%s c.relowner) AS rolname, " + "c.relchecks, c.relhastriggers, " + "c.relhasindex, c.relhasrules, c.relhasoids, " + "c.relfrozenxid, c.relpersistence, " + "CASE WHEN c.reloftype <> 0 THEN c.reloftype::pg_catalog.regtype ELSE NULL END AS reloftype, " + "d.refobjid AS owning_tab, " + "d.refobjsubid AS owning_col, " + "(SELECT spcname FROM pg_tablespace t WHERE t.oid = c.reltablespace) AS reltablespace, " + "array_to_string(c.reloptions, ', ') AS reloptions, " + "array_to_string(array(SELECT 'toast.' || x FROM unnest(tc.reloptions) x), ', ') AS toast_reloptions " + "FROM pg_class c " + "LEFT JOIN pg_depend d ON " + "(c.relkind = '%c' AND " + "d.classid = c.tableoid AND d.objid = c.oid AND " + "d.objsubid = 0 AND " + "d.refclassid = c.tableoid AND d.deptype = 'a') " + "LEFT JOIN pg_class tc ON (c.reltoastrelid = tc.oid) " + "WHERE c.relkind in ('%c', '%c', '%c', '%c') " + "ORDER BY c.oid", + username_subquery, + RELKIND_SEQUENCE, + RELKIND_RELATION, RELKIND_SEQUENCE, + RELKIND_VIEW, RELKIND_COMPOSITE_TYPE); + } + else if (g_fout->remoteVersion >= 90000) { /* * Left join to pick up dependency info linking sequences to their @@ -3489,7 +3532,7 @@ getTables(int *numTables) "(%s c.relowner) AS rolname, " "c.relchecks, c.relhastriggers, " "c.relhasindex, c.relhasrules, c.relhasoids, " - "c.relfrozenxid, " + "c.relfrozenxid, 'p' AS relpersistence, " "CASE WHEN c.reloftype <> 0 THEN c.reloftype::pg_catalog.regtype ELSE NULL END AS reloftype, " "d.refobjid AS owning_tab, " "d.refobjsubid AS owning_col, " @@ -3522,7 +3565,7 @@ getTables(int *numTables) "(%s c.relowner) AS rolname, " "c.relchecks, c.relhastriggers, " "c.relhasindex, c.relhasrules, c.relhasoids, " - "c.relfrozenxid, " + "c.relfrozenxid, 'p' AS relpersistence, " "NULL AS reloftype, " "d.refobjid AS owning_tab, " "d.refobjsubid AS owning_col, " @@ -3555,7 +3598,7 @@ getTables(int *numTables) "(%s relowner) AS rolname, " "relchecks, (reltriggers <> 0) AS relhastriggers, " "relhasindex, relhasrules, relhasoids, " - "relfrozenxid, " + "relfrozenxid, 'p' AS relpersistence, " "NULL AS reloftype, " "d.refobjid AS owning_tab, " "d.refobjsubid AS owning_col, " @@ -3587,7 +3630,7 @@ getTables(int *numTables) "(%s relowner) AS rolname, " "relchecks, (reltriggers <> 0) AS relhastriggers, " "relhasindex, relhasrules, relhasoids, " - "0 AS relfrozenxid, " + "0 AS relfrozenxid, 'p' AS relpersistence, " "NULL AS reloftype, " "d.refobjid AS owning_tab, " "d.refobjsubid AS owning_col, " @@ -3619,7 +3662,7 @@ getTables(int *numTables) "(%s relowner) AS rolname, " "relchecks, (reltriggers <> 0) AS relhastriggers, " "relhasindex, relhasrules, relhasoids, " - "0 AS relfrozenxid, " + "0 AS relfrozenxid, 'p' AS relpersistence, " "NULL AS reloftype, " "d.refobjid AS owning_tab, " "d.refobjsubid AS owning_col, " @@ -3647,7 +3690,7 @@ getTables(int *numTables) "(%s relowner) AS rolname, " "relchecks, (reltriggers <> 0) AS relhastriggers, " "relhasindex, relhasrules, relhasoids, " - "0 AS relfrozenxid, " + "0 AS relfrozenxid, 'p' AS relpersistence, " "NULL AS reloftype, " "NULL::oid AS owning_tab, " "NULL::int4 AS owning_col, " @@ -3670,7 +3713,7 @@ getTables(int *numTables) "relchecks, (reltriggers <> 0) AS relhastriggers, " "relhasindex, relhasrules, " "'t'::bool AS relhasoids, " - "0 AS relfrozenxid, " + "0 AS relfrozenxid, 'p' AS relpersistence, " "NULL AS reloftype, " "NULL::oid AS owning_tab, " "NULL::int4 AS owning_col, " @@ -3703,7 +3746,7 @@ getTables(int *numTables) "relchecks, (reltriggers <> 0) AS relhastriggers, " "relhasindex, relhasrules, " "'t'::bool AS relhasoids, " - "0 as relfrozenxid, " + "0 as relfrozenxid, 'p' AS relpersistence, " "NULL AS reloftype, " "NULL::oid AS owning_tab, " "NULL::int4 AS owning_col, " @@ -3749,6 +3792,7 @@ getTables(int *numTables) i_relhasrules = PQfnumber(res, "relhasrules"); i_relhasoids = PQfnumber(res, "relhasoids"); i_relfrozenxid = PQfnumber(res, "relfrozenxid"); + i_relpersistence = PQfnumber(res, "relpersistence"); i_owning_tab = PQfnumber(res, "owning_tab"); i_owning_col = PQfnumber(res, "owning_col"); i_reltablespace = PQfnumber(res, "reltablespace"); @@ -3783,6 +3827,7 @@ getTables(int *numTables) tblinfo[i].rolname = strdup(PQgetvalue(res, i, i_rolname)); tblinfo[i].relacl = strdup(PQgetvalue(res, i, i_relacl)); tblinfo[i].relkind = *(PQgetvalue(res, i, i_relkind)); + tblinfo[i].relpersistence = *(PQgetvalue(res, i, i_relpersistence)); tblinfo[i].hasindex = (strcmp(PQgetvalue(res, i, i_relhasindex), "t") == 0); tblinfo[i].hasrules = (strcmp(PQgetvalue(res, i, i_relhasrules), "t") == 0); tblinfo[i].hastriggers = (strcmp(PQgetvalue(res, i, i_relhastriggers), "t") == 0); @@ -11051,8 +11096,12 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo) if (binary_upgrade) binary_upgrade_set_relfilenodes(q, tbinfo->dobj.catId.oid, false); - appendPQExpBuffer(q, "CREATE TABLE %s", - fmtId(tbinfo->dobj.name)); + if (tbinfo->relpersistence == RELPERSISTENCE_UNLOGGED) + appendPQExpBuffer(q, "CREATE UNLOGGED TABLE %s", + fmtId(tbinfo->dobj.name)); + else + appendPQExpBuffer(q, "CREATE TABLE %s", + fmtId(tbinfo->dobj.name)); if (tbinfo->reloftype) appendPQExpBuffer(q, " OF %s", tbinfo->reloftype); actual_atts = 0; diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 78855357c8..4313fd866b 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -220,6 +220,7 @@ typedef struct _tableInfo char *rolname; /* name of owner, or empty string */ char *relacl; char relkind; + char relpersistence; /* relation persistence */ char *reltablespace; /* relation tablespace */ char *reloptions; /* options specified by WITH (...) */ char *toast_reloptions; /* ditto, for the TOAST table */ diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index beeba1cb52..ef05a46e7a 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -70,6 +70,7 @@ static int inserts = 0; static int no_tablespaces = 0; static int use_setsessauth = 0; static int no_security_label = 0; +static int no_unlogged_table_data = 0; static int server_version; static FILE *OPF; @@ -135,6 +136,7 @@ main(int argc, char *argv[]) {"role", required_argument, NULL, 3}, {"use-set-session-authorization", no_argument, &use_setsessauth, 1}, {"no-security-label", no_argument, &no_security_label, 1}, + {"no-unlogged-table-data", no_argument, &no_unlogged_table_data, 1}, {NULL, 0, NULL, 0} }; @@ -290,6 +292,8 @@ main(int argc, char *argv[]) use_setsessauth = 1; else if (strcmp(optarg, "no-security-label") == 0) no_security_label = 1; + else if (strcmp(optarg, "no-unlogged-table-data") == 0) + no_unlogged_table_data = 1; else { fprintf(stderr, @@ -377,6 +381,8 @@ main(int argc, char *argv[]) appendPQExpBuffer(pgdumpopts, " --use-set-session-authorization"); if (no_security_label) appendPQExpBuffer(pgdumpopts, " --no-security-label"); + if (no_unlogged_table_data) + appendPQExpBuffer(pgdumpopts, " --no-unlogged-table-data"); /* * If there was a database specified on the command line, use that, @@ -574,6 +580,7 @@ help(void) printf(_(" --quote-all-identifiers quote all identifiers, even if not keywords\n")); printf(_(" --role=ROLENAME do SET ROLE before dump\n")); printf(_(" --no-security-label do not dump security label assignments\n")); + printf(_(" --no-unlogged-table-data do not dump unlogged table data\n")); printf(_(" --use-set-session-authorization\n" " use SET SESSION AUTHORIZATION commands instead of\n" " ALTER OWNER commands to set ownership\n")); diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index edbe882963..406cc838e9 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -1118,6 +1118,7 @@ describeOneTableDetails(const char *schemaname, Oid tablespace; char *reloptions; char *reloftype; + char relpersistence; } tableinfo; bool show_modifiers = false; bool retval; @@ -1133,6 +1134,23 @@ describeOneTableDetails(const char *schemaname, /* Get general table info */ if (pset.sversion >= 90000) + { + printfPQExpBuffer(&buf, + "SELECT c.relchecks, c.relkind, c.relhasindex, c.relhasrules, " + "c.relhastriggers, c.relhasoids, " + "%s, c.reltablespace, " + "CASE WHEN c.reloftype = 0 THEN '' ELSE c.reloftype::pg_catalog.regtype::pg_catalog.text END, " + "c.relpersistence\n" + "FROM pg_catalog.pg_class c\n " + "LEFT JOIN pg_catalog.pg_class tc ON (c.reltoastrelid = tc.oid)\n" + "WHERE c.oid = '%s'\n", + (verbose ? + "pg_catalog.array_to_string(c.reloptions || " + "array(select 'toast.' || x from pg_catalog.unnest(tc.reloptions) x), ', ')\n" + : "''"), + oid); + } + else if (pset.sversion >= 90000) { printfPQExpBuffer(&buf, "SELECT c.relchecks, c.relkind, c.relhasindex, c.relhasrules, " @@ -1218,6 +1236,8 @@ describeOneTableDetails(const char *schemaname, atooid(PQgetvalue(res, 0, 7)) : 0; tableinfo.reloftype = (pset.sversion >= 90000 && strcmp(PQgetvalue(res, 0, 8), "") != 0) ? strdup(PQgetvalue(res, 0, 8)) : 0; + tableinfo.relpersistence = (pset.sversion >= 90100 && strcmp(PQgetvalue(res, 0, 9), "") != 0) ? + PQgetvalue(res, 0, 9)[0] : 0; PQclear(res); res = NULL; @@ -1269,8 +1289,12 @@ describeOneTableDetails(const char *schemaname, switch (tableinfo.relkind) { case 'r': - printfPQExpBuffer(&title, _("Table \"%s.%s\""), - schemaname, relationname); + if (tableinfo.relpersistence == 'u') + printfPQExpBuffer(&title, _("Unlogged Table \"%s.%s\""), + schemaname, relationname); + else + printfPQExpBuffer(&title, _("Table \"%s.%s\""), + schemaname, relationname); break; case 'v': printfPQExpBuffer(&title, _("View \"%s.%s\""), @@ -1281,8 +1305,12 @@ describeOneTableDetails(const char *schemaname, schemaname, relationname); break; case 'i': - printfPQExpBuffer(&title, _("Index \"%s.%s\""), - schemaname, relationname); + if (tableinfo.relpersistence == 'u') + printfPQExpBuffer(&title, _("Unlogged Index \"%s.%s\""), + schemaname, relationname); + else + printfPQExpBuffer(&title, _("Index \"%s.%s\""), + schemaname, relationname); break; case 's': /* not used as of 8.2, but keep it for backwards compatibility */ diff --git a/src/include/access/gin.h b/src/include/access/gin.h index e2d7b450c2..b1eef92054 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -389,6 +389,7 @@ extern void ginUpdateStats(Relation index, const GinStatsData *stats); /* gininsert.c */ extern Datum ginbuild(PG_FUNCTION_ARGS); +extern Datum ginbuildempty(PG_FUNCTION_ARGS); extern Datum gininsert(PG_FUNCTION_ARGS); extern void ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value, diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 1bacb468ee..77d679d489 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -281,6 +281,7 @@ typedef struct /* gist.c */ extern Datum gistbuild(PG_FUNCTION_ARGS); +extern Datum gistbuildempty(PG_FUNCTION_ARGS); extern Datum gistinsert(PG_FUNCTION_ARGS); extern MemoryContext createTempGistContext(void); extern void initGISTstate(GISTSTATE *giststate, Relation index); diff --git a/src/include/access/hash.h b/src/include/access/hash.h index d5899f4d57..a48320bbee 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -242,6 +242,7 @@ typedef HashMetaPageData *HashMetaPage; /* public routines */ extern Datum hashbuild(PG_FUNCTION_ARGS); +extern Datum hashbuildempty(PG_FUNCTION_ARGS); extern Datum hashinsert(PG_FUNCTION_ARGS); extern Datum hashbeginscan(PG_FUNCTION_ARGS); extern Datum hashgettuple(PG_FUNCTION_ARGS); @@ -291,7 +292,7 @@ extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf); extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf, BufferAccessStrategy bstrategy); extern void _hash_initbitmap(Relation rel, HashMetaPage metap, - BlockNumber blkno); + BlockNumber blkno, ForkNumber forkNum); extern void _hash_squeezebucket(Relation rel, Bucket bucket, BlockNumber bucket_blkno, BufferAccessStrategy bstrategy); @@ -303,7 +304,8 @@ extern void _hash_droplock(Relation rel, BlockNumber whichlock, int access); extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags); extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno); -extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno); +extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno, + ForkNumber forkNum); extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno, int access, int flags, BufferAccessStrategy bstrategy); @@ -312,7 +314,8 @@ extern void _hash_dropbuf(Relation rel, Buffer buf); extern void _hash_wrtbuf(Relation rel, Buffer buf); extern void _hash_chgbufaccess(Relation rel, Buffer buf, int from_access, int to_access); -extern uint32 _hash_metapinit(Relation rel, double num_tuples); +extern uint32 _hash_metapinit(Relation rel, double num_tuples, + ForkNumber forkNum); extern void _hash_pageinit(Page page, Size size); extern void _hash_expandtable(Relation rel, Buffer metabuf); diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 3bbc4d1cda..283612eaed 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -555,6 +555,7 @@ typedef BTScanOpaqueData *BTScanOpaque; * prototypes for functions in nbtree.c (external entry points for btree) */ extern Datum btbuild(PG_FUNCTION_ARGS); +extern Datum btbuildempty(PG_FUNCTION_ARGS); extern Datum btinsert(PG_FUNCTION_ARGS); extern Datum btbeginscan(PG_FUNCTION_ARGS); extern Datum btgettuple(PG_FUNCTION_ARGS); diff --git a/src/include/catalog/catalog.h b/src/include/catalog/catalog.h index 56dcdd53fe..40cb9ff2e4 100644 --- a/src/include/catalog/catalog.h +++ b/src/include/catalog/catalog.h @@ -25,7 +25,7 @@ extern const char *forkNames[]; extern ForkNumber forkname_to_number(char *forkName); -extern int forkname_chars(const char *str); +extern int forkname_chars(const char *str, ForkNumber *); extern char *relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index ca1c3bd683..b44991de80 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201012271 +#define CATALOG_VERSION_NO 201012291 #endif diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h index 9425329e94..1aa43a9e35 100644 --- a/src/include/catalog/pg_am.h +++ b/src/include/catalog/pg_am.h @@ -60,6 +60,7 @@ CATALOG(pg_am,2601) regproc ammarkpos; /* "mark current scan position" function */ regproc amrestrpos; /* "restore marked scan position" function */ regproc ambuild; /* "build new index" function */ + regproc ambuildempty; /* "build empty index" function */ regproc ambulkdelete; /* bulk-delete function */ regproc amvacuumcleanup; /* post-VACUUM cleanup function */ regproc amcostestimate; /* estimate cost of an indexscan */ @@ -101,26 +102,27 @@ typedef FormData_pg_am *Form_pg_am; #define Anum_pg_am_ammarkpos 21 #define Anum_pg_am_amrestrpos 22 #define Anum_pg_am_ambuild 23 -#define Anum_pg_am_ambulkdelete 24 -#define Anum_pg_am_amvacuumcleanup 25 -#define Anum_pg_am_amcostestimate 26 -#define Anum_pg_am_amoptions 27 +#define Anum_pg_am_ambuildempty 24 +#define Anum_pg_am_ambulkdelete 25 +#define Anum_pg_am_amvacuumcleanup 26 +#define Anum_pg_am_amcostestimate 27 +#define Anum_pg_am_amoptions 28 /* ---------------- * initial contents of pg_am * ---------------- */ -DATA(insert OID = 403 ( btree 5 1 t f t t t t t t f t 0 btinsert btbeginscan btgettuple btgetbitmap btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btvacuumcleanup btcostestimate btoptions )); +DATA(insert OID = 403 ( btree 5 1 t f t t t t t t f t 0 btinsert btbeginscan btgettuple btgetbitmap btrescan btendscan btmarkpos btrestrpos btbuild btbuildempty btbulkdelete btvacuumcleanup btcostestimate btoptions )); DESCR("b-tree index access method"); #define BTREE_AM_OID 403 -DATA(insert OID = 405 ( hash 1 1 f f t f f f f f f f 23 hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashvacuumcleanup hashcostestimate hashoptions )); +DATA(insert OID = 405 ( hash 1 1 f f t f f f f f f f 23 hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbuildempty hashbulkdelete hashvacuumcleanup hashcostestimate hashoptions )); DESCR("hash index access method"); #define HASH_AM_OID 405 -DATA(insert OID = 783 ( gist 0 8 f t f f t t t t t t 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions )); +DATA(insert OID = 783 ( gist 0 8 f t f f t t t t t t 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbuildempty gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions )); DESCR("GiST index access method"); #define GIST_AM_OID 783 -DATA(insert OID = 2742 ( gin 0 5 f f f f t t f f t f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions )); +DATA(insert OID = 2742 ( gin 0 5 f f f f t t f f t f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbuildempty ginbulkdelete ginvacuumcleanup gincostestimate ginoptions )); DESCR("GIN index access method"); #define GIN_AM_OID 2742 diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 1edbfe378b..39f9743990 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -150,6 +150,7 @@ DESCR(""); #define RELKIND_COMPOSITE_TYPE 'c' /* composite type */ #define RELPERSISTENCE_PERMANENT 'p' +#define RELPERSISTENCE_UNLOGGED 'u' #define RELPERSISTENCE_TEMP 't' #endif /* PG_CLASS_H */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index bc3fd71876..c6242433b7 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -689,6 +689,8 @@ DATA(insert OID = 337 ( btrestrpos PGNSP PGUID 12 1 0 0 f f f t f v 1 0 227 DESCR("btree(internal)"); DATA(insert OID = 338 ( btbuild PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ btbuild _null_ _null_ _null_ )); DESCR("btree(internal)"); +DATA(insert OID = 328 ( btbuildempty PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ btbuildempty _null_ _null_ _null_ )); +DESCR("btree(internal)"); DATA(insert OID = 332 ( btbulkdelete PGNSP PGUID 12 1 0 0 f f f t f v 4 0 2281 "2281 2281 2281 2281" _null_ _null_ _null_ _null_ btbulkdelete _null_ _null_ _null_ )); DESCR("btree(internal)"); DATA(insert OID = 972 ( btvacuumcleanup PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ btvacuumcleanup _null_ _null_ _null_ )); @@ -808,6 +810,8 @@ DATA(insert OID = 447 ( hashrestrpos PGNSP PGUID 12 1 0 0 f f f t f v 1 0 22 DESCR("hash(internal)"); DATA(insert OID = 448 ( hashbuild PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ hashbuild _null_ _null_ _null_ )); DESCR("hash(internal)"); +DATA(insert OID = 327 ( hashbuildempty PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ hashbuildempty _null_ _null_ _null_ )); +DESCR("hash(internal)"); DATA(insert OID = 442 ( hashbulkdelete PGNSP PGUID 12 1 0 0 f f f t f v 4 0 2281 "2281 2281 2281 2281" _null_ _null_ _null_ _null_ hashbulkdelete _null_ _null_ _null_ )); DESCR("hash(internal)"); DATA(insert OID = 425 ( hashvacuumcleanup PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ hashvacuumcleanup _null_ _null_ _null_ )); @@ -1104,6 +1108,8 @@ DATA(insert OID = 781 ( gistrestrpos PGNSP PGUID 12 1 0 0 f f f t f v 1 0 22 DESCR("gist(internal)"); DATA(insert OID = 782 ( gistbuild PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ gistbuild _null_ _null_ _null_ )); DESCR("gist(internal)"); +DATA(insert OID = 326 ( gistbuildempty PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ gistbuildempty _null_ _null_ _null_ )); +DESCR("gist(internal)"); DATA(insert OID = 776 ( gistbulkdelete PGNSP PGUID 12 1 0 0 f f f t f v 4 0 2281 "2281 2281 2281 2281" _null_ _null_ _null_ _null_ gistbulkdelete _null_ _null_ _null_ )); DESCR("gist(internal)"); DATA(insert OID = 2561 ( gistvacuumcleanup PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ gistvacuumcleanup _null_ _null_ _null_ )); @@ -4353,6 +4359,8 @@ DATA(insert OID = 2737 ( ginrestrpos PGNSP PGUID 12 1 0 0 f f f t f v 1 0 22 DESCR("gin(internal)"); DATA(insert OID = 2738 ( ginbuild PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ ginbuild _null_ _null_ _null_ )); DESCR("gin(internal)"); +DATA(insert OID = 325 ( ginbuildempty PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ ginbuildempty _null_ _null_ _null_ )); +DESCR("gin(internal)"); DATA(insert OID = 2739 ( ginbulkdelete PGNSP PGUID 12 1 0 0 f f f t f v 4 0 2281 "2281 2281 2281 2281" _null_ _null_ _null_ _null_ ginbulkdelete _null_ _null_ _null_ )); DESCR("gin(internal)"); DATA(insert OID = 2740 ( ginvacuumcleanup PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ ginvacuumcleanup _null_ _null_ _null_ )); diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h index f086b1c33f..e2a1fecdf5 100644 --- a/src/include/catalog/storage.h +++ b/src/include/catalog/storage.h @@ -35,6 +35,8 @@ extern void AtSubCommit_smgr(void); extern void AtSubAbort_smgr(void); extern void PostPrepare_smgr(void); +extern void log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum); + extern void smgr_redo(XLogRecPtr lsn, XLogRecord *record); extern void smgr_desc(StringInfo buf, uint8 xl_info, char *rec); diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 09d167a323..726daf5c07 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -390,6 +390,7 @@ PG_KEYWORD("union", UNION, RESERVED_KEYWORD) PG_KEYWORD("unique", UNIQUE, RESERVED_KEYWORD) PG_KEYWORD("unknown", UNKNOWN, UNRESERVED_KEYWORD) PG_KEYWORD("unlisten", UNLISTEN, UNRESERVED_KEYWORD) +PG_KEYWORD("unlogged", UNLOGGED, UNRESERVED_KEYWORD) PG_KEYWORD("until", UNTIL, UNRESERVED_KEYWORD) PG_KEYWORD("update", UPDATE, UNRESERVED_KEYWORD) PG_KEYWORD("user", USER, RESERVED_KEYWORD) diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h index 5f41adfcc2..7cf1a643d3 100644 --- a/src/include/pg_config_manual.h +++ b/src/include/pg_config_manual.h @@ -203,7 +203,7 @@ * Enable debugging print statements for WAL-related operations; see * also the wal_debug GUC var. */ -/* #define WAL_DEBUG */ +#define WAL_DEBUG /* * Enable tracing of resource consumption during sort operations; diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 0c18fb52ee..823ca32766 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -37,6 +37,7 @@ #define BM_JUST_DIRTIED (1 << 5) /* dirtied since write started */ #define BM_PIN_COUNT_WAITER (1 << 6) /* have waiter for sole pin */ #define BM_CHECKPOINT_NEEDED (1 << 7) /* must write for checkpoint */ +#define BM_PERMANENT (1 << 8) /* permanent relation (not unlogged) */ typedef bits16 BufFlags; diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 8c1552190c..58808f0b59 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -177,13 +177,17 @@ extern void AtEOXact_Buffers(bool isCommit); extern void PrintBufferLeakWarning(Buffer buffer); extern void CheckPointBuffers(int flags); extern BlockNumber BufferGetBlockNumber(Buffer buffer); -extern BlockNumber RelationGetNumberOfBlocks(Relation relation); +extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation, + ForkNumber forkNum); extern void FlushRelationBuffers(Relation rel); extern void FlushDatabaseBuffers(Oid dbid); extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum, BlockNumber firstDelBlock); extern void DropDatabaseBuffers(Oid dbid); +#define RelationGetNumberOfBlocks(reln) \ + RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM) + #ifdef NOT_USED extern void PrintPinnedBufs(void); #endif diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h index b24a98c83e..7c577240f8 100644 --- a/src/include/storage/copydir.h +++ b/src/include/storage/copydir.h @@ -14,5 +14,6 @@ #define COPYDIR_H extern void copydir(char *fromdir, char *todir, bool recurse); +extern void copy_file(char *fromfile, char *tofile); #endif /* COPYDIR_H */ diff --git a/src/include/storage/reinit.h b/src/include/storage/reinit.h new file mode 100644 index 0000000000..9999dff37d --- /dev/null +++ b/src/include/storage/reinit.h @@ -0,0 +1,23 @@ +/*------------------------------------------------------------------------- + * + * reinit.h + * Reinitialization of unlogged relations + * + * + * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/storage/fd.h + * + *------------------------------------------------------------------------- + */ + +#ifndef REINIT_H +#define REINIT_H + +extern void ResetUnloggedRelations(int op); + +#define UNLOGGED_RELATION_CLEANUP 0x0001 +#define UNLOGGED_RELATION_INIT 0x0002 + +#endif /* REINIT_H */ diff --git a/src/include/storage/relfilenode.h b/src/include/storage/relfilenode.h index 24a72e60ac..f71b2331a5 100644 --- a/src/include/storage/relfilenode.h +++ b/src/include/storage/relfilenode.h @@ -27,7 +27,8 @@ typedef enum ForkNumber InvalidForkNumber = -1, MAIN_FORKNUM = 0, FSM_FORKNUM, - VISIBILITYMAP_FORKNUM + VISIBILITYMAP_FORKNUM, + INIT_FORKNUM /* * NOTE: if you add a new fork, change MAX_FORKNUM below and update the @@ -35,7 +36,7 @@ typedef enum ForkNumber */ } ForkNumber; -#define MAX_FORKNUM VISIBILITYMAP_FORKNUM +#define MAX_FORKNUM INIT_FORKNUM /* * RelFileNode must provide all that we need to know to physically access diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 88a3168d13..d5b5e58de3 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -114,6 +114,7 @@ typedef struct RelationAmInfo FmgrInfo ammarkpos; FmgrInfo amrestrpos; FmgrInfo ambuild; + FmgrInfo ambuildempty; FmgrInfo ambulkdelete; FmgrInfo amvacuumcleanup; FmgrInfo amcostestimate; -- 2.40.0