From e33f205a945e7497b9aecffffb7e3b0a199f8a29 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 29 Sep 2003 23:40:26 +0000 Subject: [PATCH] Adjust btree index build procedure so that the btree metapage looks invalid (has the wrong magic number) until the build is entirely complete. This turns out to cost no additional writes in the normal case, since we were rewriting the metapage at the end of the process anyway. In normal scenarios there's no real gain in security, because a failed index build would roll back the transaction leaving an unused index file, but for rebuilding shared system indexes this seems to add some useful protection. --- doc/src/sgml/ref/reindex.sgml | 9 ++++--- src/backend/access/nbtree/nbtpage.c | 18 +++++++++---- src/backend/access/nbtree/nbtree.c | 5 ++-- src/backend/access/nbtree/nbtsort.c | 20 ++++++++++---- src/backend/access/nbtree/nbtxlog.c | 42 ++++++++++++++++++++++------- src/include/access/nbtree.h | 5 ++-- 6 files changed, 71 insertions(+), 28 deletions(-) diff --git a/doc/src/sgml/ref/reindex.sgml b/doc/src/sgml/ref/reindex.sgml index d945112de7..43f0368d64 100644 --- a/doc/src/sgml/ref/reindex.sgml +++ b/doc/src/sgml/ref/reindex.sgml @@ -1,5 +1,5 @@ @@ -180,9 +180,10 @@ REINDEX { DATABASE | TABLE | INDEX } nameREINDEX is not crash-safe for shared indexes, which is why this case is disallowed during normal operation. If a failure occurs while reindexing one - of these catalogs in standalone mode, it is important that the failure - be rectified and the REINDEX operation redone - before attempting to restart the regular server. + of these catalogs in standalone mode, it will not be possible to + restart the regular server until the problem is rectified. (The + typical symptom of a partially rebuilt shared index is index is not + a btree errors.) diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 2cb57aadc2..77ca04601d 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.71 2003/09/25 06:57:57 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.72 2003/09/29 23:40:26 tgl Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -31,12 +31,16 @@ /* * _bt_metapinit() -- Initialize the metadata page of a new btree. * + * If markvalid is true, the index is immediately marked valid, else it + * will be invalid until _bt_metaproot() is called. + * * Note: there's no real need for any locking here. Since the transaction * creating the index hasn't committed yet, no one else can even see the index - * much less be trying to use it. + * much less be trying to use it. (In a REINDEX-in-place scenario, that's + * not true, but we assume the caller holds sufficient locks on the index.) */ void -_bt_metapinit(Relation rel) +_bt_metapinit(Relation rel, bool markvalid) { Buffer buf; Page pg; @@ -57,7 +61,7 @@ _bt_metapinit(Relation rel) _bt_pageinit(pg, BufferGetPageSize(buf)); metad = BTPageGetMeta(pg); - metad->btm_magic = BTREE_MAGIC; + metad->btm_magic = markvalid ? BTREE_MAGIC : 0; metad->btm_version = BTREE_VERSION; metad->btm_root = P_NONE; metad->btm_level = 0; @@ -85,7 +89,9 @@ _bt_metapinit(Relation rel) rdata[0].len = SizeOfBtreeNewmeta; rdata[0].next = NULL; - recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWMETA, rdata); + recptr = XLogInsert(RM_BTREE_ID, + markvalid ? XLOG_BTREE_NEWMETA : XLOG_BTREE_INVALIDMETA, + rdata); PageSetLSN(pg, recptr); PageSetSUI(pg, ThisStartUpID); @@ -611,6 +617,8 @@ _bt_metaproot(Relation rel, BlockNumber rootbknum, uint32 level) START_CRIT_SECTION(); metad = BTPageGetMeta(metap); + Assert(metad->btm_magic == BTREE_MAGIC || metad->btm_magic == 0); + metad->btm_magic = BTREE_MAGIC; /* it's valid now for sure */ metad->btm_root = rootbknum; metad->btm_level = level; metad->btm_fastroot = rootbknum; diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index da92254820..3979f79c35 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.105 2003/08/04 02:39:57 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.106 2003/09/29 23:40:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -112,7 +112,8 @@ btbuild(PG_FUNCTION_ARGS) RelationGetRelationName(index)); /* initialize the btree index metadata page */ - _bt_metapinit(index); + /* mark it valid right away only if using slow build */ + _bt_metapinit(index, !buildstate.usefast); if (buildstate.usefast) { diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index d8ea19434f..a56665be5c 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -36,7 +36,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsort.c,v 1.76 2003/09/25 06:57:57 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsort.c,v 1.77 2003/09/29 23:40:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -514,6 +514,8 @@ static void _bt_uppershutdown(Relation index, BTPageState *state) { BTPageState *s; + BlockNumber rootblkno = P_NONE; + uint32 rootlevel = 0; /* * Each iteration of this loop completes one more level of the tree. @@ -537,7 +539,8 @@ _bt_uppershutdown(Relation index, BTPageState *state) if (s->btps_next == (BTPageState *) NULL) { opaque->btpo_flags |= BTP_ROOT; - _bt_metaproot(index, blkno, s->btps_level); + rootblkno = blkno; + rootlevel = s->btps_level; } else { @@ -556,6 +559,14 @@ _bt_uppershutdown(Relation index, BTPageState *state) _bt_slideleft(index, s->btps_buf, s->btps_page); _bt_blwritepage(index, s->btps_buf); } + + /* + * As the last step in the process, update the metapage to point to + * the new root (unless we had no data at all, in which case it's + * left pointing to "P_NONE"). This changes the index to the "valid" + * state by updating its magic number. + */ + _bt_metaproot(index, rootblkno, rootlevel); } /* @@ -672,7 +683,6 @@ _bt_load(Relation index, BTSpool *btspool, BTSpool *btspool2) } } - /* Close down final pages, if we had any data at all */ - if (state != NULL) - _bt_uppershutdown(index, state); + /* Close down final pages and rewrite the metapage */ + _bt_uppershutdown(index, state); } diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index 4fecd3116a..721ef6ba0a 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.6 2003/08/08 21:41:27 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.7 2003/09/29 23:40:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -109,7 +109,8 @@ _bt_restore_page(Page page, char *from, int len) static void _bt_restore_meta(Relation reln, XLogRecPtr lsn, BlockNumber root, uint32 level, - BlockNumber fastroot, uint32 fastlevel) + BlockNumber fastroot, uint32 fastlevel, + bool markvalid) { Buffer metabuf; Page metapg; @@ -124,7 +125,7 @@ _bt_restore_meta(Relation reln, XLogRecPtr lsn, _bt_pageinit(metapg, BufferGetPageSize(metabuf)); md = BTPageGetMeta(metapg); - md->btm_magic = BTREE_MAGIC; + md->btm_magic = markvalid ? BTREE_MAGIC : 0; md->btm_version = BTREE_VERSION; md->btm_root = root; md->btm_level = level; @@ -213,7 +214,8 @@ btree_xlog_insert(bool redo, bool isleaf, bool ismeta, if (ismeta) _bt_restore_meta(reln, lsn, md.root, md.level, - md.fastroot, md.fastlevel); + md.fastroot, md.fastlevel, + true); } /* Forget any split this insertion completes */ @@ -562,7 +564,8 @@ btree_xlog_delete_page(bool redo, bool ismeta, sizeof(xl_btree_metadata)); _bt_restore_meta(reln, lsn, md.root, md.level, - md.fastroot, md.fastlevel); + md.fastroot, md.fastlevel, + true); } } } @@ -607,7 +610,8 @@ btree_xlog_newroot(bool redo, XLogRecPtr lsn, XLogRecord *record) _bt_restore_meta(reln, lsn, xlrec->rootblk, xlrec->level, - xlrec->rootblk, xlrec->level); + xlrec->rootblk, xlrec->level, + true); /* Check to see if this satisfies any incomplete insertions */ if (record->xl_len > SizeOfBtreeNewroot && @@ -621,7 +625,8 @@ btree_xlog_newroot(bool redo, XLogRecPtr lsn, XLogRecord *record) } static void -btree_xlog_newmeta(bool redo, XLogRecPtr lsn, XLogRecord *record) +btree_xlog_newmeta(bool redo, XLogRecPtr lsn, XLogRecord *record, + bool markvalid) { xl_btree_newmeta *xlrec = (xl_btree_newmeta *) XLogRecGetData(record); Relation reln; @@ -635,7 +640,8 @@ btree_xlog_newmeta(bool redo, XLogRecPtr lsn, XLogRecord *record) _bt_restore_meta(reln, lsn, xlrec->meta.root, xlrec->meta.level, - xlrec->meta.fastroot, xlrec->meta.fastlevel); + xlrec->meta.fastroot, xlrec->meta.fastlevel, + markvalid); } static void @@ -707,11 +713,14 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record) btree_xlog_newroot(true, lsn, record); break; case XLOG_BTREE_NEWMETA: - btree_xlog_newmeta(true, lsn, record); + btree_xlog_newmeta(true, lsn, record, true); break; case XLOG_BTREE_NEWPAGE: btree_xlog_newpage(true, lsn, record); break; + case XLOG_BTREE_INVALIDMETA: + btree_xlog_newmeta(true, lsn, record, false); + break; default: elog(PANIC, "btree_redo: unknown op code %u", info); } @@ -758,11 +767,14 @@ btree_undo(XLogRecPtr lsn, XLogRecord *record) btree_xlog_newroot(false, lsn, record); break; case XLOG_BTREE_NEWMETA: - btree_xlog_newmeta(false, lsn, record); + btree_xlog_newmeta(false, lsn, record, true); break; case XLOG_BTREE_NEWPAGE: btree_xlog_newpage(false, lsn, record); break; + case XLOG_BTREE_INVALIDMETA: + btree_xlog_newmeta(false, lsn, record, false); + break; default: elog(PANIC, "btree_undo: unknown op code %u", info); } @@ -895,6 +907,16 @@ btree_desc(char *buf, uint8 xl_info, char *rec) xlrec->blkno); break; } + case XLOG_BTREE_INVALIDMETA: + { + xl_btree_newmeta *xlrec = (xl_btree_newmeta *) rec; + + sprintf(buf + strlen(buf), "invalidmeta: node %u/%u; root %u lev %u fast %u lev %u", + xlrec->node.tblNode, xlrec->node.relNode, + xlrec->meta.root, xlrec->meta.level, + xlrec->meta.fastroot, xlrec->meta.fastlevel); + break; + } default: strcat(buf, "UNKNOWN"); break; diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 6d9c0081a9..a852fa2492 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: nbtree.h,v 1.70 2003/08/08 21:42:32 momjian Exp $ + * $Id: nbtree.h,v 1.71 2003/09/29 23:40:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -198,6 +198,7 @@ typedef BTItemData *BTItem; #define XLOG_BTREE_NEWROOT 0xA0 /* new root page */ #define XLOG_BTREE_NEWMETA 0xB0 /* update metadata page */ #define XLOG_BTREE_NEWPAGE 0xC0 /* new index page during build */ +#define XLOG_BTREE_INVALIDMETA 0xD0 /* new metadata, temp. invalid */ /* * All that we need to find changed index tuple @@ -448,7 +449,7 @@ extern void _bt_insert_parent(Relation rel, Buffer buf, Buffer rbuf, /* * prototypes for functions in nbtpage.c */ -extern void _bt_metapinit(Relation rel); +extern void _bt_metapinit(Relation rel, bool markvalid); extern Buffer _bt_getroot(Relation rel, int access); extern Buffer _bt_gettrueroot(Relation rel); extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access); -- 2.40.0