From f844022a0953ab013055fc68bd3ec34f504a6115 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 15 Jul 2011 10:54:56 +0300 Subject: [PATCH] Fix two ancient bugs in GiST code to re-find a parent after page split: First, when following a right-link, we incorrectly marked the current page as the parent of the right sibling. In reality, the parent of the right page is the same as the parent of the current page (or some page to the right of it, gistFindCorrectParent() will sort that out). Secondly, when we follow a right-link, we must prepend, not append, the right page to our list of pages to visit. That's because we assume that once we hit a leaf page in the list, all the rest are leaf pages too, and give up. To hit these bugs, you need concurrent actions and several unlucky accidents. Another backend must split the root page, while you're in process of splitting a lower-level page. Furthermore, while you scan the internal nodes to re-find the parent, another backend needs to again split some more internal pages. Even then, the bugs don't necessarily manifest as user-visible errors or index corruption. While we're at it, make the error reporting a bit better if gistFindPath() fails to re-find the parent. It used to be an assertion, but an elog() seems more appropriate. Backpatch to all supported branches. --- src/backend/access/gist/gist.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index cdb0343d10..4e732d1d85 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -883,9 +883,12 @@ gistFindPath(Relation r, BlockNumber child) if (GistPageIsLeaf(page)) { - /* we can safety go away, follows only leaf pages */ + /* + * Because we scan the index top-down, all the rest of the pages + * in the queue must be leaf pages as well. + */ UnlockReleaseBuffer(buffer); - return NULL; + break; } top->lsn = PageGetLSN(page); @@ -900,14 +903,25 @@ gistFindPath(Relation r, BlockNumber child) if (top->parent && XLByteLT(top->parent->lsn, GistPageGetOpaque(page)->nsn) && GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ ) { - /* page splited while we thinking of... */ + /* + * Page was split while we looked elsewhere. We didn't see the + * downlink to the right page when we scanned the parent, so + * add it to the queue now. + * + * Put the right page ahead of the queue, so that we visit it + * next. That's important, because if this is the lowest internal + * level, just above leaves, we might already have queued up some + * leaf pages, and we assume that there can't be any non-leaf + * pages behind leaf pages. + */ ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack)); ptr->blkno = GistPageGetOpaque(page)->rightlink; ptr->childoffnum = InvalidOffsetNumber; - ptr->parent = top; - ptr->next = NULL; - tail->next = ptr; - tail = ptr; + ptr->parent = top->parent; + ptr->next = top->next; + top->next = ptr; + if (tail == top) + tail = ptr; } maxoff = PageGetMaxOffsetNumber(page); @@ -963,7 +977,9 @@ gistFindPath(Relation r, BlockNumber child) top = top->next; } - return NULL; + elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u", + RelationGetRelationName(r), child); + return NULL; /* keep compiler quiet */ } /* @@ -1034,7 +1050,6 @@ gistFindCorrectParent(Relation r, GISTInsertStack *child) /* ok, find new path */ ptr = parent = gistFindPath(r, child->blkno); - Assert(ptr != NULL); /* read all buffers as expected by caller */ /* note we don't lock them or gistcheckpage them here! */ -- 2.40.0