]> granicus.if.org Git - postgresql/commitdiff
Improve GIN indexscan cost estimation.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 18 Oct 2010 00:52:32 +0000 (20:52 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 18 Oct 2010 00:52:32 +0000 (20:52 -0400)
The better estimate requires more statistics than we previously stored:
in particular, counts of "entry" versus "data" pages within the index,
as well as knowledge of the number of distinct key values.  We collect
this information during initial index build and update it during VACUUM,
storing the info in new fields on the index metapage.  No initdb is
required because these fields will read as zeroes in a pre-existing
index, and the new gincostestimate code is coded to behave (reasonably)
sanely if they are zeroes.

Teodor Sigaev, reviewed by Jan Urbanski, Tom Lane, and Itagaki Takahiro.

src/backend/access/gin/ginbtree.c
src/backend/access/gin/gindatapage.c
src/backend/access/gin/ginentrypage.c
src/backend/access/gin/ginfast.c
src/backend/access/gin/gininsert.c
src/backend/access/gin/ginutil.c
src/backend/access/gin/ginvacuum.c
src/backend/access/gin/ginxlog.c
src/backend/utils/adt/selfuncs.c
src/include/access/gin.h

index 82d7dd18a809b5be901c3272c6c7565f7b2d9ded..812e241f449015bd01733aba1d157163ee027eab 100644 (file)
@@ -268,10 +268,13 @@ findParents(GinBtree btree, GinBtreeStack *stack,
 /*
  * Insert value (stored in GinBtree) to tree described by stack
  *
+ * During an index build, buildStats is non-null and the counters
+ * it contains should be incremented as needed.
+ *
  * NB: the passed-in stack is freed, as though by freeGinBtreeStack.
  */
 void
-ginInsertValue(GinBtree btree, GinBtreeStack *stack)
+ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats)
 {
        GinBtreeStack *parent = stack;
        BlockNumber rootBlkno = InvalidBuffer;
@@ -330,6 +333,15 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack)
 
                        ((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno;
 
+                       /* During index build, count the newly-split page */
+                       if (buildStats)
+                       {
+                               if (btree->isData)
+                                       buildStats->nDataPages++;
+                               else
+                                       buildStats->nEntryPages++;
+                       }
+
                        parent = stack->parent;
 
                        if (parent == NULL)
@@ -381,6 +393,15 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack)
 
                                freeGinBtreeStack(stack);
 
+                               /* During index build, count the newly-added root page */
+                               if (buildStats)
+                               {
+                                       if (btree->isData)
+                                               buildStats->nDataPages++;
+                                       else
+                                               buildStats->nEntryPages++;
+                               }
+
                                return;
                        }
                        else
index c590d56f7cb4c57c5ec816278df575c11a32e449..5e2f6e76d30664c60d03b34e85be09b8cd0dc065 100644 (file)
@@ -592,9 +592,11 @@ void
 prepareDataScan(GinBtree btree, Relation index)
 {
        memset(btree, 0, sizeof(GinBtreeData));
+
        btree->index = index;
-       btree->isMoveRight = dataIsMoveRight;
+
        btree->findChildPage = dataLocateItem;
+       btree->isMoveRight = dataIsMoveRight;
        btree->findItem = dataLocateLeafItem;
        btree->findChildPtr = dataFindChildPtr;
        btree->getLeftMostPage = dataGetLeftMostPage;
@@ -603,6 +605,7 @@ prepareDataScan(GinBtree btree, Relation index)
        btree->splitPage = dataSplitPage;
        btree->fillRoot = dataFillRoot;
 
+       btree->isData = TRUE;
        btree->searchMode = FALSE;
        btree->isDelete = FALSE;
        btree->fullScan = FALSE;
@@ -628,7 +631,9 @@ prepareScanPostingTree(Relation index, BlockNumber rootBlkno, bool searchMode)
  * Inserts array of item pointers, may execute several tree scan (very rare)
  */
 void
-insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem)
+ginInsertItemPointer(GinPostingTreeScan *gdi,
+                                        ItemPointerData *items, uint32 nitem,
+                                        GinStatsData *buildStats)
 {
        BlockNumber rootBlkno = gdi->stack->blkno;
 
@@ -653,7 +658,7 @@ insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem)
                        freeGinBtreeStack(gdi->stack);
                }
                else
-                       ginInsertValue(&(gdi->btree), gdi->stack);
+                       ginInsertValue(&(gdi->btree), gdi->stack, buildStats);
 
                gdi->stack = NULL;
        }
index d60282f20472c1864a9e47383da752e9e93d1abe..a47e92785c2dc6f9d8cce621ab3951015ce088f6 100644 (file)
@@ -659,8 +659,11 @@ prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum, Datum valu
 {
        memset(btree, 0, sizeof(GinBtreeData));
 
-       btree->isMoveRight = entryIsMoveRight;
+       btree->index = index;
+       btree->ginstate = ginstate;
+
        btree->findChildPage = entryLocateEntry;
+       btree->isMoveRight = entryIsMoveRight;
        btree->findItem = entryLocateLeafEntry;
        btree->findChildPtr = entryFindChildPtr;
        btree->getLeftMostPage = entryGetLeftMostPage;
@@ -669,13 +672,12 @@ prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum, Datum valu
        btree->splitPage = entrySplitPage;
        btree->fillRoot = entryFillRoot;
 
-       btree->index = index;
-       btree->ginstate = ginstate;
-       btree->entryAttnum = attnum;
-       btree->entryValue = value;
-
-       btree->isDelete = FALSE;
+       btree->isData = FALSE;
        btree->searchMode = FALSE;
        btree->fullScan = FALSE;
        btree->isBuild = FALSE;
+
+       btree->entryAttnum = attnum;
+       btree->entryValue = value;
+       btree->isDelete = FALSE;
 }
index eacac507e438f612c92dc795e2efe82714be2891..0c050c97a0bc8500890eb6df58e59360e7d2ce6c 100644 (file)
@@ -789,7 +789,7 @@ ginInsertCleanup(Relation index, GinState *ginstate,
                        ginBeginBAScan(&accum);
                        while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
                        {
-                               ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
+                               ginEntryInsert(index, ginstate, attnum, entry, list, nlist, NULL);
                                if (vac_delay)
                                        vacuum_delay_point();
                        }
@@ -823,7 +823,7 @@ ginInsertCleanup(Relation index, GinState *ginstate,
 
                                ginBeginBAScan(&accum);
                                while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
-                                       ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
+                                       ginEntryInsert(index, ginstate, attnum, entry, list, nlist, NULL);
                        }
 
                        /*
index 640d3acde9f5fa87c4ac78ca5434e1b313e25c9d..263e447ca4167393e3e5fed928acac53451c9733 100644 (file)
@@ -27,6 +27,7 @@ typedef struct
 {
        GinState        ginstate;
        double          indtuples;
+       GinStatsData buildStats;
        MemoryContext tmpCtx;
        MemoryContext funcCtx;
        BuildAccumulator accum;
@@ -97,8 +98,10 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems)
  * GinFormTuple().
  */
 static IndexTuple
-addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
-                 IndexTuple old, ItemPointerData *items, uint32 nitem, bool isBuild)
+addItemPointersToTuple(Relation index, GinState *ginstate,
+                                          GinBtreeStack *stack, IndexTuple old,
+                                          ItemPointerData *items, uint32 nitem,
+                                          GinStatsData *buildStats)
 {
        Datum           key = gin_index_getattr(ginstate, old);
        OffsetNumber attnum = gintuple_get_attrnum(ginstate, old);
@@ -128,11 +131,15 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
                GinSetPostingTree(res, postingRoot);
 
                gdi = prepareScanPostingTree(index, postingRoot, FALSE);
-               gdi->btree.isBuild = isBuild;
+               gdi->btree.isBuild = (buildStats != NULL);
 
-               insertItemPointer(gdi, items, nitem);
+               ginInsertItemPointer(gdi, items, nitem, buildStats);
 
                pfree(gdi);
+
+               /* During index build, count the newly-added data page */
+               if (buildStats)
+                       buildStats->nDataPages++;
        }
 
        return res;
@@ -140,18 +147,25 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
 
 /*
  * Inserts only one entry to the index, but it can add more than 1 ItemPointer.
+ *
+ * During an index build, buildStats is non-null and the counters
+ * it contains should be incremented as needed.
  */
 void
 ginEntryInsert(Relation index, GinState *ginstate,
                           OffsetNumber attnum, Datum value,
                           ItemPointerData *items, uint32 nitem,
-                          bool isBuild)
+                          GinStatsData *buildStats)
 {
        GinBtreeData btree;
        GinBtreeStack *stack;
        IndexTuple      itup;
        Page            page;
 
+       /* During index build, count the to-be-inserted entry */
+       if (buildStats)
+               buildStats->nEntries++;
+
        prepareEntryScan(&btree, index, attnum, value, ginstate);
 
        stack = ginFindLeafPage(&btree, NULL);
@@ -174,14 +188,15 @@ ginEntryInsert(Relation index, GinState *ginstate,
 
                        /* insert into posting tree */
                        gdi = prepareScanPostingTree(index, rootPostingTree, FALSE);
-                       gdi->btree.isBuild = isBuild;
-                       insertItemPointer(gdi, items, nitem);
+                       gdi->btree.isBuild = (buildStats != NULL);
+                       ginInsertItemPointer(gdi, items, nitem, buildStats);
                        pfree(gdi);
 
                        return;
                }
 
-               itup = addItemPointersToTuple(index, ginstate, stack, itup, items, nitem, isBuild);
+               itup = addItemPointersToTuple(index, ginstate, stack, itup,
+                                                                         items, nitem, buildStats);
 
                btree.isDelete = TRUE;
        }
@@ -195,13 +210,14 @@ ginEntryInsert(Relation index, GinState *ginstate,
                        /* Add the rest, making a posting tree if necessary */
                        IndexTuple      previtup = itup;
 
-                       itup = addItemPointersToTuple(index, ginstate, stack, previtup, items + 1, nitem - 1, isBuild);
+                       itup = addItemPointersToTuple(index, ginstate, stack, previtup,
+                                                                                 items + 1, nitem - 1, buildStats);
                        pfree(previtup);
                }
        }
 
        btree.entry = itup;
-       ginInsertValue(&btree, stack);
+       ginInsertValue(&btree, stack, buildStats);
        pfree(itup);
 }
 
@@ -260,7 +276,8 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
                {
                        /* there could be many entries, so be willing to abort here */
                        CHECK_FOR_INTERRUPTS();
-                       ginEntryInsert(index, &buildstate->ginstate, attnum, entry, list, nlist, TRUE);
+                       ginEntryInsert(index, &buildstate->ginstate, attnum, entry,
+                                                  list, nlist, &buildstate->buildStats);
                }
 
                MemoryContextReset(buildstate->tmpCtx);
@@ -292,6 +309,8 @@ ginbuild(PG_FUNCTION_ARGS)
                         RelationGetRelationName(index));
 
        initGinState(&buildstate.ginstate, index);
+       buildstate.indtuples = 0;
+       memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
 
        /* initialize the meta page */
        MetaBuffer = GinNewBuffer(index);
@@ -331,8 +350,8 @@ ginbuild(PG_FUNCTION_ARGS)
        UnlockReleaseBuffer(RootBuffer);
        END_CRIT_SECTION();
 
-       /* build the index */
-       buildstate.indtuples = 0;
+       /* count the root as first entry page */
+       buildstate.buildStats.nEntryPages++;
 
        /*
         * create a temporary memory context that is reset once for each tuple
@@ -367,12 +386,19 @@ ginbuild(PG_FUNCTION_ARGS)
        {
                /* there could be many entries, so be willing to abort here */
                CHECK_FOR_INTERRUPTS();
-               ginEntryInsert(index, &buildstate.ginstate, attnum, entry, list, nlist, TRUE);
+               ginEntryInsert(index, &buildstate.ginstate, attnum, entry,
+                                          list, nlist, &buildstate.buildStats);
        }
        MemoryContextSwitchTo(oldCtx);
 
        MemoryContextDelete(buildstate.tmpCtx);
 
+       /*
+        * Update metapage stats
+        */
+       buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
+       ginUpdateStats(index, &buildstate.buildStats);
+
        /*
         * Return statistics
         */
@@ -401,7 +427,7 @@ ginHeapTupleInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datu
                return 0;
 
        for (i = 0; i < nentries; i++)
-               ginEntryInsert(index, ginstate, attnum, entries[i], item, 1, FALSE);
+               ginEntryInsert(index, ginstate, attnum, entries[i], item, 1, NULL);
 
        return nentries;
 }
index c128e5b3309d3f7e1262754baf98b82d26987af2..52bca8cee3c5ab1797262e8f77ad2683b9541ee8 100644 (file)
  */
 
 #include "postgres.h"
+
 #include "access/genam.h"
 #include "access/gin.h"
 #include "access/reloptions.h"
 #include "catalog/pg_type.h"
+#include "miscadmin.h"
 #include "storage/bufmgr.h"
 #include "storage/freespace.h"
 #include "storage/indexfsm.h"
@@ -227,6 +229,10 @@ GinInitMetabuffer(Buffer b)
        metadata->tailFreeSize = 0;
        metadata->nPendingPages = 0;
        metadata->nPendingHeapTuples = 0;
+       metadata->nTotalPages = 0;
+       metadata->nEntryPages = 0;
+       metadata->nDataPages = 0;
+       metadata->nEntries = 0;
 }
 
 int
@@ -354,3 +360,82 @@ ginoptions(PG_FUNCTION_ARGS)
 
        PG_RETURN_BYTEA_P(rdopts);
 }
+
+/*
+ * Fetch index's statistical data into *stats
+ *
+ * Note: in the result, nPendingPages can be trusted to be up-to-date,
+ * but the other fields are as of the last VACUUM.
+ */
+void
+ginGetStats(Relation index, GinStatsData *stats)
+{
+       Buffer                  metabuffer;
+       Page                    metapage;
+       GinMetaPageData *metadata;
+
+       metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+       LockBuffer(metabuffer, GIN_SHARE);
+       metapage = BufferGetPage(metabuffer);
+       metadata = GinPageGetMeta(metapage);
+
+       stats->nPendingPages = metadata->nPendingPages;
+       stats->nTotalPages = metadata->nTotalPages;
+       stats->nEntryPages = metadata->nEntryPages;
+       stats->nDataPages = metadata->nDataPages;
+       stats->nEntries = metadata->nEntries;
+
+       UnlockReleaseBuffer(metabuffer);
+}
+
+/*
+ * Write the given statistics to the index's metapage
+ *
+ * Note: nPendingPages is *not* copied over
+ */
+void
+ginUpdateStats(Relation index, const GinStatsData *stats)
+{
+       Buffer                  metabuffer;
+       Page                    metapage;
+       GinMetaPageData *metadata;
+
+       metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+       LockBuffer(metabuffer, GIN_EXCLUSIVE);
+       metapage = BufferGetPage(metabuffer);
+       metadata = GinPageGetMeta(metapage);
+
+       START_CRIT_SECTION();
+
+       metadata->nTotalPages = stats->nTotalPages;
+       metadata->nEntryPages = stats->nEntryPages;
+       metadata->nDataPages = stats->nDataPages;
+       metadata->nEntries = stats->nEntries;
+
+       MarkBufferDirty(metabuffer);
+
+       if (!index->rd_istemp)
+       {
+               XLogRecPtr                      recptr;
+               ginxlogUpdateMeta       data;
+               XLogRecData                     rdata;
+
+               data.node = index->rd_node;
+               data.ntuples = 0;
+               data.newRightlink = data.prevTail = InvalidBlockNumber;
+               memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
+
+               rdata.buffer = InvalidBuffer;
+               rdata.data = (char *) &data;
+               rdata.len = sizeof(ginxlogUpdateMeta);
+               rdata.next = NULL;
+
+               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, &rdata);
+               PageSetLSN(metapage, recptr);
+               PageSetTLI(metapage, ThisTimeLineID);
+       }
+
+       UnlockReleaseBuffer(metabuffer);
+
+       END_CRIT_SECTION();
+}
index f074299622084f25b2d53ce18de3fb887c96aec4..c7f9a72d2e5c9da54d5367169206bd7ba2ad89a2 100644 (file)
@@ -707,9 +707,8 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
        BlockNumber npages,
                                blkno;
        BlockNumber totFreePages;
-       BlockNumber lastBlock = GIN_ROOT_BLKNO,
-                               lastFilledBlock = GIN_ROOT_BLKNO;
        GinState        ginstate;
+       GinStatsData idxStat;
 
        /*
         * In an autovacuum analyze, we want to clean up pending insertions.
@@ -736,6 +735,8 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
                ginInsertCleanup(index, &ginstate, true, stats);
        }
 
+       memset(&idxStat, 0, sizeof(idxStat));
+
        /*
         * XXX we always report the heap tuple count as the number of index
         * entries.  This is bogus if the index is partial, but it's real hard to
@@ -757,7 +758,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
 
        totFreePages = 0;
 
-       for (blkno = GIN_ROOT_BLKNO + 1; blkno < npages; blkno++)
+       for (blkno = GIN_ROOT_BLKNO; blkno < npages; blkno++)
        {
                Buffer          buffer;
                Page            page;
@@ -771,15 +772,28 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
 
                if (GinPageIsDeleted(page))
                {
+                       Assert(blkno != GIN_ROOT_BLKNO);
                        RecordFreeIndexPage(index, blkno);
                        totFreePages++;
                }
-               else
-                       lastFilledBlock = blkno;
+               else if (GinPageIsData(page))
+               {
+                       idxStat.nDataPages++;
+               }
+               else if (!GinPageIsList(page))
+               {
+                       idxStat.nEntryPages++;
+
+                       if ( GinPageIsLeaf(page) )
+                               idxStat.nEntries += PageGetMaxOffsetNumber(page);
+               }
 
                UnlockReleaseBuffer(buffer);
        }
-       lastBlock = npages - 1;
+
+       /* Update the metapage with accurate page and entry counts */
+       idxStat.nTotalPages = npages;
+       ginUpdateStats(info->index, &idxStat);
 
        /* Finally, vacuum the FSM */
        IndexFreeSpaceMapVacuum(info->index);
index 75997d9534b4e03558a21e9f1a9fd6534fa5cf81..18b5908d05f3e5ced34238a0a3abbfea76c58141 100644 (file)
@@ -839,7 +839,7 @@ ginContinueSplit(ginIncompleteSplit *split)
        stack.parent = NULL;
 
        findParents(&btree, &stack, split->rootBlkno);
-       ginInsertValue(&btree, stack.parent);
+       ginInsertValue(&btree, stack.parent, NULL);
 
        FreeFakeRelcacheEntry(reln);
 
index 34369e5aaebae23820eeb838747fd4248df248ef..ce6d4e2a79b5cb2048e7f7932649ab721af9d029 100644 (file)
@@ -91,6 +91,7 @@
 #include <ctype.h>
 #include <math.h>
 
+#include "access/gin.h"
 #include "access/sysattr.h"
 #include "catalog/index.h"
 #include "catalog/pg_opfamily.h"
@@ -6235,6 +6236,24 @@ gistcostestimate(PG_FUNCTION_ARGS)
        PG_RETURN_VOID();
 }
 
+/* Find the index column matching "op"; return its index, or -1 if no match */
+static int
+find_index_column(Node *op, IndexOptInfo *index)
+{
+       int                     i;
+
+       for (i = 0; i < index->ncolumns; i++)
+       {
+               if (match_index_to_operand(op, i, index))
+                       return i;
+       }
+
+       return -1;
+}
+
+/*
+ * GIN has search behavior completely different from other index types
+ */
 Datum
 gincostestimate(PG_FUNCTION_ARGS)
 {
@@ -6246,10 +6265,329 @@ gincostestimate(PG_FUNCTION_ARGS)
        Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
        Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
        double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
+       ListCell           *l;
+       int32              nfullscan = 0;
+       List               *selectivityQuals;
+       double             numPages = index->pages,
+                                  numTuples = index->tuples;
+       double             numEntryPages,
+                                  numDataPages,
+                                  numPendingPages,
+                                  numEntries;
+       double             partialEntriesInQuals = 0.0;
+       double             searchEntriesInQuals = 0.0;
+       double             exactEntriesInQuals = 0.0;
+       double             entryPagesFetched,
+                                  dataPagesFetched,
+                                  dataPagesFetchedBySel;
+       double             qual_op_cost,
+                                  qual_arg_cost,
+                                  spc_random_page_cost,
+                                  num_scans;
+       QualCost           index_qual_cost;
+       Relation           indexRel;
+       GinStatsData   ginStats;
 
-       genericcostestimate(root, index, indexQuals, outer_rel, 0.0,
-                                               indexStartupCost, indexTotalCost,
-                                               indexSelectivity, indexCorrelation);
+       /*
+        * Obtain statistic information from the meta page
+        */
+       indexRel = index_open(index->indexoid, AccessShareLock);
+       ginGetStats(indexRel, &ginStats);
+       index_close(indexRel, AccessShareLock);
+
+       numEntryPages = ginStats.nEntryPages;
+       numDataPages = ginStats.nDataPages;
+       numPendingPages = ginStats.nPendingPages;
+       numEntries = ginStats.nEntries;
+
+       /*
+        * nPendingPages can be trusted, but the other fields are as of the last
+        * VACUUM.  Scale them by the ratio numPages / nTotalPages to account for
+        * growth since then.  If the fields are zero (implying no VACUUM at all,
+        * and an index created pre-9.1), assume all pages are entry pages.
+        */
+       if (ginStats.nTotalPages == 0 || ginStats.nEntryPages == 0)
+       {
+               numEntryPages = numPages;
+               numDataPages = 0;
+               numEntries = numTuples;         /* bogus, but no other info available */
+       }
+       else
+       {
+               double  scale = numPages / ginStats.nTotalPages;
+
+               numEntryPages = ceil(numEntryPages * scale);
+               numDataPages = ceil(numDataPages * scale);
+               numEntries = ceil(numEntries * scale);
+               /* ensure we didn't round up too much */
+               numEntryPages = Min(numEntryPages, numPages);
+               numDataPages = Min(numDataPages, numPages - numEntryPages);
+       }
+
+       /*
+        * Include predicate in selectivityQuals (should match genericcostestimate)
+        */
+       if (index->indpred != NIL)
+       {
+               List       *predExtraQuals = NIL;
+
+               foreach(l, index->indpred)
+               {
+                       Node       *predQual = (Node *) lfirst(l);
+                       List       *oneQual = list_make1(predQual);
+
+                       if (!predicate_implied_by(oneQual, indexQuals))
+                               predExtraQuals = list_concat(predExtraQuals, oneQual);
+               }
+               /* list_concat avoids modifying the passed-in indexQuals list */
+               selectivityQuals = list_concat(predExtraQuals, indexQuals);
+       }
+       else
+               selectivityQuals = indexQuals;
+
+       /* Estimate the fraction of main-table tuples that will be visited */
+       *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
+                                                                                               index->rel->relid,
+                                                                                               JOIN_INNER,
+                                                                                               NULL);
+
+       /* fetch estimated page cost for schema containing index */
+    get_tablespace_page_costs(index->reltablespace,
+                                                         &spc_random_page_cost,
+                                                         NULL);
+
+       /*
+        * Generic assumption about index correlation: there isn't any.
+        */
+       *indexCorrelation = 0.0;
+
+       /*
+        * Examine quals to estimate number of search entries & partial matches
+        */
+       foreach(l, indexQuals)
+       {
+               RestrictInfo    *rinfo = (RestrictInfo *) lfirst(l);
+               Expr                    *clause;
+               Node                    *leftop,
+                                               *rightop,
+                                               *operand;
+               Oid                             extractProcOid;
+               Oid                             clause_op;
+               int                             strategy_op;
+               Oid                             lefttype,
+                                               righttype;
+               int32                   nentries = 0;
+               bool                    *partial_matches = NULL;
+               Pointer                 *extra_data = NULL;
+               int                             indexcol;
+
+               Assert(IsA(rinfo, RestrictInfo));
+               clause = rinfo->clause;
+               Assert(IsA(clause, OpExpr));
+               leftop = get_leftop(clause);
+               rightop = get_rightop(clause);
+               clause_op = ((OpExpr *) clause)->opno;
+
+               if ((indexcol = find_index_column(leftop, index)) >= 0)
+               {
+                       operand = rightop;
+               }
+               else if ((indexcol = find_index_column(rightop, index)) >= 0)
+               {
+                       operand = leftop;
+                       clause_op = get_commutator(clause_op);
+               }
+               else
+               {
+                       elog(ERROR, "Could not match index to operand");
+                       operand = NULL; /* keep compiler quiet */
+               }
+
+               if (IsA(operand, RelabelType))
+                       operand = (Node *) ((RelabelType *) operand)->arg;
+
+               /*
+                * It's impossible to call extractQuery method for unknown operand.
+                * So unless operand is a Const we can't do much; just assume there
+                * will be one ordinary search entry from the operand at runtime.
+                */
+               if (!IsA(operand, Const))
+               {
+                       searchEntriesInQuals++;
+                       continue;
+               }
+
+               /* If Const is null, there can be no matches */
+               if (((Const*) operand)->constisnull)
+               {
+                       *indexStartupCost = 0;
+                       *indexTotalCost = 0;
+                       *indexSelectivity = 0;
+                       PG_RETURN_VOID();
+               }
+
+               /*
+                * Get the operator's strategy number and declared input data types
+                * within the index opfamily.
+                */
+               get_op_opfamily_properties(clause_op, index->opfamily[indexcol],
+                                                                  &strategy_op, &lefttype, &righttype);
+
+               /*
+                * GIN (like GiST) always has lefttype == righttype in pg_amproc
+                * and they are equal to type Oid on which index was created/designed
+                */
+               extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
+                                                                                  lefttype, lefttype,
+                                                                                  GIN_EXTRACTQUERY_PROC);
+
+               if (!OidIsValid(extractProcOid))
+               {
+                       /* probably shouldn't happen, but cope sanely if so */
+                       searchEntriesInQuals++;
+                       continue;
+               }
+
+               OidFunctionCall5(extractProcOid,
+                                                ((Const*)operand)->constvalue,
+                                                PointerGetDatum(&nentries),
+                                                UInt16GetDatum(strategy_op),
+                                                PointerGetDatum(&partial_matches),
+                                                PointerGetDatum(&extra_data));
+
+               if (nentries == 0)
+               {
+                       nfullscan++;
+               }
+               else if (nentries < 0)
+               {
+                       /*
+                        * GIN_EXTRACTQUERY_PROC guarantees that nothing will be found
+                        */
+                       *indexStartupCost = 0;
+                       *indexTotalCost = 0;
+                       *indexSelectivity = 0;
+                       PG_RETURN_VOID();
+               }
+               else
+               {
+                       int             i;
+
+                       for (i=0; i<nentries; i++)
+                       {
+                               /*
+                                * For partial match we haven't any information to estimate
+                                * number of matched entries in index, so, we just estimate it
+                                * as 100
+                                */
+                               if (partial_matches && partial_matches[i])
+                                       partialEntriesInQuals += 100;
+                               else
+                                       exactEntriesInQuals++;
+
+                               searchEntriesInQuals++;
+                       }
+               }
+       }
+
+       if (nfullscan == list_length(indexQuals))
+               searchEntriesInQuals = numEntries;
+
+       /* Will we have more than one iteration of a nestloop scan? */
+       if (outer_rel != NULL && outer_rel->rows > 1)
+               num_scans = outer_rel->rows;
+       else
+               num_scans = 1;
+
+       /*
+        * cost to begin scan, first of all, pay attention to
+        * pending list.
+        */
+       entryPagesFetched = numPendingPages;
+
+       /*
+        * Estimate number of entry pages read.  We need to do
+        * searchEntriesInQuals searches.  Use a power function as it should be,
+        * but tuples on leaf pages usually is much greater.
+        * Here we include all searches in entry tree, including
+        * search of first entry in partial match algorithm
+        */
+       entryPagesFetched += ceil(searchEntriesInQuals * rint(pow(numEntryPages, 0.15)));
+
+       /*
+        * Add an estimate of entry pages read by partial match algorithm.
+        * It's a scan over leaf pages in entry tree.  We haven't any useful stats
+        * here, so estimate it as proportion.
+        */
+       entryPagesFetched += ceil(numEntryPages * partialEntriesInQuals / numEntries);
+
+       /*
+        * Partial match algorithm reads all data pages before
+        * doing actual scan, so it's a startup cost. Again,
+        * we havn't any useful stats here, so, estimate it as
+        * proportion
+        */
+       dataPagesFetched = ceil(numDataPages * partialEntriesInQuals / numEntries);
+
+       /* calculate cache effects */
+       if (num_scans > 1 || searchEntriesInQuals > 1)
+       {
+               entryPagesFetched = index_pages_fetched(entryPagesFetched,
+                                                                                               (BlockNumber) numEntryPages,
+                                                                                               numEntryPages, root);
+               dataPagesFetched = index_pages_fetched(dataPagesFetched,
+                                                                                          (BlockNumber) numDataPages,
+                                                                                          numDataPages, root);
+       }
+
+       /*
+        * Here we use random page cost because logically-close pages could be
+        * far apart on disk.
+        */
+       *indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
+
+       /* cost to scan data pages for each exact (non-partial) matched entry */
+       dataPagesFetched = ceil(numDataPages * exactEntriesInQuals / numEntries);
+
+       /*
+        * Estimate number of data pages read, using selectivity estimation and
+        * capacity of data page.
+        */
+       dataPagesFetchedBySel = ceil(*indexSelectivity *
+                                                                (numTuples / (BLCKSZ/SizeOfIptrData)));
+
+       if (dataPagesFetchedBySel > dataPagesFetched)
+       {
+               /*
+                * At least one of entries is very frequent and, unfortunately,
+                * we couldn't get statistic about entries (only tsvector has
+                * such statistics). So, we obviously have too small estimation of
+                * pages fetched from data tree. Re-estimate it from known
+                * capacity of data pages
+                */
+               dataPagesFetched = dataPagesFetchedBySel;
+       }
+
+       if (num_scans > 1)
+               dataPagesFetched = index_pages_fetched(dataPagesFetched,
+                                                                                          (BlockNumber) numDataPages,
+                                                                                          numDataPages, root);
+       *indexTotalCost = *indexStartupCost +
+               dataPagesFetched * spc_random_page_cost;
+
+       /*
+        * Add on index qual eval costs, much as in genericcostestimate
+        */
+       cost_qual_eval(&index_qual_cost, indexQuals, root);
+       qual_op_cost = cpu_operator_cost * list_length(indexQuals);
+       qual_arg_cost = index_qual_cost.startup +
+               index_qual_cost.per_tuple - qual_op_cost;
+       if (qual_arg_cost < 0)      /* just in case... */
+               qual_arg_cost = 0;
+
+       *indexStartupCost += qual_arg_cost;
+       *indexTotalCost += qual_arg_cost;
+       *indexTotalCost += ( numTuples * *indexSelectivity ) * (cpu_index_tuple_cost + qual_op_cost);
 
        PG_RETURN_VOID();
 }
index c67d4182c48f5ca25ec2930fd63f291634dd22c9..e6db073a0558cc347a6cc0da7ed770901c4a6e1f 100644 (file)
@@ -79,6 +79,14 @@ typedef struct GinMetaPageData
         */
        BlockNumber nPendingPages;
        int64           nPendingHeapTuples;
+
+       /*
+        * Statistics for planner use (accurate as of last VACUUM)
+        */
+       BlockNumber     nTotalPages;
+       BlockNumber     nEntryPages;
+       BlockNumber     nDataPages;
+       int64           nEntries;
 } GinMetaPageData;
 
 #define GinPageGetMeta(p) \
@@ -94,6 +102,8 @@ typedef struct GinMetaPageData
 #define GinPageSetNonLeaf(page)    ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF )
 #define GinPageIsData(page)    ( GinPageGetOpaque(page)->flags & GIN_DATA )
 #define GinPageSetData(page)   ( GinPageGetOpaque(page)->flags |= GIN_DATA )
+#define GinPageIsList(page)    ( GinPageGetOpaque(page)->flags & GIN_LIST )
+#define GinPageSetList(page)   ( GinPageGetOpaque(page)->flags |= GIN_LIST )
 #define GinPageHasFullRow(page)    ( GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW )
 #define GinPageSetFullRow(page)   ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
 
@@ -362,13 +372,28 @@ extern Datum *extractEntriesSU(GinState *ginstate, OffsetNumber attnum, Datum va
 extern Datum gin_index_getattr(GinState *ginstate, IndexTuple tuple);
 extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
 
+/*
+ * GinStatsData represents stats data for planner use
+ */
+typedef struct GinStatsData
+{
+       BlockNumber nPendingPages;
+       BlockNumber     nTotalPages;
+       BlockNumber     nEntryPages;
+       BlockNumber     nDataPages;
+       int64           nEntries;
+} GinStatsData;
+
+extern void ginGetStats(Relation index, GinStatsData *stats);
+extern void ginUpdateStats(Relation index, const GinStatsData *stats);
+
 /* gininsert.c */
 extern Datum ginbuild(PG_FUNCTION_ARGS);
 extern Datum gininsert(PG_FUNCTION_ARGS);
 extern void ginEntryInsert(Relation index, GinState *ginstate,
                           OffsetNumber attnum, Datum value,
                           ItemPointerData *items, uint32 nitem,
-                          bool isBuild);
+                          GinStatsData *buildStats);
 
 /* ginxlog.c */
 extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
@@ -406,6 +431,7 @@ typedef struct GinBtreeData
        Page            (*splitPage) (GinBtree, Buffer, Buffer, OffsetNumber, XLogRecData **);
        void            (*fillRoot) (GinBtree, Buffer, Buffer, Buffer);
 
+       bool            isData;
        bool            searchMode;
 
        Relation        index;
@@ -432,7 +458,8 @@ typedef struct GinBtreeData
 extern GinBtreeStack *ginPrepareFindLeafPage(GinBtree btree, BlockNumber blkno);
 extern GinBtreeStack *ginFindLeafPage(GinBtree btree, GinBtreeStack *stack);
 extern void freeGinBtreeStack(GinBtreeStack *stack);
-extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack);
+extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack,
+                                                  GinStatsData *buildStats);
 extern void findParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBlkno);
 
 /* ginentrypage.c */
@@ -462,8 +489,9 @@ typedef struct
 
 extern GinPostingTreeScan *prepareScanPostingTree(Relation index,
                                           BlockNumber rootBlkno, bool searchMode);
-extern void insertItemPointer(GinPostingTreeScan *gdi,
-                                 ItemPointerData *items, uint32 nitem);
+extern void ginInsertItemPointer(GinPostingTreeScan *gdi,
+                                                                ItemPointerData *items, uint32 nitem,
+                                                                GinStatsData *buildStats);
 extern Buffer scanBeginPostingTree(GinPostingTreeScan *gdi);
 extern void dataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf);
 extern void prepareDataScan(GinBtree btree, Relation index);