]> granicus.if.org Git - postgresql/commitdiff
Small performance improvement for hash joins and hash aggregation:
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 30 May 2003 20:23:10 +0000 (20:23 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 30 May 2003 20:23:10 +0000 (20:23 +0000)
when the plan is ReScanned, we don't have to rebuild the hash table
if there is no parameter change for its child node.  This idea has
been used for a long time in Sort and Material nodes, but was not in
the hash code till now.

src/backend/executor/nodeAgg.c
src/backend/executor/nodeHashjoin.c

index bbdda3540a7ec4e910d8bf7dc124a8305d9f2971..603df5ed1c44eca93f744c742decc7b672ff277a 100644 (file)
@@ -45,7 +45,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.104 2003/02/09 00:30:39 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.105 2003/05/30 20:23:10 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1374,6 +1374,31 @@ ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
        ExprContext *econtext = node->ss.ps.ps_ExprContext;
        int                     aggno;
 
+       node->agg_done = false;
+
+       if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
+       {
+               /*
+                * In the hashed case, if we haven't yet built the hash table
+                * then we can just return; nothing done yet, so nothing to undo.
+                * If subnode's chgParam is not NULL then it will be re-scanned by
+                * ExecProcNode, else no reason to re-scan it at all.
+                */
+               if (!node->table_filled)
+                       return;
+
+               /*
+                * If we do have the hash table and the subplan does not have any
+                * parameter changes, then we can just rescan the existing hash
+                * table; no need to build it again.
+                */
+               if (((PlanState *) node)->lefttree->chgParam == NULL)
+               {
+                       ResetTupleHashIterator(&node->hashiter);
+                       return;
+               }
+       }
+
        /* Make sure we have closed any open tuplesorts */
        for (aggno = 0; aggno < node->numaggs; aggno++)
        {
@@ -1384,19 +1409,23 @@ ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
                peraggstate->sortstate = NULL;
        }
 
-       node->agg_done = false;
+       /* Release first tuple of group, if we have made a copy */
        if (node->grp_firstTuple != NULL)
        {
                heap_freetuple(node->grp_firstTuple);
                node->grp_firstTuple = NULL;
        }
+
+       /* Forget current agg values */
        MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
        MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
 
+       /* Release all temp storage */
        MemoryContextReset(node->aggcontext);
 
        if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
        {
+               /* Rebuild an empty hash table */
                build_hash_table(node);
                node->table_filled = false;
        }
index 000063a8b7f23e932f48528e4ea5e4c85b7a948f..17585b2f0fc4e4269faabe59c74d6368b1a10a80 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.50 2003/05/05 17:57:47 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.51 2003/05/30 20:23:10 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -56,9 +56,7 @@ ExecHashJoin(HashJoinState *node)
        HashJoinTable hashtable;
        HeapTuple       curtuple;
        TupleTableSlot *outerTupleSlot;
-       TupleTableSlot *innerTupleSlot;
        int                     i;
-       bool            hashPhaseDone;
 
        /*
         * get information from HashJoin node
@@ -69,7 +67,6 @@ ExecHashJoin(HashJoinState *node)
        otherqual = node->js.ps.qual;
        hashNode = (HashState *) innerPlanState(node);
        outerNode = outerPlanState(node);
-       hashPhaseDone = node->hj_hashdone;
        dir = estate->es_direction;
 
        /*
@@ -114,24 +111,20 @@ ExecHashJoin(HashJoinState *node)
        /*
         * if this is the first call, build the hash table for inner relation
         */
-       if (!hashPhaseDone)
-       {                                                       /* if the hash phase not completed */
-               if (hashtable == NULL)
-               {                                               /* if the hash table has not been created */
-
-                       /*
-                        * create the hash table
-                        */
-                       hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan);
-                       node->hj_HashTable = hashtable;
+       if (!node->hj_hashdone)
+       {
+               /*
+                * create the hash table
+                */
+               Assert(hashtable == NULL);
+               hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan);
+               node->hj_HashTable = hashtable;
 
-                       /*
-                        * execute the Hash node, to build the hash table
-                        */
-                       hashNode->hashtable = hashtable;
-                       innerTupleSlot = ExecProcNode((PlanState *) hashNode);
-               }
-               node->hj_hashdone = true;
+               /*
+                * execute the Hash node, to build the hash table
+                */
+               hashNode->hashtable = hashtable;
+               (void) ExecProcNode((PlanState *) hashNode);
 
                /*
                 * Open temp files for outer batches, if needed. Note that file
@@ -139,9 +132,9 @@ ExecHashJoin(HashJoinState *node)
                 */
                for (i = 0; i < hashtable->nbatch; i++)
                        hashtable->outerBatchFile[i] = BufFileCreateTemp(false);
+
+               node->hj_hashdone = true;
        }
-       else if (hashtable == NULL)
-               return NULL;
 
        /*
         * Now get an outer tuple and probe into the hash table for matches
@@ -159,11 +152,7 @@ ExecHashJoin(HashJoinState *node)
                                                                                                           node);
                        if (TupIsNull(outerTupleSlot))
                        {
-                               /*
-                                * when the last batch runs out, clean up and exit
-                                */
-                               ExecHashTableDestroy(hashtable);
-                               node->hj_HashTable = NULL;
+                               /* end of join */
                                return NULL;
                        }
 
@@ -410,8 +399,8 @@ ExecInitHashJoin(HashJoin *node, EState *estate)
         */
 
        hjstate->hj_hashdone = false;
-
        hjstate->hj_HashTable = (HashJoinTable) NULL;
+
        hjstate->hj_CurBucketNo = 0;
        hjstate->hj_CurTuple = (HashJoinTuple) NULL;
 
@@ -461,7 +450,7 @@ void
 ExecEndHashJoin(HashJoinState *node)
 {
        /*
-        * free hash table in case we end plan before all tuples are retrieved
+        * Free hash table
         */
        if (node->hj_HashTable)
        {
@@ -682,21 +671,41 @@ ExecHashJoinSaveTuple(HeapTuple heapTuple,
 void
 ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
 {
+       /*
+        * If we haven't yet built the hash table then we can just return;
+        * nothing done yet, so nothing to undo.
+        */
        if (!node->hj_hashdone)
                return;
-
-       node->hj_hashdone = false;
+       Assert(node->hj_HashTable != NULL);
 
        /*
-        * Unfortunately, currently we have to destroy hashtable in all
-        * cases...
+        * In a multi-batch join, we currently have to do rescans the hard way,
+        * primarily because batch temp files may have already been released.
+        * But if it's a single-batch join, and there is no parameter change
+        * for the inner subnode, then we can just re-use the existing hash
+        * table without rebuilding it.
         */
-       if (node->hj_HashTable)
+       if (node->hj_HashTable->nbatch == 0 &&
+               ((PlanState *) node)->righttree->chgParam == NULL)
+       {
+               /* okay to reuse the hash table; needn't rescan inner, either */
+       }
+       else
        {
+               /* must destroy and rebuild hash table */
+               node->hj_hashdone = false;
                ExecHashTableDestroy(node->hj_HashTable);
                node->hj_HashTable = NULL;
+               /*
+                * if chgParam of subnode is not null then plan will be re-scanned
+                * by first ExecProcNode.
+                */
+               if (((PlanState *) node)->righttree->chgParam == NULL)
+                       ExecReScan(((PlanState *) node)->righttree, exprCtxt);
        }
 
+       /* Always reset intra-tuple state */
        node->hj_CurBucketNo = 0;
        node->hj_CurTuple = (HashJoinTuple) NULL;
 
@@ -706,11 +715,9 @@ ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
        node->hj_MatchedOuter = false;
 
        /*
-        * if chgParam of subnodes is not null then plans will be re-scanned
+        * if chgParam of subnode is not null then plan will be re-scanned
         * by first ExecProcNode.
         */
        if (((PlanState *) node)->lefttree->chgParam == NULL)
                ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
-       if (((PlanState *) node)->righttree->chgParam == NULL)
-               ExecReScan(((PlanState *) node)->righttree, exprCtxt);
 }