]> granicus.if.org Git - postgresql/commitdiff
Teach tuplestore.c to throw away data before the "mark" point when the caller
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 21 May 2007 17:57:35 +0000 (17:57 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 21 May 2007 17:57:35 +0000 (17:57 +0000)
is using mark/restore but not rewind or backward-scan capability.  Insert a
materialize plan node between a mergejoin and its inner child if the inner
child is a sort that is expected to spill to disk.  The materialize shields
the sort from the need to do mark/restore and thereby allows it to perform
its final merge pass on-the-fly; while the materialize itself is normally
cheap since it won't spill to disk unless the number of tuples with equal
key values exceeds work_mem.

Greg Stark, with some kibitzing from Tom Lane.

src/backend/executor/nodeMaterial.c
src/backend/executor/nodeMergejoin.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/plan/createplan.c
src/backend/utils/sort/tuplestore.c
src/include/nodes/execnodes.h
src/include/optimizer/cost.h
src/include/utils/tuplestore.h

index 1cb4e63b97525275b607065dedc0efe0267b6258..e216c1f9e955560f09dd6fbe3f6d30ced28049f5 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/executor/nodeMaterial.c,v 1.58 2007/01/05 22:19:28 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/executor/nodeMaterial.c,v 1.59 2007/05/21 17:57:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -56,10 +56,10 @@ ExecMaterial(MaterialState *node)
        /*
         * If first time through, and we need a tuplestore, initialize it.
         */
-       if (tuplestorestate == NULL && node->randomAccess)
+       if (tuplestorestate == NULL && node->eflags != 0)
        {
                tuplestorestate = tuplestore_begin_heap(true, false, work_mem);
-
+               tuplestore_set_eflags(tuplestorestate, node->eflags);
                node->tuplestorestate = (void *) tuplestorestate;
        }
 
@@ -162,14 +162,14 @@ ExecInitMaterial(Material *node, EState *estate, int eflags)
        matstate->ss.ps.state = estate;
 
        /*
-        * We must have random access to the subplan output to do backward scan or
-        * mark/restore.  We also prefer to materialize the subplan output if we
-        * might be called on to rewind and replay it many times. However, if none
-        * of these cases apply, we can skip storing the data.
+        * We must have a tuplestore buffering the subplan output to do backward
+        * scan or mark/restore.  We also prefer to materialize the subplan output
+        * if we might be called on to rewind and replay it many times. However,
+        * if none of these cases apply, we can skip storing the data.
         */
-       matstate->randomAccess = (eflags & (EXEC_FLAG_REWIND |
-                                                                               EXEC_FLAG_BACKWARD |
-                                                                               EXEC_FLAG_MARK)) != 0;
+       matstate->eflags = (eflags & (EXEC_FLAG_REWIND |
+                                                                 EXEC_FLAG_BACKWARD |
+                                                                 EXEC_FLAG_MARK));
 
        matstate->eof_underlying = false;
        matstate->tuplestorestate = NULL;
@@ -255,7 +255,7 @@ ExecEndMaterial(MaterialState *node)
 void
 ExecMaterialMarkPos(MaterialState *node)
 {
-       Assert(node->randomAccess);
+       Assert(node->eflags & EXEC_FLAG_MARK);
 
        /*
         * if we haven't materialized yet, just return.
@@ -275,7 +275,7 @@ ExecMaterialMarkPos(MaterialState *node)
 void
 ExecMaterialRestrPos(MaterialState *node)
 {
-       Assert(node->randomAccess);
+       Assert(node->eflags & EXEC_FLAG_MARK);
 
        /*
         * if we haven't materialized yet, just return.
@@ -300,7 +300,7 @@ ExecMaterialReScan(MaterialState *node, ExprContext *exprCtxt)
 {
        ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
 
-       if (node->randomAccess)
+       if (node->eflags != 0)
        {
                /*
                 * If we haven't materialized yet, just return. If outerplan' chgParam
@@ -312,15 +312,21 @@ ExecMaterialReScan(MaterialState *node, ExprContext *exprCtxt)
 
                /*
                 * If subnode is to be rescanned then we forget previous stored
-                * results; we have to re-read the subplan and re-store.
+                * results; we have to re-read the subplan and re-store.  Also,
+                * if we told tuplestore it needn't support rescan, we lose and
+                * must re-read.  (This last should not happen in common cases;
+                * else our caller lied by not passing EXEC_FLAG_REWIND to us.)
                 *
                 * Otherwise we can just rewind and rescan the stored output. The
                 * state of the subnode does not change.
                 */
-               if (((PlanState *) node)->lefttree->chgParam != NULL)
+               if (((PlanState *) node)->lefttree->chgParam != NULL ||
+                       (node->eflags & EXEC_FLAG_REWIND) == 0)
                {
                        tuplestore_end((Tuplestorestate *) node->tuplestorestate);
                        node->tuplestorestate = NULL;
+                       if (((PlanState *) node)->lefttree->chgParam == NULL)
+                               ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
                        node->eof_underlying = false;
                }
                else
index ad4aace6536363a1228c94263781877e070cd325..794871e5ba44b464c3d8a3d0f3463e579fbba6d7 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.87 2007/02/02 00:07:03 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.88 2007/05/21 17:57:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -706,6 +706,9 @@ ExecMergeJoin(MergeJoinState *node)
                                }
                                else
                                {
+                                       /* Mark before advancing, if wanted */
+                                       if (node->mj_ExtraMarks)
+                                               ExecMarkPos(innerPlan);
                                        /* Stay in same state to fetch next inner tuple */
                                        if (doFillInner)
                                        {
@@ -830,6 +833,9 @@ ExecMergeJoin(MergeJoinState *node)
                                 * now we get the next inner tuple, if any.  If there's none,
                                 * advance to next outer tuple (which may be able to join to
                                 * previously marked tuples).
+                                *
+                                * NB: must NOT do "extraMarks" here, since we may need to
+                                * return to previously marked tuples.
                                 */
                                innerTupleSlot = ExecProcNode(innerPlan);
                                node->mj_InnerTupleSlot = innerTupleSlot;
@@ -1140,6 +1146,9 @@ ExecMergeJoin(MergeJoinState *node)
                                break;
 
                                /*
+                                * SKIPOUTER_ADVANCE: advance over an outer tuple that is
+                                * known not to join to any inner tuple.
+                                *
                                 * Before advancing, we check to see if we must emit an
                                 * outer-join fill tuple for this outer tuple.
                                 */
@@ -1204,6 +1213,9 @@ ExecMergeJoin(MergeJoinState *node)
                                break;
 
                                /*
+                                * SKIPINNER_ADVANCE: advance over an inner tuple that is
+                                * known not to join to any outer tuple.
+                                *
                                 * Before advancing, we check to see if we must emit an
                                 * outer-join fill tuple for this inner tuple.
                                 */
@@ -1225,6 +1237,10 @@ ExecMergeJoin(MergeJoinState *node)
                                                return result;
                                }
 
+                               /* Mark before advancing, if wanted */
+                               if (node->mj_ExtraMarks)
+                                       ExecMarkPos(innerPlan);
+
                                /*
                                 * now we get the next inner tuple, if any
                                 */
@@ -1295,6 +1311,10 @@ ExecMergeJoin(MergeJoinState *node)
                                                return result;
                                }
 
+                               /* Mark before advancing, if wanted */
+                               if (node->mj_ExtraMarks)
+                                       ExecMarkPos(innerPlan);
+
                                /*
                                 * now we get the next inner tuple, if any
                                 */
@@ -1425,6 +1445,22 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
        innerPlanState(mergestate) = ExecInitNode(innerPlan(node), estate,
                                                                                          eflags | EXEC_FLAG_MARK);
 
+       /*
+        * For certain types of inner child nodes, it is advantageous to issue
+        * MARK every time we advance past an inner tuple we will never return
+        * to.  For other types, MARK on a tuple we cannot return to is a waste
+        * of cycles.  Detect which case applies and set mj_ExtraMarks if we
+        * want to issue "unnecessary" MARK calls.
+        *
+        * Currently, only Material wants the extra MARKs, and it will be helpful
+        * only if eflags doesn't specify REWIND.
+        */
+       if (IsA(innerPlan(node), Material) &&
+               (eflags & EXEC_FLAG_REWIND) == 0)
+               mergestate->mj_ExtraMarks = true;
+       else
+               mergestate->mj_ExtraMarks = false;
+
 #define MERGEJOIN_NSLOTS 4
 
        /*
index 82dfc77f065e3c93fe3739cdc597c17194cdcb04..55c7648b9e7865042a179a63f2461458b454b755 100644 (file)
@@ -54,7 +54,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.182 2007/05/04 01:13:44 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.183 2007/05/21 17:57:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1038,6 +1038,23 @@ cost_sort(Path *path, PlannerInfo *root,
        path->total_cost = startup_cost + run_cost;
 }
 
+/*
+ * sort_exceeds_work_mem
+ *       Given a finished Sort plan node, detect whether it is expected to
+ *       spill to disk (ie, will need more than work_mem workspace)
+ *
+ * This assumes there will be no available LIMIT.
+ */
+bool
+sort_exceeds_work_mem(Sort *sort)
+{
+       double          input_bytes = relation_byte_size(sort->plan.plan_rows,
+                                                                                                sort->plan.plan_width);
+       long            work_mem_bytes = work_mem * 1024L;
+
+       return (input_bytes > work_mem_bytes);
+}
+
 /*
  * cost_material
  *       Determines and returns the cost of materializing a relation, including
index be0162406bd18e9ce7a5157948d498dadc9dabfd..23099df6dc32a357e79b5d46ae422c6e1506e631 100644 (file)
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.230 2007/05/04 01:13:44 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.231 2007/05/21 17:57:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1600,6 +1600,30 @@ create_mergejoin_plan(PlannerInfo *root,
        else
                innerpathkeys = best_path->jpath.innerjoinpath->pathkeys;
 
+       /*
+        * If inner plan is a sort that is expected to spill to disk, add a
+        * materialize node to shield it from the need to handle mark/restore.
+        * This will allow it to perform the last merge pass on-the-fly, while
+        * in most cases not requiring the materialize to spill to disk.
+        *
+        * XXX really, Sort oughta do this for itself, probably, to avoid the
+        * overhead of a separate plan node.
+        */
+       if (IsA(inner_plan, Sort) &&
+               sort_exceeds_work_mem((Sort *) inner_plan))
+       {
+               Plan       *matplan = (Plan *) make_material(inner_plan);
+
+               /*
+                * We assume the materialize will not spill to disk, and therefore
+                * charge just cpu_tuple_cost per tuple.
+                */
+               copy_plan_costsize(matplan, inner_plan);
+               matplan->total_cost += cpu_tuple_cost * matplan->plan_rows;
+
+               inner_plan = matplan;
+       }
+
        /*
         * Compute the opfamily/strategy/nullsfirst arrays needed by the executor.
         * The information is in the pathkeys for the two inputs, but we need to
index d94cb5ee7746b5a131a39189e0054c862579583f..d25bb122b58f3313e68c6bb8ca2fa57049bcf134 100644 (file)
  * maxKBytes, we dump all the tuples into a temp file and then read from that
  * when needed.
  *
- * When the caller requests random access to the data, we write the temp file
+ * When the caller requests backward-scan capability, we write the temp file
  * in a format that allows either forward or backward scan.  Otherwise, only
- * forward scan is allowed.  But rewind and markpos/restorepos are allowed
- * in any case.
+ * forward scan is allowed.  Rewind and markpos/restorepos are normally allowed
+ * but can be turned off via tuplestore_set_eflags; turning off both backward
+ * scan and rewind enables truncation of the tuplestore at the mark point
+ * (if any) for minimal memory usage.
  *
  * Because we allow reading before writing is complete, there are two
  * interesting positions in the temp file: the current read position and
@@ -36,7 +38,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/sort/tuplestore.c,v 1.30 2007/01/05 22:19:47 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/sort/tuplestore.c,v 1.31 2007/05/21 17:57:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -44,6 +46,7 @@
 #include "postgres.h"
 
 #include "access/heapam.h"
+#include "executor/executor.h"
 #include "storage/buffile.h"
 #include "utils/memutils.h"
 #include "utils/tuplestore.h"
@@ -66,7 +69,7 @@ typedef enum
 struct Tuplestorestate
 {
        TupStoreStatus status;          /* enumerated value as shown above */
-       bool            randomAccess;   /* did caller request random access? */
+       int                     eflags;                 /* capability flags */
        bool            interXact;              /* keep open through transactions? */
        long            availMem;               /* remaining memory available, in bytes */
        BufFile    *myfile;                     /* underlying file, or NULL if none */
@@ -157,11 +160,11 @@ struct Tuplestorestate
  * may or may not match the in-memory representation of the tuple ---
  * any conversion needed is the job of the writetup and readtup routines.
  *
- * If state->randomAccess is true, then the stored representation of the
- * tuple must be followed by another "unsigned int" that is a copy of the
+ * If state->eflags & EXEC_FLAG_BACKWARD, then the stored representation of
+ * the tuple must be followed by another "unsigned int" that is a copy of the
  * length --- so the total tape space used is actually sizeof(unsigned int)
  * more than the stored length value.  This allows read-backwards.     When
- * randomAccess is not true, the write/read routines may omit the extra
+ * EXEC_FLAG_BACKWARD is not set, the write/read routines may omit the extra
  * length word.
  *
  * writetup is expected to write both length words as well as the tuple
@@ -192,11 +195,12 @@ struct Tuplestorestate
  */
 
 
-static Tuplestorestate *tuplestore_begin_common(bool randomAccess,
+static Tuplestorestate *tuplestore_begin_common(int eflags,
                                                bool interXact,
                                                int maxKBytes);
 static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple);
 static void dumptuples(Tuplestorestate *state);
+static void tuplestore_trim(Tuplestorestate *state, int ntuples);
 static unsigned int getlen(Tuplestorestate *state, bool eofOK);
 static void *copytup_heap(Tuplestorestate *state, void *tup);
 static void writetup_heap(Tuplestorestate *state, void *tup);
@@ -209,14 +213,14 @@ static void *readtup_heap(Tuplestorestate *state, unsigned int len);
  * Initialize for a tuple store operation.
  */
 static Tuplestorestate *
-tuplestore_begin_common(bool randomAccess, bool interXact, int maxKBytes)
+tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
 {
        Tuplestorestate *state;
 
        state = (Tuplestorestate *) palloc0(sizeof(Tuplestorestate));
 
        state->status = TSS_INMEM;
-       state->randomAccess = randomAccess;
+       state->eflags = eflags;
        state->interXact = interXact;
        state->availMem = maxKBytes * 1024L;
        state->myfile = NULL;
@@ -255,9 +259,18 @@ tuplestore_begin_common(bool randomAccess, bool interXact, int maxKBytes)
 Tuplestorestate *
 tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
 {
-       Tuplestorestate *state = tuplestore_begin_common(randomAccess,
-                                                                                                        interXact,
-                                                                                                        maxKBytes);
+       Tuplestorestate *state;
+       int             eflags;
+
+       /*
+        * This interpretation of the meaning of randomAccess is compatible
+        * with the pre-8.3 behavior of tuplestores.
+        */
+       eflags = randomAccess ?
+               (EXEC_FLAG_BACKWARD | EXEC_FLAG_REWIND | EXEC_FLAG_MARK) :
+               (EXEC_FLAG_REWIND | EXEC_FLAG_MARK);
+
+       state = tuplestore_begin_common(eflags, interXact, maxKBytes);
 
        state->copytup = copytup_heap;
        state->writetup = writetup_heap;
@@ -266,6 +279,30 @@ tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
        return state;
 }
 
+/*
+ * tuplestore_set_eflags
+ *
+ * Set capability flags at a finer grain than is allowed by
+ * tuplestore_begin_xxx.  This must be called before inserting any data
+ * into the tuplestore.
+ *
+ * eflags is a bitmask following the meanings used for executor node
+ * startup flags (see executor.h).  tuplestore pays attention to these bits:
+ *             EXEC_FLAG_REWIND                need rewind to start
+ *             EXEC_FLAG_BACKWARD              need backward fetch
+ *             EXEC_FLAG_MARK                  need mark/restore
+ * If tuplestore_set_eflags is not called, REWIND and MARK are allowed,
+ * and BACKWARD is set per "randomAccess" in the tuplestore_begin_xxx call.
+ */
+void
+tuplestore_set_eflags(Tuplestorestate *state, int eflags)
+{
+       Assert(state->status == TSS_INMEM);
+       Assert(state->memtupcount == 0);
+
+       state->eflags = eflags;
+}
+
 /*
  * tuplestore_end
  *
@@ -420,6 +457,9 @@ tuplestore_puttuple_common(Tuplestorestate *state, void *tuple)
  * Fetch the next tuple in either forward or back direction.
  * Returns NULL if no more tuples.     If should_free is set, the
  * caller must pfree the returned tuple when done with it.
+ *
+ * Backward scan is only allowed if randomAccess was set true or
+ * EXEC_FLAG_BACKWARD was specified to tuplestore_set_eflags().
  */
 static void *
 tuplestore_gettuple(Tuplestorestate *state, bool forward,
@@ -428,7 +468,7 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward,
        unsigned int tuplen;
        void       *tup;
 
-       Assert(forward || state->randomAccess);
+       Assert(forward || (state->eflags & EXEC_FLAG_BACKWARD));
 
        switch (state->status)
        {
@@ -643,6 +683,8 @@ dumptuples(Tuplestorestate *state)
 void
 tuplestore_rescan(Tuplestorestate *state)
 {
+       Assert(state->eflags & EXEC_FLAG_REWIND);
+
        switch (state->status)
        {
                case TSS_INMEM:
@@ -671,10 +713,26 @@ tuplestore_rescan(Tuplestorestate *state)
 void
 tuplestore_markpos(Tuplestorestate *state)
 {
+       Assert(state->eflags & EXEC_FLAG_MARK);
+
        switch (state->status)
        {
                case TSS_INMEM:
                        state->markpos_current = state->current;
+                       /*
+                        * We can truncate the tuplestore if neither backward scan nor
+                        * rewind capability are required by the caller.  There will
+                        * never be a need to back up past the mark point.
+                        *
+                        * Note: you might think we could remove all the tuples before
+                        * "current", since that one is the next to be returned.  However,
+                        * since tuplestore_gettuple returns a direct pointer to our
+                        * internal copy of the tuple, it's likely that the caller has
+                        * still got the tuple just before "current" referenced in a slot.
+                        * Don't free it yet.
+                        */
+                       if (!(state->eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_REWIND)))
+                               tuplestore_trim(state, 1);
                        break;
                case TSS_WRITEFILE:
                        if (state->eof_reached)
@@ -708,6 +766,8 @@ tuplestore_markpos(Tuplestorestate *state)
 void
 tuplestore_restorepos(Tuplestorestate *state)
 {
+       Assert(state->eflags & EXEC_FLAG_MARK);
+
        switch (state->status)
        {
                case TSS_INMEM:
@@ -733,6 +793,55 @@ tuplestore_restorepos(Tuplestorestate *state)
        }
 }
 
+/*
+ * tuplestore_trim     - remove all but ntuples tuples before current
+ */
+static void
+tuplestore_trim(Tuplestorestate *state, int ntuples)
+{
+       int                     nremove;
+       int                     i;
+
+       /*
+        * We don't bother trimming temp files since it usually would mean more
+        * work than just letting them sit in kernel buffers until they age out.
+        */
+       if (state->status != TSS_INMEM)
+               return;
+
+       nremove = state->current - ntuples;
+       if (nremove <= 0)
+               return;                                 /* nothing to do */
+       Assert(nremove <= state->memtupcount);
+
+       /* Release no-longer-needed tuples */
+       for (i = 0; i < nremove; i++)
+       {
+               FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i]));
+               pfree(state->memtuples[i]);
+       }
+
+       /*
+        * Slide the array down and readjust pointers.  This may look pretty
+        * stupid, but we expect that there will usually not be very many
+        * tuple-pointers to move, so this isn't that expensive; and it keeps
+        * a lot of other logic simple.
+        *
+        * In fact, in the current usage for merge joins, it's demonstrable that
+        * there will always be exactly one non-removed tuple; so optimize that
+        * case.
+        */
+       if (nremove + 1 == state->memtupcount)
+               state->memtuples[0] = state->memtuples[nremove];
+       else
+               memmove(state->memtuples, state->memtuples + nremove,
+                               (state->memtupcount - nremove) * sizeof(void *));
+
+       state->memtupcount -= nremove;
+       state->current -= nremove;
+       state->markpos_current -= nremove;
+}
+
 
 /*
  * Tape interface routines
@@ -783,7 +892,7 @@ writetup_heap(Tuplestorestate *state, void *tup)
 
        if (BufFileWrite(state->myfile, (void *) tuple, tuplen) != (size_t) tuplen)
                elog(ERROR, "write failed");
-       if (state->randomAccess)        /* need trailing length word? */
+       if (state->eflags & EXEC_FLAG_BACKWARD) /* need trailing length word? */
                if (BufFileWrite(state->myfile, (void *) &tuplen,
                                                 sizeof(tuplen)) != sizeof(tuplen))
                        elog(ERROR, "write failed");
@@ -804,7 +913,7 @@ readtup_heap(Tuplestorestate *state, unsigned int len)
        if (BufFileRead(state->myfile, (void *) ((char *) tuple + sizeof(int)),
                                        len - sizeof(int)) != (size_t) (len - sizeof(int)))
                elog(ERROR, "unexpected end of data");
-       if (state->randomAccess)        /* need trailing length word? */
+       if (state->eflags & EXEC_FLAG_BACKWARD) /* need trailing length word? */
                if (BufFileRead(state->myfile, (void *) &tuplen,
                                                sizeof(tuplen)) != sizeof(tuplen))
                        elog(ERROR, "unexpected end of data");
index 6f80080a09f2905e8cdd715fb0447f2298c55629..b6b7ddcde6fdb45d30f3c0d198b6867cb8c9e113 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.174 2007/05/17 19:35:08 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.175 2007/05/21 17:57:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1180,6 +1180,7 @@ typedef struct NestLoopState
  *             NumClauses                 number of mergejoinable join clauses
  *             Clauses                    info for each mergejoinable clause
  *             JoinState                  current "state" of join.  see execdefs.h
+ *             ExtraMarks                 true to issue extra Mark operations on inner scan
  *             FillOuter                  true if should emit unjoined outer tuples anyway
  *             FillInner                  true if should emit unjoined inner tuples anyway
  *             MatchedOuter       true if found a join match for current outer tuple
@@ -1202,6 +1203,7 @@ typedef struct MergeJoinState
        int                     mj_NumClauses;
        MergeJoinClause mj_Clauses; /* array of length mj_NumClauses */
        int                     mj_JoinState;
+       bool            mj_ExtraMarks;
        bool            mj_FillOuter;
        bool            mj_FillInner;
        bool            mj_MatchedOuter;
@@ -1281,7 +1283,7 @@ typedef struct HashJoinState
 typedef struct MaterialState
 {
        ScanState       ss;                             /* its first field is NodeTag */
-       bool            randomAccess;   /* need random access to subplan output? */
+       int                     eflags;                 /* capability flags to pass to tuplestore */
        bool            eof_underlying; /* reached end of underlying plan? */
        void       *tuplestorestate;    /* private state of tuplestore.c */
 } MaterialState;
index 641555b70686e6e4a4049c8eda8379375a539c42..138741e07a8b7dfaeb105f3f5e893fa7e58a0d6a 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.86 2007/05/04 01:13:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.87 2007/05/21 17:57:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -75,6 +75,7 @@ extern void cost_valuesscan(Path *path, PlannerInfo *root,
 extern void cost_sort(Path *path, PlannerInfo *root,
                  List *pathkeys, Cost input_cost, double tuples, int width,
                  double limit_tuples);
+extern bool sort_exceeds_work_mem(Sort *sort);
 extern void cost_material(Path *path,
                          Cost input_cost, double tuples, int width);
 extern void cost_agg(Path *path, PlannerInfo *root,
index 5916e9544d5991423f4b1e11779f417e7c7b2a06..ef1c072aefe5e4c3cd26f4a437ca03e322567a89 100644 (file)
@@ -22,7 +22,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/tuplestore.h,v 1.20 2007/01/05 22:20:00 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/tuplestore.h,v 1.21 2007/05/21 17:57:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -46,6 +46,8 @@ extern Tuplestorestate *tuplestore_begin_heap(bool randomAccess,
                                          bool interXact,
                                          int maxKBytes);
 
+extern void tuplestore_set_eflags(Tuplestorestate *state, int eflags);
+
 extern void tuplestore_puttupleslot(Tuplestorestate *state,
                                                TupleTableSlot *slot);
 extern void tuplestore_puttuple(Tuplestorestate *state, HeapTuple tuple);
@@ -53,7 +55,6 @@ extern void tuplestore_puttuple(Tuplestorestate *state, HeapTuple tuple);
 /* tuplestore_donestoring() used to be required, but is no longer used */
 #define tuplestore_donestoring(state)  ((void) 0)
 
-/* backwards scan is only allowed if randomAccess was specified 'true' */
 extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
                                                TupleTableSlot *slot);
 extern bool tuplestore_advance(Tuplestorestate *state, bool forward);