]> granicus.if.org Git - postgresql/commitdiff
Improve some O(N^2) behavior in window function evaluation.
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 13 Apr 2014 17:59:17 +0000 (13:59 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 13 Apr 2014 17:59:17 +0000 (13:59 -0400)
Repositioning the tuplestore seek pointer in window_gettupleslot() turns
out to be a very significant expense when the window frame is sizable and
the frame end can move.  To fix, introduce a tuplestore function for
skipping an arbitrary number of tuples in one call, parallel to the one we
introduced for tuplesort objects in commit 8d65da1f.  This reduces the cost
of window_gettupleslot() to O(1) if the tuplestore has not spilled to disk.
As in the previous commit, I didn't try to do any real optimization of
tuplestore_skiptuples for the case where the tuplestore has spilled to
disk.  There is probably no practical way to get the cost to less than O(N)
anyway, but perhaps someone can think of something later.

Also fix PersistHoldablePortal() to make use of this API now that we have
it.

Based on a suggestion by Dean Rasheed, though this turns out not to look
much like his patch.

src/backend/commands/portalcmds.c
src/backend/executor/nodeWindowAgg.c
src/backend/utils/sort/tuplestore.c
src/include/utils/tuplestore.h

index 1f50993621faa3da8ce46befed7da7746112c7c4..e7c681ab7f428d6dcd994c5d7c7f21fab7708730 100644 (file)
@@ -389,26 +389,22 @@ PersistHoldablePortal(Portal portal)
                FreeQueryDesc(queryDesc);
 
                /*
-                * Set the position in the result set: ideally, this could be
-                * implemented by just skipping straight to the tuple # that we need
-                * to be at, but the tuplestore API doesn't support that. So we start
-                * at the beginning of the tuplestore and iterate through it until we
-                * reach where we need to be.  FIXME someday?  (Fortunately, the
-                * typical case is that we're supposed to be at or near the start of
-                * the result set, so this isn't as bad as it sounds.)
+                * Set the position in the result set.
                 */
                MemoryContextSwitchTo(portal->holdContext);
 
                if (portal->atEnd)
                {
-                       /* we can handle this case even if posOverflow */
-                       while (tuplestore_advance(portal->holdStore, true))
+                       /*
+                        * We can handle this case even if posOverflow: just force the
+                        * tuplestore forward to its end.  The size of the skip request
+                        * here is arbitrary.
+                        */
+                       while (tuplestore_skiptuples(portal->holdStore, 1000000, true))
                                 /* continue */ ;
                }
                else
                {
-                       long            store_pos;
-
                        if (portal->posOverflow)        /* oops, cannot trust portalPos */
                                ereport(ERROR,
                                                (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
@@ -416,11 +412,10 @@ PersistHoldablePortal(Portal portal)
 
                        tuplestore_rescan(portal->holdStore);
 
-                       for (store_pos = 0; store_pos < portal->portalPos; store_pos++)
-                       {
-                               if (!tuplestore_advance(portal->holdStore, true))
-                                       elog(ERROR, "unexpected end of tuple stream");
-                       }
+                       if (!tuplestore_skiptuples(portal->holdStore,
+                                                                          portal->portalPos,
+                                                                          true))
+                               elog(ERROR, "unexpected end of tuple stream");
                }
        }
        PG_CATCH();
index 046637fb0924ab8c916da7414883a6416f45b776..2fcc630a92549088a132411916604dab9713435b 100644 (file)
@@ -2371,31 +2371,56 @@ window_gettupleslot(WindowObject winobj, int64 pos, TupleTableSlot *slot)
        tuplestore_select_read_pointer(winstate->buffer, winobj->readptr);
 
        /*
-        * There's no API to refetch the tuple at the current position. We have to
-        * move one tuple forward, and then one backward.  (We don't do it the
-        * other way because we might try to fetch the row before our mark, which
-        * isn't allowed.)  XXX this case could stand to be optimized.
+        * Advance or rewind until we are within one tuple of the one we want.
         */
-       if (winobj->seekpos == pos)
+       if (winobj->seekpos < pos - 1)
        {
+               if (!tuplestore_skiptuples(winstate->buffer,
+                                                                  pos - 1 - winobj->seekpos,
+                                                                  true))
+                       elog(ERROR, "unexpected end of tuplestore");
+               winobj->seekpos = pos - 1;
+       }
+       else if (winobj->seekpos > pos + 1)
+       {
+               if (!tuplestore_skiptuples(winstate->buffer,
+                                                                  winobj->seekpos - (pos + 1),
+                                                                  false))
+                       elog(ERROR, "unexpected end of tuplestore");
+               winobj->seekpos = pos + 1;
+       }
+       else if (winobj->seekpos == pos)
+       {
+               /*
+                * There's no API to refetch the tuple at the current position.  We
+                * have to move one tuple forward, and then one backward.  (We don't
+                * do it the other way because we might try to fetch the row before
+                * our mark, which isn't allowed.)  XXX this case could stand to be
+                * optimized.
+                */
                tuplestore_advance(winstate->buffer, true);
                winobj->seekpos++;
        }
 
-       while (winobj->seekpos > pos)
+       /*
+        * Now we should be on the tuple immediately before or after the one we
+        * want, so just fetch forwards or backwards as appropriate.
+        */
+       if (winobj->seekpos > pos)
        {
                if (!tuplestore_gettupleslot(winstate->buffer, false, true, slot))
                        elog(ERROR, "unexpected end of tuplestore");
                winobj->seekpos--;
        }
-
-       while (winobj->seekpos < pos)
+       else
        {
                if (!tuplestore_gettupleslot(winstate->buffer, true, true, slot))
                        elog(ERROR, "unexpected end of tuplestore");
                winobj->seekpos++;
        }
 
+       Assert(winobj->seekpos == pos);
+
        MemoryContextSwitchTo(oldcontext);
 
        return true;
@@ -2478,16 +2503,20 @@ WinSetMarkPosition(WindowObject winobj, int64 markpos)
        if (markpos < winobj->markpos)
                elog(ERROR, "cannot move WindowObject's mark position backward");
        tuplestore_select_read_pointer(winstate->buffer, winobj->markptr);
-       while (markpos > winobj->markpos)
+       if (markpos > winobj->markpos)
        {
-               tuplestore_advance(winstate->buffer, true);
-               winobj->markpos++;
+               tuplestore_skiptuples(winstate->buffer,
+                                                         markpos - winobj->markpos,
+                                                         true);
+               winobj->markpos = markpos;
        }
        tuplestore_select_read_pointer(winstate->buffer, winobj->readptr);
-       while (markpos > winobj->seekpos)
+       if (markpos > winobj->seekpos)
        {
-               tuplestore_advance(winstate->buffer, true);
-               winobj->seekpos++;
+               tuplestore_skiptuples(winstate->buffer,
+                                                         markpos - winobj->seekpos,
+                                                         true);
+               winobj->seekpos = markpos;
        }
 }
 
index bcba30e5981d51c6cfa062a9193f9c36a5567c15..a5a56be91b2515bf7db618e157c8527a3bd08f9f 100644 (file)
@@ -57,6 +57,7 @@
 #include "access/htup_details.h"
 #include "commands/tablespace.h"
 #include "executor/executor.h"
+#include "miscadmin.h"
 #include "storage/buffile.h"
 #include "utils/memutils.h"
 #include "utils/resowner.h"
@@ -1065,8 +1066,7 @@ tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
  * tuplestore_advance - exported function to adjust position without fetching
  *
  * We could optimize this case to avoid palloc/pfree overhead, but for the
- * moment it doesn't seem worthwhile.  (XXX this probably needs to be
- * reconsidered given the needs of window functions.)
+ * moment it doesn't seem worthwhile.
  */
 bool
 tuplestore_advance(Tuplestorestate *state, bool forward)
@@ -1088,6 +1088,75 @@ tuplestore_advance(Tuplestorestate *state, bool forward)
        }
 }
 
+/*
+ * Advance over N tuples in either forward or back direction,
+ * without returning any data. N<=0 is a no-op.
+ * Returns TRUE if successful, FALSE if ran out of tuples.
+ */
+bool
+tuplestore_skiptuples(Tuplestorestate *state, int64 ntuples, bool forward)
+{
+       TSReadPointer *readptr = &state->readptrs[state->activeptr];
+
+       Assert(forward || (readptr->eflags & EXEC_FLAG_BACKWARD));
+
+       if (ntuples <= 0)
+               return true;
+
+       switch (state->status)
+       {
+               case TSS_INMEM:
+                       if (forward)
+                       {
+                               if (readptr->eof_reached)
+                                       return false;
+                               if (state->memtupcount - readptr->current >= ntuples)
+                               {
+                                       readptr->current += ntuples;
+                                       return true;
+                               }
+                               readptr->current = state->memtupcount;
+                               readptr->eof_reached = true;
+                               return false;
+                       }
+                       else
+                       {
+                               if (readptr->eof_reached)
+                               {
+                                       readptr->current = state->memtupcount;
+                                       readptr->eof_reached = false;
+                                       ntuples--;
+                               }
+                               if (readptr->current - state->memtupdeleted > ntuples)
+                               {
+                                       readptr->current -= ntuples;
+                                       return true;
+                               }
+                               Assert(!state->truncated);
+                               readptr->current = state->memtupdeleted;
+                               return false;
+                       }
+                       break;
+
+               default:
+                       /* We don't currently try hard to optimize other cases */
+                       while (ntuples-- > 0)
+                       {
+                               void       *tuple;
+                               bool            should_free;
+
+                               tuple = tuplestore_gettuple(state, forward, &should_free);
+
+                               if (tuple == NULL)
+                                       return false;
+                               if (should_free)
+                                       pfree(tuple);
+                               CHECK_FOR_INTERRUPTS();
+                       }
+                       return true;
+       }
+}
+
 /*
  * dumptuples - remove tuples from memory and write to tape
  *
index 1e22feecee8d9d0ee2ce8c69625672aeab2c2148..16eca871cd1fe8c49eaa1056ed88c253b64b8b17 100644 (file)
@@ -72,8 +72,12 @@ extern bool tuplestore_in_memory(Tuplestorestate *state);
 
 extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
                                                bool copy, TupleTableSlot *slot);
+
 extern bool tuplestore_advance(Tuplestorestate *state, bool forward);
 
+extern bool tuplestore_skiptuples(Tuplestorestate *state,
+                                         int64 ntuples, bool forward);
+
 extern bool tuplestore_ateof(Tuplestorestate *state);
 
 extern void tuplestore_rescan(Tuplestorestate *state);