Reuse abbreviated keys in ordered [set] aggregates.

author Robert Haas <rhaas@postgresql.org>

Wed, 17 Feb 2016 10:10:00 +0000 (15:40 +0530)

committer Robert Haas <rhaas@postgresql.org>

Wed, 17 Feb 2016 10:10:00 +0000 (15:40 +0530)
author Robert Haas <rhaas@postgresql.org>
Wed, 17 Feb 2016 10:10:00 +0000 (15:40 +0530)
committer Robert Haas <rhaas@postgresql.org>
Wed, 17 Feb 2016 10:10:00 +0000 (15:40 +0530)
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c

index a309c446e37bb558f18ca73e6303cc607c7ddaca..8898b55d360a1de44cab14b69780c39806edc073 100644 (file)
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -3063,7 +3063,7 @@ validate_index_heapscan(Relation heapRelation,
                         }
  
                         tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
-                                                                                                 &ts_val, &ts_isnull);
+                                                                                                 &ts_val, &ts_isnull, NULL);
                         Assert(tuplesort_empty || !ts_isnull);
                         if (!tuplesort_empty)
                         {
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c

index b5aac67489d7127cb0d92222fe8dfe32e35916f7..03aa20f61e0d4ec5b5b99af07ab5f4a5f0fba371 100644 (file)
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -575,7 +575,8 @@ fetch_input_tuple(AggState *aggstate)
  
         if (aggstate->sort_in)
         {
-               if (!tuplesort_gettupleslot(aggstate->sort_in, true, aggstate->sort_slot))
+               if (!tuplesort_gettupleslot(aggstate->sort_in, true, aggstate->sort_slot,
+                                                                       NULL))
                         return NULL;
                 slot = aggstate->sort_slot;
         }
@@ -1076,6 +1077,8 @@ process_ordered_aggregate_single(AggState *aggstate,
         MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
         MemoryContext oldContext;
         bool            isDistinct = (pertrans->numDistinctCols > 0);
+       Datum           newAbbrevVal = (Datum) 0;
+       Datum           oldAbbrevVal = (Datum) 0;
         FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo;
         Datum      *newVal;
         bool       *isNull;
@@ -1095,7 +1098,7 @@ process_ordered_aggregate_single(AggState *aggstate,
          */
  
         while (tuplesort_getdatum(pertrans->sortstates[aggstate->current_set],
-                                                         true, newVal, isNull))
+                                                         true, newVal, isNull, &newAbbrevVal))
         {
                 /*
                  * Clear and select the working context for evaluation of the equality
@@ -1113,6 +1116,7 @@ process_ordered_aggregate_single(AggState *aggstate,
                         haveOldVal &&
                         ((oldIsNull && *isNull) ||
                          (!oldIsNull && !*isNull &&
+                         oldAbbrevVal == newAbbrevVal &&
                           DatumGetBool(FunctionCall2(&pertrans->equalfns[0],
                                                                                  oldVal, *newVal)))))
                 {
@@ -1128,6 +1132,7 @@ process_ordered_aggregate_single(AggState *aggstate,
                                 pfree(DatumGetPointer(oldVal));
                         /* and remember the new one for subsequent equality checks */
                         oldVal = *newVal;
+                       oldAbbrevVal = newAbbrevVal;
                         oldIsNull = *isNull;
                         haveOldVal = true;
                 }
@@ -1165,6 +1170,8 @@ process_ordered_aggregate_multi(AggState *aggstate,
         TupleTableSlot *slot2 = pertrans->uniqslot;
         int                     numTransInputs = pertrans->numTransInputs;
         int                     numDistinctCols = pertrans->numDistinctCols;
+       Datum           newAbbrevVal = (Datum) 0;
+       Datum           oldAbbrevVal = (Datum) 0;
         bool            haveOldValue = false;
         int                     i;
  
@@ -1175,7 +1182,7 @@ process_ordered_aggregate_multi(AggState *aggstate,
                 ExecClearTuple(slot2);
  
         while (tuplesort_gettupleslot(pertrans->sortstates[aggstate->current_set],
-                                                                 true, slot1))
+                                                                 true, slot1, &newAbbrevVal))
         {
                 /*
                  * Extract the first numTransInputs columns as datums to pass to the
@@ -1186,6 +1193,7 @@ process_ordered_aggregate_multi(AggState *aggstate,
  
                 if (numDistinctCols == 0 ||
                         !haveOldValue ||
+                       newAbbrevVal != oldAbbrevVal ||
                         !execTuplesMatch(slot1, slot2,
                                                          numDistinctCols,
                                                          pertrans->sortColIdx,
@@ -1209,6 +1217,8 @@ process_ordered_aggregate_multi(AggState *aggstate,
  
                                 slot2 = slot1;
                                 slot1 = tmpslot;
+                               /* avoid execTuplesMatch() calls by reusing abbreviated keys */
+                               oldAbbrevVal = newAbbrevVal;
                                 haveOldValue = true;
                         }
                 }
diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c

index 102dbdfc12426f52dcbf4c6d1d399790d1e11574..a34dcc513582e1dd5fe7f7d32c0cdc6e31117f50 100644 (file)
--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -137,7 +137,7 @@ ExecSort(SortState *node)
         slot = node->ss.ps.ps_ResultTupleSlot;
         (void) tuplesort_gettupleslot(tuplesortstate,
                                                                   ScanDirectionIsForward(dir),
-                                                                 slot);
+                                                                 slot, NULL);
         return slot;
  }
  
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c

index 57201fd03d4fc52598ed5936c32a4e33f4a101c4..fe44d5612955b40b1673232e5df6d814829eb37e 100644 (file)
--- a/src/backend/utils/adt/orderedsetaggs.c
+++ b/src/backend/utils/adt/orderedsetaggs.c
@@ -453,7 +453,7 @@ percentile_disc_final(PG_FUNCTION_ARGS)
                         elog(ERROR, "missing row in percentile_disc");
         }
  
-       if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull))
+       if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull, NULL))
                 elog(ERROR, "missing row in percentile_disc");
  
         /*
@@ -553,7 +553,7 @@ percentile_cont_final_common(FunctionCallInfo fcinfo,
         if (!tuplesort_skiptuples(osastate->sortstate, first_row, true))
                 elog(ERROR, "missing row in percentile_cont");
  
-       if (!tuplesort_getdatum(osastate->sortstate, true, &first_val, &isnull))
+       if (!tuplesort_getdatum(osastate->sortstate, true, &first_val, &isnull, NULL))
                 elog(ERROR, "missing row in percentile_cont");
         if (isnull)
                 PG_RETURN_NULL();
@@ -564,7 +564,7 @@ percentile_cont_final_common(FunctionCallInfo fcinfo,
         }
         else
         {
-               if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull))
+               if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull, NULL))
                         elog(ERROR, "missing row in percentile_cont");
  
                 if (isnull)
@@ -792,7 +792,7 @@ percentile_disc_multi_final(PG_FUNCTION_ARGS)
                                 if (!tuplesort_skiptuples(osastate->sortstate, target_row - rownum - 1, true))
                                         elog(ERROR, "missing row in percentile_disc");
  
-                               if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull))
+                               if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull, NULL))
                                         elog(ERROR, "missing row in percentile_disc");
  
                                 rownum = target_row;
@@ -921,7 +921,8 @@ percentile_cont_multi_final_common(FunctionCallInfo fcinfo,
                                 if (!tuplesort_skiptuples(osastate->sortstate, first_row - rownum - 1, true))
                                         elog(ERROR, "missing row in percentile_cont");
  
-                               if (!tuplesort_getdatum(osastate->sortstate, true, &first_val, &isnull) || isnull)
+                               if (!tuplesort_getdatum(osastate->sortstate, true, &first_val,
+                                                                               &isnull, NULL) || isnull)
                                         elog(ERROR, "missing row in percentile_cont");
  
                                 rownum = first_row;
@@ -941,7 +942,8 @@ percentile_cont_multi_final_common(FunctionCallInfo fcinfo,
                         /* Fetch second_row if needed */
                         if (second_row > rownum)
                         {
-                               if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull) || isnull)
+                               if (!tuplesort_getdatum(osastate->sortstate, true, &second_val,
+                                                                               &isnull, NULL) || isnull)
                                         elog(ERROR, "missing row in percentile_cont");
                                 rownum++;
                         }
@@ -1016,6 +1018,8 @@ mode_final(PG_FUNCTION_ARGS)
         int64           last_val_freq = 0;
         bool            last_val_is_mode = false;
         FmgrInfo   *equalfn;
+       Datum           abbrev_val = (Datum) 0;
+       Datum           last_abbrev_val = (Datum) 0;
         bool            shouldfree;
  
         Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE);
@@ -1042,7 +1046,7 @@ mode_final(PG_FUNCTION_ARGS)
         tuplesort_performsort(osastate->sortstate);
  
         /* Scan tuples and count frequencies */
-       while (tuplesort_getdatum(osastate->sortstate, true, &val, &isnull))
+       while (tuplesort_getdatum(osastate->sortstate, true, &val, &isnull, &abbrev_val))
         {
                 /* we don't expect any nulls, but ignore them if found */
                 if (isnull)
@@ -1054,8 +1058,10 @@ mode_final(PG_FUNCTION_ARGS)
                         mode_val = last_val = val;
                         mode_freq = last_val_freq = 1;
                         last_val_is_mode = true;
+                       last_abbrev_val = abbrev_val;
                 }
-               else if (DatumGetBool(FunctionCall2(equalfn, val, last_val)))
+               else if (abbrev_val == last_abbrev_val &&
+                                DatumGetBool(FunctionCall2(equalfn, val, last_val)))
                 {
                         /* value equal to previous value, count it */
                         if (last_val_is_mode)
@@ -1078,6 +1084,8 @@ mode_final(PG_FUNCTION_ARGS)
                         if (shouldfree && !last_val_is_mode)
                                 pfree(DatumGetPointer(last_val));
                         last_val = val;
+                       /* avoid equality function calls by reusing abbreviated keys */
+                       last_abbrev_val = abbrev_val;
                         last_val_freq = 1;
                         last_val_is_mode = false;
                 }
@@ -1181,7 +1189,7 @@ hypothetical_rank_common(FunctionCallInfo fcinfo, int flag,
         tuplesort_performsort(osastate->sortstate);
  
         /* iterate till we find the hypothetical row */
-       while (tuplesort_gettupleslot(osastate->sortstate, true, slot))
+       while (tuplesort_gettupleslot(osastate->sortstate, true, slot, NULL))
         {
                 bool            isnull;
                 Datum           d = slot_getattr(slot, nargs + 1, &isnull);
@@ -1266,6 +1274,8 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
         int64           duplicate_count = 0;
         OSAPerGroupState *osastate;
         int                     numDistinctCols;
+       Datum           abbrevVal = (Datum) 0;
+       Datum           abbrevOld = (Datum) 0;
         AttrNumber *sortColIdx;
         FmgrInfo   *equalfns;
         TupleTableSlot *slot;
@@ -1342,7 +1352,7 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
         slot2 = extraslot;
  
         /* iterate till we find the hypothetical row */
-       while (tuplesort_gettupleslot(osastate->sortstate, true, slot))
+       while (tuplesort_gettupleslot(osastate->sortstate, true, slot, &abbrevVal))
         {
                 bool            isnull;
                 Datum           d = slot_getattr(slot, nargs + 1, &isnull);
@@ -1353,6 +1363,7 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
  
                 /* count non-distinct tuples */
                 if (!TupIsNull(slot2) &&
+                       abbrevVal == abbrevOld &&
                         execTuplesMatch(slot, slot2,
                                                         numDistinctCols,
                                                         sortColIdx,
@@ -1363,6 +1374,8 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
                 tmpslot = slot2;
                 slot2 = slot;
                 slot = tmpslot;
+               /* avoid execTuplesMatch() calls by reusing abbreviated keys */
+               abbrevOld = abbrevVal;
  
                 rank++;
  
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c

index a30e1707a79069f21ebdc439df4cfc9a804d4829..67d86ed83b48cd125d407c59d01a0e0dacb8095a 100644 (file)
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -1280,7 +1280,8 @@ tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
                  * converter it won't expect NULL values, and cost model is not
                  * required to account for NULL, so in that case we avoid calling
                  * converter and just set datum1 to zeroed representation (to be
-                * consistent).
+                * consistent, and to support cheap inequality tests for NULL
+                * abbreviated keys).
                  */
                 stup.datum1 = original;
         }
@@ -1349,7 +1350,11 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
  
         if (isNull || state->datumTypeByVal)
         {
-               stup.datum1 = val;
+               /*
+                * Set datum1 to zeroed representation for NULLs (to be consistent, and
+                * to support cheap inequality tests for NULL abbreviated keys).
+                */
+               stup.datum1 = !isNull ? val : (Datum) 0;
                 stup.isnull1 = isNull;
                 stup.tuple = NULL;              /* no separate storage */
         }
@@ -1866,10 +1871,17 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
   * Fetch the next tuple in either forward or back direction.
   * If successful, put tuple in slot and return TRUE; else, clear the slot
   * and return FALSE.
+ *
+ * Caller may optionally be passed back abbreviated value (on TRUE return
+ * value) when abbreviation was used, which can be used to cheaply avoid
+ * equality checks that might otherwise be required.  Caller can safely make a
+ * determination of "non-equal tuple" based on simple binary inequality.  A
+ * NULL value in leading attribute will set abbreviated value to zeroed
+ * representation, which caller may rely on in abbreviated inequality check.
   */
  bool
  tuplesort_gettupleslot(Tuplesortstate *state, bool forward,
-                                          TupleTableSlot *slot)
+                                          TupleTableSlot *slot, Datum *abbrev)
  {
         MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
         SortTuple       stup;
@@ -1882,6 +1894,10 @@ tuplesort_gettupleslot(Tuplesortstate *state, bool forward,
  
         if (stup.tuple)
         {
+               /* Record abbreviated key for caller */
+               if (state->sortKeys->abbrev_converter && abbrev)
+                       *abbrev = stup.datum1;
+
                 ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, should_free);
                 return true;
         }
@@ -1937,10 +1953,17 @@ tuplesort_getindextuple(Tuplesortstate *state, bool forward,
   *
   * If the Datum is pass-by-ref type, the returned value is freshly palloc'd
   * and is now owned by the caller.
+ *
+ * Caller may optionally be passed back abbreviated value (on TRUE return
+ * value) when abbreviation was used, which can be used to cheaply avoid
+ * equality checks that might otherwise be required.  Caller can safely make a
+ * determination of "non-equal tuple" based on simple binary inequality.  A
+ * NULL value will have a zeroed abbreviated value representation, which caller
+ * may rely on in abbreviated inequality check.
   */
  bool
  tuplesort_getdatum(Tuplesortstate *state, bool forward,
-                                  Datum *val, bool *isNull)
+                                  Datum *val, bool *isNull, Datum *abbrev)
  {
         MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
         SortTuple       stup;
@@ -1952,6 +1975,10 @@ tuplesort_getdatum(Tuplesortstate *state, bool forward,
                 return false;
         }
  
+       /* Record abbreviated key for caller */
+       if (state->sortKeys->abbrev_converter && abbrev)
+               *abbrev = stup.datum1;
+
         if (stup.isnull1 || state->datumTypeByVal)
         {
                 *val = stup.datum1;
@@ -2232,21 +2259,6 @@ mergeruns(Tuplesortstate *state)
         Assert(state->status == TSS_BUILDRUNS);
         Assert(state->memtupcount == 0);
  
-       /*
-        * If we produced only one initial run (quite likely if the total data
-        * volume is between 1X and 2X workMem), we can just use that tape as the
-        * finished output, rather than doing a useless merge.  (This obvious
-        * optimization is not in Knuth's algorithm.)
-        */
-       if (state->currentRun == 1)
-       {
-               state->result_tape = state->tp_tapenum[state->destTape];
-               /* must freeze and rewind the finished output tape */
-               LogicalTapeFreeze(state->tapeset, state->result_tape);
-               state->status = TSS_SORTEDONTAPE;
-               return;
-       }
-
         if (state->sortKeys != NULL && state->sortKeys->abbrev_converter != NULL)
         {
                 /*
@@ -2263,6 +2275,21 @@ mergeruns(Tuplesortstate *state)
                 state->sortKeys->abbrev_full_comparator = NULL;
         }
  
+       /*
+        * If we produced only one initial run (quite likely if the total data
+        * volume is between 1X and 2X workMem), we can just use that tape as the
+        * finished output, rather than doing a useless merge.  (This obvious
+        * optimization is not in Knuth's algorithm.)
+        */
+       if (state->currentRun == 1)
+       {
+               state->result_tape = state->tp_tapenum[state->destTape];
+               /* must freeze and rewind the finished output tape */
+               LogicalTapeFreeze(state->tapeset, state->result_tape);
+               state->status = TSS_SORTEDONTAPE;
+               return;
+       }
+
         /* End of step D2: rewind all output tapes to prepare for merging */
         for (tapenum = 0; tapenum < state->tapeRange; tapenum++)
                 LogicalTapeRewind(state->tapeset, tapenum, false);
@@ -3164,7 +3191,8 @@ copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup)
                  * converter it won't expect NULL values, and cost model is not
                  * required to account for NULL, so in that case we avoid calling
                  * converter and just set datum1 to zeroed representation (to be
-                * consistent).
+                * consistent, and to support cheap inequality tests for NULL
+                * abbreviated keys).
                  */
                 stup->datum1 = original;
         }
@@ -3406,7 +3434,8 @@ copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup)
                  * converter it won't expect NULL values, and cost model is not
                  * required to account for NULL, so in that case we avoid calling
                  * converter and just set datum1 to zeroed representation (to be
-                * consistent).
+                * consistent, and to support cheap inequality tests for NULL
+                * abbreviated keys).
                  */
                 stup->datum1 = original;
         }
@@ -3710,7 +3739,8 @@ copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup)
                  * converter it won't expect NULL values, and cost model is not
                  * required to account for NULL, so in that case we avoid calling
                  * converter and just set datum1 to zeroed representation (to be
-                * consistent).
+                * consistent, and to support cheap inequality tests for NULL
+                * abbreviated keys).
                  */
                 stup->datum1 = original;
         }
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h

index d31d9945d18dd52440a6213db9f699e2c88074be..5cecd6d1b8657fa1d03ca074b70d78ef0c6768b9 100644 (file)
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -93,13 +93,13 @@ extern void tuplesort_putdatum(Tuplesortstate *state, Datum val,
  extern void tuplesort_performsort(Tuplesortstate *state);
  
  extern bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward,
-                                          TupleTableSlot *slot);
+                                          TupleTableSlot *slot, Datum *abbrev);
  extern HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward,
                                            bool *should_free);
  extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward,
                                                 bool *should_free);
  extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward,
-                                  Datum *val, bool *isNull);
+                                  Datum *val, bool *isNull, Datum *abbrev);
  
  extern bool tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples,
                                          bool forward);
author	Robert Haas <rhaas@postgresql.org>
	Wed, 17 Feb 2016 10:10:00 +0000 (15:40 +0530)
committer	Robert Haas <rhaas@postgresql.org>
	Wed, 17 Feb 2016 10:10:00 +0000 (15:40 +0530)
src/backend/catalog/index.c		patch \| blob \| history
src/backend/executor/nodeAgg.c		patch \| blob \| history
src/backend/executor/nodeSort.c		patch \| blob \| history
src/backend/utils/adt/orderedsetaggs.c		patch \| blob \| history
src/backend/utils/sort/tuplesort.c		patch \| blob \| history
src/include/utils/tuplesort.h		patch \| blob \| history