From dd979f66be20fc54aad06da743f788fbc505bbe1 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 27 Jan 2000 18:11:50 +0000 Subject: [PATCH] Redesign DISTINCT ON as discussed in pgsql-sql 1/25/00: syntax is now SELECT DISTINCT ON (expr [, expr ...]) targetlist ... and there is a check to make sure that the user didn't specify an ORDER BY that's incompatible with the DISTINCT operation. Reimplement nodeUnique and nodeGroup to use the proper datatype-specific equality function for each column being compared --- they used to do bitwise comparisons or convert the data to text strings and strcmp(). (To add insult to injury, they'd look up the conversion functions once for each tuple...) Parse/plan representation of DISTINCT is now a list of SortClause nodes. initdb forced by querytree change... --- doc/src/sgml/ref/select.sgml | 29 ++- src/backend/executor/execTuples.c | 4 +- src/backend/executor/nodeGroup.c | 210 ++++++++++------ src/backend/executor/nodeUnique.c | 232 ++++-------------- src/backend/nodes/copyfuncs.c | 13 +- src/backend/nodes/equalfuncs.c | 14 +- src/backend/nodes/freefuncs.c | 9 +- src/backend/nodes/outfuncs.c | 12 +- src/backend/nodes/readfuncs.c | 39 +-- src/backend/optimizer/plan/createplan.c | 33 ++- src/backend/optimizer/plan/planner.c | 24 +- src/backend/optimizer/prep/prepkeyset.c | 4 +- src/backend/optimizer/prep/prepunion.c | 32 ++- src/backend/optimizer/util/tlist.c | 31 ++- src/backend/parser/analyze.c | 34 +-- src/backend/parser/gram.y | 31 +-- src/backend/parser/parse_clause.c | 175 +++++++++---- src/backend/rewrite/rewriteDefine.c | 4 +- src/backend/rewrite/rewriteHandler.c | 20 +- src/backend/rewrite/rewriteManip.c | 21 +- src/include/catalog/catversion.h | 4 +- src/include/executor/nodeGroup.h | 12 +- src/include/nodes/execnodes.h | 16 +- src/include/nodes/parsenodes.h | 20 +- src/include/nodes/plannodes.h | 10 +- src/include/optimizer/planmain.h | 4 +- src/include/optimizer/tlist.h | 5 +- src/include/parser/parse_clause.h | 6 +- src/test/regress/expected/errors.out | 8 +- .../regress/expected/select_distinct_on.out | 76 ++++-- src/test/regress/sql/errors.sql | 6 +- src/test/regress/sql/select_distinct_on.sql | 14 +- 32 files changed, 607 insertions(+), 545 deletions(-) diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml index 070f8b43d0..ed180ac91c 100644 --- a/doc/src/sgml/ref/select.sgml +++ b/doc/src/sgml/ref/select.sgml @@ -1,5 +1,5 @@ @@ -22,7 +22,7 @@ Postgres documentation 1999-07-20 -SELECT [ ALL | DISTINCT [ ON column ] ] +SELECT [ ALL | DISTINCT [ ON ( expression [, ...] ) ] ] expression [ AS name ] [, ...] [ INTO [ TEMPORARY | TEMP ] [ TABLE ] new_table ] [ FROM table [ alias ] [, ...] ] @@ -201,16 +201,29 @@ SELECT [ ALL | DISTINCT [ ON column - DISTINCT will eliminate all duplicate rows from the + DISTINCT will eliminate duplicate rows from the result. - DISTINCT ON column - will eliminate all duplicates in the specified column; this is - similar to using - GROUP BY column. - ALL will return all candidate rows, + ALL (the default) will return all candidate rows, including duplicates. + + DISTINCT ON eliminates rows that match on all the + specified expressions, keeping only the first row of each set of + duplicates. Note that "the first row" of each set is unpredictable + unless ORDER BY is used to ensure that the desired + row appears first. For example, + + SELECT DISTINCT ON (location) location, time, report + FROM weatherReports + ORDER BY location, time DESC; + + retrieves the most recent weather report for each location. But if + we had not used ORDER BY to force descending order of time values + for each location, we'd have gotten a report of unpredictable age + for each location. + + The GROUP BY clause allows a user to divide a table conceptually into groups. diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index f600a78fd2..1cbae3519a 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/execTuples.c,v 1.35 2000/01/26 05:56:22 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/execTuples.c,v 1.36 2000/01/27 18:11:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -753,7 +753,7 @@ NodeGetResultTupleSlot(Plan *node) { UniqueState *uniquestate = ((Unique *) node)->uniquestate; - slot = uniquestate->cs_ResultTupleSlot; + slot = uniquestate->cstate.cs_ResultTupleSlot; } break; diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c index 017929424b..cad023776d 100644 --- a/src/backend/executor/nodeGroup.c +++ b/src/backend/executor/nodeGroup.c @@ -9,12 +9,13 @@ * * DESCRIPTION * The Group node is designed for handling queries with a GROUP BY clause. - * It's outer plan must be a sort node. It assumes that the tuples it gets - * back from the outer plan is sorted in the order specified by the group - * columns. (ie. tuples from the same group are consecutive) + * Its outer plan must deliver tuples that are sorted in the order + * specified by the grouping columns (ie. tuples from the same group are + * consecutive). That way, we just have to compare adjacent tuples to + * locate group boundaries. * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.32 2000/01/26 05:56:22 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.33 2000/01/27 18:11:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,13 +24,14 @@ #include "access/heapam.h" #include "access/printtup.h" +#include "catalog/pg_operator.h" #include "executor/executor.h" #include "executor/nodeGroup.h" +#include "parser/parse_oper.h" +#include "parser/parse_type.h" static TupleTableSlot *ExecGroupEveryTuple(Group *node); static TupleTableSlot *ExecGroupOneTuple(Group *node); -static bool sameGroup(HeapTuple oldslot, HeapTuple newslot, - int numCols, AttrNumber *grpColIdx, TupleDesc tupdesc); /* --------------------------------------- * ExecGroup - @@ -38,11 +40,11 @@ static bool sameGroup(HeapTuple oldslot, HeapTuple newslot, * tuplePerGroup is TRUE, every tuple from the same group will be * returned, followed by a NULL at the end of each group. This is * useful for Agg node which needs to aggregate over tuples of the same - * group. (eg. SELECT salary, count{*} FROM emp GROUP BY salary) + * group. (eg. SELECT salary, count(*) FROM emp GROUP BY salary) * * If tuplePerGroup is FALSE, only one tuple per group is returned. The * tuple returned contains only the group columns. NULL is returned only - * at the end when no more groups is present. This is useful when + * at the end when no more groups are present. This is useful when * the query does not involve aggregates. (eg. SELECT salary FROM emp * GROUP BY salary) * ------------------------------------------ @@ -66,6 +68,7 @@ ExecGroupEveryTuple(Group *node) GroupState *grpstate; EState *estate; ExprContext *econtext; + TupleDesc tupdesc; HeapTuple outerTuple = NULL; HeapTuple firsttuple; @@ -87,6 +90,8 @@ ExecGroupEveryTuple(Group *node) econtext = grpstate->csstate.cstate.cs_ExprContext; + tupdesc = ExecGetScanType(&grpstate->csstate); + /* if we haven't returned first tuple of new group yet ... */ if (grpstate->grp_useFirstTuple) { @@ -110,20 +115,25 @@ ExecGroupEveryTuple(Group *node) outerTuple = outerslot->val; firsttuple = grpstate->grp_firstTuple; - /* this should occur on the first call only */ if (firsttuple == NULL) + { + /* this should occur on the first call only */ grpstate->grp_firstTuple = heap_copytuple(outerTuple); + } else { - /* * Compare with first tuple and see if this tuple is of the * same group. */ - if (!sameGroup(firsttuple, outerTuple, - node->numCols, node->grpColIdx, - ExecGetScanType(&grpstate->csstate))) + if (! execTuplesMatch(firsttuple, outerTuple, + tupdesc, + node->numCols, node->grpColIdx, + grpstate->eqfunctions)) { + /* + * No; save the tuple to return it next time, and return NULL + */ grpstate->grp_useFirstTuple = TRUE; heap_freetuple(firsttuple); grpstate->grp_firstTuple = heap_copytuple(outerTuple); @@ -164,6 +174,7 @@ ExecGroupOneTuple(Group *node) GroupState *grpstate; EState *estate; ExprContext *econtext; + TupleDesc tupdesc; HeapTuple outerTuple = NULL; HeapTuple firsttuple; @@ -185,10 +196,12 @@ ExecGroupOneTuple(Group *node) econtext = node->grpstate->csstate.cstate.cs_ExprContext; + tupdesc = ExecGetScanType(&grpstate->csstate); + firsttuple = grpstate->grp_firstTuple; - /* this should occur on the first call only */ if (firsttuple == NULL) { + /* this should occur on the first call only */ outerslot = ExecProcNode(outerPlan(node), (Plan *) node); if (TupIsNull(outerslot)) { @@ -213,14 +226,14 @@ ExecGroupOneTuple(Group *node) } outerTuple = outerslot->val; - /* ---------------- - * Compare with first tuple and see if this tuple is of - * the same group. - * ---------------- + /* + * Compare with first tuple and see if this tuple is of the + * same group. */ - if ((!sameGroup(firsttuple, outerTuple, - node->numCols, node->grpColIdx, - ExecGetScanType(&grpstate->csstate)))) + if (! execTuplesMatch(firsttuple, outerTuple, + tupdesc, + node->numCols, node->grpColIdx, + grpstate->eqfunctions)) break; } @@ -311,6 +324,14 @@ ExecInitGroup(Group *node, EState *estate, Plan *parent) ExecAssignResultTypeFromTL((Plan *) node, &grpstate->csstate.cstate); ExecAssignProjectionInfo((Plan *) node, &grpstate->csstate.cstate); + /* + * Precompute fmgr lookup data for inner loop + */ + grpstate->eqfunctions = + execTuplesMatchPrepare(ExecGetScanType(&grpstate->csstate), + node->numCols, + node->grpColIdx); + return TRUE; } @@ -347,94 +368,121 @@ ExecEndGroup(Group *node) } } +void +ExecReScanGroup(Group *node, ExprContext *exprCtxt, Plan *parent) +{ + GroupState *grpstate = node->grpstate; + + grpstate->grp_useFirstTuple = FALSE; + grpstate->grp_done = FALSE; + if (grpstate->grp_firstTuple != NULL) + { + heap_freetuple(grpstate->grp_firstTuple); + grpstate->grp_firstTuple = NULL; + } + + if (((Plan *) node)->lefttree && + ((Plan *) node)->lefttree->chgParam == NULL) + ExecReScan(((Plan *) node)->lefttree, exprCtxt, (Plan *) node); +} + /***************************************************************************** - * + * Code shared with nodeUnique.c *****************************************************************************/ /* - * code swiped from nodeUnique.c + * execTuplesMatch + * Return true if two tuples match in all the indicated fields. + * This is used to detect group boundaries in nodeGroup, and to + * decide whether two tuples are distinct or not in nodeUnique. + * + * tuple1, tuple2: the tuples to compare + * tupdesc: tuple descriptor applying to both tuples + * numCols: the number of attributes to be examined + * matchColIdx: array of attribute column numbers + * eqFunctions: array of fmgr lookup info for the equality functions to use */ -static bool -sameGroup(HeapTuple oldtuple, - HeapTuple newtuple, - int numCols, - AttrNumber *grpColIdx, - TupleDesc tupdesc) +bool +execTuplesMatch(HeapTuple tuple1, + HeapTuple tuple2, + TupleDesc tupdesc, + int numCols, + AttrNumber *matchColIdx, + FmgrInfo *eqfunctions) { - bool isNull1, - isNull2; - Datum attr1, - attr2; - char *val1, - *val2; int i; - AttrNumber att; - Oid typoutput, - typelem; - for (i = 0; i < numCols; i++) + /* + * We cannot report a match without checking all the fields, but we + * can report a non-match as soon as we find unequal fields. So, + * start comparing at the last field (least significant sort key). + * That's the most likely to be different... + */ + for (i = numCols; --i >= 0; ) { - att = grpColIdx[i]; - getTypeOutAndElem((Oid) tupdesc->attrs[att - 1]->atttypid, - &typoutput, &typelem); - - attr1 = heap_getattr(oldtuple, + AttrNumber att = matchColIdx[i]; + Datum attr1, + attr2; + bool isNull1, + isNull2; + Datum equal; + + attr1 = heap_getattr(tuple1, att, tupdesc, &isNull1); - attr2 = heap_getattr(newtuple, + attr2 = heap_getattr(tuple2, att, tupdesc, &isNull2); - if (isNull1 == isNull2) - { - if (isNull1) /* both are null, they are equal */ - continue; + if (isNull1 != isNull2) + return FALSE; /* one null and one not; they aren't equal */ - val1 = fmgr(typoutput, attr1, typelem, - tupdesc->attrs[att - 1]->atttypmod); - val2 = fmgr(typoutput, attr2, typelem, - tupdesc->attrs[att - 1]->atttypmod); + if (isNull1) + continue; /* both are null, treat as equal */ - /* - * now, val1 and val2 are ascii representations so we can use - * strcmp for comparison - */ - if (strcmp(val1, val2) != 0) - { - pfree(val1); - pfree(val2); - return FALSE; - } - pfree(val1); - pfree(val2); - } - else - { - /* one is null and the other isn't, they aren't equal */ + /* Apply the type-specific equality function */ + + equal = (Datum) (*fmgr_faddr(& eqfunctions[i])) (attr1, attr2); + + if (DatumGetInt32(equal) == 0) return FALSE; - } } return TRUE; } -void -ExecReScanGroup(Group *node, ExprContext *exprCtxt, Plan *parent) +/* + * execTuplesMatchPrepare + * Look up the equality functions needed for execTuplesMatch. + * The result is a palloc'd array. + */ +FmgrInfo * +execTuplesMatchPrepare(TupleDesc tupdesc, + int numCols, + AttrNumber *matchColIdx) { - GroupState *grpstate = node->grpstate; + FmgrInfo *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); + int i; - grpstate->grp_useFirstTuple = FALSE; - grpstate->grp_done = FALSE; - if (grpstate->grp_firstTuple != NULL) + for (i = 0; i < numCols; i++) { - heap_freetuple(grpstate->grp_firstTuple); - grpstate->grp_firstTuple = NULL; + AttrNumber att = matchColIdx[i]; + Oid typid = tupdesc->attrs[att - 1]->atttypid; + Operator eq_operator; + Form_pg_operator pgopform; + + eq_operator = oper("=", typid, typid, true); + if (!HeapTupleIsValid(eq_operator)) + { + elog(ERROR, "Unable to identify an equality operator for type '%s'", + typeidTypeName(typid)); + } + pgopform = (Form_pg_operator) GETSTRUCT(eq_operator); + fmgr_info(pgopform->oprcode, & eqfunctions[i]); } - if (((Plan *) node)->lefttree && - ((Plan *) node)->lefttree->chgParam == NULL) - ExecReScan(((Plan *) node)->lefttree, exprCtxt, (Plan *) node); + return eqfunctions; } diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c index 6078e0f68a..f9f1fe81ab 100644 --- a/src/backend/executor/nodeUnique.c +++ b/src/backend/executor/nodeUnique.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeUnique.c,v 1.26 2000/01/26 05:56:24 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeUnique.c,v 1.27 2000/01/27 18:11:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,79 +29,14 @@ #include "access/heapam.h" #include "access/printtup.h" #include "executor/executor.h" +#include "executor/nodeGroup.h" #include "executor/nodeUnique.h" -/* ---------------------------------------------------------------- - * ExecIdenticalTuples - * - * This is a hack function used by ExecUnique to see if - * two tuples are identical. This should be provided - * by the heap tuple code but isn't. The real problem - * is that we assume we can byte compare tuples to determine - * if they are "equal". In fact, if we have user defined - * types there may be problems because it's possible that - * an ADT may have multiple representations with the - * same ADT value. -cim - * ---------------------------------------------------------------- - */ -static bool /* true if tuples are identical, false - * otherwise */ -ExecIdenticalTuples(TupleTableSlot *t1, TupleTableSlot *t2) -{ - HeapTuple h1; - HeapTuple h2; - char *d1; - char *d2; - int len; - - h1 = t1->val; - h2 = t2->val; - - /* ---------------- - * if tuples aren't the same length then they are - * obviously different (one may have null attributes). - * ---------------- - */ - if (h1->t_len != h2->t_len) - return false; - - /* ---------------- - * if the tuples have different header offsets then - * they are different. This will prevent us from returning - * true when comparing tuples of one attribute where one of - * two we're looking at is null (t_len - t_hoff == 0). - * THE t_len FIELDS CAN BE THE SAME IN THIS CASE!! - * ---------------- - */ - if (h1->t_data->t_hoff != h2->t_data->t_hoff) - return false; - - /* ---------------- - * ok, now get the pointers to the data and the - * size of the attribute portion of the tuple. - * ---------------- - */ - d1 = (char *) GETSTRUCT(h1); - d2 = (char *) GETSTRUCT(h2); - len = (int) h1->t_len - (int) h1->t_data->t_hoff; - - /* ---------------- - * byte compare the data areas and return the result. - * ---------------- - */ - if (memcmp(d1, d2, len) != 0) - return false; - - return true; -} - /* ---------------------------------------------------------------- * ExecUnique * * This is a very simple node which filters out duplicate * tuples from a stream of sorted tuples from a subplan. - * - * XXX see comments below regarding freeing tuples. * ---------------------------------------------------------------- */ TupleTableSlot * /* return: a tuple or NULL */ @@ -111,11 +46,7 @@ ExecUnique(Unique *node) TupleTableSlot *resultTupleSlot; TupleTableSlot *slot; Plan *outerPlan; - char *uniqueAttr; - AttrNumber uniqueAttrNum; TupleDesc tupDesc; - Oid typoutput, - typelem; /* ---------------- * get information from the node @@ -123,22 +54,8 @@ ExecUnique(Unique *node) */ uniquestate = node->uniquestate; outerPlan = outerPlan((Plan *) node); - resultTupleSlot = uniquestate->cs_ResultTupleSlot; - uniqueAttr = node->uniqueAttr; - uniqueAttrNum = node->uniqueAttrNum; - - if (uniqueAttr) - { - tupDesc = ExecGetResultType(uniquestate); - getTypeOutAndElem((Oid) tupDesc->attrs[uniqueAttrNum - 1]->atttypid, - &typoutput, &typelem); - } - else - { /* keep compiler quiet */ - tupDesc = NULL; - typoutput = InvalidOid; - typelem = InvalidOid; - } + resultTupleSlot = uniquestate->cstate.cs_ResultTupleSlot; + tupDesc = ExecGetResultType(& uniquestate->cstate); /* ---------------- * now loop, returning only non-duplicate tuples. @@ -157,83 +74,38 @@ ExecUnique(Unique *node) return NULL; /* ---------------- - * we use the result tuple slot to hold our saved tuples. - * if we haven't a saved tuple to compare our new tuple with, - * then we exit the loop. This new tuple as the saved tuple - * the next time we get here. + * Always return the first tuple from the subplan. * ---------------- */ - if (TupIsNull(resultTupleSlot)) + if (uniquestate->priorTuple == NULL) break; /* ---------------- - * now test if the new tuple and the previous + * Else test if the new tuple and the previously returned * tuple match. If so then we loop back and fetch * another new tuple from the subplan. * ---------------- */ - - if (uniqueAttr) - { - - /* - * to check equality, we check to see if the typoutput of the - * attributes are equal - */ - bool isNull1, - isNull2; - Datum attr1, - attr2; - char *val1, - *val2; - - attr1 = heap_getattr(slot->val, - uniqueAttrNum, tupDesc, &isNull1); - attr2 = heap_getattr(resultTupleSlot->val, - uniqueAttrNum, tupDesc, &isNull2); - - if (isNull1 == isNull2) - { - if (isNull1) /* both are null, they are equal */ - continue; - val1 = fmgr(typoutput, attr1, typelem, - tupDesc->attrs[uniqueAttrNum - 1]->atttypmod); - val2 = fmgr(typoutput, attr2, typelem, - tupDesc->attrs[uniqueAttrNum - 1]->atttypmod); - - /* - * now, val1 and val2 are ascii representations so we can - * use strcmp for comparison - */ - if (strcmp(val1, val2) == 0) /* they are equal */ - { - pfree(val1); - pfree(val2); - continue; - } - pfree(val1); - pfree(val2); - break; - } - else -/* one is null and the other isn't, they aren't equal */ - break; - - } - else - { - if (!ExecIdenticalTuples(slot, resultTupleSlot)) - break; - } - + if (! execTuplesMatch(slot->val, uniquestate->priorTuple, + tupDesc, + node->numCols, node->uniqColIdx, + uniquestate->eqfunctions)) + break; } /* ---------------- - * we have a new tuple different from the previous saved tuple - * so we save it in the saved tuple slot. We copy the tuple - * so we don't increment the buffer ref count. + * We have a new tuple different from the previous saved tuple (if any). + * Save it and return it. Note that we make two copies of the tuple: + * one to keep for our own future comparisons, and one to return to the + * caller. We need to copy the tuple returned by the subplan to avoid + * holding buffer refcounts, and we need our own copy because the caller + * may alter the resultTupleSlot (eg via ExecRemoveJunk). * ---------------- */ + if (uniquestate->priorTuple != NULL) + heap_freetuple(uniquestate->priorTuple); + uniquestate->priorTuple = heap_copytuple(slot->val); + ExecStoreTuple(heap_copytuple(slot->val), resultTupleSlot, InvalidBuffer, @@ -254,7 +126,6 @@ ExecInitUnique(Unique *node, EState *estate, Plan *parent) { UniqueState *uniquestate; Plan *outerPlan; - char *uniqueAttr; /* ---------------- * assign execution state to node @@ -268,10 +139,10 @@ ExecInitUnique(Unique *node, EState *estate, Plan *parent) */ uniquestate = makeNode(UniqueState); node->uniquestate = uniquestate; - uniqueAttr = node->uniqueAttr; + uniquestate->priorTuple = NULL; /* ---------------- - * Miscellanious initialization + * Miscellaneous initialization * * + assign node's base_id * + assign debugging hooks and @@ -280,14 +151,14 @@ ExecInitUnique(Unique *node, EState *estate, Plan *parent) * they never call ExecQual or ExecTargetList. * ---------------- */ - ExecAssignNodeBaseInfo(estate, uniquestate, parent); + ExecAssignNodeBaseInfo(estate, & uniquestate->cstate, parent); #define UNIQUE_NSLOTS 1 /* ------------ * Tuple table initialization * ------------ */ - ExecInitResultTupleSlot(estate, uniquestate); + ExecInitResultTupleSlot(estate, & uniquestate->cstate); /* ---------------- * then initialize outer plan @@ -301,31 +172,17 @@ ExecInitUnique(Unique *node, EState *estate, Plan *parent) * projection info for this node appropriately * ---------------- */ - ExecAssignResultTypeFromOuterPlan((Plan *) node, uniquestate); - uniquestate->cs_ProjInfo = NULL; - - if (uniqueAttr) - { - TupleDesc tupDesc; - int i = 0; - - tupDesc = ExecGetResultType(uniquestate); - - /* - * the parser should have ensured that uniqueAttr is a legal - * attribute name - */ - while (strcmp(NameStr(tupDesc->attrs[i]->attname), uniqueAttr) != 0) - i++; - node->uniqueAttrNum = i + 1; /* attribute numbers start from 1 */ - } - else - node->uniqueAttrNum = InvalidAttrNumber; + ExecAssignResultTypeFromOuterPlan((Plan *) node, & uniquestate->cstate); + uniquestate->cstate.cs_ProjInfo = NULL; - /* ---------------- - * all done. - * ---------------- + /* + * Precompute fmgr lookup data for inner loop */ + uniquestate->eqfunctions = + execTuplesMatchPrepare(ExecGetResultType(& uniquestate->cstate), + node->numCols, + node->uniqColIdx); + return TRUE; } @@ -347,11 +204,17 @@ ExecCountSlotsUnique(Unique *node) void ExecEndUnique(Unique *node) { - UniqueState *uniquestate; + UniqueState *uniquestate = node->uniquestate; - uniquestate = node->uniquestate; ExecEndNode(outerPlan((Plan *) node), (Plan *) node); - ExecClearTuple(uniquestate->cs_ResultTupleSlot); + + /* clean up tuple table */ + ExecClearTuple(uniquestate->cstate.cs_ResultTupleSlot); + if (uniquestate->priorTuple != NULL) + { + heap_freetuple(uniquestate->priorTuple); + uniquestate->priorTuple = NULL; + } } @@ -360,7 +223,12 @@ ExecReScanUnique(Unique *node, ExprContext *exprCtxt, Plan *parent) { UniqueState *uniquestate = node->uniquestate; - ExecClearTuple(uniquestate->cs_ResultTupleSlot); + ExecClearTuple(uniquestate->cstate.cs_ResultTupleSlot); + if (uniquestate->priorTuple != NULL) + { + heap_freetuple(uniquestate->priorTuple); + uniquestate->priorTuple = NULL; + } /* * if chgParam of subnode is not null then plan will be re-scanned by diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 6c8d92355a..cb447b6371 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.102 2000/01/26 05:56:31 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.103 2000/01/27 18:11:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -532,11 +532,9 @@ _copyUnique(Unique *from) * copy remainder of node * ---------------- */ - if (from->uniqueAttr) - newnode->uniqueAttr = pstrdup(from->uniqueAttr); - else - newnode->uniqueAttr = NULL; - newnode->uniqueAttrNum = from->uniqueAttrNum; + newnode->numCols = from->numCols; + newnode->uniqColIdx = palloc(from->numCols * sizeof(AttrNumber)); + memcpy(newnode->uniqColIdx, from->uniqColIdx, from->numCols * sizeof(AttrNumber)); return newnode; } @@ -1427,8 +1425,7 @@ _copyQuery(Query *from) Node_Copy(from, newnode, qual); Node_Copy(from, newnode, rowMark); - if (from->uniqueFlag) - newnode->uniqueFlag = pstrdup(from->uniqueFlag); + Node_Copy(from, newnode, distinctClause); Node_Copy(from, newnode, sortClause); Node_Copy(from, newnode, groupClause); Node_Copy(from, newnode, havingQual); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 888f21543c..b4351462c0 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.56 2000/01/26 05:56:31 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.57 2000/01/27 18:11:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -593,16 +593,8 @@ _equalQuery(Query *a, Query *b) return false; if (!equal(a->rowMark, b->rowMark)) return false; - if (a->uniqueFlag && b->uniqueFlag) - { - if (strcmp(a->uniqueFlag, b->uniqueFlag) != 0) - return false; - } - else - { - if (a->uniqueFlag != b->uniqueFlag) - return false; - } + if (!equal(a->distinctClause, b->distinctClause)) + return false; if (!equal(a->sortClause, b->sortClause)) return false; if (!equal(a->groupClause, b->groupClause)) diff --git a/src/backend/nodes/freefuncs.c b/src/backend/nodes/freefuncs.c index cfaa119937..fc5c3506d8 100644 --- a/src/backend/nodes/freefuncs.c +++ b/src/backend/nodes/freefuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/Attic/freefuncs.c,v 1.32 2000/01/26 05:56:31 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/Attic/freefuncs.c,v 1.33 2000/01/27 18:11:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -427,8 +427,7 @@ _freeUnique(Unique *node) * free remainder of node * ---------------- */ - if (node->uniqueAttr) - pfree(node->uniqueAttr); + pfree(node->uniqColIdx); pfree(node); } @@ -1072,9 +1071,7 @@ _freeQuery(Query *node) freeObject(node->targetList); freeObject(node->qual); freeObject(node->rowMark); - if (node->uniqueFlag) - pfree(node->uniqueFlag); - + freeObject(node->distinctClause); freeObject(node->sortClause); freeObject(node->groupClause); freeObject(node->havingQual); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 7c7b7760a1..4475fa382b 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.104 2000/01/26 05:56:31 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.105 2000/01/27 18:11:28 tgl Exp $ * * NOTES * Every (plan) node in POSTGRES has an associated "out" routine which @@ -256,12 +256,13 @@ _outQuery(StringInfo str, Query *node) _outToken(str, node->into); appendStringInfo(str, - " :isPortal %s :isBinary %s :isTemp %s :unionall %s :unique ", + " :isPortal %s :isBinary %s :isTemp %s :unionall %s :distinctClause ", node->isPortal ? "true" : "false", node->isBinary ? "true" : "false", node->isTemp ? "true" : "false", node->unionall ? "true" : "false"); - _outToken(str, node->uniqueFlag); + _outNode(str, node->distinctClause); + appendStringInfo(str, " :sortClause "); _outNode(str, node->sortClause); @@ -584,9 +585,10 @@ _outUnique(StringInfo str, Unique *node) appendStringInfo(str, " UNIQUE "); _outPlanInfo(str, (Plan *) node); - appendStringInfo(str, " :nonameid %u :keycount %d ", + appendStringInfo(str, " :nonameid %u :keycount %d :numCols %d ", node->nonameid, - node->keycount); + node->keycount, + node->numCols); } diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index cf63506a05..9dccbf5017 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.80 2000/01/26 05:56:32 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.81 2000/01/27 18:11:28 tgl Exp $ * * NOTES * Most of the read functions for plan nodes are tested. (In fact, they @@ -111,12 +111,8 @@ _readQuery() token = lsptok(NULL, &length); /* get unionall */ local_node->unionall = (token[0] == 't') ? true : false; - token = lsptok(NULL, &length); /* skip :uniqueFlag */ - token = lsptok(NULL, &length); /* get uniqueFlag */ - if (length == 0) - local_node->uniqueFlag = NULL; - else - local_node->uniqueFlag = debackslash(token, length); + token = lsptok(NULL, &length); /* skip :distinctClause */ + local_node->distinctClause = nodeRead(true); token = lsptok(NULL, &length); /* skip :sortClause */ local_node->sortClause = nodeRead(true); @@ -624,33 +620,6 @@ _readAgg() return local_node; } -/* ---------------- - * _readUnique - * - * For some reason, unique is a subclass of Noname. - */ -static Unique * -_readUnique() -{ - Unique *local_node; - char *token; - int length; - - local_node = makeNode(Unique); - - _getPlan((Plan *) local_node); - - token = lsptok(NULL, &length); /* eat :nonameid */ - token = lsptok(NULL, &length); /* get :nonameid */ - local_node->nonameid = atol(token); - - token = lsptok(NULL, &length); /* eat :keycount */ - token = lsptok(NULL, &length); /* get :keycount */ - local_node->keycount = atoi(token); - - return local_node; -} - /* ---------------- * _readHash * @@ -1847,8 +1816,6 @@ parsePlanString(void) return_value = _readSubLink(); else if (length == 3 && strncmp(token, "AGG", length) == 0) return_value = _readAgg(); - else if (length == 6 && strncmp(token, "UNIQUE", length) == 0) - return_value = _readUnique(); else if (length == 4 && strncmp(token, "HASH", length) == 0) return_value = _readHash(); else if (length == 6 && strncmp(token, "RESDOM", length) == 0) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index ab69742482..70a7382164 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.81 2000/01/26 05:56:37 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.82 2000/01/27 18:11:30 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1342,16 +1342,19 @@ make_group(List *tlist, } /* - * The uniqueAttr argument must be a null-terminated string, - * either the name of the attribute to select unique on - * or "*" + * distinctList is a list of SortClauses, identifying the targetlist items + * that should be considered by the Unique filter. */ Unique * -make_unique(List *tlist, Plan *lefttree, char *uniqueAttr) +make_unique(List *tlist, Plan *lefttree, List *distinctList) { Unique *node = makeNode(Unique); Plan *plan = &node->plan; + int numCols = length(distinctList); + int keyno = 0; + AttrNumber *uniqColIdx; + List *slitem; copy_plan_costsize(plan, lefttree); plan->state = (EState *) NULL; @@ -1361,10 +1364,22 @@ make_unique(List *tlist, Plan *lefttree, char *uniqueAttr) plan->righttree = NULL; node->nonameid = _NONAME_RELATION_ID_; node->keycount = 0; - if (strcmp(uniqueAttr, "*") == 0) - node->uniqueAttr = NULL; - else - node->uniqueAttr = pstrdup(uniqueAttr); + + /* convert SortClause list into array of attr indexes, as wanted by exec */ + Assert(numCols > 0); + uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); + + foreach(slitem, distinctList) + { + SortClause *sortcl = (SortClause *) lfirst(slitem); + TargetEntry *tle = get_sortgroupclause_tle(sortcl, tlist); + + uniqColIdx[keyno++] = tle->resdom->resno; + } + + node->numCols = numCols; + node->uniqColIdx = uniqColIdx; + return node; } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 0b711f3209..28483fd473 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.73 2000/01/26 05:56:37 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.74 2000/01/27 18:11:31 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -381,12 +381,12 @@ union_planner(Query *parse) } /* - * Finally, if there is a UNIQUE clause, add the UNIQUE node. + * Finally, if there is a DISTINCT clause, add the UNIQUE node. */ - if (parse->uniqueFlag) + if (parse->distinctClause) { result_plan = (Plan *) make_unique(tlist, result_plan, - parse->uniqueFlag); + parse->distinctClause); } return result_plan; @@ -583,20 +583,8 @@ make_sortplan(List *tlist, List *sortcls, Plan *plannode) foreach(i, sortcls) { SortClause *sortcl = (SortClause *) lfirst(i); - Index refnumber = sortcl->tleSortGroupRef; - TargetEntry *tle = NULL; - Resdom *resdom; - List *l; - - foreach(l, temp_tlist) - { - tle = (TargetEntry *) lfirst(l); - if (tle->resdom->ressortgroupref == refnumber) - break; - } - if (l == NIL) - elog(ERROR, "make_sortplan: ORDER BY expression not found in targetlist"); - resdom = tle->resdom; + TargetEntry *tle = get_sortgroupclause_tle(sortcl, temp_tlist); + Resdom *resdom = tle->resdom; /* * Check for the possibility of duplicate order-by clauses --- the diff --git a/src/backend/optimizer/prep/prepkeyset.c b/src/backend/optimizer/prep/prepkeyset.c index 127505597a..fc192e6f28 100644 --- a/src/backend/optimizer/prep/prepkeyset.c +++ b/src/backend/optimizer/prep/prepkeyset.c @@ -104,9 +104,7 @@ transformKeySetQuery(Query *origNode) unionNode->isPortal = origNode->isPortal; unionNode->isBinary = origNode->isBinary; - if (origNode->uniqueFlag) - unionNode->uniqueFlag = pstrdup(origNode->uniqueFlag); - + Node_Copy(origNode, unionNode, distinctClause); Node_Copy(origNode, unionNode, sortClause); Node_Copy(origNode, unionNode, rtable); Node_Copy(origNode, unionNode, targetList); diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index a27305ef3e..4323b652e8 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.41 2000/01/26 05:56:39 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.42 2000/01/27 18:11:32 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,6 +21,7 @@ #include "optimizer/planmain.h" #include "optimizer/planner.h" #include "optimizer/prep.h" +#include "optimizer/tlist.h" #include "parser/parse_clause.h" #include "parser/parsetree.h" #include "utils/lsyscache.h" @@ -131,7 +132,7 @@ plan_union_queries(Query *parse) !last_union_all_flag) { parse->sortClause = NIL; - parse->uniqueFlag = NULL; + parse->distinctClause = NIL; } parse->unionClause = NIL; /* prevent recursion */ @@ -183,17 +184,28 @@ plan_union_queries(Query *parse) if (!last_union_all_flag) { /* Need SELECT DISTINCT behavior to implement UNION. - * Set uniqueFlag properly, put back the held sortClause, - * and add any missing columns to the sort clause. + * Put back the held sortClause, add any missing columns to the + * sort clause, and set distinctClause properly. */ - parse->uniqueFlag = "*"; + List *slitem; + parse->sortClause = addAllTargetsToSortList(hold_sortClause, parse->targetList); + parse->distinctClause = NIL; + foreach(slitem, parse->sortClause) + { + SortClause *scl = (SortClause *) lfirst(slitem); + TargetEntry *tle = get_sortgroupclause_tle(scl, parse->targetList); + + if (! tle->resdom->resjunk) + parse->distinctClause = lappend(parse->distinctClause, + copyObject(scl)); + } } else { - /* needed so we don't take the flag from the first query */ - parse->uniqueFlag = NULL; + /* needed so we don't take SELECT DISTINCT from the first query */ + parse->distinctClause = NIL; } /* Make sure we don't try to apply the first query's grouping stuff @@ -314,9 +326,9 @@ plan_inherit_query(Relids relids, * Clear the sorting and grouping qualifications in the subquery, * so that sorting will only be done once after append */ - new_root->uniqueFlag = NULL; - new_root->sortClause = NULL; - new_root->groupClause = NULL; + new_root->distinctClause = NIL; + new_root->sortClause = NIL; + new_root->groupClause = NIL; new_root->havingQual = NULL; new_root->hasAggs = false; /* shouldn't be any left ... */ diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 8edf44190a..b4c745b25f 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/tlist.c,v 1.42 2000/01/26 05:56:40 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/tlist.c,v 1.43 2000/01/27 18:11:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -217,15 +217,16 @@ get_expr(TargetEntry *tle) } /* - * get_sortgroupclause_expr + * get_sortgroupclause_tle * Find the targetlist entry matching the given SortClause - * (or GroupClause) by ressortgroupref, and return its expression. + * (or GroupClause) by ressortgroupref, and return it. * * Because GroupClause is typedef'd as SortClause, either kind of * node can be passed without casting. */ -Node * -get_sortgroupclause_expr(SortClause *sortClause, List *targetList) +TargetEntry * +get_sortgroupclause_tle(SortClause *sortClause, + List *targetList) { Index refnumber = sortClause->tleSortGroupRef; List *l; @@ -235,9 +236,25 @@ get_sortgroupclause_expr(SortClause *sortClause, List *targetList) TargetEntry *tle = (TargetEntry *) lfirst(l); if (tle->resdom->ressortgroupref == refnumber) - return tle->expr; + return tle; } - elog(ERROR, "get_sortgroupclause_expr: ORDER/GROUP BY expression not found in targetlist"); + elog(ERROR, "get_sortgroupclause_tle: ORDER/GROUP BY expression not found in targetlist"); return NULL; /* keep compiler quiet */ } + +/* + * get_sortgroupclause_expr + * Find the targetlist entry matching the given SortClause + * (or GroupClause) by ressortgroupref, and return its expression. + * + * Because GroupClause is typedef'd as SortClause, either kind of + * node can be passed without casting. + */ +Node * +get_sortgroupclause_expr(SortClause *sortClause, List *targetList) +{ + TargetEntry *tle = get_sortgroupclause_tle(sortClause, targetList); + + return tle->expr; +} diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 48178bcb21..fd3dda8f17 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: analyze.c,v 1.133 2000/01/26 05:56:41 momjian Exp $ + * $Id: analyze.c,v 1.134 2000/01/27 18:11:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -247,7 +247,7 @@ transformDeleteStmt(ParseState *pstate, DeleteStmt *stmt) makeRangeTable(pstate, NULL, NULL); setTargetTable(pstate, stmt->relname); - qry->uniqueFlag = NULL; + qry->distinctClause = NIL; /* fix where clause */ qry->qual = transformWhereClause(pstate, stmt->whereClause, NULL); @@ -296,8 +296,6 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) /* set up a range table --- note INSERT target is not in it yet */ makeRangeTable(pstate, stmt->fromClause, &fromQual); - qry->uniqueFlag = stmt->unique; - qry->targetList = transformTargetList(pstate, stmt->targetList); qry->qual = transformWhereClause(pstate, stmt->whereClause, fromQual); @@ -311,13 +309,13 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) stmt->groupClause, qry->targetList); - /* An InsertStmt has no sortClause, but we still call - * transformSortClause because it also handles uniqueFlag. - */ - qry->sortClause = transformSortClause(pstate, - NIL, - qry->targetList, - qry->uniqueFlag); + /* An InsertStmt has no sortClause */ + qry->sortClause = NIL; + + qry->distinctClause = transformDistinctClause(pstate, + stmt->distinctClause, + qry->targetList, + & qry->sortClause); qry->hasSubLinks = pstate->p_hasSubLinks; qry->hasAggs = pstate->p_hasAggs; @@ -1312,8 +1310,6 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) /* set up a range table */ makeRangeTable(pstate, stmt->fromClause, &fromQual); - qry->uniqueFlag = stmt->unique; - qry->into = stmt->into; qry->isTemp = stmt->istemp; qry->isPortal = FALSE; @@ -1333,8 +1329,12 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) qry->sortClause = transformSortClause(pstate, stmt->sortClause, - qry->targetList, - qry->uniqueFlag); + qry->targetList); + + qry->distinctClause = transformDistinctClause(pstate, + stmt->distinctClause, + qry->targetList, + & qry->sortClause); qry->hasSubLinks = pstate->p_hasSubLinks; qry->hasAggs = pstate->p_hasAggs; @@ -1558,9 +1558,9 @@ CheckSelectForUpdate(Query *qry) { if (qry->unionClause != NULL) elog(ERROR, "SELECT FOR UPDATE is not allowed with UNION/INTERSECT/EXCEPT clause"); - if (qry->uniqueFlag != NULL) + if (qry->distinctClause != NIL) elog(ERROR, "SELECT FOR UPDATE is not allowed with DISTINCT clause"); - if (qry->groupClause != NULL) + if (qry->groupClause != NIL) elog(ERROR, "SELECT FOR UPDATE is not allowed with GROUP BY clause"); if (qry->hasAggs) elog(ERROR, "SELECT FOR UPDATE is not allowed with AGGREGATE"); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 924d26d26f..228cb73f3a 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.135 2000/01/26 05:56:41 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.136 2000/01/27 18:11:35 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -159,7 +159,7 @@ static Node *doNegate(Node *n); class, index_name, name, func_name, file_name, aggr_argtype %type opt_id, - all_Op, MathOp, opt_name, opt_unique, + all_Op, MathOp, opt_name, OptUseOp, opt_class, SpecialRuleRelation %type opt_level, opt_encoding @@ -168,7 +168,7 @@ static Node *doNegate(Node *n); %type stmtblock, stmtmulti, result, relation_name_list, OptTableElementList, - OptInherit, definition, + OptInherit, definition, opt_distinct, opt_with, func_args, func_args_list, func_as, oper_argtypes, RuleActionList, RuleActionMulti, opt_column_list, columnList, opt_va_list, va_list, @@ -2843,7 +2843,7 @@ insert_rest: VALUES '(' target_list ')' { $$ = makeNode(InsertStmt); $$->cols = NULL; - $$->unique = NULL; + $$->distinctClause = NIL; $$->targetList = $3; $$->fromClause = NIL; $$->whereClause = NULL; @@ -2854,7 +2854,7 @@ insert_rest: VALUES '(' target_list ')' | DEFAULT VALUES { $$ = makeNode(InsertStmt); - $$->unique = NULL; + $$->distinctClause = NIL; $$->targetList = NIL; $$->fromClause = NIL; $$->whereClause = NULL; @@ -2873,7 +2873,7 @@ insert_rest: VALUES '(' target_list ')' elog(ERROR, "INSERT ... SELECT can't have ORDER BY"); $$ = makeNode(InsertStmt); $$->cols = NIL; - $$->unique = n->unique; + $$->distinctClause = n->distinctClause; $$->targetList = n->targetList; $$->fromClause = n->fromClause; $$->whereClause = n->whereClause; @@ -2888,7 +2888,7 @@ insert_rest: VALUES '(' target_list ')' { $$ = makeNode(InsertStmt); $$->cols = $2; - $$->unique = NULL; + $$->distinctClause = NIL; $$->targetList = $6; $$->fromClause = NIL; $$->whereClause = NULL; @@ -2904,7 +2904,7 @@ insert_rest: VALUES '(' target_list ')' elog(ERROR, "INSERT ... SELECT can't have ORDER BY"); $$ = makeNode(InsertStmt); $$->cols = $2; - $$->unique = n->unique; + $$->distinctClause = n->distinctClause; $$->targetList = n->targetList; $$->fromClause = n->fromClause; $$->whereClause = n->whereClause; @@ -3189,12 +3189,12 @@ select_clause: '(' select_clause ')' } ; -SubSelect: SELECT opt_unique target_list +SubSelect: SELECT opt_distinct target_list result from_clause where_clause group_clause having_clause { SelectStmt *n = makeNode(SelectStmt); - n->unique = $2; + n->distinctClause = $2; n->unionall = FALSE; n->targetList = $3; /* This is new: Subselects support the INTO clause @@ -3230,10 +3230,13 @@ opt_all: ALL { $$ = TRUE; } | /*EMPTY*/ { $$ = FALSE; } ; -opt_unique: DISTINCT { $$ = "*"; } - | DISTINCT ON ColId { $$ = $3; } - | ALL { $$ = NULL; } - | /*EMPTY*/ { $$ = NULL; } +/* We use (NIL) as a placeholder to indicate that all target expressions + * should be placed in the DISTINCT list during parsetree analysis. + */ +opt_distinct: DISTINCT { $$ = lcons(NIL,NIL); } + | DISTINCT ON '(' expr_list ')' { $$ = $4; } + | ALL { $$ = NIL; } + | /*EMPTY*/ { $$ = NIL; } ; sort_clause: ORDER BY sortby_list { $$ = $3; } diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index ba2b5f8499..b22691fa3c 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/parse_clause.c,v 1.50 2000/01/26 05:56:42 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/parse_clause.c,v 1.51 2000/01/27 18:11:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,12 +28,12 @@ #define ORDER_CLAUSE 0 #define GROUP_CLAUSE 1 +#define DISTINCT_ON_CLAUSE 2 -static char *clauseText[] = {"ORDER", "GROUP"}; +static char *clauseText[] = {"ORDER BY", "GROUP BY", "DISTINCT ON"}; static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node, - List *tlist, int clause, - char *uniqFlag); + List *tlist, int clause); static void parseFromClause(ParseState *pstate, List *frmList, Node **qual); static char *transformTableEntry(ParseState *pstate, RangeVar *r); static List *addTargetToSortList(TargetEntry *tle, List *sortlist, @@ -359,14 +359,13 @@ parseFromClause(ParseState *pstate, List *frmList, Node **qual) * If no matching entry exists, one is created and appended to the target * list as a "resjunk" node. * - * node the ORDER BY or GROUP BY expression to be matched + * node the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched * tlist the existing target list (NB: this cannot be NIL, which is a * good thing since we'd be unable to append to it...) * clause identifies clause type for error messages. */ static TargetEntry * -findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause, - char *uniqueFlag) +findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause) { TargetEntry *target_result = NULL; List *tl; @@ -407,7 +406,7 @@ findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause, if (target_result != NULL) { if (! equal(target_result->expr, tle->expr)) - elog(ERROR, "%s BY '%s' is ambiguous", + elog(ERROR, "%s '%s' is ambiguous", clauseText[clause], name); } else @@ -424,8 +423,8 @@ findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause, int targetlist_pos = 0; int target_pos; - if (nodeTag(val) != T_Integer) - elog(ERROR, "Non-integer constant in %s BY", clauseText[clause]); + if (! IsA(val, Integer)) + elog(ERROR, "Non-integer constant in %s", clauseText[clause]); target_pos = intVal(val); foreach(tl, tlist) { @@ -438,7 +437,7 @@ findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause, return tle; /* return the unique match */ } } - elog(ERROR, "%s BY position %d is not in target list", + elog(ERROR, "%s position %d is not in target list", clauseText[clause], target_pos); } @@ -462,13 +461,9 @@ findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause, /* * If no matches, construct a new target entry which is appended to - * the end of the target list. This target is set to be resjunk = - * TRUE so that it will not be projected into the final tuple. + * the end of the target list. This target is given resjunk = TRUE + * so that it will not be projected into the final tuple. */ - if(clause == ORDER_CLAUSE && uniqueFlag) { - elog(ERROR, "ORDER BY columns must appear in SELECT DISTINCT target list"); - } - target_result = transformTargetEntry(pstate, node, expr, NULL, true); lappend(tlist, target_result); @@ -492,7 +487,7 @@ transformGroupClause(ParseState *pstate, List *grouplist, List *targetlist) TargetEntry *tle; tle = findTargetlistEntry(pstate, lfirst(gl), - targetlist, GROUP_CLAUSE, NULL); + targetlist, GROUP_CLAUSE); /* avoid making duplicate grouplist entries */ if (! exprIsInSortList(tle->expr, glist, targetlist)) @@ -514,74 +509,149 @@ transformGroupClause(ParseState *pstate, List *grouplist, List *targetlist) /* * transformSortClause - - * transform an Order By clause - * + * transform an ORDER BY clause */ List * transformSortClause(ParseState *pstate, List *orderlist, - List *targetlist, - char *uniqueFlag) + List *targetlist) { List *sortlist = NIL; List *olitem; - /* Transform all the explicit ORDER BY clauses */ - foreach(olitem, orderlist) { SortGroupBy *sortby = lfirst(olitem); TargetEntry *tle; tle = findTargetlistEntry(pstate, sortby->node, - targetlist, ORDER_CLAUSE, uniqueFlag); + targetlist, ORDER_CLAUSE); sortlist = addTargetToSortList(tle, sortlist, targetlist, sortby->useOp); } - /* If we have a DISTINCT clause, add any necessary entries to - * the sortlist to ensure that all the DISTINCT columns will be - * sorted. A subsequent UNIQUE pass will then do the right thing. - */ + return sortlist; +} - if (uniqueFlag) +/* + * transformDistinctClause - + * transform a DISTINCT or DISTINCT ON clause + * + * Since we may need to add items to the query's sortClause list, that list + * is passed by reference. We might also need to add items to the query's + * targetlist, but we assume that cannot be empty initially, so we can + * lappend to it even though the pointer is passed by value. + */ +List * +transformDistinctClause(ParseState *pstate, List *distinctlist, + List *targetlist, List **sortClause) +{ + List *result = NIL; + List *slitem; + List *dlitem; + + /* No work if there was no DISTINCT clause */ + if (distinctlist == NIL) + return NIL; + + if (lfirst(distinctlist) == NIL) { - if (uniqueFlag[0] == '*') + /* We had SELECT DISTINCT */ + + /* + * All non-resjunk elements from target list that are not already + * in the sort list should be added to it. (We don't really care + * what order the DISTINCT fields are checked in, so we can leave + * the user's ORDER BY spec alone, and just add additional sort keys + * to it to ensure that all targetlist items get sorted.) + */ + *sortClause = addAllTargetsToSortList(*sortClause, targetlist); + + /* + * Now, DISTINCT list consists of all non-resjunk sortlist items. + * Actually, all the sortlist items had better be non-resjunk! + * Otherwise, user wrote SELECT DISTINCT with an ORDER BY item + * that does not appear anywhere in the SELECT targetlist, and + * we can't implement that with only one sorting pass... + */ + foreach(slitem, *sortClause) { - /* - * concatenate all elements from target list that are not - * already in the sortby list - */ - sortlist = addAllTargetsToSortList(sortlist, targetlist); + SortClause *scl = (SortClause *) lfirst(slitem); + TargetEntry *tle = get_sortgroupclause_tle(scl, targetlist); + + if (tle->resdom->resjunk) + elog(ERROR, "For SELECT DISTINCT, ORDER BY expressions must appear in target list"); + else + result = lappend(result, copyObject(scl)); } - else + } + else + { + /* We had SELECT DISTINCT ON (expr, ...) */ + + /* + * If the user writes both DISTINCT ON and ORDER BY, then the two + * expression lists must match (until one or the other runs out). + * Otherwise the ORDER BY requires a different sort order than the + * DISTINCT does, and we can't implement that with only one sort pass + * (and if we do two passes, the results will be rather unpredictable). + * However, it's OK to have more DISTINCT ON expressions than ORDER BY + * expressions; we can just add the extra DISTINCT values to the sort + * list, much as we did above for ordinary DISTINCT fields. + * + * Actually, it'd be OK for the common prefixes of the two lists to + * match in any order, but implementing that check seems like more + * trouble than it's worth. + */ + List *nextsortlist = *sortClause; + + foreach(dlitem, distinctlist) { - TargetEntry *tle = NULL; - char *uniqueAttrName = uniqueFlag; - List *i; + TargetEntry *tle; - /* only create sort clause with the specified unique attribute */ - foreach(i, targetlist) + tle = findTargetlistEntry(pstate, lfirst(dlitem), + targetlist, DISTINCT_ON_CLAUSE); + + if (nextsortlist != NIL) { - tle = (TargetEntry *) lfirst(i); - if (strcmp(tle->resdom->resname, uniqueAttrName) == 0) - break; + SortClause *scl = (SortClause *) lfirst(nextsortlist); + + if (tle->resdom->ressortgroupref != scl->tleSortGroupRef) + elog(ERROR, "SELECT DISTINCT ON expressions must match initial ORDER BY expressions"); + result = lappend(result, copyObject(scl)); + nextsortlist = lnext(nextsortlist); } - if (i == NIL) - elog(ERROR, "All fields in the UNIQUE ON clause must appear in the target list"); + else + { + *sortClause = addTargetToSortList(tle, *sortClause, + targetlist, NULL); + /* Probably, the tle should always have been added at the + * end of the sort list ... but search to be safe. + */ + foreach(slitem, *sortClause) + { + SortClause *scl = (SortClause *) lfirst(slitem); - sortlist = addTargetToSortList(tle, sortlist, targetlist, NULL); + if (tle->resdom->ressortgroupref == scl->tleSortGroupRef) + { + result = lappend(result, copyObject(scl)); + break; + } + } + if (slitem == NIL) + elog(ERROR, "transformDistinctClause: failed to add DISTINCT ON clause to target list"); + } } } - return sortlist; + return result; } /* * addAllTargetsToSortList - * Make sure all targets in the targetlist are in the ORDER BY list, - * adding the not-yet-sorted ones to the end of the list. + * Make sure all non-resjunk targets in the targetlist are in the + * ORDER BY list, adding the not-yet-sorted ones to the end of the list. * This is typically used to help implement SELECT DISTINCT. * * Returns the updated ORDER BY list. @@ -595,7 +665,8 @@ addAllTargetsToSortList(List *sortlist, List *targetlist) { TargetEntry *tle = (TargetEntry *) lfirst(i); - sortlist = addTargetToSortList(tle, sortlist, targetlist, NULL); + if (! tle->resdom->resjunk) + sortlist = addTargetToSortList(tle, sortlist, targetlist, NULL); } return sortlist; } diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index b0ca5f522e..a3ce8d2e8c 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/rewrite/rewriteDefine.c,v 1.41 2000/01/26 05:56:49 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/rewrite/rewriteDefine.c,v 1.42 2000/01/27 18:11:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -312,7 +312,7 @@ DefineQueryRewrite(RuleStmt *stmt) /* * DISTINCT on view is not supported */ - if (query->uniqueFlag != NULL) + if (query->distinctClause != NIL) elog(ERROR, "DISTINCT not supported in views"); /* diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 358dba0a3e..57169d9336 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/rewrite/rewriteHandler.c,v 1.66 2000/01/26 05:56:49 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/rewrite/rewriteHandler.c,v 1.67 2000/01/27 18:11:37 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -536,8 +536,8 @@ modifyAggrefMakeSublink(Aggref *aggref, Query *parsetree) subquery->isBinary = FALSE; subquery->isTemp = FALSE; subquery->unionall = FALSE; - subquery->uniqueFlag = NULL; - subquery->sortClause = NULL; + subquery->distinctClause = NIL; + subquery->sortClause = NIL; subquery->rtable = lcons(copyObject(rte), NIL); subquery->targetList = lcons(tle, NIL); subquery->qual = modifyAggrefDropQual((Node *) parsetree->qual, @@ -1725,7 +1725,7 @@ check_targetlists_are_compatible(List *prev_target, List *current_target) * The operator tree is attached to 'intersectClause' (see rule * 'SelectStmt' in gram.y) of the 'parsetree' given as an * argument. First we remember some clauses (the sortClause, the - * unique flag etc.) Then we translate the operator tree to DNF + * distinctClause etc.) Then we translate the operator tree to DNF * (disjunctive normal form) by 'cnfify'. (Note that 'cnfify' produces * CNF but as we exchanged ANDs with ORs in function A_Expr_to_Expr() * earlier we get DNF after exchanging ANDs and ORs again in the @@ -1736,8 +1736,8 @@ check_targetlists_are_compatible(List *prev_target, List *current_target) * union list is handed back but before that the remembered clauses * (sortClause etc) are attached to the new top Node (Note that the * new top Node can differ from the parsetree given as argument because of - * the translation to DNF. That's why we have to remember the sortClause or - * unique flag!) */ + * the translation to DNF. That's why we have to remember the sortClause + * and so on!) */ static Query * Except_Intersect_Rewrite(Query *parsetree) { @@ -1750,12 +1750,12 @@ Except_Intersect_Rewrite(Query *parsetree) *intersect, *intersectClause; List *union_list = NIL, - *sortClause; + *sortClause, + *distinctClause; List *left_expr, *right_expr, *resnames = NIL; char *op, - *uniqueFlag, *into; bool isBinary, isPortal, @@ -1806,7 +1806,7 @@ Except_Intersect_Rewrite(Query *parsetree) * node at the end of the function */ sortClause = parsetree->sortClause; - uniqueFlag = parsetree->uniqueFlag; + distinctClause = parsetree->distinctClause; into = parsetree->into; isBinary = parsetree->isBinary; isPortal = parsetree->isPortal; @@ -2009,7 +2009,7 @@ Except_Intersect_Rewrite(Query *parsetree) result->unionClause = lnext(union_list); /* Attach all the items remembered in the beginning of the function */ result->sortClause = sortClause; - result->uniqueFlag = uniqueFlag; + result->distinctClause = distinctClause; result->into = into; result->isPortal = isPortal; result->isBinary = isBinary; diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index 4d2960433e..01f33ecf8f 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -7,13 +7,14 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/rewrite/rewriteManip.c,v 1.43 2000/01/26 05:56:49 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/rewrite/rewriteManip.c,v 1.44 2000/01/27 18:11:37 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "optimizer/clauses.h" +#include "optimizer/tlist.h" #include "parser/parsetree.h" #include "parser/parse_clause.h" #include "rewrite/rewriteManip.h" @@ -286,22 +287,10 @@ AddGroupClause(Query *parsetree, List *group_by, List *tlist) foreach(l, group_by) { GroupClause *groupclause = (GroupClause *) copyObject(lfirst(l)); - Index refnumber = groupclause->tleSortGroupRef; - TargetEntry *tle = NULL; - List *tl; + TargetEntry *tle = get_sortgroupclause_tle(groupclause, tlist); - /* Find and copy the groupclause's TLE in the old tlist */ - foreach(tl, tlist) - { - if (((TargetEntry *) lfirst(tl))->resdom->ressortgroupref == - refnumber) - { - tle = (TargetEntry *) copyObject(lfirst(tl)); - break; - } - } - if (tle == NULL) - elog(ERROR, "AddGroupClause(): GROUP BY entry not found in rules targetlist"); + /* copy the groupclause's TLE from the old tlist */ + tle = (TargetEntry *) copyObject(tle); /* The ressortgroupref number in the old tlist might be already * taken in the new tlist, so force assignment of a new number. diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index e8e2f971b0..d6d8ff6ffb 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: catversion.h,v 1.12 2000/01/26 05:57:56 momjian Exp $ + * $Id: catversion.h,v 1.13 2000/01/27 18:11:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200001241 +#define CATALOG_VERSION_NO 200001271 #endif diff --git a/src/include/executor/nodeGroup.h b/src/include/executor/nodeGroup.h index afe510d5ea..dd104b0af9 100644 --- a/src/include/executor/nodeGroup.h +++ b/src/include/executor/nodeGroup.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: nodeGroup.h,v 1.14 2000/01/26 23:48:05 tgl Exp $ + * $Id: nodeGroup.h,v 1.15 2000/01/27 18:11:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,4 +22,14 @@ extern int ExecCountSlotsGroup(Group *node); extern void ExecEndGroup(Group *node); extern void ExecReScanGroup(Group *node, ExprContext *exprCtxt, Plan *parent); +extern bool execTuplesMatch(HeapTuple tuple1, + HeapTuple tuple2, + TupleDesc tupdesc, + int numCols, + AttrNumber *matchColIdx, + FmgrInfo *eqfunctions); +extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc, + int numCols, + AttrNumber *matchColIdx); + #endif /* NODEGROUP_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index a6a337087a..827bb75a9c 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: execnodes.h,v 1.39 2000/01/26 05:58:15 momjian Exp $ + * $Id: execnodes.h,v 1.40 2000/01/27 18:11:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -621,6 +621,7 @@ typedef struct AggState typedef struct GroupState { CommonScanState csstate; /* its first field is NodeTag */ + FmgrInfo *eqfunctions; /* per-field lookup data for equality fns */ bool grp_useFirstTuple; /* first tuple not processed yet */ bool grp_done; HeapTuple grp_firstTuple; @@ -663,9 +664,9 @@ typedef struct SortState * Unique nodes are used "on top of" sort nodes to discard * duplicate tuples returned from the sort phase. Basically * all it does is compare the current tuple from the subplan - * with the previously fetched tuple stored in OuterTuple and - * if the two are identical, then we just fetch another tuple - * from the sort and try again. + * with the previously fetched tuple stored in priorTuple. + * If the two are identical in all interesting fields, then + * we just fetch another tuple from the sort and try again. * * CommonState information * @@ -677,7 +678,12 @@ typedef struct SortState * ScanAttributes attribute numbers of interest in this tuple * ---------------- */ -typedef CommonState UniqueState; +typedef struct UniqueState +{ + CommonState cstate; /* its first field is NodeTag */ + FmgrInfo *eqfunctions; /* per-field lookup data for equality fns */ + HeapTuple priorTuple; /* most recently returned tuple, or NULL */ +} UniqueState; /* ---------------- diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 1346ac8a04..288e7f96b8 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: parsenodes.h,v 1.96 2000/01/26 05:58:16 momjian Exp $ + * $Id: parsenodes.h,v 1.97 2000/01/27 18:11:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -54,7 +54,8 @@ typedef struct Query Node *qual; /* qualifications applied to tuples */ List *rowMark; /* list of RowMark entries */ - char *uniqueFlag; /* NULL, '*', or Unique attribute name */ + List *distinctClause; /* a list of SortClause's */ + List *sortClause; /* a list of SortClause's */ List *groupClause; /* a list of GroupClause's */ @@ -733,7 +734,8 @@ typedef struct InsertStmt { NodeTag type; char *relname; /* relation to insert into */ - char *unique; /* NULL, '*', or unique attribute name */ + List *distinctClause; /* NULL, list of DISTINCT ON exprs, or + * lcons(NIL,NIL) for all (SELECT DISTINCT) */ List *cols; /* names of the columns */ List *targetList; /* the target list (of ResTarget) */ List *fromClause; /* the from clause */ @@ -777,7 +779,8 @@ typedef struct UpdateStmt typedef struct SelectStmt { NodeTag type; - char *unique; /* NULL, '*', or unique attribute name */ + List *distinctClause; /* NULL, list of DISTINCT ON exprs, or + * lcons(NIL,NIL) for all (SELECT DISTINCT) */ char *into; /* name of table (for select into table) */ List *targetList; /* the target list (of ResTarget) */ List *fromClause; /* the from clause */ @@ -1135,6 +1138,13 @@ typedef struct RangeTblEntry * tleSortGroupRef must match ressortgroupref of exactly one Resdom of the * associated targetlist; that is the expression to be sorted (or grouped) by. * sortop is the OID of the ordering operator. + * + * SortClauses are also used to identify Resdoms that we will do a "Unique" + * filter step on (for SELECT DISTINCT and SELECT DISTINCT ON). The + * distinctClause list is simply a copy of the relevant members of the + * sortClause list. Note that distinctClause can be a subset of sortClause, + * but cannot have members not present in sortClause; and the members that + * do appear must be in the same order as in sortClause. */ typedef struct SortClause { @@ -1148,7 +1158,7 @@ typedef struct SortClause * representation of GROUP BY clauses * * GroupClause is exactly like SortClause except for the nodetag value - * (and it's probably not even really necessary to have two different + * (it's probably not even really necessary to have two different * nodetags...). We have routines that operate interchangeably on both. */ typedef SortClause GroupClause; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 19216d3638..ff83431e58 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: plannodes.h,v 1.36 2000/01/26 05:58:16 momjian Exp $ + * $Id: plannodes.h,v 1.37 2000/01/27 18:11:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -265,7 +265,7 @@ typedef struct Group Plan plan; bool tuplePerGroup; /* what tuples to return (see above) */ int numCols; /* number of group columns */ - AttrNumber *grpColIdx; /* index into the target list */ + AttrNumber *grpColIdx; /* indexes into the target list */ GroupState *grpstate; } Group; @@ -314,10 +314,8 @@ typedef struct Unique Plan plan; /* noname node flattened out */ Oid nonameid; int keycount; - char *uniqueAttr; /* NULL if all attrs, or unique attribute - * name */ - AttrNumber uniqueAttrNum; /* attribute number of attribute to select - * distinct on */ + int numCols; /* number of columns to check for uniqueness */ + AttrNumber *uniqColIdx; /* indexes into the target list */ UniqueState *uniquestate; } Unique; diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 826c5dbe3c..340f54485c 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: planmain.h,v 1.36 2000/01/26 05:58:20 momjian Exp $ + * $Id: planmain.h,v 1.37 2000/01/27 18:11:45 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,7 +33,7 @@ extern Agg *make_agg(List *tlist, Plan *lefttree); extern Group *make_group(List *tlist, bool tuplePerGroup, int ngrp, AttrNumber *grpColIdx, Plan *lefttree); extern Noname *make_noname(List *tlist, List *pathkeys, Plan *subplan); -extern Unique *make_unique(List *tlist, Plan *lefttree, char *uniqueAttr); +extern Unique *make_unique(List *tlist, Plan *lefttree, List *distinctList); extern Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan); /* diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index 986c47ee79..dba93c0d17 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: tlist.h,v 1.23 2000/01/26 05:58:21 momjian Exp $ + * $Id: tlist.h,v 1.24 2000/01/27 18:11:45 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,6 +28,9 @@ extern List *flatten_tlist(List *tlist); extern List *add_to_flat_tlist(List *tlist, List *vars); extern Var *get_expr(TargetEntry *tle); + +extern TargetEntry *get_sortgroupclause_tle(SortClause *sortClause, + List *targetList); extern Node *get_sortgroupclause_expr(SortClause *sortClause, List *targetList); diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h index 5d0b2badf1..235a02bc4c 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: parse_clause.h,v 1.14 2000/01/26 05:58:26 momjian Exp $ + * $Id: parse_clause.h,v 1.15 2000/01/27 18:11:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,7 +23,9 @@ extern Node *transformWhereClause(ParseState *pstate, Node *where, extern List *transformGroupClause(ParseState *pstate, List *grouplist, List *targetlist); extern List *transformSortClause(ParseState *pstate, List *orderlist, - List *targetlist, char *uniqueFlag); + List *targetlist); +extern List *transformDistinctClause(ParseState *pstate, List *distinctlist, + List *targetlist, List **sortClause); extern List *addAllTargetsToSortList(List *sortlist, List *targetlist); extern Index assignSortGroupRef(TargetEntry *tle, List *tlist); diff --git a/src/test/regress/expected/errors.out b/src/test/regress/expected/errors.out index 6a280f321a..61f1dfd012 100644 --- a/src/test/regress/expected/errors.out +++ b/src/test/regress/expected/errors.out @@ -29,12 +29,12 @@ ERROR: attribute 'nonesuch' not found -- bad attribute name on rhs of operator select * from pg_database where pg_database.datname = nonesuch; ERROR: attribute 'nonesuch' not found --- bad select distinct on syntax, distinct attribute missing -select distinct on foobar from pg_database; +-- bad select distinct on syntax, distinct attribute missing +select distinct on (foobar) from pg_database; ERROR: parser: parse error at or near "from" -- bad select distinct on syntax, distinct attribute not in target list -select distinct on foobar * from pg_database; -ERROR: All fields in the UNIQUE ON clause must appear in the target list +select distinct on (foobar) * from pg_database; +ERROR: attribute 'foobar' not found -- -- DELETE diff --git a/src/test/regress/expected/select_distinct_on.out b/src/test/regress/expected/select_distinct_on.out index 067f74db4f..c4b03e95f7 100644 --- a/src/test/regress/expected/select_distinct_on.out +++ b/src/test/regress/expected/select_distinct_on.out @@ -1,18 +1,66 @@ -- -- SELECT_DISTINCT_ON -- -SELECT DISTINCT ON string4 two, string4, ten - FROM tmp - ORDER BY two using <, string4 using <, ten using <; - two | string4 | ten ------+---------+----- - 0 | AAAAxx | 0 - 0 | HHHHxx | 0 - 0 | OOOOxx | 0 - 0 | VVVVxx | 0 - 1 | AAAAxx | 1 - 1 | HHHHxx | 1 - 1 | OOOOxx | 1 - 1 | VVVVxx | 1 -(8 rows) +SELECT DISTINCT ON (string4) string4, two, ten + FROM tmp + ORDER BY string4 using <, two using >, ten using <; + string4 | two | ten +---------+-----+----- + AAAAxx | 1 | 1 + HHHHxx | 1 | 1 + OOOOxx | 1 | 1 + VVVVxx | 1 | 1 +(4 rows) + +-- this will fail due to conflict of ordering requirements +SELECT DISTINCT ON (string4, ten) string4, two, ten + FROM tmp + ORDER BY string4 using <, two using <, ten using <; +ERROR: SELECT DISTINCT ON expressions must match initial ORDER BY expressions +SELECT DISTINCT ON (string4, ten) string4, ten, two + FROM tmp + ORDER BY string4 using <, ten using >, two using <; + string4 | ten | two +---------+-----+----- + AAAAxx | 9 | 1 + AAAAxx | 8 | 0 + AAAAxx | 7 | 1 + AAAAxx | 6 | 0 + AAAAxx | 5 | 1 + AAAAxx | 4 | 0 + AAAAxx | 3 | 1 + AAAAxx | 2 | 0 + AAAAxx | 1 | 1 + AAAAxx | 0 | 0 + HHHHxx | 9 | 1 + HHHHxx | 8 | 0 + HHHHxx | 7 | 1 + HHHHxx | 6 | 0 + HHHHxx | 5 | 1 + HHHHxx | 4 | 0 + HHHHxx | 3 | 1 + HHHHxx | 2 | 0 + HHHHxx | 1 | 1 + HHHHxx | 0 | 0 + OOOOxx | 9 | 1 + OOOOxx | 8 | 0 + OOOOxx | 7 | 1 + OOOOxx | 6 | 0 + OOOOxx | 5 | 1 + OOOOxx | 4 | 0 + OOOOxx | 3 | 1 + OOOOxx | 2 | 0 + OOOOxx | 1 | 1 + OOOOxx | 0 | 0 + VVVVxx | 9 | 1 + VVVVxx | 8 | 0 + VVVVxx | 7 | 1 + VVVVxx | 6 | 0 + VVVVxx | 5 | 1 + VVVVxx | 4 | 0 + VVVVxx | 3 | 1 + VVVVxx | 2 | 0 + VVVVxx | 1 | 1 + VVVVxx | 0 | 0 +(40 rows) diff --git a/src/test/regress/sql/errors.sql b/src/test/regress/sql/errors.sql index cd43cb909b..ffcb5fcdb7 100644 --- a/src/test/regress/sql/errors.sql +++ b/src/test/regress/sql/errors.sql @@ -34,12 +34,12 @@ select * from pg_database where nonesuch = pg_database.datname; select * from pg_database where pg_database.datname = nonesuch; --- bad select distinct on syntax, distinct attribute missing -select distinct on foobar from pg_database; +-- bad select distinct on syntax, distinct attribute missing +select distinct on (foobar) from pg_database; -- bad select distinct on syntax, distinct attribute not in target list -select distinct on foobar * from pg_database; +select distinct on (foobar) * from pg_database; -- diff --git a/src/test/regress/sql/select_distinct_on.sql b/src/test/regress/sql/select_distinct_on.sql index 87001a0c08..54d98ca697 100644 --- a/src/test/regress/sql/select_distinct_on.sql +++ b/src/test/regress/sql/select_distinct_on.sql @@ -2,7 +2,15 @@ -- SELECT_DISTINCT_ON -- -SELECT DISTINCT ON string4 two, string4, ten - FROM tmp - ORDER BY two using <, string4 using <, ten using <; +SELECT DISTINCT ON (string4) string4, two, ten + FROM tmp + ORDER BY string4 using <, two using >, ten using <; +-- this will fail due to conflict of ordering requirements +SELECT DISTINCT ON (string4, ten) string4, two, ten + FROM tmp + ORDER BY string4 using <, two using <, ten using <; + +SELECT DISTINCT ON (string4, ten) string4, ten, two + FROM tmp + ORDER BY string4 using <, ten using >, two using <; -- 2.40.0