From 951130475221562b44b0da575fac8470adb5b555 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2008 21:32:01 +0000 Subject: [PATCH] Rearrange the querytree representation of ORDER BY/GROUP BY/DISTINCT items as per my recent proposal: 1. Fold SortClause and GroupClause into a single node type SortGroupClause. We were already relying on them to be struct-equivalent, so using two node tags wasn't accomplishing much except to get in the way of comparing items with equal(). 2. Add an "eqop" field to SortGroupClause to carry the associated equality operator. This is cheap for the parser to get at the same time it's looking up the sort operator, and storing it eliminates the need for repeated not-so-cheap lookups during planning. In future this will also let us represent GROUP/DISTINCT operations on datatypes that have hash opclasses but no btree opclasses (ie, they have equality but no natural sort order). The previous representation simply didn't work for that, since its only indicator of comparison semantics was a sort operator. 3. Add a hasDistinctOn boolean to struct Query to explicitly record whether the distinctClause came from DISTINCT or DISTINCT ON. This allows removing some complicated and not 100% bulletproof code that attempted to figure that out from the distinctClause alone. This patch doesn't in itself create any new capability, but it's necessary infrastructure for future attempts to use hash-based grouping for DISTINCT and UNION/INTERSECT/EXCEPT. --- src/backend/catalog/dependency.c | 52 ++- src/backend/commands/analyze.c | 32 +- src/backend/executor/nodeAgg.c | 13 +- src/backend/nodes/copyfuncs.c | 29 +- src/backend/nodes/equalfuncs.c | 14 +- src/backend/nodes/outfuncs.c | 25 +- src/backend/nodes/readfuncs.c | 33 +- src/backend/optimizer/README | 6 +- src/backend/optimizer/path/allpaths.c | 17 +- src/backend/optimizer/path/equivclass.c | 4 +- src/backend/optimizer/path/pathkeys.c | 13 +- src/backend/optimizer/plan/createplan.c | 45 ++- src/backend/optimizer/plan/planagg.c | 11 +- src/backend/optimizer/plan/planner.c | 104 ++++- src/backend/optimizer/prep/prepunion.c | 22 +- src/backend/optimizer/util/clauses.c | 81 +--- src/backend/optimizer/util/pathnode.c | 41 +- src/backend/optimizer/util/tlist.c | 32 +- src/backend/parser/analyze.c | 28 +- src/backend/parser/parse_agg.c | 4 +- src/backend/parser/parse_clause.c | 497 +++++++++++++----------- src/backend/parser/parse_oper.c | 264 +++---------- src/backend/utils/adt/ruleutils.c | 14 +- src/backend/utils/cache/lsyscache.c | 46 ++- src/include/catalog/catversion.h | 4 +- src/include/nodes/nodes.h | 5 +- src/include/nodes/parsenodes.h | 88 +++-- src/include/nodes/primnodes.h | 18 +- src/include/optimizer/clauses.h | 5 +- src/include/optimizer/tlist.h | 8 +- src/include/parser/parse_clause.h | 13 +- src/include/parser/parse_oper.h | 13 +- src/include/utils/lsyscache.h | 6 +- 33 files changed, 747 insertions(+), 840 deletions(-) diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 793071b0f0..d58b9d4a2f 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.76 2008/06/14 18:04:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.77 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1335,7 +1335,7 @@ find_expr_references_walker(Node *node, } return false; } - if (IsA(node, Const)) + else if (IsA(node, Const)) { Const *con = (Const *) node; Oid objoid; @@ -1408,7 +1408,7 @@ find_expr_references_walker(Node *node, } return false; } - if (IsA(node, Param)) + else if (IsA(node, Param)) { Param *param = (Param *) node; @@ -1416,7 +1416,7 @@ find_expr_references_walker(Node *node, add_object_address(OCLASS_TYPE, param->paramtype, 0, context->addrs); } - if (IsA(node, FuncExpr)) + else if (IsA(node, FuncExpr)) { FuncExpr *funcexpr = (FuncExpr *) node; @@ -1424,7 +1424,7 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - if (IsA(node, OpExpr)) + else if (IsA(node, OpExpr)) { OpExpr *opexpr = (OpExpr *) node; @@ -1432,7 +1432,7 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - if (IsA(node, DistinctExpr)) + else if (IsA(node, DistinctExpr)) { DistinctExpr *distinctexpr = (DistinctExpr *) node; @@ -1440,7 +1440,7 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - if (IsA(node, ScalarArrayOpExpr)) + else if (IsA(node, ScalarArrayOpExpr)) { ScalarArrayOpExpr *opexpr = (ScalarArrayOpExpr *) node; @@ -1448,7 +1448,7 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - if (IsA(node, NullIfExpr)) + else if (IsA(node, NullIfExpr)) { NullIfExpr *nullifexpr = (NullIfExpr *) node; @@ -1456,7 +1456,7 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - if (IsA(node, Aggref)) + else if (IsA(node, Aggref)) { Aggref *aggref = (Aggref *) node; @@ -1464,12 +1464,12 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - if (is_subplan(node)) + else if (is_subplan(node)) { /* Extra work needed here if we ever need this case */ elog(ERROR, "already-planned subqueries not supported"); } - if (IsA(node, RelabelType)) + else if (IsA(node, RelabelType)) { RelabelType *relab = (RelabelType *) node; @@ -1477,7 +1477,7 @@ find_expr_references_walker(Node *node, add_object_address(OCLASS_TYPE, relab->resulttype, 0, context->addrs); } - if (IsA(node, CoerceViaIO)) + else if (IsA(node, CoerceViaIO)) { CoerceViaIO *iocoerce = (CoerceViaIO *) node; @@ -1485,7 +1485,7 @@ find_expr_references_walker(Node *node, add_object_address(OCLASS_TYPE, iocoerce->resulttype, 0, context->addrs); } - if (IsA(node, ArrayCoerceExpr)) + else if (IsA(node, ArrayCoerceExpr)) { ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node; @@ -1496,7 +1496,7 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - if (IsA(node, ConvertRowtypeExpr)) + else if (IsA(node, ConvertRowtypeExpr)) { ConvertRowtypeExpr *cvt = (ConvertRowtypeExpr *) node; @@ -1504,14 +1504,14 @@ find_expr_references_walker(Node *node, add_object_address(OCLASS_TYPE, cvt->resulttype, 0, context->addrs); } - if (IsA(node, RowExpr)) + else if (IsA(node, RowExpr)) { RowExpr *rowexpr = (RowExpr *) node; add_object_address(OCLASS_TYPE, rowexpr->row_typeid, 0, context->addrs); } - if (IsA(node, RowCompareExpr)) + else if (IsA(node, RowCompareExpr)) { RowCompareExpr *rcexpr = (RowCompareExpr *) node; ListCell *l; @@ -1528,14 +1528,25 @@ find_expr_references_walker(Node *node, } /* fall through to examine arguments */ } - if (IsA(node, CoerceToDomain)) + else if (IsA(node, CoerceToDomain)) { CoerceToDomain *cd = (CoerceToDomain *) node; add_object_address(OCLASS_TYPE, cd->resulttype, 0, context->addrs); } - if (IsA(node, Query)) + else if (IsA(node, SortGroupClause)) + { + SortGroupClause *sgc = (SortGroupClause *) node; + + add_object_address(OCLASS_OPERATOR, sgc->eqop, 0, + context->addrs); + if (OidIsValid(sgc->sortop)) + add_object_address(OCLASS_OPERATOR, sgc->sortop, 0, + context->addrs); + return false; + } + else if (IsA(node, Query)) { /* Recurse into RTE subquery or not-yet-planned sublink subquery */ Query *query = (Query *) node; @@ -1572,6 +1583,11 @@ find_expr_references_walker(Node *node, } } + /* query_tree_walker ignores ORDER BY etc, but we need those opers */ + find_expr_references_walker((Node *) query->sortClause, context); + find_expr_references_walker((Node *) query->groupClause, context); + find_expr_references_walker((Node *) query->distinctClause, context); + /* Examine substructure of query */ context->rtables = lcons(query->rtable, context->rtables); result = query_tree_walker(query, diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 4845e6a381..3b8423999a 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.123 2008/07/01 10:33:09 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.124 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1490,10 +1490,8 @@ static bool std_typanalyze(VacAttrStats *stats) { Form_pg_attribute attr = stats->attr; - Operator func_operator; - Oid eqopr = InvalidOid; - Oid eqfunc = InvalidOid; - Oid ltopr = InvalidOid; + Oid ltopr; + Oid eqopr; StdAnalyzeData *mystats; /* If the attstattarget column is negative, use the default value */ @@ -1501,29 +1499,19 @@ std_typanalyze(VacAttrStats *stats) if (attr->attstattarget < 0) attr->attstattarget = default_statistics_target; + /* Look for default "<" and "=" operators for column's type */ + get_sort_group_operators(attr->atttypid, + false, false, false, + <opr, &eqopr, NULL); + /* If column has no "=" operator, we can't do much of anything */ - func_operator = equality_oper(attr->atttypid, true); - if (func_operator != NULL) - { - eqopr = oprid(func_operator); - eqfunc = oprfuncid(func_operator); - ReleaseSysCache(func_operator); - } - if (!OidIsValid(eqfunc)) + if (!OidIsValid(eqopr)) return false; - /* Is there a "<" operator with suitable semantics? */ - func_operator = ordering_oper(attr->atttypid, true); - if (func_operator != NULL) - { - ltopr = oprid(func_operator); - ReleaseSysCache(func_operator); - } - /* Save the operator info for compute_stats routines */ mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData)); mystats->eqopr = eqopr; - mystats->eqfunc = eqfunc; + mystats->eqfunc = get_opcode(eqopr); mystats->ltopr = ltopr; stats->extra_data = mystats; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 00bd21ba6f..9bcff0f8df 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -61,7 +61,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.158 2008/05/12 00:00:49 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.159 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1496,7 +1496,8 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) if (aggref->aggdistinct) { - Oid eq_function; + Oid lt_opr; + Oid eq_opr; /* We don't implement DISTINCT aggs in the HASHED case */ Assert(node->aggstrategy != AGG_HASHED); @@ -1524,9 +1525,11 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) * record it in the Aggref node ... or at latest, do it in the * planner. */ - eq_function = equality_oper_funcid(inputTypes[0]); - fmgr_info(eq_function, &(peraggstate->equalfn)); - peraggstate->sortOperator = ordering_oper_opid(inputTypes[0]); + get_sort_group_operators(inputTypes[0], + true, true, false, + <_opr, &eq_opr, NULL); + fmgr_info(get_opcode(eq_opr), &(peraggstate->equalfn)); + peraggstate->sortOperator = lt_opr; peraggstate->sortstate = NULL; } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 2e1ce4cb0d..652c726a67 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.395 2008/07/16 01:30:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.396 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1542,24 +1542,13 @@ _copyFkConstraint(FkConstraint *from) return newnode; } -static SortClause * -_copySortClause(SortClause *from) +static SortGroupClause * +_copySortGroupClause(SortGroupClause *from) { - SortClause *newnode = makeNode(SortClause); - - COPY_SCALAR_FIELD(tleSortGroupRef); - COPY_SCALAR_FIELD(sortop); - COPY_SCALAR_FIELD(nulls_first); - - return newnode; -} - -static GroupClause * -_copyGroupClause(GroupClause *from) -{ - GroupClause *newnode = makeNode(GroupClause); + SortGroupClause *newnode = makeNode(SortGroupClause); COPY_SCALAR_FIELD(tleSortGroupRef); + COPY_SCALAR_FIELD(eqop); COPY_SCALAR_FIELD(sortop); COPY_SCALAR_FIELD(nulls_first); @@ -1861,6 +1850,7 @@ _copyQuery(Query *from) COPY_NODE_FIELD(intoClause); COPY_SCALAR_FIELD(hasAggs); COPY_SCALAR_FIELD(hasSubLinks); + COPY_SCALAR_FIELD(hasDistinctOn); COPY_NODE_FIELD(rtable); COPY_NODE_FIELD(jointree); COPY_NODE_FIELD(targetList); @@ -3586,11 +3576,8 @@ copyObject(void *from) case T_RangeTblEntry: retval = _copyRangeTblEntry(from); break; - case T_SortClause: - retval = _copySortClause(from); - break; - case T_GroupClause: - retval = _copyGroupClause(from); + case T_SortGroupClause: + retval = _copySortGroupClause(from); break; case T_RowMarkClause: retval = _copyRowMarkClause(from); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 41999226b6..4e011947ef 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -18,7 +18,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.324 2008/07/16 01:30:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.325 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -756,6 +756,7 @@ _equalQuery(Query *a, Query *b) COMPARE_NODE_FIELD(intoClause); COMPARE_SCALAR_FIELD(hasAggs); COMPARE_SCALAR_FIELD(hasSubLinks); + COMPARE_SCALAR_FIELD(hasDistinctOn); COMPARE_NODE_FIELD(rtable); COMPARE_NODE_FIELD(jointree); COMPARE_NODE_FIELD(targetList); @@ -1885,9 +1886,10 @@ _equalRangeTblEntry(RangeTblEntry *a, RangeTblEntry *b) } static bool -_equalSortClause(SortClause *a, SortClause *b) +_equalSortGroupClause(SortGroupClause *a, SortGroupClause *b) { COMPARE_SCALAR_FIELD(tleSortGroupRef); + COMPARE_SCALAR_FIELD(eqop); COMPARE_SCALAR_FIELD(sortop); COMPARE_SCALAR_FIELD(nulls_first); @@ -2515,12 +2517,8 @@ equal(void *a, void *b) case T_RangeTblEntry: retval = _equalRangeTblEntry(a, b); break; - case T_SortClause: - retval = _equalSortClause(a, b); - break; - case T_GroupClause: - /* GroupClause is equivalent to SortClause */ - retval = _equalSortClause(a, b); + case T_SortGroupClause: + retval = _equalSortGroupClause(a, b); break; case T_RowMarkClause: retval = _equalRowMarkClause(a, b); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 9c0726e42e..66630ae661 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.328 2008/07/17 16:02:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.329 2008/08/02 21:31:59 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -1732,6 +1732,7 @@ _outQuery(StringInfo str, Query *node) WRITE_NODE_FIELD(intoClause); WRITE_BOOL_FIELD(hasAggs); WRITE_BOOL_FIELD(hasSubLinks); + WRITE_BOOL_FIELD(hasDistinctOn); WRITE_NODE_FIELD(rtable); WRITE_NODE_FIELD(jointree); WRITE_NODE_FIELD(targetList); @@ -1747,21 +1748,12 @@ _outQuery(StringInfo str, Query *node) } static void -_outSortClause(StringInfo str, SortClause *node) +_outSortGroupClause(StringInfo str, SortGroupClause *node) { - WRITE_NODE_TYPE("SORTCLAUSE"); - - WRITE_UINT_FIELD(tleSortGroupRef); - WRITE_OID_FIELD(sortop); - WRITE_BOOL_FIELD(nulls_first); -} - -static void -_outGroupClause(StringInfo str, GroupClause *node) -{ - WRITE_NODE_TYPE("GROUPCLAUSE"); + WRITE_NODE_TYPE("SORTGROUPCLAUSE"); WRITE_UINT_FIELD(tleSortGroupRef); + WRITE_OID_FIELD(eqop); WRITE_OID_FIELD(sortop); WRITE_BOOL_FIELD(nulls_first); } @@ -2398,11 +2390,8 @@ _outNode(StringInfo str, void *obj) case T_Query: _outQuery(str, obj); break; - case T_SortClause: - _outSortClause(str, obj); - break; - case T_GroupClause: - _outGroupClause(str, obj); + case T_SortGroupClause: + _outSortGroupClause(str, obj); break; case T_RowMarkClause: _outRowMarkClause(str, obj); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index de1f1b5348..e7de4898c0 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.210 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.211 2008/08/02 21:31:59 tgl Exp $ * * NOTES * Path and Plan nodes do not have any readfuncs support, because we @@ -142,6 +142,7 @@ _readQuery(void) READ_NODE_FIELD(intoClause); READ_BOOL_FIELD(hasAggs); READ_BOOL_FIELD(hasSubLinks); + READ_BOOL_FIELD(hasDistinctOn); READ_NODE_FIELD(rtable); READ_NODE_FIELD(jointree); READ_NODE_FIELD(targetList); @@ -187,29 +188,15 @@ _readDeclareCursorStmt(void) } /* - * _readSortClause + * _readSortGroupClause */ -static SortClause * -_readSortClause(void) +static SortGroupClause * +_readSortGroupClause(void) { - READ_LOCALS(SortClause); - - READ_UINT_FIELD(tleSortGroupRef); - READ_OID_FIELD(sortop); - READ_BOOL_FIELD(nulls_first); - - READ_DONE(); -} - -/* - * _readGroupClause - */ -static GroupClause * -_readGroupClause(void) -{ - READ_LOCALS(GroupClause); + READ_LOCALS(SortGroupClause); READ_UINT_FIELD(tleSortGroupRef); + READ_OID_FIELD(eqop); READ_OID_FIELD(sortop); READ_BOOL_FIELD(nulls_first); @@ -1030,10 +1017,8 @@ parseNodeString(void) if (MATCH("QUERY", 5)) return_value = _readQuery(); - else if (MATCH("SORTCLAUSE", 10)) - return_value = _readSortClause(); - else if (MATCH("GROUPCLAUSE", 11)) - return_value = _readGroupClause(); + else if (MATCH("SORTGROUPCLAUSE", 15)) + return_value = _readSortGroupClause(); else if (MATCH("ROWMARKCLAUSE", 13)) return_value = _readRowMarkClause(); else if (MATCH("SETOPERATIONSTMT", 16)) diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index 9a9aba2a41..ba18db0dd9 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.46 2008/04/09 01:00:46 momjian Exp $ +$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.47 2008/08/02 21:31:59 tgl Exp $ Optimizer ========= @@ -563,8 +563,8 @@ competing Paths are equivalently sorted. Pathkeys are also useful to represent an ordering that we wish to achieve, since they are easily compared to the pathkeys of a potential candidate -path. So, SortClause lists are turned into pathkeys lists for use inside -the optimizer. +path. So, SortGroupClause lists are turned into pathkeys lists for use +inside the optimizer. Because we have to generate pathkeys lists from the sort clauses before we've finished EquivalenceClass merging, we cannot use the pointer-equality diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index c833586813..b13371e00e 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.171 2008/06/27 03:56:55 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.172 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -963,11 +963,12 @@ compare_tlist_datatypes(List *tlist, List *colTypes, * * 4. If the subquery uses DISTINCT ON, we must not push down any quals that * refer to non-DISTINCT output columns, because that could change the set - * of rows returned. This condition is vacuous for DISTINCT, because then - * there are no non-DISTINCT output columns, but unfortunately it's fairly - * expensive to tell the difference between DISTINCT and DISTINCT ON in the - * parsetree representation. It's cheaper to just make sure all the Vars - * in the qual refer to DISTINCT columns. + * of rows returned. (This condition is vacuous for DISTINCT, because then + * there are no non-DISTINCT output columns, so we needn't check. But note + * we are assuming that the qual can't distinguish values that the DISTINCT + * operator sees as equal. This is a bit shaky but we have no way to test + * for the case, and it's unlikely enough that we shouldn't refuse the + * optimization just because it could theoretically happen.) * * 5. We must not push down any quals that refer to subselect outputs that * return sets, else we'd introduce functions-returning-sets into the @@ -1030,8 +1031,8 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, Assert(tle != NULL); Assert(!tle->resjunk); - /* If subquery uses DISTINCT or DISTINCT ON, check point 4 */ - if (subquery->distinctClause != NIL && + /* If subquery uses DISTINCT ON, check point 4 */ + if (subquery->hasDistinctOn && !targetIsInSortList(tle, InvalidOid, subquery->distinctClause)) { /* non-DISTINCT column, so fail */ diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index b289ea6e65..c4459e6e3e 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -10,7 +10,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.10 2008/03/31 16:59:26 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.11 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -355,7 +355,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, * class it is a member of; if none, build a new single-member * EquivalenceClass for it. * - * sortref is the SortGroupRef of the originating SortClause, if any, + * sortref is the SortGroupRef of the originating SortGroupClause, if any, * or zero if not. * * This can be used safely both before and after EquivalenceClass merging; diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index f0141b2366..ccb23834de 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.93 2008/01/09 20:42:28 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.94 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -227,8 +227,8 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys) * a PathKey. If canonicalize = true, the result is a "canonical" * PathKey, otherwise not. (But note it might be redundant anyway.) * - * If the PathKey is being generated from a SortClause, sortref should be - * the SortClause's SortGroupRef; otherwise zero. + * If the PathKey is being generated from a SortGroupClause, sortref should be + * the SortGroupClause's SortGroupRef; otherwise zero. * * canonicalize should always be TRUE after EquivalenceClass merging has * been performed, but FALSE if we haven't done EquivalenceClass merging yet. @@ -823,7 +823,7 @@ build_join_pathkeys(PlannerInfo *root, /* * make_pathkeys_for_sortclauses * Generate a pathkeys list that represents the sort order specified - * by a list of SortClauses (GroupClauses will work too!) + * by a list of SortGroupClauses * * If canonicalize is TRUE, the resulting PathKeys are all in canonical form; * otherwise not. canonicalize should always be TRUE after EquivalenceClass @@ -832,7 +832,7 @@ build_join_pathkeys(PlannerInfo *root, * be able to represent requested pathkeys before the equivalence classes have * been created for the query.) * - * 'sortclauses' is a list of SortClause or GroupClause nodes + * 'sortclauses' is a list of SortGroupClause nodes * 'tlist' is the targetlist to find the referenced tlist entries in */ List * @@ -846,11 +846,12 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, foreach(l, sortclauses) { - SortClause *sortcl = (SortClause *) lfirst(l); + SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); Expr *sortkey; PathKey *pathkey; sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist); + Assert(OidIsValid(sortcl->sortop)); pathkey = make_pathkey_from_sortinfo(root, sortkey, sortcl->sortop, diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 0884cda607..80b6ed2edf 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.241 2008/06/27 03:56:55 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.242 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -760,7 +760,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) Oid in_oper = lfirst_oid(l); Oid sortop; TargetEntry *tle; - SortClause *sortcl; + SortGroupClause *sortcl; sortop = get_ordering_op_for_equality_op(in_oper, false); if (!OidIsValid(sortop)) /* shouldn't happen */ @@ -769,9 +769,10 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) tle = get_tle_by_resno(subplan->targetlist, groupColIdx[groupColPos]); Assert(tle != NULL); - sortcl = makeNode(SortClause); + sortcl = makeNode(SortGroupClause); sortcl->tleSortGroupRef = assignSortGroupRef(tle, subplan->targetlist); + sortcl->eqop = in_oper; sortcl->sortop = sortop; sortcl->nulls_first = false; sortList = lappend(sortList, sortcl); @@ -2531,6 +2532,8 @@ add_sort_column(AttrNumber colIdx, Oid sortOp, bool nulls_first, { int i; + Assert(OidIsValid(sortOp)); + for (i = 0; i < numCols; i++) { /* @@ -2753,7 +2756,7 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, * make_sort_from_sortclauses * Create sort plan to sort according to given sortclauses * - * 'sortcls' is a list of SortClauses + * 'sortcls' is a list of SortGroupClauses * 'lefttree' is the node which yields input tuples */ Sort * @@ -2778,7 +2781,7 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree) foreach(l, sortcls) { - SortClause *sortcl = (SortClause *) lfirst(l); + SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); TargetEntry *tle = get_sortgroupclause_tle(sortcl, sub_tlist); /* @@ -2802,14 +2805,14 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree) * make_sort_from_groupcols * Create sort plan to sort based on grouping columns * - * 'groupcls' is the list of GroupClauses + * 'groupcls' is the list of SortGroupClauses * 'grpColIdx' gives the column numbers to use * * This might look like it could be merged with make_sort_from_sortclauses, * but presently we *must* use the grpColIdx[] array to locate sort columns, * because the child plan's tlist is not marked with ressortgroupref info * appropriate to the grouping node. So, only the sort ordering info - * is used from the GroupClause entries. + * is used from the SortGroupClause entries. */ Sort * make_sort_from_groupcols(PlannerInfo *root, @@ -2837,7 +2840,7 @@ make_sort_from_groupcols(PlannerInfo *root, foreach(l, groupcls) { - GroupClause *grpcl = (GroupClause *) lfirst(l); + SortGroupClause *grpcl = (SortGroupClause *) lfirst(l); TargetEntry *tle = get_tle_by_resno(sub_tlist, grpColIdx[grpno]); /* @@ -3038,7 +3041,7 @@ make_group(PlannerInfo *root, } /* - * distinctList is a list of SortClauses, identifying the targetlist items + * distinctList is a list of SortGroupClauses, identifying the targetlist items * that should be considered by the Unique filter. The input path must * already be sorted accordingly. */ @@ -3074,7 +3077,7 @@ make_unique(Plan *lefttree, List *distinctList) plan->righttree = NULL; /* - * convert SortClause list into arrays of attr indexes and equality + * convert SortGroupClause list into arrays of attr indexes and equality * operators, as wanted by executor */ Assert(numCols > 0); @@ -3083,14 +3086,12 @@ make_unique(Plan *lefttree, List *distinctList) foreach(slitem, distinctList) { - SortClause *sortcl = (SortClause *) lfirst(slitem); + SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem); TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist); uniqColIdx[keyno] = tle->resno; - uniqOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop); - if (!OidIsValid(uniqOperators[keyno])) /* shouldn't happen */ - elog(ERROR, "could not find equality operator for ordering operator %u", - sortcl->sortop); + uniqOperators[keyno] = sortcl->eqop; + Assert(OidIsValid(uniqOperators[keyno])); keyno++; } @@ -3102,8 +3103,8 @@ make_unique(Plan *lefttree, List *distinctList) } /* - * distinctList is a list of SortClauses, identifying the targetlist items - * that should be considered by the SetOp filter. The input path must + * distinctList is a list of SortGroupClauses, identifying the targetlist + * items that should be considered by the SetOp filter. The input path must * already be sorted accordingly. */ SetOp * @@ -3140,7 +3141,7 @@ make_setop(SetOpCmd cmd, Plan *lefttree, plan->righttree = NULL; /* - * convert SortClause list into arrays of attr indexes and equality + * convert SortGroupClause list into arrays of attr indexes and equality * operators, as wanted by executor */ Assert(numCols > 0); @@ -3149,14 +3150,12 @@ make_setop(SetOpCmd cmd, Plan *lefttree, foreach(slitem, distinctList) { - SortClause *sortcl = (SortClause *) lfirst(slitem); + SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem); TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist); dupColIdx[keyno] = tle->resno; - dupOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop); - if (!OidIsValid(dupOperators[keyno])) /* shouldn't happen */ - elog(ERROR, "could not find equality operator for ordering operator %u", - sortcl->sortop); + dupOperators[keyno] = sortcl->eqop; + Assert(OidIsValid(dupOperators[keyno])); keyno++; } diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index af140d4acb..a6b4b1df66 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.41 2008/07/10 02:14:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.42 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -477,7 +477,7 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info) Plan *plan; Plan *iplan; TargetEntry *tle; - SortClause *sortcl; + SortGroupClause *sortcl; /* * Generate a suitably modified query. Much of the work here is probably @@ -492,6 +492,7 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info) subparse->utilityStmt = NULL; subparse->intoClause = NULL; subparse->hasAggs = false; + subparse->hasDistinctOn = false; subparse->groupClause = NIL; subparse->havingQual = NULL; subparse->distinctClause = NIL; @@ -505,8 +506,12 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info) subparse->targetList = list_make1(tle); /* set up the appropriate ORDER BY entry */ - sortcl = makeNode(SortClause); + sortcl = makeNode(SortGroupClause); sortcl->tleSortGroupRef = assignSortGroupRef(tle, subparse->targetList); + sortcl->eqop = get_equality_op_for_ordering_op(info->aggsortop, NULL); + if (!OidIsValid(sortcl->eqop)) /* shouldn't happen */ + elog(ERROR, "could not find equality operator for ordering operator %u", + info->aggsortop); sortcl->sortop = info->aggsortop; sortcl->nulls_first = info->nulls_first; subparse->sortClause = list_make1(sortcl); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 6ebbc5b7bc..735a77ac4e 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.235 2008/07/31 22:47:56 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.236 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -67,6 +67,7 @@ static bool is_dummy_plan(Plan *plan); static double preprocess_limit(PlannerInfo *root, double tuple_fraction, int64 *offset_est, int64 *count_est); +static void preprocess_groupclause(PlannerInfo *root); static Oid *extract_grouping_ops(List *groupClause); static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, double limit_tuples, @@ -846,11 +847,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) Path *best_path; long numGroups = 0; AggClauseCounts agg_counts; - int numGroupCols = list_length(parse->groupClause); + int numGroupCols; bool use_hashed_grouping = false; MemSet(&agg_counts, 0, sizeof(AggClauseCounts)); + /* Preprocess GROUP BY clause, if any */ + if (parse->groupClause) + preprocess_groupclause(root); + numGroupCols = list_length(parse->groupClause); + /* Preprocess targetlist */ tlist = preprocess_targetlist(root, tlist); @@ -1476,6 +1482,88 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction, return tuple_fraction; } + +/* + * preprocess_groupclause - do preparatory work on GROUP BY clause + * + * The idea here is to adjust the ordering of the GROUP BY elements + * (which in itself is semantically insignificant) to match ORDER BY, + * thereby allowing a single sort operation to both implement the ORDER BY + * requirement and set up for a Unique step that implements GROUP BY. + * + * In principle it might be interesting to consider other orderings of the + * GROUP BY elements, which could match the sort ordering of other + * possible plans (eg an indexscan) and thereby reduce cost. We don't + * bother with that, though. Hashed grouping will frequently win anyway. + */ +static void +preprocess_groupclause(PlannerInfo *root) +{ + Query *parse = root->parse; + List *new_groupclause; + bool partial_match; + ListCell *sl; + ListCell *gl; + + /* If no ORDER BY, nothing useful to do here anyway */ + if (parse->sortClause == NIL) + return; + + /* + * Scan the ORDER BY clause and construct a list of matching GROUP BY + * items, but only as far as we can make a matching prefix. + * + * This code assumes that the sortClause contains no duplicate items. + */ + new_groupclause = NIL; + foreach(sl, parse->sortClause) + { + SortGroupClause *sc = (SortGroupClause *) lfirst(sl); + + foreach(gl, parse->groupClause) + { + SortGroupClause *gc = (SortGroupClause *) lfirst(gl); + + if (equal(gc, sc)) + { + new_groupclause = lappend(new_groupclause, gc); + break; + } + } + if (gl == NULL) + break; /* no match, so stop scanning */ + } + + /* Did we match all of the ORDER BY list, or just some of it? */ + partial_match = (sl != NULL); + + /* If no match at all, no point in reordering GROUP BY */ + if (new_groupclause == NIL) + return; + + /* + * Add any remaining GROUP BY items to the new list, but only if we + * were able to make a complete match. In other words, we only + * rearrange the GROUP BY list if the result is that one list is a + * prefix of the other --- otherwise there's no possibility of a + * common sort. + */ + foreach(gl, parse->groupClause) + { + SortGroupClause *gc = (SortGroupClause *) lfirst(gl); + + if (list_member_ptr(new_groupclause, gc)) + continue; /* it matched an ORDER BY item */ + if (partial_match) + return; /* give up, no common sort possible */ + new_groupclause = lappend(new_groupclause, gc); + } + + /* Success --- install the rearranged GROUP BY list */ + Assert(list_length(parse->groupClause) == list_length(new_groupclause)); + parse->groupClause = new_groupclause; +} + /* * extract_grouping_ops - make an array of the equality operator OIDs * for the GROUP BY clause @@ -1492,12 +1580,10 @@ extract_grouping_ops(List *groupClause) foreach(glitem, groupClause) { - GroupClause *groupcl = (GroupClause *) lfirst(glitem); + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); - groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop); - if (!OidIsValid(groupOperators[colno])) /* shouldn't happen */ - elog(ERROR, "could not find equality operator for ordering operator %u", - groupcl->sortop); + groupOperators[colno] = groupcl->eqop; + Assert(OidIsValid(groupOperators[colno])); colno++; } @@ -1738,7 +1824,7 @@ make_subplanTargetList(PlannerInfo *root, foreach(gl, parse->groupClause) { - GroupClause *grpcl = (GroupClause *) lfirst(gl); + SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); TargetEntry *te = NULL; ListCell *sl; @@ -1797,7 +1883,7 @@ locate_grouping_columns(PlannerInfo *root, foreach(gl, root->parse->groupClause) { - GroupClause *grpcl = (GroupClause *) lfirst(gl); + SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); TargetEntry *te = NULL; ListCell *sl; diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index b30cedee9f..31ba005edb 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -22,7 +22,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.148 2008/07/31 22:47:56 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.149 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -98,7 +98,7 @@ static List *adjust_inherited_tlist(List *tlist, * zero means "all the tuples will be fetched". Any LIMIT present at the * top level has already been factored into tuple_fraction. * - * *sortClauses is an output argument: it is set to a list of SortClauses + * *sortClauses is an output argument: it is set to a list of SortGroupClauses * representing the result ordering of the topmost set operation. */ Plan * @@ -152,7 +152,7 @@ plan_set_operations(PlannerInfo *root, double tuple_fraction, * junkOK: if true, child resjunk columns may be left in the result * flag: if >= 0, add a resjunk output column indicating value of flag * refnames_tlist: targetlist to take column names from - * *sortClauses: receives list of SortClauses for result plan, if any + * *sortClauses: receives list of SortGroupClauses for result plan, if any * * We don't have to care about typmods here: the only allowed difference * between set-op input and output typmods is input is a specific typmod @@ -678,8 +678,11 @@ generate_append_tlist(List *colTypes, bool flag, /* * generate_setop_sortlist - * Build a SortClause list enumerating all the non-resjunk tlist entries, - * using default ordering properties. + * Build a SortGroupClause list enumerating all the non-resjunk + * tlist entries, using default ordering properties. + * + * For now, we require all the items to be sortable. Eventually we + * should implement hashing setops and allow hash-only datatypes. */ static List * generate_setop_sortlist(List *targetlist) @@ -692,11 +695,10 @@ generate_setop_sortlist(List *targetlist) TargetEntry *tle = (TargetEntry *) lfirst(l); if (!tle->resjunk) - sortlist = addTargetToSortList(NULL, tle, - sortlist, targetlist, - SORTBY_DEFAULT, - SORTBY_NULLS_DEFAULT, - NIL, false); + sortlist = addTargetToGroupList(NULL, tle, + sortlist, targetlist, + true, /* XXX fixme someday */ + false); } return sortlist; } diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index ea92422279..04ef2224c9 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.259 2008/05/15 17:37:49 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.260 2008/08/02 21:32:00 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -1334,85 +1334,6 @@ is_pseudo_constant_clause_relids(Node *clause, Relids relids) } -/***************************************************************************** - * Tests on clauses of queries - * - * Possibly this code should go someplace else, since this isn't quite the - * same meaning of "clause" as is used elsewhere in this module. But I can't - * think of a better place for it... - *****************************************************************************/ - -/* - * Test whether a query uses DISTINCT ON, ie, has a distinct-list that is - * not the same as the set of output columns. - */ -bool -has_distinct_on_clause(Query *query) -{ - ListCell *l; - - /* Is there a DISTINCT clause at all? */ - if (query->distinctClause == NIL) - return false; - - /* - * If the DISTINCT list contains all the nonjunk targetlist items, and - * nothing else (ie, no junk tlist items), then it's a simple DISTINCT, - * else it's DISTINCT ON. We do not require the lists to be in the same - * order (since the parser may have adjusted the DISTINCT clause ordering - * to agree with ORDER BY). Furthermore, a non-DISTINCT junk tlist item - * that is in the sortClause is also evidence of DISTINCT ON, since we - * don't allow ORDER BY on junk tlist items when plain DISTINCT is used. - * - * This code assumes that the DISTINCT list is valid, ie, all its entries - * match some entry of the tlist. - */ - foreach(l, query->targetList) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - - if (tle->ressortgroupref == 0) - { - if (tle->resjunk) - continue; /* we can ignore unsorted junk cols */ - return true; /* definitely not in DISTINCT list */ - } - if (targetIsInSortList(tle, InvalidOid, query->distinctClause)) - { - if (tle->resjunk) - return true; /* junk TLE in DISTINCT means DISTINCT ON */ - /* else this TLE is okay, keep looking */ - } - else - { - /* This TLE is not in DISTINCT list */ - if (!tle->resjunk) - return true; /* non-junk, non-DISTINCT, so DISTINCT ON */ - if (targetIsInSortList(tle, InvalidOid, query->sortClause)) - return true; /* sorted, non-distinct junk */ - /* unsorted junk is okay, keep looking */ - } - } - /* It's a simple DISTINCT */ - return false; -} - -/* - * Test whether a query uses simple DISTINCT, ie, has a distinct-list that - * is the same as the set of output columns. - */ -bool -has_distinct_clause(Query *query) -{ - /* Is there a DISTINCT clause at all? */ - if (query->distinctClause == NIL) - return false; - - /* It's DISTINCT if it's not DISTINCT ON */ - return !has_distinct_on_clause(query); -} - - /***************************************************************************** * * * General clause-manipulating routines * diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index c5b6c2ed61..ad3c10ac27 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.143 2008/04/21 20:54:15 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.144 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -935,8 +935,10 @@ translate_sub_tlist(List *tlist, int relid) * corresponding upper-level equality operators listed in opids would think * the values are distinct. (Note: the opids entries could be cross-type * operators, and thus not exactly the equality operators that the subquery - * would use itself. We assume that the subquery is compatible if these - * operators appear in the same btree opfamily as the ones the subquery uses.) + * would use itself. We use equality_ops_are_compatible() to check + * compatibility. That looks at btree or hash opfamily membership, and so + * should give trustworthy answers for all operators that we might need + * to deal with here.) */ static bool query_is_distinct_for(Query *query, List *colnos, List *opids) @@ -955,13 +957,13 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) { foreach(l, query->distinctClause) { - SortClause *scl = (SortClause *) lfirst(l); - TargetEntry *tle = get_sortgroupclause_tle(scl, + SortGroupClause *sgc = (SortGroupClause *) lfirst(l); + TargetEntry *tle = get_sortgroupclause_tle(sgc, query->targetList); opid = distinct_col_search(tle->resno, colnos, opids); if (!OidIsValid(opid) || - !ops_in_same_btree_opfamily(opid, scl->sortop)) + !equality_ops_are_compatible(opid, sgc->eqop)) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -976,13 +978,13 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) { foreach(l, query->groupClause) { - GroupClause *grpcl = (GroupClause *) lfirst(l); - TargetEntry *tle = get_sortgroupclause_tle(grpcl, + SortGroupClause *sgc = (SortGroupClause *) lfirst(l); + TargetEntry *tle = get_sortgroupclause_tle(sgc, query->targetList); opid = distinct_col_search(tle->resno, colnos, opids); if (!OidIsValid(opid) || - !ops_in_same_btree_opfamily(opid, grpcl->sortop)) + !equality_ops_are_compatible(opid, sgc->eqop)) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -1002,10 +1004,11 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row, * except with ALL. * - * XXX this code knows that prepunion.c will adopt the default ordering - * operator for each column datatype as the sortop. It'd probably be - * better if these operators were chosen at parse time and stored into the - * parsetree, instead of leaving bits of the planner to decide semantics. + * XXX this code knows that prepunion.c will adopt the default sort/group + * operators for each column datatype to determine uniqueness. It'd + * probably be better if these operators were chosen at parse time and + * stored into the parsetree, instead of leaving bits of the planner to + * decide semantics. */ if (query->setOperations) { @@ -1020,14 +1023,20 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) foreach(l, query->targetList) { TargetEntry *tle = (TargetEntry *) lfirst(l); + Oid tle_eq_opr; if (tle->resjunk) continue; /* ignore resjunk columns */ opid = distinct_col_search(tle->resno, colnos, opids); - if (!OidIsValid(opid) || - !ops_in_same_btree_opfamily(opid, - ordering_oper_opid(exprType((Node *) tle->expr)))) + if (!OidIsValid(opid)) + break; /* exit early if no match */ + /* check for compatible semantics */ + get_sort_group_operators(exprType((Node *) tle->expr), + false, false, false, + NULL, &tle_eq_opr, NULL); + if (!OidIsValid(tle_eq_opr) || + !equality_ops_are_compatible(opid, tle_eq_opr)) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 618d65925c..fc0880ad56 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.78 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.79 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -156,49 +156,43 @@ get_sortgroupref_tle(Index sortref, List *targetList) /* * get_sortgroupclause_tle - * Find the targetlist entry matching the given SortClause - * (or GroupClause) by ressortgroupref, and return it. - * - * Because GroupClause is typedef'd as SortClause, either kind of - * node can be passed without casting. + * Find the targetlist entry matching the given SortGroupClause + * by ressortgroupref, and return it. */ TargetEntry * -get_sortgroupclause_tle(SortClause *sortClause, +get_sortgroupclause_tle(SortGroupClause *sgClause, List *targetList) { - return get_sortgroupref_tle(sortClause->tleSortGroupRef, targetList); + return get_sortgroupref_tle(sgClause->tleSortGroupRef, targetList); } /* * get_sortgroupclause_expr - * Find the targetlist entry matching the given SortClause - * (or GroupClause) by ressortgroupref, and return its expression. - * - * Because GroupClause is typedef'd as SortClause, either kind of - * node can be passed without casting. + * Find the targetlist entry matching the given SortGroupClause + * by ressortgroupref, and return its expression. */ Node * -get_sortgroupclause_expr(SortClause *sortClause, List *targetList) +get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList) { - TargetEntry *tle = get_sortgroupclause_tle(sortClause, targetList); + TargetEntry *tle = get_sortgroupclause_tle(sgClause, targetList); return (Node *) tle->expr; } /* * get_sortgrouplist_exprs - * Given a list of SortClauses (or GroupClauses), build a list + * Given a list of SortGroupClauses, build a list * of the referenced targetlist expressions. */ List * -get_sortgrouplist_exprs(List *sortClauses, List *targetList) +get_sortgrouplist_exprs(List *sgClauses, List *targetList) { List *result = NIL; ListCell *l; - foreach(l, sortClauses) + foreach(l, sgClauses) { - SortClause *sortcl = (SortClause *) lfirst(l); + SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); Node *sortexpr; sortexpr = get_sortgroupclause_expr(sortcl, targetList); diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 329b1bc881..2cc3f28d20 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -17,7 +17,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.374 2008/07/31 22:47:56 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.375 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -724,10 +724,28 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) &qry->targetList, qry->sortClause); - qry->distinctClause = transformDistinctClause(pstate, - stmt->distinctClause, - &qry->targetList, - qry->sortClause); + if (stmt->distinctClause == NIL) + { + qry->distinctClause = NIL; + qry->hasDistinctOn = false; + } + else if (linitial(stmt->distinctClause) == NULL) + { + /* We had SELECT DISTINCT */ + qry->distinctClause = transformDistinctClause(pstate, + &qry->targetList, + qry->sortClause); + qry->hasDistinctOn = false; + } + else + { + /* We had SELECT DISTINCT ON */ + qry->distinctClause = transformDistinctOnClause(pstate, + stmt->distinctClause, + &qry->targetList, + qry->sortClause); + qry->hasDistinctOn = true; + } /* transform LIMIT */ qry->limitOffset = transformLimitClause(pstate, stmt->limitOffset, diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 03a765e96e..b5fbd0f78d 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.79 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.80 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -131,7 +131,7 @@ parseCheckAggregates(ParseState *pstate, Query *qry) */ foreach(l, qry->groupClause) { - GroupClause *grpcl = (GroupClause *) lfirst(l); + SortGroupClause *grpcl = (SortGroupClause *) lfirst(l); Node *expr; expr = get_sortgroupclause_expr(grpcl, qry->targetList); diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index d82573fd03..0a3b544b10 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.171 2008/07/31 22:47:56 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.172 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -66,6 +66,10 @@ static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype, Var *l_colvar, Var *r_colvar); static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause); +static List *addTargetToSortList(ParseState *pstate, TargetEntry *tle, + List *sortlist, List *targetlist, + SortByDir sortby_dir, SortByNulls sortby_nulls, + List *sortby_opname, bool resolveUnknown); /* @@ -1289,129 +1293,75 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause) return target_result; } -static GroupClause * -make_group_clause(TargetEntry *tle, List *targetlist, - Oid sortop, bool nulls_first) -{ - GroupClause *result; - - result = makeNode(GroupClause); - result->tleSortGroupRef = assignSortGroupRef(tle, targetlist); - result->sortop = sortop; - result->nulls_first = nulls_first; - return result; -} - /* * transformGroupClause - * transform a GROUP BY clause * * GROUP BY items will be added to the targetlist (as resjunk columns) * if not already present, so the targetlist must be passed by reference. - * - * The order of the elements of the grouping clause does not affect - * the semantics of the query. However, the optimizer is not currently - * smart enough to reorder the grouping clause, so we try to do some - * primitive reordering here. */ List * transformGroupClause(ParseState *pstate, List *grouplist, List **targetlist, List *sortClause) { List *result = NIL; - List *tle_list = NIL; - ListCell *l; + ListCell *gl; - /* Preprocess the grouping clause, lookup TLEs */ - foreach(l, grouplist) + foreach(gl, grouplist) { + Node *gexpr = (Node *) lfirst(gl); TargetEntry *tle; - Oid restype; + bool found = false; - tle = findTargetlistEntry(pstate, lfirst(l), + tle = findTargetlistEntry(pstate, gexpr, targetlist, GROUP_CLAUSE); - /* if tlist item is an UNKNOWN literal, change it to TEXT */ - restype = exprType((Node *) tle->expr); - - if (restype == UNKNOWNOID) - tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr, - restype, TEXTOID, -1, - COERCION_IMPLICIT, - COERCE_IMPLICIT_CAST); - - tle_list = lappend(tle_list, tle); - } - - /* - * Now iterate through the ORDER BY clause. If we find a grouping element - * that matches the ORDER BY element, append the grouping element to the - * result set immediately. Otherwise, stop iterating. The effect of this - * is to look for a prefix of the ORDER BY list in the grouping clauses, - * and to move that prefix to the front of the GROUP BY. - */ - foreach(l, sortClause) - { - SortClause *sc = (SortClause *) lfirst(l); - ListCell *prev = NULL; - ListCell *tl; - bool found = false; + /* Eliminate duplicates (GROUP BY x, x) */ + if (targetIsInSortList(tle, InvalidOid, result)) + continue; - foreach(tl, tle_list) + /* + * If the GROUP BY tlist entry also appears in ORDER BY, copy operator + * info from the (first) matching ORDER BY item. This means that if + * you write something like "GROUP BY foo ORDER BY foo USING <<<", the + * GROUP BY operation silently takes on the equality semantics implied + * by the ORDER BY. There are two reasons to do this: it improves + * the odds that we can implement both GROUP BY and ORDER BY with a + * single sort step, and it allows the user to choose the equality + * semantics used by GROUP BY, should she be working with a datatype + * that has more than one equality operator. + */ + if (tle->ressortgroupref > 0) { - TargetEntry *tle = (TargetEntry *) lfirst(tl); + ListCell *sl; - if (sc->tleSortGroupRef == tle->ressortgroupref) + foreach(sl, sortClause) { - GroupClause *gc; + SortGroupClause *sc = (SortGroupClause *) lfirst(sl); - tle_list = list_delete_cell(tle_list, tl, prev); - - /* Use the sort clause's sorting information */ - gc = make_group_clause(tle, *targetlist, - sc->sortop, sc->nulls_first); - result = lappend(result, gc); - found = true; - break; + if (sc->tleSortGroupRef == tle->ressortgroupref) + { + result = lappend(result, copyObject(sc)); + found = true; + break; + } } - - prev = tl; } - /* As soon as we've failed to match an ORDER BY element, stop */ - if (!found) - break; - } - - /* - * Now add any remaining elements of the GROUP BY list in the order we - * received them. - * - * XXX: are there any additional criteria to consider when ordering - * grouping clauses? - */ - foreach(l, tle_list) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - GroupClause *gc; - Oid sort_op; - /* - * Avoid making duplicate grouplist entries. Note that we don't - * enforce a particular sortop here. Along with the copying of sort - * information above, this means that if you write something like - * "GROUP BY foo ORDER BY foo USING <<<", the GROUP BY operation - * silently takes on the equality semantics implied by the ORDER BY. + * If no match in ORDER BY, just add it to the result using + * default sort/group semantics. + * + * XXX for now, the planner requires groupClause to be sortable, + * so we have to insist on that here. */ - if (targetIsInSortList(tle, InvalidOid, result)) - continue; - - sort_op = ordering_oper_opid(exprType((Node *) tle->expr)); - gc = make_group_clause(tle, *targetlist, sort_op, false); - result = lappend(result, gc); + if (!found) + result = addTargetToGroupList(pstate, tle, + result, *targetlist, + true, /* XXX for now */ + true); } - list_free(tle_list); return result; } @@ -1433,7 +1383,7 @@ transformSortClause(ParseState *pstate, foreach(olitem, orderlist) { - SortBy *sortby = lfirst(olitem); + SortBy *sortby = (SortBy *) lfirst(olitem); TargetEntry *tle; tle = findTargetlistEntry(pstate, sortby->node, @@ -1452,159 +1402,165 @@ transformSortClause(ParseState *pstate, /* * transformDistinctClause - - * transform a DISTINCT or DISTINCT ON clause + * transform a DISTINCT clause * * Since we may need to add items to the query's targetlist, that list * is passed by reference. * * As with GROUP BY, we absorb the sorting semantics of ORDER BY as much as * possible into the distinctClause. This avoids a possible need to re-sort, - * and allows the user to determine the equality semantics used by DISTINCT, - * should she be working with a datatype that has more than one btree equality + * and allows the user to choose the equality semantics used by DISTINCT, + * should she be working with a datatype that has more than one equality * operator. */ List * -transformDistinctClause(ParseState *pstate, List *distinctlist, +transformDistinctClause(ParseState *pstate, List **targetlist, List *sortClause) { List *result = NIL; ListCell *slitem; - ListCell *dlitem; ListCell *tlitem; - /* No work if there was no DISTINCT clause */ - if (distinctlist == NIL) - return NIL; - - if (linitial(distinctlist) == NULL) + /* + * The distinctClause should consist of all ORDER BY items followed + * by all other non-resjunk targetlist items. There must not be any + * resjunk ORDER BY items --- that would imply that we are sorting + * by a value that isn't necessarily unique within a DISTINCT group, + * so the results wouldn't be well-defined. This construction + * ensures we follow the rule that sortClause and distinctClause match; + * in fact the sortClause will always be a prefix of distinctClause. + * + * Note a corner case: the same TLE could be in the ORDER BY list + * multiple times with different sortops. We have to include it in + * the distinctClause the same way to preserve the prefix property. + * The net effect will be that the TLE value will be made unique + * according to both sortops. + */ + foreach(slitem, sortClause) { - /* We had SELECT DISTINCT */ - - /* - * The distinctClause should consist of all ORDER BY items followed - * by all other non-resjunk targetlist items. There must not be any - * resjunk ORDER BY items --- that would imply that we are sorting - * by a value that isn't necessarily unique within a DISTINCT group, - * so the results wouldn't be well-defined. This construction - * ensures we follow the rule that sortClause and distinctClause match; - * in fact the sortClause will always be a prefix of distinctClause. - * - * Note a corner case: the same TLE could be in the ORDER BY list - * multiple times with different sortops. We have to include it in - * the distinctClause the same way to preserve the prefix property. - * The net effect will be that the TLE value will be made unique - * according to both sortops. - */ - foreach(slitem, sortClause) - { - SortClause *scl = (SortClause *) lfirst(slitem); - TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist); - - if (tle->resjunk) - ereport(ERROR, - (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), - errmsg("for SELECT DISTINCT, ORDER BY expressions must appear in select list"))); - else - result = lappend(result, copyObject(scl)); - } + SortGroupClause *scl = (SortGroupClause *) lfirst(slitem); + TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist); - /* - * Now add any remaining non-resjunk tlist items, using default - * sorting semantics for their data types. - */ - foreach(tlitem, *targetlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(tlitem); - - if (tle->resjunk) - continue; /* ignore junk */ - if (targetIsInSortList(tle, InvalidOid, result)) - continue; /* already in list (with some semantics) */ - result = addTargetToSortList(pstate, tle, - result, *targetlist, - SORTBY_DEFAULT, - SORTBY_NULLS_DEFAULT, - NIL, true); - } + if (tle->resjunk) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("for SELECT DISTINCT, ORDER BY expressions must appear in select list"))); + result = lappend(result, copyObject(scl)); } - else + + /* + * Now add any remaining non-resjunk tlist items, using default + * sort/group semantics for their data types. + * + * XXX for now, the planner requires distinctClause to be sortable, + * so we have to insist on that here. + */ + foreach(tlitem, *targetlist) { - /* We had SELECT DISTINCT ON (expr, ...) */ - Bitmapset *refnos = NULL; - int sortgroupref; - bool skipped_sortitem; + TargetEntry *tle = (TargetEntry *) lfirst(tlitem); + + if (tle->resjunk) + continue; /* ignore junk */ + result = addTargetToGroupList(pstate, tle, + result, *targetlist, + true, /* XXX for now */ + true); + } - /* - * Add all the DISTINCT ON expressions to the tlist (if not already - * present, they are added as resjunk items). Assign sortgroupref - * numbers to them, and form a bitmapset of these numbers. (A - * bitmapset is convenient here because we don't care about order - * and we can discard duplicates.) - */ - foreach(dlitem, distinctlist) - { - Node *dexpr = (Node *) lfirst(dlitem); - TargetEntry *tle; + return result; +} - tle = findTargetlistEntry(pstate, dexpr, - targetlist, DISTINCT_ON_CLAUSE); - sortgroupref = assignSortGroupRef(tle, *targetlist); - refnos = bms_add_member(refnos, sortgroupref); - } +/* + * transformDistinctOnClause - + * transform a DISTINCT ON clause + * + * Since we may need to add items to the query's targetlist, that list + * is passed by reference. + * + * As with GROUP BY, we absorb the sorting semantics of ORDER BY as much as + * possible into the distinctClause. This avoids a possible need to re-sort, + * and allows the user to choose the equality semantics used by DISTINCT, + * should she be working with a datatype that has more than one equality + * operator. + */ +List * +transformDistinctOnClause(ParseState *pstate, List *distinctlist, + List **targetlist, List *sortClause) +{ + List *result = NIL; + ListCell *slitem; + ListCell *dlitem; + Bitmapset *refnos = NULL; + int sortgroupref; + bool skipped_sortitem; - /* - * If the user writes both DISTINCT ON and ORDER BY, adopt the - * sorting semantics from ORDER BY items that match DISTINCT ON - * items, and also adopt their column sort order. We insist that - * the distinctClause and sortClause match, so throw error if we - * find the need to add any more distinctClause items after we've - * skipped an ORDER BY item that wasn't in DISTINCT ON. - */ - skipped_sortitem = false; - foreach(slitem, sortClause) - { - SortClause *scl = (SortClause *) lfirst(slitem); + /* + * Add all the DISTINCT ON expressions to the tlist (if not already + * present, they are added as resjunk items). Assign sortgroupref + * numbers to them, and form a bitmapset of these numbers. (A + * bitmapset is convenient here because we don't care about order + * and we can discard duplicates.) + */ + foreach(dlitem, distinctlist) + { + Node *dexpr = (Node *) lfirst(dlitem); + TargetEntry *tle; - if (bms_is_member(scl->tleSortGroupRef, refnos)) - { - if (skipped_sortitem) - ereport(ERROR, - (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), - errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"))); - else - result = lappend(result, copyObject(scl)); - } - else - { - skipped_sortitem = true; - } - } + tle = findTargetlistEntry(pstate, dexpr, + targetlist, DISTINCT_ON_CLAUSE); + sortgroupref = assignSortGroupRef(tle, *targetlist); + refnos = bms_add_member(refnos, sortgroupref); + } - /* - * Now add any remaining DISTINCT ON items, using default sorting - * semantics for their data types. (Note: this is pretty - * questionable; if the ORDER BY list doesn't include all the DISTINCT - * ON items and more besides, you certainly aren't using DISTINCT ON - * in the intended way, and you probably aren't going to get - * consistent results. It might be better to throw an error or warning - * here. But historically we've allowed it, so keep doing so.) - */ - while ((sortgroupref = bms_first_member(refnos)) >= 0) - { - TargetEntry *tle = get_sortgroupref_tle(sortgroupref, *targetlist); + /* + * If the user writes both DISTINCT ON and ORDER BY, adopt the + * sorting semantics from ORDER BY items that match DISTINCT ON + * items, and also adopt their column sort order. We insist that + * the distinctClause and sortClause match, so throw error if we + * find the need to add any more distinctClause items after we've + * skipped an ORDER BY item that wasn't in DISTINCT ON. + */ + skipped_sortitem = false; + foreach(slitem, sortClause) + { + SortGroupClause *scl = (SortGroupClause *) lfirst(slitem); - if (targetIsInSortList(tle, InvalidOid, result)) - continue; /* already in list (with some semantics) */ + if (bms_is_member(scl->tleSortGroupRef, refnos)) + { if (skipped_sortitem) ereport(ERROR, (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"))); - result = addTargetToSortList(pstate, tle, - result, *targetlist, - SORTBY_DEFAULT, - SORTBY_NULLS_DEFAULT, - NIL, true); + else + result = lappend(result, copyObject(scl)); } + else + skipped_sortitem = true; + } + + /* + * Now add any remaining DISTINCT ON items, using default sort/group + * semantics for their data types. (Note: this is pretty questionable; + * if the ORDER BY list doesn't include all the DISTINCT ON items and more + * besides, you certainly aren't using DISTINCT ON in the intended way, + * and you probably aren't going to get consistent results. It might be + * better to throw an error or warning here. But historically we've + * allowed it, so keep doing so.) + */ + while ((sortgroupref = bms_first_member(refnos)) >= 0) + { + TargetEntry *tle = get_sortgroupref_tle(sortgroupref, *targetlist); + + if (targetIsInSortList(tle, InvalidOid, result)) + continue; /* already in list (with some semantics) */ + if (skipped_sortitem) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"))); + result = addTargetToGroupList(pstate, tle, + result, *targetlist, + true, /* someday allow hash-only? */ + true); } return result; @@ -1612,17 +1568,18 @@ transformDistinctClause(ParseState *pstate, List *distinctlist, /* * addTargetToSortList - * If the given targetlist entry isn't already in the SortClause list, - * add it to the end of the list, using the given sort ordering info. + * If the given targetlist entry isn't already in the SortGroupClause + * list, add it to the end of the list, using the given sort ordering + * info. * * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT. If not, * do nothing (which implies the search for a sort operator will fail). * pstate should be provided if resolveUnknown is TRUE, but can be NULL * otherwise. * - * Returns the updated SortClause list. + * Returns the updated SortGroupClause list. */ -List * +static List * addTargetToSortList(ParseState *pstate, TargetEntry *tle, List *sortlist, List *targetlist, SortByDir sortby_dir, SortByNulls sortby_nulls, @@ -1630,7 +1587,7 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle, { Oid restype = exprType((Node *) tle->expr); Oid sortop; - Oid cmpfunc; + Oid eqop; bool reverse; /* if tlist item is an UNKNOWN literal, change it to TEXT */ @@ -1643,16 +1600,20 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle, restype = TEXTOID; } - /* determine the sortop */ + /* determine the sortop, eqop, and directionality */ switch (sortby_dir) { case SORTBY_DEFAULT: case SORTBY_ASC: - sortop = ordering_oper_opid(restype); + get_sort_group_operators(restype, + true, true, false, + &sortop, &eqop, NULL); reverse = false; break; case SORTBY_DESC: - sortop = reverse_ordering_oper_opid(restype); + get_sort_group_operators(restype, + false, true, true, + NULL, &eqop, &sortop); reverse = true; break; case SORTBY_USING: @@ -1663,11 +1624,12 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle, false); /* - * Verify it's a valid ordering operator, and determine whether to - * consider it like ASC or DESC. + * Verify it's a valid ordering operator, fetch the corresponding + * equality operator, and determine whether to consider it like + * ASC or DESC. */ - if (!get_compare_function_for_ordering_op(sortop, - &cmpfunc, &reverse)) + eqop = get_equality_op_for_ordering_op(sortop, &reverse); + if (!OidIsValid(eqop)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("operator %s is not a valid ordering operator", @@ -1677,6 +1639,7 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle, default: elog(ERROR, "unrecognized sortby_dir: %d", sortby_dir); sortop = InvalidOid; /* keep compiler quiet */ + eqop = InvalidOid; reverse = false; break; } @@ -1684,10 +1647,11 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle, /* avoid making duplicate sortlist entries */ if (!targetIsInSortList(tle, sortop, sortlist)) { - SortClause *sortcl = makeNode(SortClause); + SortGroupClause *sortcl = makeNode(SortGroupClause); sortcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist); + sortcl->eqop = eqop; sortcl->sortop = sortop; switch (sortby_nulls) @@ -1713,6 +1677,68 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle, return sortlist; } +/* + * addTargetToGroupList + * If the given targetlist entry isn't already in the SortGroupClause + * list, add it to the end of the list, using default sort/group + * semantics. + * + * This is very similar to addTargetToSortList, except that we allow the + * case where only a grouping (equality) operator can be found, and that + * the TLE is considered "already in the list" if it appears there with any + * sorting semantics. + * + * If requireSortOp is TRUE, we require a sorting operator to be found too. + * XXX this argument should eventually be obsolete, but for now there are + * parts of the system that can't support non-sortable grouping lists. + * + * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT. If not, + * do nothing (which implies the search for an equality operator will fail). + * pstate should be provided if resolveUnknown is TRUE, but can be NULL + * otherwise. + * + * Returns the updated SortGroupClause list. + */ +List * +addTargetToGroupList(ParseState *pstate, TargetEntry *tle, + List *grouplist, List *targetlist, + bool requireSortOp, bool resolveUnknown) +{ + Oid restype = exprType((Node *) tle->expr); + Oid sortop; + Oid eqop; + + /* if tlist item is an UNKNOWN literal, change it to TEXT */ + if (restype == UNKNOWNOID && resolveUnknown) + { + tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr, + restype, TEXTOID, -1, + COERCION_IMPLICIT, + COERCE_IMPLICIT_CAST); + restype = TEXTOID; + } + + /* avoid making duplicate grouplist entries */ + if (!targetIsInSortList(tle, InvalidOid, grouplist)) + { + SortGroupClause *grpcl = makeNode(SortGroupClause); + + /* determine the eqop and optional sortop */ + get_sort_group_operators(restype, + requireSortOp, true, false, + &sortop, &eqop, NULL); + + grpcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist); + grpcl->eqop = eqop; + grpcl->sortop = sortop; + grpcl->nulls_first = false; /* OK with or without sortop */ + + grouplist = lappend(grouplist, grpcl); + } + + return grouplist; +} + /* * assignSortGroupRef * Assign the targetentry an unused ressortgroupref, if it doesn't @@ -1756,9 +1782,10 @@ assignSortGroupRef(TargetEntry *tle, List *tlist) * opposite nulls direction is redundant. Also, we can consider * ORDER BY foo ASC, foo DESC redundant, so check for a commutator match. * - * Works for both SortClause and GroupClause lists. Note that the main - * reason we need this routine (and not just a quick test for nonzeroness - * of ressortgroupref) is that a TLE might be in only one of the lists. + * Works for both ordering and grouping lists (sortop would normally be + * InvalidOid when considering grouping). Note that the main reason we need + * this routine (and not just a quick test for nonzeroness of ressortgroupref) + * is that a TLE might be in only one of the lists. */ bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList) @@ -1772,7 +1799,7 @@ targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList) foreach(l, sortList) { - SortClause *scl = (SortClause *) lfirst(l); + SortGroupClause *scl = (SortGroupClause *) lfirst(l); if (scl->tleSortGroupRef == ref && (sortop == InvalidOid || diff --git a/src/backend/parser/parse_oper.c b/src/backend/parser/parse_oper.c index 54fb63f959..055574ff98 100644 --- a/src/backend/parser/parse_oper.c +++ b/src/backend/parser/parse_oper.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_oper.c,v 1.102 2008/04/22 01:34:34 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_oper.c,v 1.103 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -160,239 +160,93 @@ LookupOperNameTypeNames(ParseState *pstate, List *opername, } /* - * equality_oper - identify a suitable equality operator for a datatype + * get_sort_group_operators - get default sorting/grouping operators for type * - * On failure, return NULL if noError, else report a standard error - */ -Operator -equality_oper(Oid argtype, bool noError) -{ - TypeCacheEntry *typentry; - Oid oproid; - Operator optup; - - /* - * Look for an "=" operator for the datatype. We require it to be an - * exact or binary-compatible match, since most callers are not prepared - * to cope with adding any run-time type coercion steps. - */ - typentry = lookup_type_cache(argtype, TYPECACHE_EQ_OPR); - oproid = typentry->eq_opr; - - /* - * If the datatype is an array, then we can use array_eq ... but only if - * there is a suitable equality operator for the element type. (This check - * is not in the raw typcache.c code ... should it be?) - */ - if (oproid == ARRAY_EQ_OP) - { - Oid elem_type = get_element_type(argtype); - - if (OidIsValid(elem_type)) - { - optup = equality_oper(elem_type, true); - if (optup != NULL) - ReleaseSysCache(optup); - else - oproid = InvalidOid; /* element type has no "=" */ - } - else - oproid = InvalidOid; /* bogus array type? */ - } - - if (OidIsValid(oproid)) - { - optup = SearchSysCache(OPEROID, - ObjectIdGetDatum(oproid), - 0, 0, 0); - if (optup == NULL) /* should not fail */ - elog(ERROR, "cache lookup failed for operator %u", oproid); - return optup; - } - - if (!noError) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_FUNCTION), - errmsg("could not identify an equality operator for type %s", - format_type_be(argtype)))); - return NULL; -} - -/* - * ordering_oper - identify a suitable sorting operator ("<") for a datatype + * We fetch the "<", "=", and ">" operators all at once to reduce lookup + * overhead (knowing that most callers will be interested in at least two). + * However, a given datatype might have only an "=" operator, if it is + * hashable but not sortable. (Other combinations of present and missing + * operators shouldn't happen, unless the system catalogs are messed up.) + * + * If an operator is missing and the corresponding needXX flag is true, + * throw a standard error message, else return InvalidOid. + * + * Callers can pass NULL pointers for any results they don't care to get. * - * On failure, return NULL if noError, else report a standard error + * Note: the results are guaranteed to be exact or binary-compatible matches, + * since most callers are not prepared to cope with adding any run-time type + * coercion steps. */ -Operator -ordering_oper(Oid argtype, bool noError) +void +get_sort_group_operators(Oid argtype, + bool needLT, bool needEQ, bool needGT, + Oid *ltOpr, Oid *eqOpr, Oid *gtOpr) { TypeCacheEntry *typentry; - Oid oproid; - Operator optup; + Oid lt_opr; + Oid eq_opr; + Oid gt_opr; /* - * Look for a "<" operator for the datatype. We require it to be an exact - * or binary-compatible match, since most callers are not prepared to cope - * with adding any run-time type coercion steps. + * Look up the operators using the type cache. * - * Note: the search algorithm used by typcache.c ensures that if a "<" - * operator is returned, it will be consistent with the "=" operator - * returned by equality_oper. This is critical for sorting and grouping - * purposes. + * Note: the search algorithm used by typcache.c ensures that the results + * are consistent, ie all from the same opclass. */ - typentry = lookup_type_cache(argtype, TYPECACHE_LT_OPR); - oproid = typentry->lt_opr; + typentry = lookup_type_cache(argtype, + TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR | TYPECACHE_GT_OPR); + lt_opr = typentry->lt_opr; + eq_opr = typentry->eq_opr; + gt_opr = typentry->gt_opr; /* - * If the datatype is an array, then we can use array_lt ... but only if - * there is a suitable less-than operator for the element type. (This - * check is not in the raw typcache.c code ... should it be?) + * If the datatype is an array, then we can use array_lt and friends ... + * but only if there are suitable operators for the element type. (This + * check is not in the raw typcache.c code ... should it be?) Testing + * all three operator IDs here should be redundant. */ - if (oproid == ARRAY_LT_OP) + if (lt_opr == ARRAY_LT_OP || + eq_opr == ARRAY_EQ_OP || + gt_opr == ARRAY_GT_OP) { Oid elem_type = get_element_type(argtype); if (OidIsValid(elem_type)) { - optup = ordering_oper(elem_type, true); - if (optup != NULL) - ReleaseSysCache(optup); - else - oproid = InvalidOid; /* element type has no "<" */ + typentry = lookup_type_cache(elem_type, + TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR | TYPECACHE_GT_OPR); + if (!OidIsValid(typentry->lt_opr)) + lt_opr = InvalidOid; /* element type has no "<" */ + if (!OidIsValid(typentry->eq_opr)) + eq_opr = InvalidOid; /* element type has no "=" */ + if (!OidIsValid(typentry->gt_opr)) + gt_opr = InvalidOid; /* element type has no ">" */ } else - oproid = InvalidOid; /* bogus array type? */ - } - - if (OidIsValid(oproid)) - { - optup = SearchSysCache(OPEROID, - ObjectIdGetDatum(oproid), - 0, 0, 0); - if (optup == NULL) /* should not fail */ - elog(ERROR, "cache lookup failed for operator %u", oproid); - return optup; + lt_opr = eq_opr = gt_opr = InvalidOid; /* bogus array type? */ } - if (!noError) + /* Report errors if needed */ + if ((needLT && !OidIsValid(lt_opr)) || + (needGT && !OidIsValid(gt_opr))) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("could not identify an ordering operator for type %s", format_type_be(argtype)), errhint("Use an explicit ordering operator or modify the query."))); - return NULL; -} - -/* - * reverse_ordering_oper - identify DESC sort operator (">") for a datatype - * - * On failure, return NULL if noError, else report a standard error - */ -Operator -reverse_ordering_oper(Oid argtype, bool noError) -{ - TypeCacheEntry *typentry; - Oid oproid; - Operator optup; - - /* - * Look for a ">" operator for the datatype. We require it to be an exact - * or binary-compatible match, since most callers are not prepared to cope - * with adding any run-time type coercion steps. - * - * Note: the search algorithm used by typcache.c ensures that if a ">" - * operator is returned, it will be consistent with the "=" operator - * returned by equality_oper. This is critical for sorting and grouping - * purposes. - */ - typentry = lookup_type_cache(argtype, TYPECACHE_GT_OPR); - oproid = typentry->gt_opr; - - /* - * If the datatype is an array, then we can use array_gt ... but only if - * there is a suitable greater-than operator for the element type. (This - * check is not in the raw typcache.c code ... should it be?) - */ - if (oproid == ARRAY_GT_OP) - { - Oid elem_type = get_element_type(argtype); - - if (OidIsValid(elem_type)) - { - optup = reverse_ordering_oper(elem_type, true); - if (optup != NULL) - ReleaseSysCache(optup); - else - oproid = InvalidOid; /* element type has no ">" */ - } - else - oproid = InvalidOid; /* bogus array type? */ - } - - if (OidIsValid(oproid)) - { - optup = SearchSysCache(OPEROID, - ObjectIdGetDatum(oproid), - 0, 0, 0); - if (optup == NULL) /* should not fail */ - elog(ERROR, "cache lookup failed for operator %u", oproid); - return optup; - } - - if (!noError) + if (needEQ && !OidIsValid(eq_opr)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), - errmsg("could not identify an ordering operator for type %s", - format_type_be(argtype)), - errhint("Use an explicit ordering operator or modify the query."))); - return NULL; -} - -/* - * equality_oper_funcid - convenience routine for oprfuncid(equality_oper()) - */ -Oid -equality_oper_funcid(Oid argtype) -{ - Operator optup; - Oid result; - - optup = equality_oper(argtype, false); - result = oprfuncid(optup); - ReleaseSysCache(optup); - return result; -} - -/* - * ordering_oper_opid - convenience routine for oprid(ordering_oper()) - * - * This was formerly called any_ordering_op() - */ -Oid -ordering_oper_opid(Oid argtype) -{ - Operator optup; - Oid result; - - optup = ordering_oper(argtype, false); - result = oprid(optup); - ReleaseSysCache(optup); - return result; -} - -/* - * reverse_ordering_oper_opid - convenience routine for oprid(reverse_ordering_oper()) - */ -Oid -reverse_ordering_oper_opid(Oid argtype) -{ - Operator optup; - Oid result; + errmsg("could not identify an equality operator for type %s", + format_type_be(argtype)))); - optup = reverse_ordering_oper(argtype, false); - result = oprid(optup); - ReleaseSysCache(optup); - return result; + /* Return results as needed */ + if (ltOpr) + *ltOpr = lt_opr; + if (eqOpr) + *eqOpr = eq_opr; + if (gtOpr) + *gtOpr = gt_opr; } diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index b3603c53c1..0120353344 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.278 2008/07/18 03:32:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.279 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -157,7 +157,7 @@ static void get_target_list(List *targetList, deparse_context *context, static void get_setop_query(Node *setOp, Query *query, deparse_context *context, TupleDesc resultDesc); -static Node *get_rule_sortgroupclause(SortClause *srt, List *tlist, +static Node *get_rule_sortgroupclause(SortGroupClause *srt, List *tlist, bool force_colno, deparse_context *context); static char *get_variable(Var *var, int levelsup, bool showstar, @@ -2056,7 +2056,7 @@ get_select_query_def(Query *query, deparse_context *context, sep = ""; foreach(l, query->sortClause) { - SortClause *srt = (SortClause *) lfirst(l); + SortGroupClause *srt = (SortGroupClause *) lfirst(l); Node *sortexpr; Oid sortcoltype; TypeCacheEntry *typentry; @@ -2178,13 +2178,13 @@ get_basic_select_query(Query *query, deparse_context *context, /* Add the DISTINCT clause if given */ if (query->distinctClause != NIL) { - if (has_distinct_on_clause(query)) + if (query->hasDistinctOn) { appendStringInfo(buf, " DISTINCT ON ("); sep = ""; foreach(l, query->distinctClause) { - SortClause *srt = (SortClause *) lfirst(l); + SortGroupClause *srt = (SortGroupClause *) lfirst(l); appendStringInfoString(buf, sep); get_rule_sortgroupclause(srt, query->targetList, @@ -2219,7 +2219,7 @@ get_basic_select_query(Query *query, deparse_context *context, sep = ""; foreach(l, query->groupClause) { - GroupClause *grp = (GroupClause *) lfirst(l); + SortGroupClause *grp = (SortGroupClause *) lfirst(l); appendStringInfoString(buf, sep); get_rule_sortgroupclause(grp, query->targetList, @@ -2398,7 +2398,7 @@ get_setop_query(Node *setOp, Query *query, deparse_context *context, * Also returns the expression tree, so caller need not find it again. */ static Node * -get_rule_sortgroupclause(SortClause *srt, List *tlist, bool force_colno, +get_rule_sortgroupclause(SortGroupClause *srt, List *tlist, bool force_colno, deparse_context *context) { StringInfo buf = context->buf; diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 79362441f8..ed94af6d49 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.158 2008/07/30 17:05:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.159 2008/08/02 21:32:00 tgl Exp $ * * NOTES * Eventually, the index information should go through here, too. @@ -254,11 +254,14 @@ get_compare_function_for_ordering_op(Oid opno, Oid *cmpfunc, bool *reverse) * Get the OID of the datatype-specific btree equality operator * associated with an ordering operator (a "<" or ">" operator). * + * If "reverse" isn't NULL, also set *reverse to FALSE if the operator is "<", + * TRUE if it's ">" + * * Returns InvalidOid if no matching equality operator can be found. * (This indicates that the operator is not a valid ordering operator.) */ Oid -get_equality_op_for_ordering_op(Oid opno) +get_equality_op_for_ordering_op(Oid opno, bool *reverse) { Oid result = InvalidOid; Oid opfamily; @@ -274,6 +277,8 @@ get_equality_op_for_ordering_op(Oid opno) opcintype, opcintype, BTEqualStrategyNumber); + if (reverse) + *reverse = (strategy == BTGreaterStrategyNumber); } return result; @@ -666,36 +671,49 @@ get_op_btree_interpretation(Oid opno, List **opfamilies, List **opstrats) } /* - * ops_in_same_btree_opfamily - * Return TRUE if there exists a btree opfamily containing both operators. - * (This implies that they have compatible notions of equality.) + * equality_ops_are_compatible + * Return TRUE if the two given equality operators have compatible + * semantics. + * + * This is trivially true if they are the same operator. Otherwise, + * we look to see if they can be found in the same btree or hash opfamily. + * Either finding allows us to assume that they have compatible notions + * of equality. (The reason we need to do these pushups is that one might + * be a cross-type operator; for instance int24eq vs int4eq.) */ bool -ops_in_same_btree_opfamily(Oid opno1, Oid opno2) +equality_ops_are_compatible(Oid opno1, Oid opno2) { - bool result = false; + bool result; CatCList *catlist; int i; + /* Easy if they're the same operator */ + if (opno1 == opno2) + return true; + /* * We search through all the pg_amop entries for opno1. */ catlist = SearchSysCacheList(AMOPOPID, 1, ObjectIdGetDatum(opno1), 0, 0, 0); + + result = false; for (i = 0; i < catlist->n_members; i++) { HeapTuple op_tuple = &catlist->members[i]->tuple; Form_pg_amop op_form = (Form_pg_amop) GETSTRUCT(op_tuple); - /* must be btree */ - if (op_form->amopmethod != BTREE_AM_OID) - continue; - - if (op_in_opfamily(opno2, op_form->amopfamily)) + /* must be btree or hash */ + if (op_form->amopmethod == BTREE_AM_OID || + op_form->amopmethod == HASH_AM_OID) { - result = true; - break; + if (op_in_opfamily(opno2, op_form->amopfamily)) + { + result = true; + break; + } } } diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 264183af56..49d2a32431 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.473 2008/07/30 19:35:13 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.474 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200807301 +#define CATALOG_VERSION_NO 200808011 #endif diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 79d679b5be..e35356ab55 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.206 2008/03/20 21:42:48 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.207 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -336,8 +336,7 @@ typedef enum NodeTag T_Constraint, T_DefElem, T_RangeTblEntry, - T_SortClause, - T_GroupClause, + T_SortGroupClause, T_FkConstraint, T_PrivGrantee, T_FuncWithArgs, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index fa78b4b188..f1a1e828fc 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.369 2008/07/31 22:47:56 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.370 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -108,6 +108,7 @@ typedef struct Query bool hasAggs; /* has aggregates in tlist or havingQual */ bool hasSubLinks; /* has subquery SubLink */ + bool hasDistinctOn; /* distinctClause is from DISTINCT ON */ List *rtable; /* list of range table entries */ FromExpr *jointree; /* table join tree (FROM and WHERE clauses) */ @@ -116,13 +117,13 @@ typedef struct Query List *returningList; /* return-values list (of TargetEntry) */ - List *groupClause; /* a list of GroupClause's */ + List *groupClause; /* a list of SortGroupClause's */ Node *havingQual; /* qualifications applied to groups */ - List *distinctClause; /* a list of SortClause's */ + List *distinctClause; /* a list of SortGroupClause's */ - List *sortClause; /* a list of SortClause's */ + List *sortClause; /* a list of SortGroupClause's */ Node *limitOffset; /* # of result tuples to skip (int8 expr) */ Node *limitCount; /* # of result tuples to return (int8 expr) */ @@ -607,47 +608,58 @@ typedef struct RangeTblEntry } RangeTblEntry; /* - * SortClause - - * representation of ORDER BY clauses + * SortGroupClause - + * representation of ORDER BY, GROUP BY, DISTINCT, DISTINCT ON items + * + * You might think that ORDER BY is only interested in defining ordering, + * and GROUP/DISTINCT are only interested in defining equality. However, + * one way to implement grouping is to sort and then apply a "uniq"-like + * filter. So it's also interesting to keep track of possible sort operators + * for GROUP/DISTINCT, and in particular to try to sort for the grouping + * in a way that will also yield a requested ORDER BY ordering. So we need + * to be able to compare ORDER BY and GROUP/DISTINCT lists, which motivates + * the decision to give them the same representation. * * tleSortGroupRef must match ressortgroupref of exactly one entry of the - * associated targetlist; that is the expression to be sorted (or grouped) by. - * sortop is the OID of the ordering operator (a "<" or ">" operator). - * nulls_first means about what you'd expect. + * query's targetlist; that is the expression to be sorted or grouped by. + * eqop is the OID of the equality operator. + * sortop is the OID of the ordering operator (a "<" or ">" operator), + * or InvalidOid if not available. + * nulls_first means about what you'd expect. If sortop is InvalidOid + * then nulls_first is meaningless and should be set to false. + * + * In an ORDER BY item, all fields must be valid. (The eqop isn't essential + * here, but it's cheap to get it along with the sortop, and requiring it + * to be valid eases comparisons to grouping items.) + * + * In a grouping item, eqop must be valid. If the eqop is a btree equality + * operator, then sortop should be set to a compatible ordering operator. + * We prefer to set eqop/sortop/nulls_first to match any ORDER BY item that + * the query presents for the same tlist item. If there is none, we just + * use the default ordering op for the datatype. * - * SortClauses are also used to identify targets that we will do a "Unique" - * filter step on (for SELECT DISTINCT and SELECT DISTINCT ON). The - * distinctClause list is a list of SortClauses for the expressions to be - * unique-ified. (As per comment for GroupClause, this overspecifies the - * semantics.) In SELECT DISTINCT, the distinctClause list is typically - * longer than the ORDER BY list, while in SELECT DISTINCT ON it's typically - * shorter. The two lists must match up to the end of the shorter one --- - * the parser rearranges the distinctClause if necessary to make this true. - * (This restriction ensures that only one sort step is needed to both - * satisfy the ORDER BY and set up for the Unique step. This is semantically - * necessary for DISTINCT ON, and offers no real drawback for DISTINCT.) + * If the tlist item's type has a hash opclass but no btree opclass, then + * we will set eqop to the hash equality operator, sortop to InvalidOid, + * and nulls_first to false. A grouping item of this kind can only be + * implemented by hashing, and of course it'll never match an ORDER BY item. + * + * A query might have both ORDER BY and DISTINCT (or DISTINCT ON) clauses. + * In SELECT DISTINCT, the distinctClause list is as long or longer than the + * sortClause list, while in SELECT DISTINCT ON it's typically shorter. + * The two lists must match up to the end of the shorter one --- the parser + * rearranges the distinctClause if necessary to make this true. (This + * restriction ensures that only one sort step is needed to both satisfy the + * ORDER BY and set up for the Unique step. This is semantically necessary + * for DISTINCT ON, and presents no real drawback for DISTINCT.) */ -typedef struct SortClause +typedef struct SortGroupClause { NodeTag type; Index tleSortGroupRef; /* reference into targetlist */ - Oid sortop; /* the ordering operator ('<' op) */ - bool nulls_first; /* do NULLs come before normal values? */ -} SortClause; - -/* - * GroupClause - - * representation of GROUP BY clauses - * - * GroupClause is exactly like SortClause except for the nodetag value. - * We have routines that operate interchangeably on both. - * - * XXX SortClause overspecifies the semantics so far as GROUP BY is concerned - * (ditto for DISTINCT). It'd be better to specify an equality operator not - * an ordering operator. However, the two implementations are tightly entwined - * at the moment ... breaking them apart is work for another day. - */ -typedef SortClause GroupClause; + Oid eqop; /* the equality operator ('=' op) */ + Oid sortop; /* the ordering operator ('<' op), or 0 */ + bool nulls_first; /* do NULLs come before normal values? */ +} SortGroupClause; /* * RowMarkClause - diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 59d7af5044..f0c55112cb 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -10,7 +10,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.137 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.138 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -969,14 +969,14 @@ typedef struct CurrentOfExpr * a specific system-generated name (such as "ctid") or NULL; anything else * risks confusing ExecGetJunkAttribute! * - * ressortgroupref is used in the representation of ORDER BY and - * GROUP BY items. Targetlist entries with ressortgroupref=0 are not - * sort/group items. If ressortgroupref>0, then this item is an ORDER BY or - * GROUP BY value. No two entries in a targetlist may have the same nonzero - * ressortgroupref --- but there is no particular meaning to the nonzero - * values, except as tags. (For example, one must not assume that lower - * ressortgroupref means a more significant sort key.) The order of the - * associated SortClause or GroupClause lists determine the semantics. + * ressortgroupref is used in the representation of ORDER BY, GROUP BY, and + * DISTINCT items. Targetlist entries with ressortgroupref=0 are not + * sort/group items. If ressortgroupref>0, then this item is an ORDER BY, + * GROUP BY, and/or DISTINCT target value. No two entries in a targetlist + * may have the same nonzero ressortgroupref --- but there is no particular + * meaning to the nonzero values, except as tags. (For example, one must + * not assume that lower ressortgroupref means a more significant sort key.) + * The order of the associated SortGroupClause lists determine the semantics. * * resorigtbl/resorigcol identify the source of the column, if it is a * simple reference to a column of a base table (or view). If it is not diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index dce10d6506..69a4f4c774 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.90 2008/04/01 00:48:33 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.91 2008/08/02 21:32:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -63,9 +63,6 @@ extern Relids find_nonnullable_rels(Node *clause); extern bool is_pseudo_constant_clause(Node *clause); extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids); -extern bool has_distinct_clause(Query *query); -extern bool has_distinct_on_clause(Query *query); - extern int NumRelids(Node *clause); extern void CommuteOpExpr(OpExpr *clause); diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index e953d66520..8a8966213b 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.49 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.50 2008/08/02 21:32:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,11 +25,11 @@ extern List *add_to_flat_tlist(List *tlist, List *vars); extern TargetEntry *get_sortgroupref_tle(Index sortref, List *targetList); -extern TargetEntry *get_sortgroupclause_tle(SortClause *sortClause, +extern TargetEntry *get_sortgroupclause_tle(SortGroupClause *sgClause, List *targetList); -extern Node *get_sortgroupclause_expr(SortClause *sortClause, +extern Node *get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList); -extern List *get_sortgrouplist_exprs(List *sortClauses, +extern List *get_sortgrouplist_exprs(List *sgClauses, List *targetList); extern bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK); diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h index 6fc357522e..357a2947cb 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.50 2008/07/31 22:47:56 tgl Exp $ + * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.51 2008/08/02 21:32:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -30,13 +30,14 @@ extern List *transformGroupClause(ParseState *pstate, List *grouplist, List **targetlist, List *sortClause); extern List *transformSortClause(ParseState *pstate, List *orderlist, List **targetlist, bool resolveUnknown); -extern List *transformDistinctClause(ParseState *pstate, List *distinctlist, +extern List *transformDistinctClause(ParseState *pstate, + List **targetlist, List *sortClause); +extern List *transformDistinctOnClause(ParseState *pstate, List *distinctlist, List **targetlist, List *sortClause); -extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle, - List *sortlist, List *targetlist, - SortByDir sortby_dir, SortByNulls sortby_nulls, - List *sortby_opname, bool resolveUnknown); +extern List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle, + List *grouplist, List *targetlist, + bool requireSortOp, bool resolveUnknown); extern Index assignSortGroupRef(TargetEntry *tle, List *tlist); extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList); diff --git a/src/include/parser/parse_oper.h b/src/include/parser/parse_oper.h index f0ec2d1f08..ad0d65b3be 100644 --- a/src/include/parser/parse_oper.h +++ b/src/include/parser/parse_oper.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/parser/parse_oper.h,v 1.42 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/parser/parse_oper.h,v 1.43 2008/08/02 21:32:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -45,16 +45,13 @@ extern Operator compatible_oper(ParseState *pstate, List *op, /* currently no need for compatible_left_oper/compatible_right_oper */ -/* Routines for identifying "=", "<", ">" operators for a type */ -extern Operator equality_oper(Oid argtype, bool noError); -extern Operator ordering_oper(Oid argtype, bool noError); -extern Operator reverse_ordering_oper(Oid argtype, bool noError); +/* Routines for identifying "<", "=", ">" operators for a type */ +extern void get_sort_group_operators(Oid argtype, + bool needLT, bool needEQ, bool needGT, + Oid *ltOpr, Oid *eqOpr, Oid *gtOpr); /* Convenience routines for common calls on the above */ extern Oid compatible_oper_opid(List *op, Oid arg1, Oid arg2, bool noError); -extern Oid equality_oper_funcid(Oid argtype); -extern Oid ordering_oper_opid(Oid argtype); -extern Oid reverse_ordering_oper_opid(Oid argtype); /* Extract operator OID or underlying-function OID from an Operator tuple */ extern Oid oprid(Operator op); diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 66e035cffa..0538e0ff56 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.124 2008/07/30 17:05:05 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.125 2008/08/02 21:32:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -38,7 +38,7 @@ extern bool get_ordering_op_properties(Oid opno, Oid *opfamily, Oid *opcintype, int16 *strategy); extern bool get_compare_function_for_ordering_op(Oid opno, Oid *cmpfunc, bool *reverse); -extern Oid get_equality_op_for_ordering_op(Oid opno); +extern Oid get_equality_op_for_ordering_op(Oid opno, bool *reverse); extern Oid get_ordering_op_for_equality_op(Oid opno, bool use_lhs_type); extern List *get_mergejoin_opfamilies(Oid opno); extern bool get_compatible_hash_operators(Oid opno, @@ -47,7 +47,7 @@ extern bool get_op_hash_functions(Oid opno, RegProcedure *lhs_procno, RegProcedure *rhs_procno); extern void get_op_btree_interpretation(Oid opno, List **opfamilies, List **opstrats); -extern bool ops_in_same_btree_opfamily(Oid opno1, Oid opno2); +extern bool equality_ops_are_compatible(Oid opno1, Oid opno2); extern Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum); extern char *get_attname(Oid relid, AttrNumber attnum); -- 2.40.0