From a191a169d6d0b9558da4519e66510c4540204a51 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 10 Jan 2007 18:06:05 +0000 Subject: [PATCH] Change the planner-to-executor API so that the planner tells the executor which comparison operators to use for plan nodes involving tuple comparison (Agg, Group, Unique, SetOp). Formerly the executor looked up the default equality operator for the datatype, which was really pretty shaky, since it's possible that the data being fed to the node is sorted according to some nondefault operator class that could have an incompatible idea of equality. The planner knows what it has sorted by and therefore can provide the right equality operator to use. Also, this change moves a couple of catalog lookups out of the executor and into the planner, which should help startup time for pre-planned queries by some small amount. Modify the planner to remove some other cavalier assumptions about always being able to use the default operators. Also add "nulls first/last" info to the Plan node for a mergejoin --- neither the executor nor the planner can cope yet, but at least the API is in place. --- src/backend/executor/execGrouping.c | 49 +++--- src/backend/executor/nodeAgg.c | 19 ++- src/backend/executor/nodeGroup.c | 7 +- src/backend/executor/nodeMergejoin.c | 40 +++-- src/backend/executor/nodeSetOp.c | 7 +- src/backend/executor/nodeUnique.c | 7 +- src/backend/nodes/copyfuncs.c | 16 +- src/backend/nodes/equalfuncs.c | 3 +- src/backend/nodes/outfuncs.c | 54 +++++- src/backend/optimizer/path/costsize.c | 10 +- src/backend/optimizer/path/joinpath.c | 71 +++++--- src/backend/optimizer/plan/createplan.c | 114 ++++++++++--- src/backend/optimizer/plan/planner.c | 92 +++++----- src/backend/optimizer/plan/subselect.c | 41 ++++- src/backend/optimizer/util/clauses.c | 3 +- src/backend/optimizer/util/pathnode.c | 129 +++++++++----- src/backend/parser/parse_clause.c | 5 +- src/backend/utils/cache/lsyscache.c | 216 +++++++++++++++++++++++- src/backend/utils/sort/tuplesort.c | 14 +- src/include/executor/executor.h | 16 +- src/include/nodes/plannodes.h | 25 ++- src/include/nodes/relation.h | 25 +-- src/include/optimizer/pathnode.h | 7 +- src/include/optimizer/planmain.h | 6 +- src/include/utils/lsyscache.h | 9 +- 25 files changed, 722 insertions(+), 263 deletions(-) diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index eed92f6533..f84c1120db 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execGrouping.c,v 1.22 2007/01/05 22:19:27 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execGrouping.c,v 1.23 2007/01/10 18:06:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -183,24 +183,22 @@ execTuplesUnequal(TupleTableSlot *slot1, * The result is a palloc'd array. */ FmgrInfo * -execTuplesMatchPrepare(TupleDesc tupdesc, - int numCols, - AttrNumber *matchColIdx) +execTuplesMatchPrepare(int numCols, + Oid *eqOperators) { - FmgrInfo *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); + FmgrInfo *eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); int i; for (i = 0; i < numCols; i++) { - AttrNumber att = matchColIdx[i]; - Oid typid = tupdesc->attrs[att - 1]->atttypid; + Oid eq_opr = eqOperators[i]; Oid eq_function; - eq_function = equality_oper_funcid(typid); - fmgr_info(eq_function, &eqfunctions[i]); + eq_function = get_opcode(eq_opr); + fmgr_info(eq_function, &eqFunctions[i]); } - return eqfunctions; + return eqFunctions; } /* @@ -208,40 +206,33 @@ execTuplesMatchPrepare(TupleDesc tupdesc, * Look up the equality and hashing functions needed for a TupleHashTable. * * This is similar to execTuplesMatchPrepare, but we also need to find the - * hash functions associated with the equality operators. *eqfunctions and - * *hashfunctions receive the palloc'd result arrays. + * hash functions associated with the equality operators. *eqFunctions and + * *hashFunctions receive the palloc'd result arrays. */ void -execTuplesHashPrepare(TupleDesc tupdesc, - int numCols, - AttrNumber *matchColIdx, - FmgrInfo **eqfunctions, - FmgrInfo **hashfunctions) +execTuplesHashPrepare(int numCols, + Oid *eqOperators, + FmgrInfo **eqFunctions, + FmgrInfo **hashFunctions) { int i; - *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); - *hashfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); + *eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); + *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); for (i = 0; i < numCols; i++) { - AttrNumber att = matchColIdx[i]; - Oid typid = tupdesc->attrs[att - 1]->atttypid; - Operator optup; - Oid eq_opr; + Oid eq_opr = eqOperators[i]; Oid eq_function; Oid hash_function; - optup = equality_oper(typid, false); - eq_opr = oprid(optup); - eq_function = oprfuncid(optup); - ReleaseSysCache(optup); + eq_function = get_opcode(eq_opr); hash_function = get_op_hash_function(eq_opr); if (!OidIsValid(hash_function)) /* should not happen */ elog(ERROR, "could not find hash function for hash operator %u", eq_opr); - fmgr_info(eq_function, &(*eqfunctions)[i]); - fmgr_info(hash_function, &(*hashfunctions)[i]); + fmgr_info(eq_function, &(*eqFunctions)[i]); + fmgr_info(hash_function, &(*hashFunctions)[i]); } } diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 74a00ac892..00fb3b86e7 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -61,7 +61,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.148 2007/01/09 02:14:11 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.149 2007/01/10 18:06:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1270,16 +1270,14 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) if (node->numCols > 0) { if (node->aggstrategy == AGG_HASHED) - execTuplesHashPrepare(ExecGetScanType(&aggstate->ss), - node->numCols, - node->grpColIdx, + execTuplesHashPrepare(node->numCols, + node->grpOperators, &aggstate->eqfunctions, &aggstate->hashfunctions); else aggstate->eqfunctions = - execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss), - node->numCols, - node->grpColIdx); + execTuplesMatchPrepare(node->numCols, + node->grpOperators); } /* @@ -1519,6 +1517,13 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) &peraggstate->inputtypeLen, &peraggstate->inputtypeByVal); + /* + * Look up the sorting and comparison operators to use. XXX it's + * pretty bletcherous to be making this sort of semantic decision + * in the executor. Probably the parser should decide this and + * record it in the Aggref node ... or at latest, do it in the + * planner. + */ eq_function = equality_oper_funcid(inputTypes[0]); fmgr_info(eq_function, &(peraggstate->equalfn)); peraggstate->sortOperator = ordering_oper_opid(inputTypes[0]); diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c index 417adeda25..da4b2bcdb4 100644 --- a/src/backend/executor/nodeGroup.c +++ b/src/backend/executor/nodeGroup.c @@ -15,7 +15,7 @@ * locate group boundaries. * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeGroup.c,v 1.66 2007/01/05 22:19:28 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeGroup.c,v 1.67 2007/01/10 18:06:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -211,9 +211,8 @@ ExecInitGroup(Group *node, EState *estate, int eflags) * Precompute fmgr lookup data for inner loop */ grpstate->eqfunctions = - execTuplesMatchPrepare(ExecGetScanType(&grpstate->ss), - node->numCols, - node->grpColIdx); + execTuplesMatchPrepare(node->numCols, + node->grpOperators); return grpstate; } diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index 63e9035f54..a5f08c28ef 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.84 2007/01/05 22:19:28 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.85 2007/01/10 18:06:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -156,35 +156,36 @@ typedef struct MergeJoinClauseData * the two expressions from the original clause. * * In addition to the expressions themselves, the planner passes the btree - * opfamily OID and btree strategy number (BTLessStrategyNumber or - * BTGreaterStrategyNumber) that identify the intended merge semantics for - * each merge key. The mergejoinable operator is an equality operator in - * this opfamily, and the two inputs are guaranteed to be ordered in either - * increasing or decreasing (respectively) order according to this opfamily. - * This allows us to obtain the needed comparison functions from the opfamily. + * opfamily OID, btree strategy number (BTLessStrategyNumber or + * BTGreaterStrategyNumber), and nulls-first flag that identify the intended + * merge semantics for each merge key. The mergejoinable operator is an + * equality operator in this opfamily, and the two inputs are guaranteed to be + * ordered in either increasing or decreasing (respectively) order according + * to this opfamily. This allows us to obtain the needed comparison functions + * from the opfamily. */ static MergeJoinClause -MJExamineQuals(List *mergeclauses, List *mergefamilies, List *mergestrategies, +MJExamineQuals(List *mergeclauses, + Oid *mergefamilies, + int *mergestrategies, + bool *mergenullsfirst, PlanState *parent) { MergeJoinClause clauses; int nClauses = list_length(mergeclauses); int iClause; ListCell *cl; - ListCell *cf; - ListCell *cs; clauses = (MergeJoinClause) palloc0(nClauses * sizeof(MergeJoinClauseData)); iClause = 0; - cf = list_head(mergefamilies); - cs = list_head(mergestrategies); foreach(cl, mergeclauses) { OpExpr *qual = (OpExpr *) lfirst(cl); MergeJoinClause clause = &clauses[iClause]; - Oid opfamily; - StrategyNumber opstrategy; + Oid opfamily = mergefamilies[iClause]; + StrategyNumber opstrategy = mergestrategies[iClause]; + bool nulls_first = mergenullsfirst[iClause]; int op_strategy; Oid op_lefttype; Oid op_righttype; @@ -192,14 +193,10 @@ MJExamineQuals(List *mergeclauses, List *mergefamilies, List *mergestrategies, RegProcedure cmpproc; AclResult aclresult; - opfamily = lfirst_oid(cf); - cf = lnext(cf); - opstrategy = lfirst_int(cs); - cs = lnext(cs); - /* Later we'll support both ascending and descending sort... */ Assert(opstrategy == BTLessStrategyNumber); clause->cmpstrategy = MERGEFUNC_CMP; + Assert(!nulls_first); if (!IsA(qual, OpExpr)) elog(ERROR, "mergejoin clause is not an OpExpr"); @@ -1525,8 +1522,9 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) */ mergestate->mj_NumClauses = list_length(node->mergeclauses); mergestate->mj_Clauses = MJExamineQuals(node->mergeclauses, - node->mergefamilies, - node->mergestrategies, + node->mergeFamilies, + node->mergeStrategies, + node->mergeNullsFirst, (PlanState *) mergestate); /* diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index 6d7c9e3b8b..761f7fc594 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -21,7 +21,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeSetOp.c,v 1.23 2007/01/05 22:19:28 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeSetOp.c,v 1.24 2007/01/10 18:06:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -267,9 +267,8 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags) * Precompute fmgr lookup data for inner loop */ setopstate->eqfunctions = - execTuplesMatchPrepare(ExecGetResultType(&setopstate->ps), - node->numCols, - node->dupColIdx); + execTuplesMatchPrepare(node->numCols, + node->dupOperators); return setopstate; } diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c index 5e0edfb57b..6d64c3e334 100644 --- a/src/backend/executor/nodeUnique.c +++ b/src/backend/executor/nodeUnique.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeUnique.c,v 1.54 2007/01/05 22:19:28 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeUnique.c,v 1.55 2007/01/10 18:06:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -159,9 +159,8 @@ ExecInitUnique(Unique *node, EState *estate, int eflags) * Precompute fmgr lookup data for inner loop */ uniquestate->eqfunctions = - execTuplesMatchPrepare(ExecGetResultType(&uniquestate->ps), - node->numCols, - node->uniqColIdx); + execTuplesMatchPrepare(node->numCols, + node->uniqOperators); return uniquestate; } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 08817e54a9..4943bb2029 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.360 2007/01/09 02:14:11 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.361 2007/01/10 18:06:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -439,6 +439,7 @@ static MergeJoin * _copyMergeJoin(MergeJoin *from) { MergeJoin *newnode = makeNode(MergeJoin); + int numCols; /* * copy node superclass fields @@ -449,8 +450,10 @@ _copyMergeJoin(MergeJoin *from) * copy remainder of node */ COPY_NODE_FIELD(mergeclauses); - COPY_NODE_FIELD(mergefamilies); - COPY_NODE_FIELD(mergestrategies); + numCols = list_length(from->mergeclauses); + COPY_POINTER_FIELD(mergeFamilies, numCols * sizeof(Oid)); + COPY_POINTER_FIELD(mergeStrategies, numCols * sizeof(int)); + COPY_POINTER_FIELD(mergeNullsFirst, numCols * sizeof(bool)); return newnode; } @@ -528,6 +531,7 @@ _copyGroup(Group *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber)); + COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); return newnode; } @@ -545,7 +549,10 @@ _copyAgg(Agg *from) COPY_SCALAR_FIELD(aggstrategy); COPY_SCALAR_FIELD(numCols); if (from->numCols > 0) + { COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber)); + COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); + } COPY_SCALAR_FIELD(numGroups); return newnode; @@ -569,6 +576,7 @@ _copyUnique(Unique *from) */ COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(uniqColIdx, from->numCols * sizeof(AttrNumber)); + COPY_POINTER_FIELD(uniqOperators, from->numCols * sizeof(Oid)); return newnode; } @@ -612,6 +620,7 @@ _copySetOp(SetOp *from) COPY_SCALAR_FIELD(cmd); COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber)); + COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid)); COPY_SCALAR_FIELD(flagColIdx); return newnode; @@ -1356,6 +1365,7 @@ _copyInClauseInfo(InClauseInfo *from) COPY_BITMAPSET_FIELD(lefthand); COPY_BITMAPSET_FIELD(righthand); COPY_NODE_FIELD(sub_targetlist); + COPY_NODE_FIELD(in_operators); return newnode; } diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index e1b3bbbbaa..fafd8ae546 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -18,7 +18,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.294 2007/01/09 02:14:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.295 2007/01/10 18:06:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -638,6 +638,7 @@ _equalInClauseInfo(InClauseInfo *a, InClauseInfo *b) COMPARE_BITMAPSET_FIELD(lefthand); COMPARE_BITMAPSET_FIELD(righthand); COMPARE_NODE_FIELD(sub_targetlist); + COMPARE_NODE_FIELD(in_operators); return true; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 1ffaa08dfe..6137c39af0 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.292 2007/01/09 02:14:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.293 2007/01/10 18:06:03 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -437,13 +437,28 @@ _outNestLoop(StringInfo str, NestLoop *node) static void _outMergeJoin(StringInfo str, MergeJoin *node) { + int numCols; + int i; + WRITE_NODE_TYPE("MERGEJOIN"); _outJoinPlanInfo(str, (Join *) node); WRITE_NODE_FIELD(mergeclauses); - WRITE_NODE_FIELD(mergefamilies); - WRITE_NODE_FIELD(mergestrategies); + + numCols = list_length(node->mergeclauses); + + appendStringInfo(str, " :mergeFamilies"); + for (i = 0; i < numCols; i++) + appendStringInfo(str, " %u", node->mergeFamilies[i]); + + appendStringInfo(str, " :mergeStrategies"); + for (i = 0; i < numCols; i++) + appendStringInfo(str, " %d", node->mergeStrategies[i]); + + appendStringInfo(str, " :mergeNullsFirst"); + for (i = 0; i < numCols; i++) + appendStringInfo(str, " %d", (int) node->mergeNullsFirst[i]); } static void @@ -482,6 +497,10 @@ _outGroup(StringInfo str, Group *node) appendStringInfo(str, " :grpColIdx"); for (i = 0; i < node->numCols; i++) appendStringInfo(str, " %d", node->grpColIdx[i]); + + appendStringInfo(str, " :grpOperators"); + for (i = 0; i < node->numCols; i++) + appendStringInfo(str, " %u", node->grpOperators[i]); } static void @@ -530,6 +549,10 @@ _outUnique(StringInfo str, Unique *node) appendStringInfo(str, " :uniqColIdx"); for (i = 0; i < node->numCols; i++) appendStringInfo(str, " %d", node->uniqColIdx[i]); + + appendStringInfo(str, " :uniqOperators"); + for (i = 0; i < node->numCols; i++) + appendStringInfo(str, " %u", node->uniqOperators[i]); } static void @@ -548,6 +571,10 @@ _outSetOp(StringInfo str, SetOp *node) for (i = 0; i < node->numCols; i++) appendStringInfo(str, " %d", node->dupColIdx[i]); + appendStringInfo(str, " :dupOperators"); + for (i = 0; i < node->numCols; i++) + appendStringInfo(str, " %d", node->dupOperators[i]); + WRITE_INT_FIELD(flagColIdx); } @@ -1169,13 +1196,29 @@ _outNestPath(StringInfo str, NestPath *node) static void _outMergePath(StringInfo str, MergePath *node) { + int numCols; + int i; + WRITE_NODE_TYPE("MERGEPATH"); _outJoinPathInfo(str, (JoinPath *) node); WRITE_NODE_FIELD(path_mergeclauses); - WRITE_NODE_FIELD(path_mergefamilies); - WRITE_NODE_FIELD(path_mergestrategies); + + numCols = list_length(node->path_mergeclauses); + + appendStringInfo(str, " :path_mergeFamilies"); + for (i = 0; i < numCols; i++) + appendStringInfo(str, " %u", node->path_mergeFamilies[i]); + + appendStringInfo(str, " :path_mergeStrategies"); + for (i = 0; i < numCols; i++) + appendStringInfo(str, " %d", node->path_mergeStrategies[i]); + + appendStringInfo(str, " :path_mergeNullsFirst"); + for (i = 0; i < numCols; i++) + appendStringInfo(str, " %d", (int) node->path_mergeNullsFirst[i]); + WRITE_NODE_FIELD(outersortkeys); WRITE_NODE_FIELD(innersortkeys); } @@ -1325,6 +1368,7 @@ _outInClauseInfo(StringInfo str, InClauseInfo *node) WRITE_BITMAPSET_FIELD(lefthand); WRITE_BITMAPSET_FIELD(righthand); WRITE_NODE_FIELD(sub_targetlist); + WRITE_NODE_FIELD(in_operators); } static void diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 69d1a890e4..e1c003b504 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.173 2007/01/08 16:09:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.174 2007/01/10 18:06:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1258,8 +1258,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; List *mergeclauses = path->path_mergeclauses; - List *mergefamilies = path->path_mergefamilies; - List *mergestrategies = path->path_mergestrategies; + Oid *mergeFamilies = path->path_mergeFamilies; + int *mergeStrategies = path->path_mergeStrategies; List *outersortkeys = path->outersortkeys; List *innersortkeys = path->innersortkeys; Cost startup_cost = 0; @@ -1357,8 +1357,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) firstclause = (RestrictInfo *) linitial(mergeclauses); if (firstclause->left_mergescansel < 0) /* not computed yet? */ mergejoinscansel(root, (Node *) firstclause->clause, - linitial_oid(mergefamilies), - linitial_int(mergestrategies), + mergeFamilies[0], + mergeStrategies[0], &firstclause->left_mergescansel, &firstclause->right_mergescansel); diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index bde38af0dd..2885b02154 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.109 2007/01/05 22:19:31 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.110 2007/01/10 18:06:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -40,8 +40,10 @@ static List *select_mergejoin_clauses(RelOptInfo *joinrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype); -static void build_mergejoin_strat_lists(List *mergeclauses, - List **mergefamilies, List **mergestrategies); +static void build_mergejoin_strat_arrays(List *mergeclauses, + Oid **mergefamilies, + int **mergestrategies, + bool **mergenullsfirst); /* @@ -228,8 +230,9 @@ sort_inner_and_outer(PlannerInfo *root, List *front_pathkey = (List *) lfirst(l); List *cur_pathkeys; List *cur_mergeclauses; - List *mergefamilies; - List *mergestrategies; + Oid *mergefamilies; + int *mergestrategies; + bool *mergenullsfirst; List *outerkeys; List *innerkeys; List *merge_pathkeys; @@ -275,8 +278,10 @@ sort_inner_and_outer(PlannerInfo *root, outerkeys); /* Build opfamily info for execution */ - build_mergejoin_strat_lists(cur_mergeclauses, - &mergefamilies, &mergestrategies); + build_mergejoin_strat_arrays(cur_mergeclauses, + &mergefamilies, + &mergestrategies, + &mergenullsfirst); /* * And now we can make the path. @@ -292,6 +297,7 @@ sort_inner_and_outer(PlannerInfo *root, cur_mergeclauses, mergefamilies, mergestrategies, + mergenullsfirst, outerkeys, innerkeys)); } @@ -421,8 +427,9 @@ match_unsorted_outer(PlannerInfo *root, Path *outerpath = (Path *) lfirst(l); List *merge_pathkeys; List *mergeclauses; - List *mergefamilies; - List *mergestrategies; + Oid *mergefamilies; + int *mergestrategies; + bool *mergenullsfirst; List *innersortkeys; List *trialsortkeys; Path *cheapest_startup_inner; @@ -530,8 +537,10 @@ match_unsorted_outer(PlannerInfo *root, innerrel); /* Build opfamily info for execution */ - build_mergejoin_strat_lists(mergeclauses, - &mergefamilies, &mergestrategies); + build_mergejoin_strat_arrays(mergeclauses, + &mergefamilies, + &mergestrategies, + &mergenullsfirst); /* * Generate a mergejoin on the basis of sorting the cheapest inner. @@ -550,6 +559,7 @@ match_unsorted_outer(PlannerInfo *root, mergeclauses, mergefamilies, mergestrategies, + mergenullsfirst, NIL, innersortkeys)); @@ -610,8 +620,10 @@ match_unsorted_outer(PlannerInfo *root, newclauses = mergeclauses; /* Build opfamily info for execution */ - build_mergejoin_strat_lists(newclauses, - &mergefamilies, &mergestrategies); + build_mergejoin_strat_arrays(newclauses, + &mergefamilies, + &mergestrategies, + &mergenullsfirst); add_path(joinrel, (Path *) create_mergejoin_path(root, @@ -624,6 +636,7 @@ match_unsorted_outer(PlannerInfo *root, newclauses, mergefamilies, mergestrategies, + mergenullsfirst, NIL, NIL)); cheapest_total_inner = innerpath; @@ -661,8 +674,10 @@ match_unsorted_outer(PlannerInfo *root, } /* Build opfamily info for execution */ - build_mergejoin_strat_lists(newclauses, - &mergefamilies, &mergestrategies); + build_mergejoin_strat_arrays(newclauses, + &mergefamilies, + &mergestrategies, + &mergenullsfirst); add_path(joinrel, (Path *) create_mergejoin_path(root, @@ -675,6 +690,7 @@ match_unsorted_outer(PlannerInfo *root, newclauses, mergefamilies, mergestrategies, + mergenullsfirst, NIL, NIL)); } @@ -981,20 +997,26 @@ select_mergejoin_clauses(RelOptInfo *joinrel, } /* - * Temporary hack to build opfamily and strategy lists needed for mergejoin + * Temporary hack to build opfamily and strategy info needed for mergejoin * by the executor. We need to rethink the planner's handling of merge * planning so that it can deal with multiple possible merge orders, but * that's not done yet. */ static void -build_mergejoin_strat_lists(List *mergeclauses, - List **mergefamilies, List **mergestrategies) +build_mergejoin_strat_arrays(List *mergeclauses, + Oid **mergefamilies, + int **mergestrategies, + bool **mergenullsfirst) { + int nClauses = list_length(mergeclauses); + int i; ListCell *l; - *mergefamilies = NIL; - *mergestrategies = NIL; + *mergefamilies = (Oid *) palloc(nClauses * sizeof(Oid)); + *mergestrategies = (int *) palloc(nClauses * sizeof(int)); + *mergenullsfirst = (bool *) palloc(nClauses * sizeof(bool)); + i = 0; foreach(l, mergeclauses) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); @@ -1003,11 +1025,16 @@ build_mergejoin_strat_lists(List *mergeclauses, * We do not need to worry about whether the mergeclause will be * commuted at runtime --- it's the same opfamily either way. */ - *mergefamilies = lappend_oid(*mergefamilies, restrictinfo->mergeopfamily); + (*mergefamilies)[i] = restrictinfo->mergeopfamily; /* * For the moment, strategy must always be LessThan --- see * hack version of get_op_mergejoin_info */ - *mergestrategies = lappend_int(*mergestrategies, BTLessStrategyNumber); + (*mergestrategies)[i] = BTLessStrategyNumber; + + /* And we only allow NULLS LAST, too */ + (*mergenullsfirst)[i] = false; + + i++; } } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 8f1d8e81cc..9a8204392a 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.220 2007/01/09 02:14:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.221 2007/01/10 18:06:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -113,7 +113,10 @@ static HashJoin *make_hashjoin(List *tlist, static Hash *make_hash(Plan *lefttree); static MergeJoin *make_mergejoin(List *tlist, List *joinclauses, List *otherclauses, - List *mergeclauses, List *mergefamilies, List *mergestrategies, + List *mergeclauses, + Oid *mergefamilies, + int *mergestrategies, + bool *mergenullsfirst, Plan *lefttree, Plan *righttree, JoinType jointype); static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols, @@ -595,6 +598,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) Plan *plan; Plan *subplan; List *uniq_exprs; + List *in_operators; List *newtlist; int nextresno; bool newitems; @@ -626,10 +630,12 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) * To find the correct list of values to unique-ify, we look in the * information saved for IN expressions. If this code is ever used in * other scenarios, some other way of finding what to unique-ify will - * be needed. + * be needed. The IN clause's operators are needed too, since they + * determine what the meaning of "unique" is in this context. *---------- */ uniq_exprs = NIL; /* just to keep compiler quiet */ + in_operators = NIL; foreach(l, root->in_info_list) { InClauseInfo *ininfo = (InClauseInfo *) lfirst(l); @@ -637,6 +643,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) if (bms_equal(ininfo->righthand, best_path->path.parent->relids)) { uniq_exprs = ininfo->sub_targetlist; + in_operators = ininfo->in_operators; break; } } @@ -687,8 +694,8 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) newtlist = subplan->targetlist; numGroupCols = list_length(uniq_exprs); groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber)); - groupColPos = 0; + groupColPos = 0; foreach(l, uniq_exprs) { Node *uniqexpr = lfirst(l); @@ -703,9 +710,30 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) if (best_path->umethod == UNIQUE_PATH_HASH) { long numGroups; + Oid *groupOperators; numGroups = (long) Min(best_path->rows, (double) LONG_MAX); + /* + * Get the (presumed hashable) equality operators for the Agg node + * to use. Normally these are the same as the IN clause operators, + * but if those are cross-type operators then the equality operators + * are the ones for the IN clause operators' RHS datatype. + */ + groupOperators = (Oid *) palloc(numGroupCols * sizeof(Oid)); + groupColPos = 0; + foreach(l, in_operators) + { + Oid in_oper = lfirst_oid(l); + Oid eq_oper; + + eq_oper = get_compatible_hash_operator(in_oper, false); + if (!OidIsValid(eq_oper)) /* shouldn't happen */ + elog(ERROR, "could not find compatible hash operator for operator %u", + in_oper); + groupOperators[groupColPos++] = eq_oper; + } + /* * Since the Agg node is going to project anyway, we can give it the * minimum output tlist, without any stuff we might have added to the @@ -717,6 +745,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) AGG_HASHED, numGroupCols, groupColIdx, + groupOperators, numGroups, 0, subplan); @@ -725,18 +754,29 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) { List *sortList = NIL; - for (groupColPos = 0; groupColPos < numGroupCols; groupColPos++) + /* Create an ORDER BY list to sort the input compatibly */ + groupColPos = 0; + foreach(l, in_operators) { + Oid in_oper = lfirst_oid(l); + Oid sortop; TargetEntry *tle; + SortClause *sortcl; + sortop = get_ordering_op_for_equality_op(in_oper, false); + if (!OidIsValid(sortop)) /* shouldn't happen */ + elog(ERROR, "could not find ordering operator for equality operator %u", + in_oper); tle = get_tle_by_resno(subplan->targetlist, groupColIdx[groupColPos]); Assert(tle != NULL); - sortList = addTargetToSortList(NULL, tle, - sortList, subplan->targetlist, - SORTBY_DEFAULT, - SORTBY_NULLS_DEFAULT, - NIL, false); + sortcl = makeNode(SortClause); + sortcl->tleSortGroupRef = assignSortGroupRef(tle, + subplan->targetlist); + sortcl->sortop = sortop; + sortcl->nulls_first = false; + sortList = lappend(sortList, sortcl); + groupColPos++; } plan = (Plan *) make_sort_from_sortclauses(root, sortList, subplan); plan = (Plan *) make_unique(plan, sortList); @@ -1542,8 +1582,9 @@ create_mergejoin_plan(PlannerInfo *root, joinclauses, otherclauses, mergeclauses, - best_path->path_mergefamilies, - best_path->path_mergestrategies, + best_path->path_mergeFamilies, + best_path->path_mergeStrategies, + best_path->path_mergeNullsFirst, outer_plan, inner_plan, best_path->jpath.jointype); @@ -2335,8 +2376,9 @@ make_mergejoin(List *tlist, List *joinclauses, List *otherclauses, List *mergeclauses, - List *mergefamilies, - List *mergestrategies, + Oid *mergefamilies, + int *mergestrategies, + bool *mergenullsfirst, Plan *lefttree, Plan *righttree, JoinType jointype) @@ -2350,8 +2392,9 @@ make_mergejoin(List *tlist, plan->lefttree = lefttree; plan->righttree = righttree; node->mergeclauses = mergeclauses; - node->mergefamilies = mergefamilies; - node->mergestrategies = mergestrategies; + node->mergeFamilies = mergefamilies; + node->mergeStrategies = mergestrategies; + node->mergeNullsFirst = mergenullsfirst; node->join.jointype = jointype; node->join.joinqual = joinclauses; @@ -2613,7 +2656,7 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree) * This might look like it could be merged with make_sort_from_sortclauses, * but presently we *must* use the grpColIdx[] array to locate sort columns, * because the child plan's tlist is not marked with ressortgroupref info - * appropriate to the grouping node. So, only the sort direction info + * appropriate to the grouping node. So, only the sort ordering info * is used from the GroupClause entries. */ Sort * @@ -2716,7 +2759,7 @@ materialize_finished_plan(Plan *subplan) Agg * make_agg(PlannerInfo *root, List *tlist, List *qual, AggStrategy aggstrategy, - int numGroupCols, AttrNumber *grpColIdx, + int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, long numGroups, int numAggs, Plan *lefttree) { @@ -2728,6 +2771,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual, node->aggstrategy = aggstrategy; node->numCols = numGroupCols; node->grpColIdx = grpColIdx; + node->grpOperators = grpOperators; node->numGroups = numGroups; copy_plan_costsize(plan, lefttree); /* only care about copying size */ @@ -2784,6 +2828,7 @@ make_group(PlannerInfo *root, List *qual, int numGroupCols, AttrNumber *grpColIdx, + Oid *grpOperators, double numGroups, Plan *lefttree) { @@ -2794,6 +2839,7 @@ make_group(PlannerInfo *root, node->numCols = numGroupCols; node->grpColIdx = grpColIdx; + node->grpOperators = grpOperators; copy_plan_costsize(plan, lefttree); /* only care about copying size */ cost_group(&group_path, root, @@ -2841,7 +2887,8 @@ make_group(PlannerInfo *root, /* * distinctList is a list of SortClauses, identifying the targetlist items - * that should be considered by the Unique filter. + * that should be considered by the Unique filter. The input path must + * already be sorted accordingly. */ Unique * make_unique(Plan *lefttree, List *distinctList) @@ -2851,6 +2898,7 @@ make_unique(Plan *lefttree, List *distinctList) int numCols = list_length(distinctList); int keyno = 0; AttrNumber *uniqColIdx; + Oid *uniqOperators; ListCell *slitem; copy_plan_costsize(plan, lefttree); @@ -2874,28 +2922,37 @@ make_unique(Plan *lefttree, List *distinctList) plan->righttree = NULL; /* - * convert SortClause list into array of attr indexes, as wanted by exec + * convert SortClause list into arrays of attr indexes and equality + * operators, as wanted by executor */ Assert(numCols > 0); uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); + uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { SortClause *sortcl = (SortClause *) lfirst(slitem); TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist); - uniqColIdx[keyno++] = tle->resno; + uniqColIdx[keyno] = tle->resno; + uniqOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop); + if (!OidIsValid(uniqOperators[keyno])) /* shouldn't happen */ + elog(ERROR, "could not find equality operator for ordering operator %u", + sortcl->sortop); + keyno++; } node->numCols = numCols; node->uniqColIdx = uniqColIdx; + node->uniqOperators = uniqOperators; return node; } /* * distinctList is a list of SortClauses, identifying the targetlist items - * that should be considered by the SetOp filter. + * that should be considered by the SetOp filter. The input path must + * already be sorted accordingly. */ SetOp * make_setop(SetOpCmd cmd, Plan *lefttree, @@ -2906,6 +2963,7 @@ make_setop(SetOpCmd cmd, Plan *lefttree, int numCols = list_length(distinctList); int keyno = 0; AttrNumber *dupColIdx; + Oid *dupOperators; ListCell *slitem; copy_plan_costsize(plan, lefttree); @@ -2930,22 +2988,30 @@ make_setop(SetOpCmd cmd, Plan *lefttree, plan->righttree = NULL; /* - * convert SortClause list into array of attr indexes, as wanted by exec + * convert SortClause list into arrays of attr indexes and equality + * operators, as wanted by executor */ Assert(numCols > 0); dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); + dupOperators = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { SortClause *sortcl = (SortClause *) lfirst(slitem); TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist); - dupColIdx[keyno++] = tle->resno; + dupColIdx[keyno] = tle->resno; + dupOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop); + if (!OidIsValid(dupOperators[keyno])) /* shouldn't happen */ + elog(ERROR, "could not find equality operator for ordering operator %u", + sortcl->sortop); + keyno++; } node->cmd = cmd; node->numCols = numCols; node->dupColIdx = dupColIdx; + node->dupOperators = dupOperators; node->flagColIdx = flagColIdx; return node; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a4de2dd36a..5fcfc58bf9 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.210 2007/01/05 22:19:32 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.211 2007/01/10 18:06:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -38,6 +38,7 @@ #include "parser/parse_expr.h" #include "parser/parse_oper.h" #include "parser/parsetree.h" +#include "utils/lsyscache.h" #include "utils/syscache.h" @@ -62,10 +63,11 @@ static bool is_dummy_plan(Plan *plan); static double preprocess_limit(PlannerInfo *root, double tuple_fraction, int64 *offset_est, int64 *count_est); +static Oid *extract_grouping_ops(List *groupClause); static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, Path *cheapest_path, Path *sorted_path, - double dNumGroups, AggClauseCounts *agg_counts); -static bool hash_safe_grouping(PlannerInfo *root); + Oid *groupOperators, double dNumGroups, + AggClauseCounts *agg_counts); static List *make_subplanTargetList(PlannerInfo *root, List *tlist, AttrNumber **groupColIdx, bool *need_tlist_eval); static void locate_grouping_columns(PlannerInfo *root, @@ -750,6 +752,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) List *sub_tlist; List *group_pathkeys; AttrNumber *groupColIdx = NULL; + Oid *groupOperators = NULL; bool need_tlist_eval = true; QualCost tlist_cost; Path *cheapest_path; @@ -829,14 +832,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) sort_pathkeys = root->sort_pathkeys; /* - * If grouping, decide whether we want to use hashed grouping. + * If grouping, extract the grouping operators and decide whether we + * want to use hashed grouping. */ if (parse->groupClause) { + groupOperators = extract_grouping_ops(parse->groupClause); use_hashed_grouping = choose_hashed_grouping(root, tuple_fraction, cheapest_path, sorted_path, - dNumGroups, &agg_counts); + groupOperators, dNumGroups, + &agg_counts); /* Also convert # groups to long int --- but 'ware overflow! */ numGroups = (long) Min(dNumGroups, (double) LONG_MAX); @@ -956,6 +962,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) AGG_HASHED, numGroupCols, groupColIdx, + groupOperators, numGroups, agg_counts.numAggs, result_plan); @@ -999,6 +1006,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) aggstrategy, numGroupCols, groupColIdx, + groupOperators, numGroups, agg_counts.numAggs, result_plan); @@ -1027,6 +1035,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) (List *) parse->havingQual, numGroupCols, groupColIdx, + groupOperators, dNumGroups, result_plan); /* The Group node won't change sort ordering */ @@ -1337,13 +1346,42 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction, return tuple_fraction; } +/* + * extract_grouping_ops - make an array of the equality operator OIDs + * for the GROUP BY clause + */ +static Oid * +extract_grouping_ops(List *groupClause) +{ + int numCols = list_length(groupClause); + int colno = 0; + Oid *groupOperators; + ListCell *glitem; + + groupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + + foreach(glitem, groupClause) + { + GroupClause *groupcl = (GroupClause *) lfirst(glitem); + + groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop); + if (!OidIsValid(groupOperators[colno])) /* shouldn't happen */ + elog(ERROR, "could not find equality operator for ordering operator %u", + groupcl->sortop); + colno++; + } + + return groupOperators; +} + /* * choose_hashed_grouping - should we use hashed grouping? */ static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, Path *cheapest_path, Path *sorted_path, - double dNumGroups, AggClauseCounts *agg_counts) + Oid *groupOperators, double dNumGroups, + AggClauseCounts *agg_counts) { int numGroupCols = list_length(root->parse->groupClause); double cheapest_path_rows; @@ -1352,10 +1390,13 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, List *current_pathkeys; Path hashed_p; Path sorted_p; + int i; /* * Check can't-do-it conditions, including whether the grouping operators - * are hashjoinable. + * are hashjoinable. (We assume hashing is OK if they are marked + * oprcanhash. If there isn't actually a supporting hash function, + * the executor will complain at runtime.) * * Executor doesn't support hashed aggregation with DISTINCT aggregates. * (Doing so would imply storing *all* the input values in the hash table, @@ -1365,8 +1406,11 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, return false; if (agg_counts->numDistinctAggs != 0) return false; - if (!hash_safe_grouping(root)) - return false; + for (i = 0; i < numGroupCols; i++) + { + if (!op_hashjoinable(groupOperators[i])) + return false; + } /* * Don't do it if it doesn't look like the hashtable will fit into @@ -1471,36 +1515,6 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, return false; } -/* - * hash_safe_grouping - are grouping operators hashable? - * - * We assume hashed aggregation will work if the datatype's equality operator - * is marked hashjoinable. - */ -static bool -hash_safe_grouping(PlannerInfo *root) -{ - ListCell *gl; - - foreach(gl, root->parse->groupClause) - { - GroupClause *grpcl = (GroupClause *) lfirst(gl); - TargetEntry *tle = get_sortgroupclause_tle(grpcl, - root->parse->targetList); - Operator optup; - bool oprcanhash; - - optup = equality_oper(exprType((Node *) tle->expr), true); - if (!optup) - return false; - oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash; - ReleaseSysCache(optup); - if (!oprcanhash) - return false; - } - return true; -} - /*--------------- * make_subplanTargetList * Generate appropriate target list when grouping is required. diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index ed76612dc0..7339445e04 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.116 2007/01/05 22:19:32 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.117 2007/01/10 18:06:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -671,11 +671,13 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) { Query *parse = root->parse; Query *subselect = (Query *) sublink->subselect; + List *in_operators; Relids left_varnos; int rtindex; RangeTblEntry *rte; RangeTblRef *rtr; InClauseInfo *ininfo; + Node *result; /* * The sublink type must be "= ANY" --- that is, an IN operator. We @@ -689,15 +691,31 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) return NULL; if (sublink->testexpr && IsA(sublink->testexpr, OpExpr)) { + Oid opno = ((OpExpr *) sublink->testexpr)->opno; List *opfamilies; List *opstrats; - get_op_btree_interpretation(((OpExpr *) sublink->testexpr)->opno, - &opfamilies, &opstrats); + get_op_btree_interpretation(opno, &opfamilies, &opstrats); if (!list_member_int(opstrats, ROWCOMPARE_EQ)) return NULL; + in_operators = list_make1_oid(opno); + } + else if (and_clause(sublink->testexpr)) + { + ListCell *lc; + + /* OK, but we need to extract the per-column operator OIDs */ + in_operators = NIL; + foreach(lc, ((BoolExpr *) sublink->testexpr)->args) + { + OpExpr *op = (OpExpr *) lfirst(lc); + + if (!IsA(op, OpExpr)) /* probably shouldn't happen */ + return NULL; + in_operators = lappend_oid(in_operators, op->opno); + } } - else if (!and_clause(sublink->testexpr)) + else return NULL; /* @@ -745,16 +763,23 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) ininfo = makeNode(InClauseInfo); ininfo->lefthand = left_varnos; ininfo->righthand = bms_make_singleton(rtindex); - root->in_info_list = lappend(root->in_info_list, ininfo); + ininfo->in_operators = in_operators; /* * Build the result qual expression. As a side effect, * ininfo->sub_targetlist is filled with a list of Vars representing the * subselect outputs. */ - return convert_testexpr(sublink->testexpr, - rtindex, - &ininfo->sub_targetlist); + result = convert_testexpr(sublink->testexpr, + rtindex, + &ininfo->sub_targetlist); + + Assert(list_length(in_operators) == list_length(ininfo->sub_targetlist)); + + /* Add the completed node to the query's list */ + root->in_info_list = lappend(root->in_info_list, ininfo); + + return result; } /* diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 3250beafb6..4858f985cf 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.228 2007/01/09 02:14:13 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.229 2007/01/10 18:06:04 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -3995,6 +3995,7 @@ expression_tree_mutator(Node *node, FLATCOPY(newnode, ininfo, InClauseInfo); MUTATE(newnode->sub_targetlist, ininfo->sub_targetlist, List *); + /* Assume we need not make a copy of in_operators list */ return (Node *) newnode; } break; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index e1390aa111..8ee6346e5b 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.135 2007/01/05 22:19:32 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.136 2007/01/10 18:06:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,12 +28,14 @@ #include "parser/parsetree.h" #include "utils/memutils.h" #include "utils/selfuncs.h" +#include "utils/lsyscache.h" #include "utils/syscache.h" static List *translate_sub_tlist(List *tlist, int relid); -static bool query_is_distinct_for(Query *query, List *colnos); -static bool hash_safe_tlist(List *tlist); +static bool query_is_distinct_for(Query *query, List *colnos, List *opids); +static Oid distinct_col_search(int colno, List *colnos, List *opids); +static bool hash_safe_operators(List *opids); /***************************************************************************** @@ -733,6 +735,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) Path agg_path; /* dummy for result of cost_agg */ MemoryContext oldcontext; List *sub_targetlist; + List *in_operators; ListCell *l; int numCols; @@ -769,9 +772,11 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) /* * Try to identify the targetlist that will actually be unique-ified. In * current usage, this routine is only used for sub-selects of IN clauses, - * so we should be able to find the tlist in in_info_list. + * so we should be able to find the tlist in in_info_list. Get the IN + * clause's operators, too, because they determine what "unique" means. */ sub_targetlist = NIL; + in_operators = NIL; foreach(l, root->in_info_list) { InClauseInfo *ininfo = (InClauseInfo *) lfirst(l); @@ -779,6 +784,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) if (bms_equal(ininfo->righthand, rel->relids)) { sub_targetlist = ininfo->sub_targetlist; + in_operators = ininfo->in_operators; break; } } @@ -801,7 +807,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) sub_tlist_colnos = translate_sub_tlist(sub_targetlist, rel->relid); if (sub_tlist_colnos && - query_is_distinct_for(rte->subquery, sub_tlist_colnos)) + query_is_distinct_for(rte->subquery, + sub_tlist_colnos, in_operators)) { pathnode->umethod = UNIQUE_PATH_NOOP; pathnode->rows = rel->rows; @@ -847,11 +854,11 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) /* * Is it safe to use a hashed implementation? If so, estimate and compare - * costs. We only try this if we know the targetlist for sure (else we - * can't be sure about the datatypes involved). + * costs. We only try this if we know the IN operators, else we can't + * check their hashability. */ pathnode->umethod = UNIQUE_PATH_SORT; - if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist)) + if (enable_hashagg && in_operators && hash_safe_operators(in_operators)) { /* * Estimate the overhead per hashtable entry at 64 bytes (same as in @@ -924,16 +931,25 @@ translate_sub_tlist(List *tlist, int relid) * * colnos is an integer list of output column numbers (resno's). We are * interested in whether rows consisting of just these columns are certain - * to be distinct. + * to be distinct. "Distinctness" is defined according to whether the + * corresponding upper-level equality operators listed in opids would think + * the values are distinct. (Note: the opids entries could be cross-type + * operators, and thus not exactly the equality operators that the subquery + * would use itself. We assume that the subquery is compatible if these + * operators appear in the same btree opfamily as the ones the subquery uses.) */ static bool -query_is_distinct_for(Query *query, List *colnos) +query_is_distinct_for(Query *query, List *colnos, List *opids) { ListCell *l; + Oid opid; + + Assert(list_length(colnos) == list_length(opids)); /* * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the - * columns in the DISTINCT clause appear in colnos. + * columns in the DISTINCT clause appear in colnos and operator + * semantics match. */ if (query->distinctClause) { @@ -943,7 +959,9 @@ query_is_distinct_for(Query *query, List *colnos) TargetEntry *tle = get_sortgroupclause_tle(scl, query->targetList); - if (!list_member_int(colnos, tle->resno)) + opid = distinct_col_search(tle->resno, colnos, opids); + if (!OidIsValid(opid) || + !ops_in_same_btree_opfamily(opid, scl->sortop)) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -952,7 +970,7 @@ query_is_distinct_for(Query *query, List *colnos) /* * Similarly, GROUP BY guarantees uniqueness if all the grouped columns - * appear in colnos. + * appear in colnos and operator semantics match. */ if (query->groupClause) { @@ -962,7 +980,9 @@ query_is_distinct_for(Query *query, List *colnos) TargetEntry *tle = get_sortgroupclause_tle(grpcl, query->targetList); - if (!list_member_int(colnos, tle->resno)) + opid = distinct_col_search(tle->resno, colnos, opids); + if (!OidIsValid(opid) || + !ops_in_same_btree_opfamily(opid, grpcl->sortop)) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -972,7 +992,7 @@ query_is_distinct_for(Query *query, List *colnos) { /* * If we have no GROUP BY, but do have aggregates or HAVING, then the - * result is at most one row so it's surely unique. + * result is at most one row so it's surely unique, for any operators. */ if (query->hasAggs || query->havingQual) return true; @@ -980,7 +1000,13 @@ query_is_distinct_for(Query *query, List *colnos) /* * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row, - * except with ALL + * except with ALL. + * + * XXX this code knows that prepunion.c will adopt the default ordering + * operator for each column datatype as the sortop. It'd probably be + * better if these operators were chosen at parse time and stored into + * the parsetree, instead of leaving bits of the planner to decide + * semantics. */ if (query->setOperations) { @@ -996,8 +1022,13 @@ query_is_distinct_for(Query *query, List *colnos) { TargetEntry *tle = (TargetEntry *) lfirst(l); - if (!tle->resjunk && - !list_member_int(colnos, tle->resno)) + if (tle->resjunk) + continue; /* ignore resjunk columns */ + + opid = distinct_col_search(tle->resno, colnos, opids); + if (!OidIsValid(opid) || + !ops_in_same_btree_opfamily(opid, + ordering_oper_opid(exprType((Node *) tle->expr)))) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -1014,31 +1045,46 @@ query_is_distinct_for(Query *query, List *colnos) } /* - * hash_safe_tlist - can datatypes of given tlist be hashed? + * distinct_col_search - subroutine for query_is_distinct_for * - * We assume hashed aggregation will work if the datatype's equality operator - * is marked hashjoinable. + * If colno is in colnos, return the corresponding element of opids, + * else return InvalidOid. (We expect colnos does not contain duplicates, + * so the result is well-defined.) + */ +static Oid +distinct_col_search(int colno, List *colnos, List *opids) +{ + ListCell *lc1, + *lc2; + + forboth(lc1, colnos, lc2, opids) + { + if (colno == lfirst_int(lc1)) + return lfirst_oid(lc2); + } + return InvalidOid; +} + +/* + * hash_safe_operators - can all the specified IN operators be hashed? * - * XXX this probably should be somewhere else. See also hash_safe_grouping - * in plan/planner.c. + * We assume hashed aggregation will work if each IN operator is marked + * hashjoinable. If the IN operators are cross-type, this could conceivably + * fail: the aggregation will need a hashable equality operator for the RHS + * datatype --- but it's pretty hard to conceive of a hash opclass that has + * cross-type hashing without support for hashing the individual types, so + * we don't expend cycles here to support the case. We could check + * get_compatible_hash_operator() instead of just op_hashjoinable(), but the + * former is a significantly more expensive test. */ static bool -hash_safe_tlist(List *tlist) +hash_safe_operators(List *opids) { - ListCell *tl; + ListCell *lc; - foreach(tl, tlist) + foreach(lc, opids) { - Node *expr = (Node *) lfirst(tl); - Operator optup; - bool oprcanhash; - - optup = equality_oper(exprType(expr), true); - if (!optup) - return false; - oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash; - ReleaseSysCache(optup); - if (!oprcanhash) + if (!op_hashjoinable(lfirst_oid(lc))) return false; } return true; @@ -1156,6 +1202,7 @@ create_nestloop_path(PlannerInfo *root, * ordering for each merge clause * 'mergestrategies' are the btree operator strategies identifying the merge * ordering for each merge clause + * 'mergenullsfirst' are the nulls first/last flags for each merge clause * 'outersortkeys' are the sort varkeys for the outer relation * 'innersortkeys' are the sort varkeys for the inner relation */ @@ -1168,8 +1215,9 @@ create_mergejoin_path(PlannerInfo *root, List *restrict_clauses, List *pathkeys, List *mergeclauses, - List *mergefamilies, - List *mergestrategies, + Oid *mergefamilies, + int *mergestrategies, + bool *mergenullsfirst, List *outersortkeys, List *innersortkeys) { @@ -1210,8 +1258,9 @@ create_mergejoin_path(PlannerInfo *root, pathnode->jpath.joinrestrictinfo = restrict_clauses; pathnode->jpath.path.pathkeys = pathkeys; pathnode->path_mergeclauses = mergeclauses; - pathnode->path_mergefamilies = mergefamilies; - pathnode->path_mergestrategies = mergestrategies; + pathnode->path_mergeFamilies = mergefamilies; + pathnode->path_mergeStrategies = mergestrategies; + pathnode->path_mergeNullsFirst = mergenullsfirst; pathnode->outersortkeys = outersortkeys; pathnode->innersortkeys = innersortkeys; diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 6db3fce837..9792671fec 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.162 2007/01/09 02:14:14 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.163 2007/01/10 18:06:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1676,7 +1676,8 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle, * Verify it's a valid ordering operator, and determine * whether to consider it like ASC or DESC. */ - if (!get_op_compare_function(sortop, &cmpfunc, &reverse)) + if (!get_compare_function_for_ordering_op(sortop, + &cmpfunc, &reverse)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("operator %s is not a valid ordering operator", diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 6379e25812..40a74594fa 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.142 2007/01/09 02:14:14 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.143 2007/01/10 18:06:04 tgl Exp $ * * NOTES * Eventually, the index information should go through here, too. @@ -286,7 +286,7 @@ get_op_mergejoin_info(Oid eq_op, Oid *left_sortop, #endif /* - * get_op_compare_function + * get_compare_function_for_ordering_op * Get the OID of the datatype-specific btree comparison function * associated with an ordering operator (a "<" or ">" operator). * @@ -298,7 +298,7 @@ get_op_mergejoin_info(Oid eq_op, Oid *left_sortop, * (This indicates that the operator is not a valid ordering operator.) */ bool -get_op_compare_function(Oid opno, Oid *cmpfunc, bool *reverse) +get_compare_function_for_ordering_op(Oid opno, Oid *cmpfunc, bool *reverse) { bool result = false; CatCList *catlist; @@ -357,6 +357,177 @@ get_op_compare_function(Oid opno, Oid *cmpfunc, bool *reverse) return result; } +/* + * get_equality_op_for_ordering_op + * Get the OID of the datatype-specific btree equality operator + * associated with an ordering operator (a "<" or ">" operator). + * + * Returns InvalidOid if no matching equality operator can be found. + * (This indicates that the operator is not a valid ordering operator.) + */ +Oid +get_equality_op_for_ordering_op(Oid opno) +{ + Oid result = InvalidOid; + CatCList *catlist; + int i; + + /* + * Search pg_amop to see if the target operator is registered as the "<" + * or ">" operator of any btree opfamily. This is exactly like + * get_compare_function_for_ordering_op except we don't care whether the + * ordering op is "<" or ">" ... the equality operator will be the same + * either way. + */ + catlist = SearchSysCacheList(AMOPOPID, 1, + ObjectIdGetDatum(opno), + 0, 0, 0); + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple tuple = &catlist->members[i]->tuple; + Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple); + + /* must be btree */ + if (aform->amopmethod != BTREE_AM_OID) + continue; + + if (aform->amopstrategy == BTLessStrategyNumber || + aform->amopstrategy == BTGreaterStrategyNumber) + { + /* Found a suitable opfamily, get matching equality operator */ + result = get_opfamily_member(aform->amopfamily, + aform->amoplefttype, + aform->amoprighttype, + BTEqualStrategyNumber); + if (OidIsValid(result)) + break; + /* failure probably shouldn't happen, but keep looking if so */ + } + } + + ReleaseSysCacheList(catlist); + + return result; +} + +/* + * get_ordering_op_for_equality_op + * Get the OID of a datatype-specific btree ordering operator + * associated with an equality operator. (If there are multiple + * possibilities, assume any one will do.) + * + * This function is used when we have to sort data before unique-ifying, + * and don't much care which sorting op is used as long as it's compatible + * with the intended equality operator. Since we need a sorting operator, + * it should be single-data-type even if the given operator is cross-type. + * The caller specifies whether to find an op for the LHS or RHS data type. + * + * Returns InvalidOid if no matching ordering operator can be found. + */ +Oid +get_ordering_op_for_equality_op(Oid opno, bool use_lhs_type) +{ + Oid result = InvalidOid; + CatCList *catlist; + int i; + + /* + * Search pg_amop to see if the target operator is registered as the "=" + * operator of any btree opfamily. + */ + catlist = SearchSysCacheList(AMOPOPID, 1, + ObjectIdGetDatum(opno), + 0, 0, 0); + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple tuple = &catlist->members[i]->tuple; + Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple); + + /* must be btree */ + if (aform->amopmethod != BTREE_AM_OID) + continue; + + if (aform->amopstrategy == BTEqualStrategyNumber) + { + /* Found a suitable opfamily, get matching ordering operator */ + Oid typid; + + typid = use_lhs_type ? aform->amoplefttype : aform->amoprighttype; + result = get_opfamily_member(aform->amopfamily, + typid, typid, + BTLessStrategyNumber); + if (OidIsValid(result)) + break; + /* failure probably shouldn't happen, but keep looking if so */ + } + } + + ReleaseSysCacheList(catlist); + + return result; +} + +/* + * get_compatible_hash_operator + * Get the OID of a hash equality operator compatible with the given + * operator, but operating on its LHS or RHS datatype as specified. + * + * If the given operator is not cross-type, the result should be the same + * operator, but in cross-type situations it is different. + * + * Returns InvalidOid if no compatible operator can be found. (This indicates + * that the operator should not have been marked oprcanhash.) + */ +Oid +get_compatible_hash_operator(Oid opno, bool use_lhs_type) +{ + Oid result = InvalidOid; + CatCList *catlist; + int i; + + /* + * Search pg_amop to see if the target operator is registered as the "=" + * operator of any hash opfamily. If the operator is registered in + * multiple opfamilies, assume we can use any one. + */ + catlist = SearchSysCacheList(AMOPOPID, 1, + ObjectIdGetDatum(opno), + 0, 0, 0); + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple tuple = &catlist->members[i]->tuple; + Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple); + + if (aform->amopmethod == HASH_AM_OID && + aform->amopstrategy == HTEqualStrategyNumber) + { + /* Found a suitable opfamily, get matching single-type operator */ + Oid typid; + + /* No extra lookup needed if given operator is single-type */ + if (aform->amoplefttype == aform->amoprighttype) + { + result = opno; + break; + } + typid = use_lhs_type ? aform->amoplefttype : aform->amoprighttype; + result = get_opfamily_member(aform->amopfamily, + typid, typid, + HTEqualStrategyNumber); + if (OidIsValid(result)) + break; + /* failure probably shouldn't happen, but keep looking if so */ + } + } + + ReleaseSysCacheList(catlist); + + return result; +} + /* * get_op_hash_function * Get the OID of the datatype-specific hash function associated with @@ -485,6 +656,45 @@ get_op_btree_interpretation(Oid opno, List **opfamilies, List **opstrats) ReleaseSysCacheList(catlist); } +/* + * ops_in_same_btree_opfamily + * Return TRUE if there exists a btree opfamily containing both operators. + * (This implies that they have compatible notions of equality.) + */ +bool +ops_in_same_btree_opfamily(Oid opno1, Oid opno2) +{ + bool result = false; + CatCList *catlist; + int i; + + /* + * We search through all the pg_amop entries for opno1. + */ + catlist = SearchSysCacheList(AMOPOPID, 1, + ObjectIdGetDatum(opno1), + 0, 0, 0); + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple op_tuple = &catlist->members[i]->tuple; + Form_pg_amop op_form = (Form_pg_amop) GETSTRUCT(op_tuple); + + /* must be btree */ + if (op_form->amopmethod != BTREE_AM_OID) + continue; + + if (op_in_opfamily(opno2, op_form->amopfamily)) + { + result = true; + break; + } + } + + ReleaseSysCacheList(catlist); + + return result; +} + /* ---------- AMPROC CACHES ---------- */ diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 63c2fec28e..76c59a4238 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -91,7 +91,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.73 2007/01/09 02:14:15 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.74 2007/01/10 18:06:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -550,8 +550,8 @@ tuplesort_begin_heap(TupleDesc tupDesc, AssertArg(attNums[i] != 0); AssertArg(sortOperators[i] != 0); - if (!get_op_compare_function(sortOperators[i], - &sortFunction, &reverse)) + if (!get_compare_function_for_ordering_op(sortOperators[i], + &sortFunction, &reverse)) elog(ERROR, "operator %u is not a valid ordering operator", sortOperators[i]); @@ -643,8 +643,8 @@ tuplesort_begin_datum(Oid datumType, state->datumType = datumType; /* lookup the ordering function */ - if (!get_op_compare_function(sortOperator, - &sortFunction, &reverse)) + if (!get_compare_function_for_ordering_op(sortOperator, + &sortFunction, &reverse)) elog(ERROR, "operator %u is not a valid ordering operator", sortOperator); fmgr_info(sortFunction, &state->sortOpFn); @@ -2106,8 +2106,8 @@ SelectSortFunction(Oid sortOperator, { bool reverse; - if (!get_op_compare_function(sortOperator, - sortFunction, &reverse)) + if (!get_compare_function_for_ordering_op(sortOperator, + sortFunction, &reverse)) elog(ERROR, "operator %u is not a valid ordering operator", sortOperator); diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index ff537e7dde..6e099374cf 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/executor.h,v 1.133 2007/01/05 22:19:54 momjian Exp $ + * $PostgreSQL: pgsql/src/include/executor/executor.h,v 1.134 2007/01/10 18:06:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -84,14 +84,12 @@ extern bool execTuplesUnequal(TupleTableSlot *slot1, AttrNumber *matchColIdx, FmgrInfo *eqfunctions, MemoryContext evalContext); -extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc, - int numCols, - AttrNumber *matchColIdx); -extern void execTuplesHashPrepare(TupleDesc tupdesc, - int numCols, - AttrNumber *matchColIdx, - FmgrInfo **eqfunctions, - FmgrInfo **hashfunctions); +extern FmgrInfo *execTuplesMatchPrepare(int numCols, + Oid *eqOperators); +extern void execTuplesHashPrepare(int numCols, + Oid *eqOperators, + FmgrInfo **eqFunctions, + FmgrInfo **hashFunctions); extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx, FmgrInfo *eqfunctions, FmgrInfo *hashfunctions, diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 44002a9d45..c9aa190b33 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.88 2007/01/09 02:14:15 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.89 2007/01/10 18:06:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -346,14 +346,23 @@ typedef struct NestLoop /* ---------------- * merge join node + * + * The expected ordering of each mergeable column is described by a btree + * opfamily OID, a direction (BTLessStrategyNumber or BTGreaterStrategyNumber) + * and a nulls-first flag. Note that the two sides of each mergeclause may + * be of different datatypes, but they are ordered the same way according to + * the common opfamily. The operator in each mergeclause must be an equality + * operator of the indicated opfamily. * ---------------- */ typedef struct MergeJoin { Join join; List *mergeclauses; /* mergeclauses as expression trees */ - List *mergefamilies; /* OID list of btree opfamilies */ - List *mergestrategies; /* integer list of btree strategies */ + /* these are arrays, but have the same length as the mergeclauses list: */ + Oid *mergeFamilies; /* per-clause OIDs of btree opfamilies */ + int *mergeStrategies; /* per-clause ordering (ASC or DESC) */ + bool *mergeNullsFirst; /* per-clause nulls ordering */ } MergeJoin; /* ---------------- @@ -399,6 +408,7 @@ typedef struct Group Plan plan; int numCols; /* number of grouping columns */ AttrNumber *grpColIdx; /* their indexes in the target list */ + Oid *grpOperators; /* equality operators to compare with */ } Group; /* --------------- @@ -428,6 +438,7 @@ typedef struct Agg AggStrategy aggstrategy; int numCols; /* number of grouping columns */ AttrNumber *grpColIdx; /* their indexes in the target list */ + Oid *grpOperators; /* equality operators to compare with */ long numGroups; /* estimated number of groups in input */ } Agg; @@ -439,7 +450,8 @@ typedef struct Unique { Plan plan; int numCols; /* number of columns to check for uniqueness */ - AttrNumber *uniqColIdx; /* indexes into the target list */ + AttrNumber *uniqColIdx; /* their indexes in the target list */ + Oid *uniqOperators; /* equality operators to compare with */ } Unique; /* ---------------- @@ -470,8 +482,9 @@ typedef struct SetOp SetOpCmd cmd; /* what to do */ int numCols; /* number of columns to check for * duplicate-ness */ - AttrNumber *dupColIdx; /* indexes into the target list */ - AttrNumber flagColIdx; + AttrNumber *dupColIdx; /* their indexes in the target list */ + Oid *dupOperators; /* equality operators to compare with */ + AttrNumber flagColIdx; /* where is the flag column, if any */ } SetOp; /* ---------------- diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 4e285a765a..f790037739 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.131 2007/01/09 02:14:15 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.132 2007/01/10 18:06:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -618,9 +618,11 @@ typedef JoinPath NestPath; * A mergejoin path has these fields. * * path_mergeclauses lists the clauses (in the form of RestrictInfos) - * that will be used in the merge. The parallel lists path_mergefamilies - * and path_mergestrategies specify the merge semantics for each clause - * (in effect, defining the relevant sort ordering for each clause). + * that will be used in the merge. The parallel arrays path_mergeFamilies, + * path_mergeStrategies, and path_mergeNullsFirst specify the merge semantics + * for each clause (i.e., define the relevant sort ordering for each clause). + * (XXX is this the most reasonable path-time representation? It's at least + * partially redundant with the pathkeys of the input paths.) * * Note that the mergeclauses are a subset of the parent relation's * restriction-clause list. Any join clauses that are not mergejoinable @@ -637,8 +639,10 @@ typedef struct MergePath { JoinPath jpath; List *path_mergeclauses; /* join clauses to be used for merge */ - List *path_mergefamilies; /* OID list of btree opfamilies */ - List *path_mergestrategies; /* integer list of btree strategies */ + /* these are arrays, but have the same length as the mergeclauses list: */ + Oid *path_mergeFamilies; /* per-clause OIDs of opfamilies */ + int *path_mergeStrategies; /* per-clause ordering (ASC or DESC) */ + bool *path_mergeNullsFirst; /* per-clause nulls ordering */ List *outersortkeys; /* keys for explicit sort, if any */ List *innersortkeys; /* keys for explicit sort, if any */ } MergePath; @@ -885,6 +889,9 @@ typedef struct OuterJoinInfo * the order of joining and use special join methods at some join points. * We record information about each such IN clause in an InClauseInfo struct. * These structs are kept in the PlannerInfo node's in_info_list. + * + * Note: sub_targetlist is just a list of Vars or expressions; it does not + * contain TargetEntry nodes. */ typedef struct InClauseInfo @@ -893,11 +900,7 @@ typedef struct InClauseInfo Relids lefthand; /* base relids in lefthand expressions */ Relids righthand; /* base relids coming from the subselect */ List *sub_targetlist; /* targetlist of original RHS subquery */ - - /* - * Note: sub_targetlist is just a list of Vars or expressions; it does not - * contain TargetEntry nodes. - */ + List *in_operators; /* OIDs of the IN's equality operator(s) */ } InClauseInfo; /* diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index b1e505cd6a..fd5c78372e 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.74 2007/01/05 22:19:56 momjian Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.75 2007/01/10 18:06:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -71,8 +71,9 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root, List *restrict_clauses, List *pathkeys, List *mergeclauses, - List *mergefamilies, - List *mergestrategies, + Oid *mergefamilies, + int *mergestrategies, + bool *mergenullsfirst, List *outersortkeys, List *innersortkeys); diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index cb6ed70c0c..0f6799338b 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.96 2007/01/05 22:19:56 momjian Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.97 2007/01/10 18:06:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -44,11 +44,11 @@ extern Sort *make_sort_from_groupcols(PlannerInfo *root, List *groupcls, AttrNumber *grpColIdx, Plan *lefttree); extern Agg *make_agg(PlannerInfo *root, List *tlist, List *qual, AggStrategy aggstrategy, - int numGroupCols, AttrNumber *grpColIdx, + int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, long numGroups, int numAggs, Plan *lefttree); extern Group *make_group(PlannerInfo *root, List *tlist, List *qual, - int numGroupCols, AttrNumber *grpColIdx, + int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, double numGroups, Plan *lefttree); extern Material *make_material(Plan *lefttree); diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 15d2b8a06d..8d4c0d8e3e 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.111 2007/01/09 02:14:16 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.112 2007/01/10 18:06:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,10 +37,15 @@ extern Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy); extern bool get_op_mergejoin_info(Oid eq_op, Oid *left_sortop, Oid *right_sortop, Oid *opfamily); -extern bool get_op_compare_function(Oid opno, Oid *cmpfunc, bool *reverse); +extern bool get_compare_function_for_ordering_op(Oid opno, + Oid *cmpfunc, bool *reverse); +extern Oid get_equality_op_for_ordering_op(Oid opno); +extern Oid get_ordering_op_for_equality_op(Oid opno, bool use_lhs_type); +extern Oid get_compatible_hash_operator(Oid opno, bool use_lhs_type); extern Oid get_op_hash_function(Oid opno); extern void get_op_btree_interpretation(Oid opno, List **opfamilies, List **opstrats); +extern bool ops_in_same_btree_opfamily(Oid opno1, Oid opno2); extern Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum); extern char *get_attname(Oid relid, AttrNumber attnum); -- 2.40.0