From: Tom Lane Date: Mon, 21 Apr 2008 20:54:15 +0000 (+0000) Subject: Fix convert_IN_to_join to properly handle the case where the subselect's X-Git-Tag: REL8_4_BETA1~1512 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ff673f558af1d09d006966f4bd779b1f464a4fc8;p=postgresql Fix convert_IN_to_join to properly handle the case where the subselect's output is not of the same type that's needed for the IN comparison (ie, where the parser inserted an implicit coercion above the subselect result). We should record the coerced expression, not just a raw Var referencing the subselect output, as the quantity that needs to be unique-ified if we choose to implement the IN as Unique followed by a plain join. As of 8.3 this error was causing crashes, as seen in bug #4113 from Javier Hernandez, because the executor was being told to hash or sort the raw subselect output column using operators appropriate to the coerced type. In prior versions there was no crash because the executor chose the hash or sort operators for itself based on the column type it saw. However, that's still not really right, because what's unique for one data type might not be unique for another. In corner cases we could get multiple outputs of a row that should appear only once, as demonstrated by the regression test case included in this commit. However, this patch doesn't apply cleanly to 8.2 or before, and the code involved has shifted enough over time that I'm hesitant to try to back-patch. Given the lack of complaints from the field about such corner cases, I think the bug may not be important enough to risk breaking other things with a back-patch. --- diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 9f38f07249..9fa838618a 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.129 2008/01/17 20:35:27 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.130 2008/04/21 20:54:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -725,10 +725,13 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) Query *parse = root->parse; Query *subselect = (Query *) sublink->subselect; List *in_operators; + List *left_exprs; + List *right_exprs; Relids left_varnos; int rtindex; RangeTblEntry *rte; RangeTblRef *rtr; + List *subquery_vars; InClauseInfo *ininfo; Node *result; @@ -744,28 +747,37 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) return NULL; if (sublink->testexpr && IsA(sublink->testexpr, OpExpr)) { - Oid opno = ((OpExpr *) sublink->testexpr)->opno; + OpExpr *op = (OpExpr *) sublink->testexpr; + Oid opno = op->opno; List *opfamilies; List *opstrats; + if (list_length(op->args) != 2) + return NULL; /* not binary operator? */ get_op_btree_interpretation(opno, &opfamilies, &opstrats); if (!list_member_int(opstrats, ROWCOMPARE_EQ)) return NULL; in_operators = list_make1_oid(opno); + left_exprs = list_make1(linitial(op->args)); + right_exprs = list_make1(lsecond(op->args)); } else if (and_clause(sublink->testexpr)) { ListCell *lc; - /* OK, but we need to extract the per-column operator OIDs */ - in_operators = NIL; + /* OK, but we need to extract the per-column info */ + in_operators = left_exprs = right_exprs = NIL; foreach(lc, ((BoolExpr *) sublink->testexpr)->args) { OpExpr *op = (OpExpr *) lfirst(lc); if (!IsA(op, OpExpr)) /* probably shouldn't happen */ return NULL; + if (list_length(op->args) != 2) + return NULL; /* not binary operator? */ in_operators = lappend_oid(in_operators, op->opno); + left_exprs = lappend(left_exprs, linitial(op->args)); + right_exprs = lappend(right_exprs, lsecond(op->args)); } } else @@ -782,10 +794,13 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) * The left-hand expressions must contain some Vars of the current query, * else it's not gonna be a join. */ - left_varnos = pull_varnos(sublink->testexpr); + left_varnos = pull_varnos((Node *) left_exprs); if (bms_is_empty(left_varnos)) return NULL; + /* ... and the right-hand expressions better not contain Vars at all */ + Assert(!contain_var_clause((Node *) right_exprs)); + /* * The combining operators and left-hand expressions mustn't be volatile. */ @@ -810,6 +825,20 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) rtr->rtindex = rtindex; parse->jointree->fromlist = lappend(parse->jointree->fromlist, rtr); + /* + * Build a list of Vars representing the subselect outputs. + */ + subquery_vars = generate_subquery_vars(root, + subselect->targetList, + rtindex); + + /* + * Build the result qual expression, replacing Params with these Vars. + */ + result = convert_testexpr(root, + sublink->testexpr, + subquery_vars); + /* * Now build the InClauseInfo node. */ @@ -819,24 +848,20 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) ininfo->in_operators = in_operators; /* - * ininfo->sub_targetlist is filled with a list of Vars representing the - * subselect outputs. + * ininfo->sub_targetlist must be filled with a list of expressions that + * would need to be unique-ified if we try to implement the IN using a + * regular join to unique-ified subquery output. This is most easily done + * by applying convert_testexpr to just the RHS inputs of the testexpr + * operators. That handles cases like type coercions of the subquery + * outputs, clauses dropped due to const-simplification, etc. */ - ininfo->sub_targetlist = generate_subquery_vars(root, - subselect->targetList, - rtindex); - Assert(list_length(in_operators) == list_length(ininfo->sub_targetlist)); + ininfo->sub_targetlist = (List *) convert_testexpr(root, + (Node *) right_exprs, + subquery_vars); /* Add the completed node to the query's list */ root->in_info_list = lappend(root->in_info_list, ininfo); - /* - * Build the result qual expression. - */ - result = convert_testexpr(root, - sublink->testexpr, - ininfo->sub_targetlist); - return result; } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 0808ac07b0..c5b6c2ed61 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.142 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.143 2008/04/21 20:54:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -898,7 +898,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) /* * translate_sub_tlist - get subquery column numbers represented by tlist * - * The given targetlist should contain only Vars referencing the given relid. + * The given targetlist usually contains only Vars referencing the given relid. * Extract their varattnos (ie, the column numbers of the subquery) and return * as an integer List. * diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 9e9e7afbc8..8476d7e85c 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.155 2008/03/24 21:53:04 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.156 2008/04/21 20:54:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1107,8 +1107,10 @@ typedef struct OuterJoinInfo * We record information about each such IN clause in an InClauseInfo struct. * These structs are kept in the PlannerInfo node's in_info_list. * - * Note: sub_targetlist is just a list of Vars or expressions; it does not - * contain TargetEntry nodes. + * Note: sub_targetlist is a bit misnamed; it is a list of the expressions + * on the RHS of the IN's join clauses. (This normally starts out as a list + * of Vars referencing the subquery outputs, but can get mutated if the + * subquery is flattened into the main query.) */ typedef struct InClauseInfo @@ -1116,8 +1118,8 @@ typedef struct InClauseInfo NodeTag type; Relids lefthand; /* base relids in lefthand expressions */ Relids righthand; /* base relids coming from the subselect */ - List *sub_targetlist; /* targetlist of original RHS subquery */ - List *in_operators; /* OIDs of the IN's equality operator(s) */ + List *sub_targetlist; /* RHS expressions of the IN's comparisons */ + List *in_operators; /* OIDs of the IN's equality operators */ } InClauseInfo; /* diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index a37489b495..723dca7007 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -408,3 +408,34 @@ select * from ( 0 (1 row) +-- +-- Test that an IN implemented using a UniquePath does unique-ification +-- with the right semantics, as per bug #4113. (Unfortunately we have +-- no simple way to ensure that this test case actually chooses that type +-- of plan, but it does in releases 7.4-8.3. Note that an ordering difference +-- here might mean that some other plan type is being used, rendering the test +-- pointless.) +-- +create temp table numeric_table (num_col numeric); +insert into numeric_table values (1), (1.000000000000000000001), (2), (3); +create temp table float_table (float_col float8); +insert into float_table values (1), (2), (3); +select * from float_table + where float_col in (select num_col from numeric_table); + float_col +----------- + 1 + 2 + 3 +(3 rows) + +select * from numeric_table + where num_col in (select float_col from float_table); + num_col +------------------------- + 1 + 1.000000000000000000001 + 2 + 3 +(4 rows) + diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 4f824c05f7..d99bf321bd 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -251,3 +251,24 @@ select * from ( select min(unique1) from tenk1 as a where not exists (select 1 from tenk1 as b where b.unique2 = 10000) ) ss; + +-- +-- Test that an IN implemented using a UniquePath does unique-ification +-- with the right semantics, as per bug #4113. (Unfortunately we have +-- no simple way to ensure that this test case actually chooses that type +-- of plan, but it does in releases 7.4-8.3. Note that an ordering difference +-- here might mean that some other plan type is being used, rendering the test +-- pointless.) +-- + +create temp table numeric_table (num_col numeric); +insert into numeric_table values (1), (1.000000000000000000001), (2), (3); + +create temp table float_table (float_col float8); +insert into float_table values (1), (2), (3); + +select * from float_table + where float_col in (select num_col from numeric_table); + +select * from numeric_table + where num_col in (select float_col from float_table);