From 3b0f77601b9f9f3a2e36a813e4cd32c00e0864d6 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 3 Jun 2015 18:02:39 -0400 Subject: [PATCH] Fix some questionable edge-case behaviors in add_path() and friends. add_path_precheck was doing exact comparisons of path costs, but it really needs to do them fuzzily to be sure it won't reject paths that could survive add_path's comparisons. (This can only matter if the initial cost estimate is very close to the final one, but that turns out to often be true.) Also, it should ignore startup cost for this purpose if and only if compare_path_costs_fuzzily would do so. The previous coding always ignored startup cost for parameterized paths, which is wrong as of commit 3f59be836c555fa6; it could result in improper early rejection of paths that we care about for SEMI/ANTI joins. It also always considered startup cost for unparameterized paths, which is just as wrong though the only effect is to waste planner cycles on paths that can't survive. Instead, it should consider startup cost only when directed to by the consider_startup/ consider_param_startup relation flags. Likewise, compare_path_costs_fuzzily should have symmetrical behavior for parameterized and unparameterized paths. In this case, the best answer seems to be that after establishing that total costs are fuzzily equal, we should compare startup costs whether or not the consider_xxx flags are on. That is what it's always done for unparameterized paths, so let's make the behavior for parameterized paths match. These issues were noted while developing the SEMI/ANTI join costing fix of commit 3f59be836c555fa6, but we chose not to back-patch these fixes, because they can cause changes in the planner's choices among nearly-same-cost plans. (There is in fact one minor change in plan choice within the core regression tests.) Destabilizing plan choices in back branches without very clear improvements is frowned on, so we'll just fix this in HEAD. --- src/backend/optimizer/util/pathnode.c | 50 ++++++++++++++------------ src/test/regress/expected/join.out | 52 +++++++++++++-------------- 2 files changed, 53 insertions(+), 49 deletions(-) diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 5075c8752a..f7f33bbe77 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -38,6 +38,13 @@ typedef enum COSTS_DIFFERENT /* neither path dominates the other on cost */ } PathCostComparison; +/* + * STD_FUZZ_FACTOR is the normal fuzz factor for compare_path_costs_fuzzily. + * XXX is it worth making this user-controllable? It provides a tradeoff + * between planner runtime and the accuracy of path cost comparisons. + */ +#define STD_FUZZ_FACTOR 1.01 + static List *translate_sub_tlist(List *tlist, int relid); @@ -140,8 +147,10 @@ compare_fractional_path_costs(Path *path1, Path *path2, * * This function also enforces a policy rule that paths for which the relevant * one of parent->consider_startup and parent->consider_param_startup is false - * cannot win comparisons on the grounds of good startup cost, so we never - * return COSTS_DIFFERENT when that is true for the total-cost loser. + * cannot survive comparisons solely on the grounds of good startup cost, so + * we never return COSTS_DIFFERENT when that is true for the total-cost loser. + * (But if total costs are fuzzily equal, we compare startup costs anyway, + * in hopes of eliminating one path or the other.) */ static PathCostComparison compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor) @@ -177,21 +186,13 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor) /* else path1 dominates */ return COSTS_BETTER1; } - - /* - * Fuzzily the same on total cost (so we might as well compare startup - * cost, even when that would otherwise be uninteresting; but - * parameterized paths aren't allowed to win this way, we'd rather move on - * to other comparison heuristics) - */ - if (path1->startup_cost > path2->startup_cost * fuzz_factor && - path2->param_info == NULL) + /* fuzzily the same on total cost ... */ + if (path1->startup_cost > path2->startup_cost * fuzz_factor) { /* ... but path1 fuzzily worse on startup, so path2 wins */ return COSTS_BETTER2; } - if (path2->startup_cost > path1->startup_cost * fuzz_factor && - path1->param_info == NULL) + if (path2->startup_cost > path1->startup_cost * fuzz_factor) { /* ... but path2 fuzzily worse on startup, so path1 wins */ return COSTS_BETTER1; @@ -434,10 +435,10 @@ add_path(RelOptInfo *parent_rel, Path *new_path) p1_next = lnext(p1); /* - * Do a fuzzy cost comparison with 1% fuzziness limit. (XXX does this - * percentage need to be user-configurable?) + * Do a fuzzy cost comparison with standard fuzziness limit. */ - costcmp = compare_path_costs_fuzzily(new_path, old_path, 1.01); + costcmp = compare_path_costs_fuzzily(new_path, old_path, + STD_FUZZ_FACTOR); /* * If the two paths compare differently for startup and total cost, @@ -624,11 +625,15 @@ add_path_precheck(RelOptInfo *parent_rel, List *pathkeys, Relids required_outer) { List *new_path_pathkeys; + bool consider_startup; ListCell *p1; /* Pretend parameterized paths have no pathkeys, per add_path policy */ new_path_pathkeys = required_outer ? NIL : pathkeys; + /* Decide whether new path's startup cost is interesting */ + consider_startup = required_outer ? parent_rel->consider_param_startup : parent_rel->consider_startup; + foreach(p1, parent_rel->pathlist) { Path *old_path = (Path *) lfirst(p1); @@ -640,16 +645,15 @@ add_path_precheck(RelOptInfo *parent_rel, * pathkeys as well as both cost metrics. If we find one, we can * reject the new path. * - * For speed, we make exact rather than fuzzy cost comparisons. If an - * old path dominates the new path exactly on both costs, it will - * surely do so fuzzily. + * Cost comparisons here should match compare_path_costs_fuzzily. */ - if (total_cost >= old_path->total_cost) + if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR) { - /* can win on startup cost only if unparameterized */ - if (startup_cost >= old_path->startup_cost || required_outer) + /* new path can win on startup cost only if consider_startup */ + if (startup_cost > old_path->startup_cost * STD_FUZZ_FACTOR || + !consider_startup) { - /* new path does not win on cost, so check pathkeys... */ + /* new path loses on cost, so check pathkeys... */ List *old_path_pathkeys; old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index ed9ad0e135..139f7e0498 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3771,11 +3771,11 @@ explain (costs off) ------------------------------------------ Nested Loop -> Seq Scan on int8_tbl a - -> Hash Left Join - Hash Cond: (x.q2 = (a.q1)) - -> Seq Scan on int8_tbl x + -> Hash Right Join + Hash Cond: ((a.q1) = x.q2) + -> Seq Scan on int4_tbl y -> Hash - -> Seq Scan on int4_tbl y + -> Seq Scan on int8_tbl x (7 rows) select * from int8_tbl a, @@ -3783,62 +3783,62 @@ select * from int8_tbl a, on x.q2 = ss.z; q1 | q2 | q1 | q2 | z ------------------+-------------------+------------------+-------------------+------------------ - 123 | 456 | 123 | 456 | - 123 | 456 | 123 | 4567890123456789 | 123 | 456 | 4567890123456789 | 123 | 123 123 | 456 | 4567890123456789 | 123 | 123 123 | 456 | 4567890123456789 | 123 | 123 123 | 456 | 4567890123456789 | 123 | 123 123 | 456 | 4567890123456789 | 123 | 123 123 | 456 | 4567890123456789 | 4567890123456789 | + 123 | 456 | 123 | 4567890123456789 | + 123 | 456 | 123 | 456 | 123 | 456 | 4567890123456789 | -4567890123456789 | - 123 | 4567890123456789 | 123 | 456 | - 123 | 4567890123456789 | 123 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 123 | 123 123 | 4567890123456789 | 4567890123456789 | 123 | 123 123 | 4567890123456789 | 4567890123456789 | 123 | 123 123 | 4567890123456789 | 4567890123456789 | 123 | 123 123 | 4567890123456789 | 4567890123456789 | 123 | 123 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | + 123 | 4567890123456789 | 123 | 4567890123456789 | + 123 | 4567890123456789 | 123 | 456 | 123 | 4567890123456789 | 4567890123456789 | -4567890123456789 | - 4567890123456789 | 123 | 123 | 456 | + 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 4567890123456789 | 123 | 4567890123456789 | 123 | - 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 - 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 - 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 - 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 - 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 123 | 456 | 4567890123456789 | 123 | 4567890123456789 | -4567890123456789 | - 4567890123456789 | 4567890123456789 | 123 | 456 | + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | - 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 - 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 - 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 - 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 - 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 123 | 456 | 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 | - 4567890123456789 | -4567890123456789 | 123 | 456 | + 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | 4567890123456789 4567890123456789 | -4567890123456789 | 4567890123456789 | 123 | - 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 - 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 - 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 - 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 - 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 123 | 456 | 4567890123456789 | -4567890123456789 | 4567890123456789 | -4567890123456789 | (57 rows) -- 2.40.0