* geqo_eval
*
* Returns cost of a query tree as an individual of the population.
+ *
+ * If no legal join order can be extracted from the proposed tour,
+ * returns DBL_MAX.
*/
Cost
geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
MemoryContext mycontext;
MemoryContext oldcxt;
RelOptInfo *joinrel;
- Path *best_path;
Cost fitness;
int savelength;
struct HTAB *savehash;
/* construct the best path for the given combination of relations */
joinrel = gimme_tree(root, tour, num_gene);
- best_path = joinrel->cheapest_total_path;
/*
- * compute fitness
+ * compute fitness, if we found a valid join
*
* XXX geqo does not currently support optimization for partial result
* retrieval, nor do we take any cognizance of possible use of
* parameterized paths --- how to fix?
*/
- fitness = best_path->total_cost;
+ if (joinrel)
+ {
+ Path *best_path = joinrel->cheapest_total_path;
+
+ fitness = best_path->total_cost;
+ }
+ else
+ fitness = DBL_MAX;
/*
* Restore join_rel_list to its former state, and put back original
* 'tour' is the proposed join order, of length 'num_gene'
*
* Returns a new join relation whose cheapest path is the best plan for
- * this join order.
+ * this join order. NB: will return NULL if join order is invalid and
+ * we can't modify it into a valid order.
*
* The original implementation of this routine always joined in the specified
* order, and so could only build left-sided plans (and right-sided and
* postpones joins that are illegal or seem unsuitable according to some
* heuristic rules. This allows correct bushy plans to be generated at need,
* and as a nice side-effect it seems to materially improve the quality of the
- * generated plans.
+ * generated plans. Note however that since it's just a heuristic, it can
+ * still fail in some cases. (In particular, we might clump together
+ * relations that actually mustn't be joined yet due to LATERAL restrictions;
+ * since there's no provision for un-clumping, this must lead to failure.)
*/
RelOptInfo *
gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
* to; if there is none then it becomes a new clump of its own. When we
* enlarge an existing clump we check to see if it can now be merged with
* any other clumps. After the tour is all scanned, we forget about the
- * heuristics and try to forcibly join any remaining clumps. Some forced
- * joins might still fail due to semantics, but we should always be able
- * to find some join order that works.
+ * heuristics and try to forcibly join any remaining clumps. If we are
+ * unable to merge all the clumps into one, fail.
*/
clumps = NIL;
/* Did we succeed in forming a single join relation? */
if (list_length(clumps) != 1)
- elog(ERROR, "failed to join all relations together");
+ return NULL;
return ((Clump *) linitial(clumps))->joinrel;
}
{
Chromosome *chromo = (Chromosome *) pool->data;
int i;
+ int bad = 0;
- for (i = 0; i < pool->size; i++)
+ /*
+ * We immediately discard any invalid individuals (those that geqo_eval
+ * returns DBL_MAX for), thereby not wasting pool space on them.
+ *
+ * If we fail to make any valid individuals after 10000 tries, give up;
+ * this probably means something is broken, and we shouldn't just let
+ * ourselves get stuck in an infinite loop.
+ */
+ i = 0;
+ while (i < pool->size)
{
init_tour(root, chromo[i].string, pool->string_length);
pool->data[i].worth = geqo_eval(root, chromo[i].string,
pool->string_length);
+ if (pool->data[i].worth < DBL_MAX)
+ i++;
+ else
+ {
+ bad++;
+ if (i == 0 && bad >= 10000)
+ elog(ERROR, "geqo failed to make a valid plan");
+ }
}
+
+#ifdef GEQO_DEBUG
+ if (bad > 0)
+ elog(DEBUG1, "%d invalid tours found while selecting %d pool entries",
+ bad, pool->size);
+#endif
}
/*