/*------------------------------------------------------------------------
*
- * geqo_main.c--
+ * geqo_main.c
* solution of the query optimization problem
* by means of a Genetic Algorithm (GA)
*
- * Copyright (c) 1994, Regents of the University of California
+ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: geqo_main.c,v 1.8 1998/07/18 04:22:27 momjian Exp $
+ * $Id: geqo_main.c,v 1.30 2002/03/02 21:39:26 momjian Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
-#include "nodes/pg_list.h"
-#include "nodes/relation.h"
-#include "nodes/plannodes.h"
-#include "nodes/primnodes.h"
+#include <time.h>
+#include <math.h>
-#include "utils/palloc.h"
-#include "utils/elog.h"
-
-#include "optimizer/internal.h"
-#include "optimizer/paths.h"
-#include "optimizer/pathnode.h"
-#include "optimizer/clauses.h"
-#include "optimizer/cost.h"
-
-#include "optimizer/geqo_gene.h"
#include "optimizer/geqo.h"
+#include "optimizer/geqo_misc.h"
#include "optimizer/geqo_pool.h"
#include "optimizer/geqo_selection.h"
-#include "optimizer/geqo_recombination.h"
-#include "optimizer/geqo_mutation.h"
-#include "optimizer/geqo_misc.h"
+
+
+/*
+ * Configuration options
+ */
+int Geqo_pool_size;
+int Geqo_effort;
+int Geqo_generations;
+double Geqo_selection_bias;
+int Geqo_random_seed;
+
+
+static int gimme_pool_size(int nr_rel);
+static int gimme_number_generations(int pool_size, int effort);
/* define edge recombination crossover [ERX] per default */
/*
- * geqo--
+ * geqo
* solution of the query optimization problem
* similar to a constrained Traveling Salesman Problem (TSP)
*/
RelOptInfo *
-geqo(Query *root)
+geqo(Query *root, int number_of_rels, List *initial_rels)
{
int generation;
Chromosome *momma;
Chromosome *daddy;
Chromosome *kid;
+ Pool *pool;
+ int pool_size,
+ number_generations,
+ status_interval;
+ Gene *best_tour;
+ RelOptInfo *best_rel;
#if defined(ERX)
Edge *edge_table; /* list of edges */
int edge_failures = 0;
float difference;
-
#endif
-
#if defined(CX) || defined(PX) || defined(OX1) || defined(OX2)
City *city_table; /* list of cities */
-
#endif
-
#if defined(CX)
int cycle_diffs = 0;
int mutations = 0;
-
#endif
-
- int number_of_rels;
-
- Pool *pool;
- int pool_size,
- number_generations,
- status_interval;
-
- Gene *best_tour;
- RelOptInfo *best_rel;
-
-/* Plan *best_plan; */
-
-
-/* set tour size */
- number_of_rels = length(root->base_relation_list_);
-
/* set GA parameters */
- geqo_params(number_of_rels);/* out of "$PGDATA/pg_geqo" file */
- pool_size = PoolSize;
- number_generations = Generations;
+ pool_size = gimme_pool_size(number_of_rels);
+ number_generations = gimme_number_generations(pool_size, Geqo_effort);
status_interval = 10;
/* seed random number generator */
- srandom(RandomSeed);
+/* XXX why is this done every time around? */
+ if (Geqo_random_seed >= 0)
+ srandom((unsigned int) Geqo_random_seed);
+ else
+ srandom((unsigned int) time(NULL));
/* allocate genetic pool memory */
pool = alloc_pool(pool_size, number_of_rels);
/* random initialization of the pool */
- random_init_pool(root, pool, 0, pool->size);
+ random_init_pool(root, initial_rels, pool, 0, pool->size);
/* sort the pool according to cheapest path as fitness */
sort_pool(pool); /* we have to do it only one time, since
daddy = alloc_chromo(pool->string_length);
#if defined (ERX)
- elog(DEBUG, "geqo_main: using edge recombination crossover [ERX]");
+ elog(LOG, "geqo_main: using edge recombination crossover [ERX]");
/* allocate edge table memory */
edge_table = alloc_edge_table(pool->string_length);
#elif defined(PMX)
- elog(DEBUG, "geqo_main: using partially matched crossover [PMX]");
+ elog(LOG, "geqo_main: using partially matched crossover [PMX]");
/* allocate chromosome kid memory */
kid = alloc_chromo(pool->string_length);
#elif defined(CX)
- elog(DEBUG, "geqo_main: using cycle crossover [CX]");
+ elog(LOG, "geqo_main: using cycle crossover [CX]");
/* allocate city table memory */
kid = alloc_chromo(pool->string_length);
city_table = alloc_city_table(pool->string_length);
#elif defined(PX)
- elog(DEBUG, "geqo_main: using position crossover [PX]");
+ elog(LOG, "geqo_main: using position crossover [PX]");
/* allocate city table memory */
kid = alloc_chromo(pool->string_length);
city_table = alloc_city_table(pool->string_length);
#elif defined(OX1)
- elog(DEBUG, "geqo_main: using order crossover [OX1]");
+ elog(LOG, "geqo_main: using order crossover [OX1]");
/* allocate city table memory */
kid = alloc_chromo(pool->string_length);
city_table = alloc_city_table(pool->string_length);
#elif defined(OX2)
- elog(DEBUG, "geqo_main: using order crossover [OX2]");
+ elog(LOG, "geqo_main: using order crossover [OX2]");
/* allocate city table memory */
kid = alloc_chromo(pool->string_length);
city_table = alloc_city_table(pool->string_length);
{
/* SELECTION */
- geqo_selection(momma, daddy, pool, SelectionBias); /* using linear bias
- * function */
+ geqo_selection(momma, daddy, pool, Geqo_selection_bias); /* using linear bias
+ * function */
pmx(momma->string, daddy->string, kid->string, pool->string_length);
#elif defined(CX)
/* CYCLE CROSSOVER */
- cycle_diffs =
- cx(momma->string, daddy->string, kid->string, pool->string_length, city_table);
+ cycle_diffs = cx(momma->string, daddy->string, kid->string, pool->string_length, city_table);
/* mutate the child */
if (cycle_diffs == 0)
{
/* EVALUATE FITNESS */
- kid->worth = geqo_eval(root, kid->string, pool->string_length);
+ kid->worth = geqo_eval(root, initial_rels,
+ kid->string, pool->string_length);
/* push the kid into the wilderness of life according to its worth */
spread_chromo(kid, pool);
#if defined(ERX) && defined(GEQO_DEBUG)
if (edge_failures != 0)
- fprintf(stdout, "\nFailures: %d Avg: %d\n", edge_failures, (int) generation / edge_failures);
-
+ elog(LOG, "[GEQO] failures: %d, average: %d",
+ edge_failures, (int) generation / edge_failures);
else
- fprintf(stdout, "No edge failures detected.\n");
+ elog(LOG, "[GEQO] No edge failures detected.");
#endif
#if defined(CX) && defined(GEQO_DEBUG)
if (mutations != 0)
- fprintf(stdout, "\nMutations: %d Generations: %d\n", mutations, generation);
-
+ elog(LOG, "[GEQO] mutations: %d, generations: %d", mutations, generation);
else
- fprintf(stdout, "No mutations processed.\n");
+ elog(LOG, "[GEQO] No mutations processed.");
#endif
#ifdef GEQO_DEBUG
- fprintf(stdout, "\n");
print_pool(stdout, pool, 0, pool_size - 1);
#endif
best_tour = (Gene *) pool->data[0].string;
-/* root->join_relation_list_ will be modified during this ! */
- best_rel = (RelOptInfo *) gimme_tree(root, best_tour, 0, pool->string_length, NULL);
+/* root->join_rel_list will be modified during this ! */
+ best_rel = gimme_tree(root, initial_rels,
+ best_tour, pool->string_length,
+ 0, NULL);
/* DBG: show the query plan
print_plan(best_plan, root);
free_pool(pool);
- return (best_rel);
+ return best_rel;
+}
+
+
+
+/*
+ * Return either configured pool size or
+ * a good default based on query size (no. of relations)
+ * = 2^(QS+1)
+ * also constrain between 128 and 1024
+ */
+static int
+gimme_pool_size(int nr_rel)
+{
+ double size;
+
+ if (Geqo_pool_size != 0)
+ {
+ if (Geqo_pool_size < MIN_GEQO_POOL_SIZE)
+ return MIN_GEQO_POOL_SIZE;
+ else if (Geqo_pool_size > MAX_GEQO_POOL_SIZE)
+ return MAX_GEQO_POOL_SIZE;
+ else
+ return Geqo_pool_size;
+ }
+
+ size = pow(2.0, nr_rel + 1.0);
+
+ if (size < MIN_GEQO_POOL_SIZE)
+ return MIN_GEQO_POOL_SIZE;
+ else if (size > MAX_GEQO_POOL_SIZE)
+ return MAX_GEQO_POOL_SIZE;
+ else
+ return (int) ceil(size);
+}
+
+
+
+/*
+ * Return either configured number of generations or
+ * some reasonable default calculated on the fly.
+ * = Effort * Log2(PoolSize)
+ */
+static int
+gimme_number_generations(int pool_size, int effort)
+{
+ if (Geqo_generations <= 0)
+ return effort * (int) ceil(log((double) pool_size) / log(2.0));
+ else
+ return Geqo_generations;
}