Restructure parsetree representation of DECLARE CURSOR: now it's a

[postgresql] / src / backend / optimizer / plan / planmain.c
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c

index 802e5970416558d727334fff1e3408fd34a2ceb3..97f6b76a8e48c61dd12414cfb1bfafa5fce605c8 100644 (file)
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -3,318 +3,239 @@
   * planmain.c
   *       Routines to plan a single query
   *
- * Copyright (c) 1994, Regents of the University of California
+ * What's in a name, anyway?  The top-level entry point of the planner/
+ * optimizer is over in planner.c, not here as you might think from the
+ * file name.  But this is the main code for planning a basic join operation,
+ * shorn of features like subselects, inheritance, aggregates, grouping,
+ * and so on.  (Those are the things planner.c deals with.)
+ *
+ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.42 1999/08/22 20:14:48 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.75 2003/03/10 03:53:50 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
-#include <sys/types.h>
-
  #include "postgres.h"
  
-
  #include "optimizer/clauses.h"
  #include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
  #include "optimizer/paths.h"
  #include "optimizer/planmain.h"
-#include "optimizer/prep.h"
-#include "optimizer/subselect.h"
-#include "optimizer/tlist.h"
-#include "utils/lsyscache.h"
  
  
-static Plan *subplanner(Query *root, List *flat_tlist, List *qual);
-
-/*
+/*--------------------
   * query_planner
- *       Routine to create a query plan.  It does so by first creating a
- *       subplan for the topmost level of attributes in the query.  Then,
- *       it modifies all target list and qualifications to consider the next
- *       level of nesting and creates a plan for this modified query by
- *       recursively calling itself.  The two pieces are then merged together
- *       by creating a result node that indicates which attributes should
- *       be placed where and any relation level qualifications to be
- *       satisfied.
+ *       Generate a path (that is, a simplified plan) for a basic query,
+ *       which may involve joins but not any fancier features.
+ *
+ * Since query_planner does not handle the toplevel processing (grouping,
+ * sorting, etc) it cannot select the best path by itself.  It selects
+ * two paths: the cheapest path that produces all the required tuples,
+ * independent of any ordering considerations, and the cheapest path that
+ * produces the expected fraction of the required tuples in the required
+ * ordering, if there is a path that is cheaper for this than just sorting
+ * the output of the cheapest overall path.  The caller (grouping_planner)
+ * will make the final decision about which to use.
   *
- *       command-type is the query command, e.g., select, delete, etc.
- *       tlist is the target list of the query
- *       qual is the qualification of the query
+ * Input parameters:
+ * root is the query to plan
+ * tlist is the target list the query should produce (NOT root->targetList!)
+ * tuple_fraction is the fraction of tuples we expect will be retrieved
   *
- *       Note: the Query node now also includes a query_pathkeys field, which
- *       signals query_planner that the indicated sort order is wanted in the
- *       final output plan.  If, for some reason, query_planner is unable to
- *       comply, it sets query_pathkeys to NIL before returning.  (The reason
- *       query_pathkeys is a Query field and not a passed parameter is that
- *       the low-level routines in indxpath.c need to see it.)
+ * Output parameters:
+ * *cheapest_path receives the overall-cheapest path for the query
+ * *sorted_path receives the cheapest presorted path for the query,
+ *                             if any (NULL if there is no useful presorted path)
   *
- *       Returns a query plan.
+ * Note: the Query node also includes a query_pathkeys field, which is both
+ * an input and an output of query_planner().  The input value signals
+ * query_planner that the indicated sort order is wanted in the final output
+ * plan.  But this value has not yet been "canonicalized", since the needed
+ * info does not get computed until we scan the qual clauses.  We canonicalize
+ * it as soon as that task is done.  (The main reason query_pathkeys is a
+ * Query field and not a passed parameter is that the low-level routines in
+ * indxpath.c need to see it.)
+ *
+ * tuple_fraction is interpreted as follows:
+ *       0: expect all tuples to be retrieved (normal case)
+ *       0 < tuple_fraction < 1: expect the given fraction of tuples available
+ *             from the plan to be retrieved
+ *       tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
+ *             expected to be retrieved (ie, a LIMIT specification)
+ *--------------------
   */
-Plan *
-query_planner(Query *root,
-                         int command_type,
-                         List *tlist,
-                         List *qual)
+void
+query_planner(Query *root, List *tlist, double tuple_fraction,
+                         Path **cheapest_path, Path **sorted_path)
  {
-       List       *constant_qual = NIL;
-       List       *var_only_tlist;
-       List       *level_tlist;
-       Plan       *subplan;
-
-       if (PlannerQueryLevel > 1)
-       {
-               /* should copy be made ? */
-               tlist = (List *) SS_replace_correlation_vars((Node *) tlist);
-               qual = (List *) SS_replace_correlation_vars((Node *) qual);
-       }
-       if (root->hasSubLinks)
-               qual = (List *) SS_process_sublinks((Node *) qual);
-
-       qual = cnfify((Expr *) qual, true);
-#ifdef OPTIMIZER_DEBUG
-       printf("After cnfify()\n");
-       pprint(qual);
-#endif
-
-       /*
-        * Pull out any non-variable qualifications so these can be put in the
-        * topmost result node.
-        */
-       qual = pull_constant_clauses(qual, &constant_qual);
-
-       /*
-        * Create a target list that consists solely of (resdom var) target
-        * list entries, i.e., contains no arbitrary expressions.
-        */
-       var_only_tlist = flatten_tlist(tlist);
-       if (var_only_tlist)
-               level_tlist = var_only_tlist;
-       else
-               /* from old code. the logic is beyond me. - ay 2/95 */
-               level_tlist = tlist;
+       List       *constant_quals;
+       RelOptInfo *final_rel;
+       Path       *cheapestpath;
+       Path       *sortedpath;
  
         /*
-        * A query may have a non-variable target list and a non-variable
-        * qualification only under certain conditions: - the query creates
-        * all-new tuples, or - the query is a replace (a scan must still be
-        * done in this case).
+        * If the query has an empty join tree, then it's something easy like
+        * "SELECT 2+2;" or "INSERT ... VALUES()".      Fall through quickly.
          */
-       if (var_only_tlist == NULL && qual == NULL)
+       if (root->jointree->fromlist == NIL)
         {
-               root->query_pathkeys = NIL; /* these plans make unordered results */
-
-               switch (command_type)
-               {
-                       case CMD_SELECT:
-                       case CMD_INSERT:
-                               return ((Plan *) make_result(tlist,
-                                                                                        (Node *) constant_qual,
-                                                                                        (Plan *) NULL));
-                               break;
-                       case CMD_DELETE:
-                       case CMD_UPDATE:
-                               {
-                                       SeqScan    *scan = make_seqscan(tlist,
-                                                                                                       NIL,
-                                                                                                       root->resultRelation);
-
-                                       if (constant_qual != NULL)
-                                               return ((Plan *) make_result(tlist,
-                                                                                                        (Node *) constant_qual,
-                                                                                                        (Plan *) scan));
-                                       else
-                                               return (Plan *) scan;
-                               }
-                               break;
-                       default:
-                               return (Plan *) NULL;
-               }
+               *cheapest_path = (Path *) create_result_path(NULL, NULL,
+                                                                                       (List *) root->jointree->quals);
+               *sorted_path = NULL;
+               return;
         }
  
         /*
-        * Choose the best access path and build a plan for it.
+        * Pull out any non-variable WHERE clauses so these can be put in a
+        * toplevel "Result" node, where they will gate execution of the whole
+        * plan (the Result will not invoke its descendant plan unless the
+        * quals are true).  Note that any *really* non-variable quals will
+        * have been optimized away by eval_const_expressions().  What we're
+        * mostly interested in here is quals that depend only on outer-level
+        * vars, although if the qual reduces to "WHERE FALSE" this path will
+        * also be taken.
          */
-       subplan = subplanner(root, level_tlist, qual);
+       root->jointree->quals = (Node *)
+               pull_constant_clauses((List *) root->jointree->quals,
+                                                         &constant_quals);
  
         /*
-        * Build a result node linking the plan if we have constant quals
+        * init planner lists to empty
+        *
+        * NOTE: in_info_list was set up by subquery_planner, do not touch here
          */
-       if (constant_qual)
-       {
-               subplan = (Plan *) make_result(tlist,
-                                                                          (Node *) constant_qual,
-                                                                          subplan);
-
-               root->query_pathkeys = NIL; /* result is unordered, no? */
+       root->base_rel_list = NIL;
+       root->other_rel_list = NIL;
+       root->join_rel_list = NIL;
+       root->equi_key_list = NIL;
  
-               return subplan;
-       }
+       /*
+        * Construct RelOptInfo nodes for all base relations in query.
+        */
+       add_base_rels_to_query(root, (Node *) root->jointree);
  
         /*
-        * Replace the toplevel plan node's flattened target list with the
-        * targetlist given by my caller, so that expressions are evaluated.
+        * Examine the targetlist and qualifications, adding entries to
+        * baserel targetlists for all referenced Vars.  Restrict and join
+        * clauses are added to appropriate lists belonging to the mentioned
+        * relations.  We also build lists of equijoined keys for pathkey
+        * construction.
          *
-        * This implies that all expression evaluations are done at the root
-        * of the plan tree.  Once upon a time there was code to try to push
+        * Note: all subplan nodes will have "flat" (var-only) tlists.
+        * This implies that all expression evaluations are done at the root of
+        * the plan tree.  Once upon a time there was code to try to push
          * expensive function calls down to lower plan nodes, but that's dead
          * code and has been for a long time...
          */
-       else
-       {
-               subplan->targetlist = tlist;
+       build_base_rel_tlists(root, tlist);
  
-               return subplan;
-       }
-
-#ifdef NOT_USED
+       (void) distribute_quals_to_rels(root, (Node *) root->jointree);
  
         /*
-        * Destructively modify the query plan's targetlist to add fjoin lists
-        * to flatten functions that return sets of base types
+        * Use the completed lists of equijoined keys to deduce any implied
+        * but unstated equalities (for example, A=B and B=C imply A=C).
          */
-       subplan->targetlist = generate_fjoin(subplan->targetlist);
-#endif
-
-}
-
-/*
- * subplanner
- *
- *      Subplanner creates an entire plan consisting of joins and scans
- *      for processing a single level of attributes.
- *
- *      flat_tlist is the flattened target list
- *      qual is the qualification to be satisfied
- *
- *      Returns a subplan.
- *
- */
-static Plan *
-subplanner(Query *root,
-                  List *flat_tlist,
-                  List *qual)
-{
-       RelOptInfo *final_rel;
-       Cost            cheapest_cost;
-       Path       *sortedpath;
+       generate_implied_equalities(root);
  
         /*
-        * Initialize the targetlist and qualification, adding entries to
-        * base_rel_list as relation references are found (e.g., in the
-        * qualification, the targetlist, etc.)
+        * We should now have all the pathkey equivalence sets built, so it's
+        * now possible to convert the requested query_pathkeys to canonical
+        * form.
          */
-       root->base_rel_list = NIL;
-       root->join_rel_list = NIL;
-
-       make_var_only_tlist(root, flat_tlist);
-       add_restrict_and_join_to_rels(root, qual);
-       add_missing_vars_to_tlist(root, flat_tlist);
+       root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
  
-       set_joininfo_mergeable_hashable(root->base_rel_list);
+       /*
+        * Ready to do the primary planning.
+        */
+       final_rel = make_one_rel(root);
  
-       final_rel = make_one_rel(root, root->base_rel_list);
+       if (!final_rel || !final_rel->cheapest_total_path)
+               elog(ERROR, "query_planner: failed to construct a relation");
  
-#ifdef NOT_USED                                        /* fix xfunc */
+       /*
+        * Now that we have an estimate of the final rel's size, we can
+        * convert a tuple_fraction specified as an absolute count (ie, a
+        * LIMIT option) into a fraction of the total tuples.
+        */
+       if (tuple_fraction >= 1.0)
+               tuple_fraction /= final_rel->rows;
  
         /*
-        * Perform Predicate Migration on each path, to optimize and correctly
-        * assess the cost of each before choosing the cheapest one. -- JMH,
-        * 11/16/92
+        * Pick out the cheapest-total path and the cheapest presorted path
+        * for the requested pathkeys (if there is one).  We should take the
+        * tuple fraction into account when selecting the cheapest presorted
+        * path, but not when selecting the cheapest-total path, since if we
+        * have to sort then we'll have to fetch all the tuples.  (But there's
+        * a special case: if query_pathkeys is NIL, meaning order doesn't
+        * matter, then the "cheapest presorted" path will be the cheapest
+        * overall for the tuple fraction.)
          *
-        * Needn't do so if the top rel is pruneable: that means there's no
-        * expensive functions left to pull up.  -- JMH, 11/22/92
+        * The cheapest-total path is also the one to use if grouping_planner
+        * decides to use hashed aggregation, so we return it separately even
+        * if this routine thinks the presorted path is the winner.
          */
-       if (XfuncMode != XFUNC_OFF && XfuncMode != XFUNC_NOPM &&
-               XfuncMode != XFUNC_NOPULL && !final_rel->pruneable)
-       {
-               List       *pathnode;
+       cheapestpath = final_rel->cheapest_total_path;
  
-               foreach(pathnode, final_rel->pathlist)
-               {
-                       if (xfunc_do_predmig((Path *) lfirst(pathnode)))
-                               set_cheapest(final_rel, final_rel->pathlist);
-               }
-       }
-#endif
+       sortedpath =
+               get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
+                                                                                                 root->query_pathkeys,
+                                                                                                 tuple_fraction);
  
-       if (! final_rel)
-       {
-               elog(NOTICE, "final relation is null");
-               root->query_pathkeys = NIL; /* result is unordered, no? */
-               return create_plan((Path *) NULL);
-       }
+       /* Don't return same path in both guises; just wastes effort */
+       if (sortedpath == cheapestpath)
+               sortedpath = NULL;
  
         /*
-        * Determine the cheapest path and create a subplan to execute it.
-        *
-        * If no special sort order is wanted, or if the cheapest path is
-        * already appropriately ordered, just use the cheapest path.
+        * Forget about the presorted path if it would be cheaper to sort the
+        * cheapest-total path.  Here we need consider only the behavior at
+        * the tuple fraction point.
          */
-       if (root->query_pathkeys == NIL ||
-               pathkeys_contained_in(root->query_pathkeys,
-                                                         final_rel->cheapestpath->pathkeys))
-               return create_plan(final_rel->cheapestpath);
-       /*
-        * Otherwise, look to see if we have an already-ordered path that is
-        * cheaper than doing an explicit sort on cheapestpath.
-        */
-       cheapest_cost = final_rel->cheapestpath->path_cost +
-               cost_sort(root->query_pathkeys, final_rel->size, final_rel->width);
-
-       sortedpath = get_cheapest_path_for_pathkeys(final_rel->pathlist,
-                                                                                               root->query_pathkeys,
-                                                                                               false);
         if (sortedpath)
         {
-               if (sortedpath->path_cost <= cheapest_cost)
+               Path            sort_path;      /* dummy for result of cost_sort */
+
+               if (root->query_pathkeys == NIL ||
+                       pathkeys_contained_in(root->query_pathkeys,
+                                                                 cheapestpath->pathkeys))
                 {
-                       /* Found a better presorted path, use it */
-                       return create_plan(sortedpath);
+                       /* No sort needed for cheapest path */
+                       sort_path.startup_cost = cheapestpath->startup_cost;
+                       sort_path.total_cost = cheapestpath->total_cost;
                 }
-               /* otherwise, doing it the hard way is still cheaper */
-       }
-       else
-       {
-               /*
-                * If we found no usable presorted path at all, it is possible
-                * that the user asked for descending sort order.  Check to see
-                * if we can satisfy the pathkeys by using a backwards indexscan.
-                * To do this, we commute all the operators in the pathkeys and
-                * then look for a matching path that is an IndexPath.
-                */
-               List       *commuted_pathkeys = copyObject(root->query_pathkeys);
-
-               if (commute_pathkeys(commuted_pathkeys))
+               else
                 {
-                       /* pass 'true' to force only IndexPaths to be considered */
-                       sortedpath = get_cheapest_path_for_pathkeys(final_rel->pathlist,
-                                                                                                               commuted_pathkeys,
-                                                                                                               true);
-                       if (sortedpath && sortedpath->path_cost <= cheapest_cost)
-                       {
-                               /*
-                                * Kluge here: since IndexPath has no representation for
-                                * backwards scan, we have to convert to Plan format and
-                                * then poke the result.
-                                */
-                               Plan       *sortedplan = create_plan(sortedpath);
+                       /* Figure cost for sorting */
+                       cost_sort(&sort_path, root, root->query_pathkeys,
+                                         cheapestpath->total_cost,
+                                         final_rel->rows, final_rel->width);
+               }
  
-                               Assert(IsA(sortedplan, IndexScan));
-                               ((IndexScan *) sortedplan)->indxorderdir = BackwardScanDirection;
-                               return sortedplan;
-                       }
+               if (compare_fractional_path_costs(sortedpath, &sort_path,
+                                                                                 tuple_fraction) > 0)
+               {
+                       /* Presorted path is a loser */
+                       sortedpath = NULL;
                 }
         }
  
-       /* Nothing for it but to sort the cheapestpath...
-        *
-        * We indicate we failed to sort the plan, and let the caller
-        * stick the appropriate sort node on top.  union_planner has to be
-        * able to add a sort node anyway, so no need for extra code here.
+       /*
+        * If we have constant quals, add a toplevel Result step to process them.
          */
-       root->query_pathkeys = NIL; /* sorry, it ain't sorted */
+       if (constant_quals)
+       {
+               cheapestpath = (Path *) create_result_path(final_rel,
+                                                                                                  cheapestpath,
+                                                                                                  constant_quals);
+               if (sortedpath)
+                       sortedpath = (Path *) create_result_path(final_rel,
+                                                                                                        sortedpath,
+                                                                                                        constant_quals);
+       }
  
-       return create_plan(final_rel->cheapestpath);
+       *cheapest_path = cheapestpath;
+       *sorted_path = sortedpath;
  }