Restructure parsetree representation of DECLARE CURSOR: now it's a

[postgresql] / src / backend / optimizer / plan / planmain.c
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c

index 1da01e10911ad600566e19829c34944d1a2b3e30..97f6b76a8e48c61dd12414cfb1bfafa5fce605c8 100644 (file)
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -1,424 +1,241 @@
  /*-------------------------------------------------------------------------
   *
- * planmain.c--
- *    Routines to plan a single query
+ * planmain.c
+ *       Routines to plan a single query
   *
- * Copyright (c) 1994, Regents of the University of California
+ * What's in a name, anyway?  The top-level entry point of the planner/
+ * optimizer is over in planner.c, not here as you might think from the
+ * file name.  But this is the main code for planning a basic join operation,
+ * shorn of features like subselects, inheritance, aggregates, grouping,
+ * and so on.  (Those are the things planner.c deals with.)
+ *
+ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
   * IDENTIFICATION
- *    $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.2 1996/10/31 10:59:14 scrappy Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.75 2003/03/10 03:53:50 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
-#include <sys/types.h>
-
  #include "postgres.h"
  
-#include "nodes/pg_list.h"
-#include "nodes/plannodes.h"
-#include "nodes/parsenodes.h"
-#include "nodes/relation.h"
-
-#include "optimizer/planmain.h"
-#include "optimizer/internal.h"
-#include "optimizer/paths.h"
  #include "optimizer/clauses.h"
-#include "optimizer/keys.h"
-#include "optimizer/tlist.h"
-#include "optimizer/xfunc.h"
  #include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/planmain.h"
  
-#include "tcop/dest.h"
-#include "utils/elog.h"
-#include "utils/palloc.h"
-#include "nodes/memnodes.h"
-#include "utils/mcxt.h"
-#include "utils/lsyscache.h"
-
-static Plan *subplanner(Query *root, List *flat_tlist, List *qual);
-static Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan);
-
-static Plan *make_groupPlan(List *tlist, bool tuplePerGroup,
-                           List *groupClause, Plan *subplan);
  
-/*    
- * query_planner--
- *    Routine to create a query plan.  It does so by first creating a
- *    subplan for the topmost level of attributes in the query.  Then,
- *    it modifies all target list and qualifications to consider the next
- *    level of nesting and creates a plan for this modified query by
- *    recursively calling itself.  The two pieces are then merged together
- *    by creating a result node that indicates which attributes should
- *    be placed where and any relation level qualifications to be
- *    satisfied.
- *    
- *    command-type is the query command, e.g., retrieve, delete, etc.
- *    tlist is the target list of the query
- *    qual is the qualification of the query
- *    
- *    Returns a query plan.
+/*--------------------
+ * query_planner
+ *       Generate a path (that is, a simplified plan) for a basic query,
+ *       which may involve joins but not any fancier features.
+ *
+ * Since query_planner does not handle the toplevel processing (grouping,
+ * sorting, etc) it cannot select the best path by itself.  It selects
+ * two paths: the cheapest path that produces all the required tuples,
+ * independent of any ordering considerations, and the cheapest path that
+ * produces the expected fraction of the required tuples in the required
+ * ordering, if there is a path that is cheaper for this than just sorting
+ * the output of the cheapest overall path.  The caller (grouping_planner)
+ * will make the final decision about which to use.
+ *
+ * Input parameters:
+ * root is the query to plan
+ * tlist is the target list the query should produce (NOT root->targetList!)
+ * tuple_fraction is the fraction of tuples we expect will be retrieved
+ *
+ * Output parameters:
+ * *cheapest_path receives the overall-cheapest path for the query
+ * *sorted_path receives the cheapest presorted path for the query,
+ *                             if any (NULL if there is no useful presorted path)
+ *
+ * Note: the Query node also includes a query_pathkeys field, which is both
+ * an input and an output of query_planner().  The input value signals
+ * query_planner that the indicated sort order is wanted in the final output
+ * plan.  But this value has not yet been "canonicalized", since the needed
+ * info does not get computed until we scan the qual clauses.  We canonicalize
+ * it as soon as that task is done.  (The main reason query_pathkeys is a
+ * Query field and not a passed parameter is that the low-level routines in
+ * indxpath.c need to see it.)
+ *
+ * tuple_fraction is interpreted as follows:
+ *       0: expect all tuples to be retrieved (normal case)
+ *       0 < tuple_fraction < 1: expect the given fraction of tuples available
+ *             from the plan to be retrieved
+ *       tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
+ *             expected to be retrieved (ie, a LIMIT specification)
+ *--------------------
   */
-Plan *
-query_planner(Query *root,
-             int command_type,
-             List *tlist,
-             List *qual)
+void
+query_planner(Query *root, List *tlist, double tuple_fraction,
+                         Path **cheapest_path, Path **sorted_path)
  {
-    List       *constant_qual = NIL;
-    List       *flattened_tlist = NIL;
-    List       *level_tlist = NIL;
-    Plan       *subplan = (Plan*)NULL;
-    Agg                *aggplan = NULL;
-    
-    /*
-     * A command without a target list or qualification is an error,
-     * except for "delete foo".
-     */
-    if (tlist==NIL && qual==NULL) {
-       if (command_type == CMD_DELETE ||
-           /* Total hack here. I don't know how to handle
-              statements like notify in action bodies.
-              Notify doesn't return anything but
-              scans a system table. */
-           command_type == CMD_NOTIFY) {
-           return ((Plan*)make_seqscan(NIL,
-                                       NIL,
-                                       root->resultRelation,
-                                       (Plan*)NULL));
-       } else
-           return((Plan*)NULL);
-    }
-    
-    /*
-     * Pull out any non-variable qualifications so these can be put in
-     * the topmost result node.  The opids for the remaining
-     * qualifications will be changed to regprocs later.
-     */
-    qual = pull_constant_clauses(qual, &constant_qual);
-    fix_opids(constant_qual);
-    
-    /*
-     * Create a target list that consists solely of (resdom var) target
-     * list entries, i.e., contains no arbitrary expressions.
-     */
-    flattened_tlist = flatten_tlist(tlist);
-    if (flattened_tlist) {
-       level_tlist = flattened_tlist;
-    } else {
-       /* from old code. the logic is beyond me. - ay 2/95 */
-       level_tlist = tlist;
-    }
-
-    /*
-     * Needs to add the group attribute(s) to the target list so that they
-     * are available to either the Group node or the Agg node. (The target
-     * list may not contain the group attribute(s).)
-     */
-    if (root->groupClause) {
-       AddGroupAttrToTlist(level_tlist, root->groupClause);
-    }
-    
-    if (root->qry_aggs) {
-       aggplan = make_agg(tlist, root->qry_numAgg, root->qry_aggs);
-       tlist = level_tlist;
-    }
+       List       *constant_quals;
+       RelOptInfo *final_rel;
+       Path       *cheapestpath;
+       Path       *sortedpath;
  
-    /*
-     * A query may have a non-variable target list and a non-variable
-     * qualification only under certain conditions:
-     *    - the query creates all-new tuples, or
-     *    - the query is a replace (a scan must still be done in this case).
-     */
-    if (flattened_tlist==NULL && qual==NULL) {
-
-       switch (command_type) {
-       case CMD_SELECT: 
-       case CMD_INSERT:
-           return ((Plan*)make_result(tlist,
-                                      (Node*)constant_qual,
-                                      (Plan*)NULL));
-           break;
-
-       case CMD_DELETE:
-       case CMD_UPDATE:
-           {
-               SeqScan *scan = make_seqscan(tlist,
-                                            (List *)NULL,
-                                            root->resultRelation,
-                                            (Plan*)NULL);
-               if (constant_qual!=NULL) {
-                   return ((Plan*)make_result(tlist,
-                                              (Node*)constant_qual,
-                                              (Plan*)scan));
-               } else {
-                   return ((Plan*)scan);
-               } 
-           }
-           break;
-              
-       default:
-           return ((Plan*)NULL);
+       /*
+        * If the query has an empty join tree, then it's something easy like
+        * "SELECT 2+2;" or "INSERT ... VALUES()".      Fall through quickly.
+        */
+       if (root->jointree->fromlist == NIL)
+       {
+               *cheapest_path = (Path *) create_result_path(NULL, NULL,
+                                                                                       (List *) root->jointree->quals);
+               *sorted_path = NULL;
+               return;
         }
-    }
-
-    /*
-     * Find the subplan (access path) and destructively modify the
-     * target list of the newly created subplan to contain the appropriate
-     * join references.
-     */
-    subplan = subplanner(root, level_tlist, qual);
-     
-    set_tlist_references(subplan);
-
-    /*
-     * If we have a GROUP BY clause, insert a group node (with the appropriate
-     * sort node.)
-     */
-    if (root->groupClause != NULL) {
-       bool tuplePerGroup;
  
         /*
-        * decide whether how many tuples per group the Group node needs
-        * to return. (Needs only one tuple per group if no aggregate is
-        * present. Otherwise, need every tuple from the group to do the
-        * aggregation.)
+        * Pull out any non-variable WHERE clauses so these can be put in a
+        * toplevel "Result" node, where they will gate execution of the whole
+        * plan (the Result will not invoke its descendant plan unless the
+        * quals are true).  Note that any *really* non-variable quals will
+        * have been optimized away by eval_const_expressions().  What we're
+        * mostly interested in here is quals that depend only on outer-level
+        * vars, although if the qual reduces to "WHERE FALSE" this path will
+        * also be taken.
          */
-       tuplePerGroup = (aggplan == NULL) ? FALSE : TRUE;
-       
-       subplan =
-           make_groupPlan(tlist, tuplePerGroup, root->groupClause, subplan);
+       root->jointree->quals = (Node *)
+               pull_constant_clauses((List *) root->jointree->quals,
+                                                         &constant_quals);
  
-       /* XXX fake it: this works for the Group node too! very very ugly,
-          please change me -ay 2/95 */
-       set_agg_tlist_references((Agg*)subplan);
-    }
+       /*
+        * init planner lists to empty
+        *
+        * NOTE: in_info_list was set up by subquery_planner, do not touch here
+        */
+       root->base_rel_list = NIL;
+       root->other_rel_list = NIL;
+       root->join_rel_list = NIL;
+       root->equi_key_list = NIL;
  
-    /*
-     * If aggregate is present, insert the agg node 
-     */
-    if (aggplan != NULL) {
-       aggplan->plan.lefttree = subplan;
-       subplan = (Plan*)aggplan;
+       /*
+        * Construct RelOptInfo nodes for all base relations in query.
+        */
+       add_base_rels_to_query(root, (Node *) root->jointree);
  
         /*
-        * set the varno/attno entries to the appropriate references to
-        * the result tuple of the subplans. (We need to set those in the
-        * array of aggreg's in the Agg node also. Even though they're 
-        * pointers, after a few dozen's of copying, they're not the same as
-        * those in the target list.)
+        * Examine the targetlist and qualifications, adding entries to
+        * baserel targetlists for all referenced Vars.  Restrict and join
+        * clauses are added to appropriate lists belonging to the mentioned
+        * relations.  We also build lists of equijoined keys for pathkey
+        * construction.
+        *
+        * Note: all subplan nodes will have "flat" (var-only) tlists.
+        * This implies that all expression evaluations are done at the root of
+        * the plan tree.  Once upon a time there was code to try to push
+        * expensive function calls down to lower plan nodes, but that's dead
+        * code and has been for a long time...
          */
-       set_agg_tlist_references((Agg*)subplan);
-       set_agg_agglist_references((Agg*)subplan);
+       build_base_rel_tlists(root, tlist);
  
-       tlist = aggplan->plan.targetlist;
-    }
-    
-    /*
-     * Build a result node linking the plan if we have constant quals
-     */
-    if (constant_qual) {
-       Plan *plan;
+       (void) distribute_quals_to_rels(root, (Node *) root->jointree);
  
-       plan = (Plan*)make_result(tlist,
-                                 (Node*)constant_qual,
-                                 subplan);
         /*
-        * Change all varno's of the Result's node target list.
+        * Use the completed lists of equijoined keys to deduce any implied
+        * but unstated equalities (for example, A=B and B=C imply A=C).
          */
-       set_result_tlist_references((Result*)plan);
-
-       return (plan);
-    }
+       generate_implied_equalities(root);
  
-    /*
-     * fix up the flattened target list of the plan root node so that
-     * expressions are evaluated.  this forces expression evaluations
-     * that may involve expensive function calls to be delayed to
-     * the very last stage of query execution.  this could be bad.
-     * but it is joey's responsibility to optimally push these
-     * expressions down the plan tree.  -- Wei
-     */
-    subplan->targetlist = flatten_tlist_vars(tlist,
-                                            subplan->targetlist);
+       /*
+        * We should now have all the pathkey equivalence sets built, so it's
+        * now possible to convert the requested query_pathkeys to canonical
+        * form.
+        */
+       root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
  
-    /*
-     * Destructively modify the query plan's targetlist to add fjoin
-     * lists to flatten functions that return sets of base types
-     */
-    subplan->targetlist = generate_fjoin(subplan->targetlist);
+       /*
+        * Ready to do the primary planning.
+        */
+       final_rel = make_one_rel(root);
  
-    return (subplan);
-}
+       if (!final_rel || !final_rel->cheapest_total_path)
+               elog(ERROR, "query_planner: failed to construct a relation");
  
-/*    
- * subplanner
- *    
- *   Subplanner creates an entire plan consisting of joins and scans
- *   for processing a single level of attributes. 
- *    
- *   flat-tlist is the flattened target list
- *   qual is the qualification to be satisfied
- *    
- *   Returns a subplan.
- *    
- */
-static Plan *
-subplanner(Query *root,
-          List *flat_tlist,
-          List *qual)
-{
-    Rel *final_relation;
-    List *final_relation_list;
+       /*
+        * Now that we have an estimate of the final rel's size, we can
+        * convert a tuple_fraction specified as an absolute count (ie, a
+        * LIMIT option) into a fraction of the total tuples.
+        */
+       if (tuple_fraction >= 1.0)
+               tuple_fraction /= final_rel->rows;
  
-    /* Initialize the targetlist and qualification, adding entries to
-     * *query-relation-list* as relation references are found (e.g., in the
-     *  qualification, the targetlist, etc.)
-     */
-    root->base_relation_list_ = NIL; 
-    root->join_relation_list_ = NIL;
-    initialize_base_rels_list(root, flat_tlist);
-    initialize_base_rels_jinfo(root, qual);
-    add_missing_vars_to_base_rels(root, flat_tlist);
+       /*
+        * Pick out the cheapest-total path and the cheapest presorted path
+        * for the requested pathkeys (if there is one).  We should take the
+        * tuple fraction into account when selecting the cheapest presorted
+        * path, but not when selecting the cheapest-total path, since if we
+        * have to sort then we'll have to fetch all the tuples.  (But there's
+        * a special case: if query_pathkeys is NIL, meaning order doesn't
+        * matter, then the "cheapest presorted" path will be the cheapest
+        * overall for the tuple fraction.)
+        *
+        * The cheapest-total path is also the one to use if grouping_planner
+        * decides to use hashed aggregation, so we return it separately even
+        * if this routine thinks the presorted path is the winner.
+        */
+       cheapestpath = final_rel->cheapest_total_path;
  
-    /* Find all possible scan and join paths.
-     * Mark all the clauses and relations that can be processed using special
-     * join methods, then do the exhaustive path search.
-     */
-    initialize_join_clause_info(root->base_relation_list_);
-    final_relation_list = find_paths(root,
-                                    root->base_relation_list_);
+       sortedpath =
+               get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
+                                                                                                 root->query_pathkeys,
+                                                                                                 tuple_fraction);
  
-    if (final_relation_list)
-       final_relation = (Rel*)lfirst (final_relation_list);
-    else
-       final_relation = (Rel*)NIL;
+       /* Don't return same path in both guises; just wastes effort */
+       if (sortedpath == cheapestpath)
+               sortedpath = NULL;
  
-#if 0 /* fix xfunc */
-    /*
-     * Perform Predicate Migration on each path, to optimize and correctly
-     * assess the cost of each before choosing the cheapest one.
-     *                                                 -- JMH, 11/16/92
-     *
-     * Needn't do so if the top rel is pruneable: that means there's no
-     * expensive functions left to pull up.  -- JMH, 11/22/92
-     */
-    if (XfuncMode != XFUNC_OFF && XfuncMode != XFUNC_NOPM && 
-       XfuncMode != XFUNC_NOPULL && !final_relation->pruneable)
+       /*
+        * Forget about the presorted path if it would be cheaper to sort the
+        * cheapest-total path.  Here we need consider only the behavior at
+        * the tuple fraction point.
+        */
+       if (sortedpath)
         {
-           List *pathnode;
-           foreach(pathnode, final_relation->pathlist)
+               Path            sort_path;      /* dummy for result of cost_sort */
+
+               if (root->query_pathkeys == NIL ||
+                       pathkeys_contained_in(root->query_pathkeys,
+                                                                 cheapestpath->pathkeys))
                 {
-                   if (xfunc_do_predmig((Path*)lfirst(pathnode)))
-                       set_cheapest(final_relation, final_relation->pathlist);
+                       /* No sort needed for cheapest path */
+                       sort_path.startup_cost = cheapestpath->startup_cost;
+                       sort_path.total_cost = cheapestpath->total_cost;
+               }
+               else
+               {
+                       /* Figure cost for sorting */
+                       cost_sort(&sort_path, root, root->query_pathkeys,
+                                         cheapestpath->total_cost,
+                                         final_rel->rows, final_rel->width);
                 }
-       }
-#endif
-    
-    /*
-     * Determine the cheapest path and create a subplan corresponding to it.
-     */
-    if (final_relation) {
-       return (create_plan ((Path*)final_relation->cheapestpath));
-    }else {
-       elog(NOTICE, "final relation is nil");
-       return(create_plan ((Path*)NULL));
-    }
-    
-}
-
-/*****************************************************************************
- *
- *****************************************************************************/
-
-static Result *
-make_result(List *tlist,
-           Node *resconstantqual,
-           Plan *subplan)
-{
-    Result *node = makeNode(Result);
-    Plan *plan = &node->plan;
-
-    tlist = generate_fjoin(tlist);
-    plan->cost = 0.0;
-    plan->state = (EState *)NULL;
-    plan->targetlist = tlist;
-    plan->lefttree = subplan;
-    plan->righttree = NULL;
-    node->resconstantqual = resconstantqual;
-    node->resstate = NULL;
-    
-    return(node);
-} 
-
-/*****************************************************************************
- *
- *****************************************************************************/
-
-static Plan *
-make_groupPlan(List *tlist,
-              bool tuplePerGroup,
-              List *groupClause,
-              Plan *subplan)
-{
-    List *sort_tlist;
-    List *gl;
-    int keyno;
-    Sort *sortplan;
-    Group *grpplan;
-    int numCols;
-    AttrNumber *grpColIdx;
-
-    numCols = length(groupClause);
-    grpColIdx = (AttrNumber *)palloc(sizeof(AttrNumber)*numCols);
-
-    /*
-     * first, make a sort node. Group node expects the tuples it gets
-     * from the subplan is in the order as specified by the group columns.
-     */
-    keyno = 1;
-    sort_tlist = new_unsorted_tlist(subplan->targetlist);
  
-    {
-       /* if this is a mergejoin node, varno could be OUTER/INNER */
-       List *l;
-       foreach(l, sort_tlist) {
-           TargetEntry *tle;
-           tle = lfirst(l);
-           ((Var*)tle->expr)->varno = 1;
+               if (compare_fractional_path_costs(sortedpath, &sort_path,
+                                                                                 tuple_fraction) > 0)
+               {
+                       /* Presorted path is a loser */
+                       sortedpath = NULL;
+               }
         }
-    }
-    
-    foreach (gl, groupClause) {
-       GroupClause *grpcl = (GroupClause*)lfirst(gl);
-       TargetEntry *tle;
  
-       tle = match_varid(grpcl->grpAttr, sort_tlist);
         /*
-        * the parser should have checked to make sure the group attribute
-        * is valid but the optimizer might have screwed up and hence we
-        * check again.
+        * If we have constant quals, add a toplevel Result step to process them.
          */
-       if (tle==NULL) {
-           elog(WARN, "group attribute disappeared from target list");
+       if (constant_quals)
+       {
+               cheapestpath = (Path *) create_result_path(final_rel,
+                                                                                                  cheapestpath,
+                                                                                                  constant_quals);
+               if (sortedpath)
+                       sortedpath = (Path *) create_result_path(final_rel,
+                                                                                                        sortedpath,
+                                                                                                        constant_quals);
         }
-       tle->resdom->reskey = keyno;
-       tle->resdom->reskeyop = get_opcode(grpcl->grpOpoid);
-
-       grpColIdx[keyno-1] = tle->resdom->resno;
-       keyno++;
-    }
-    sortplan = make_sort(sort_tlist,
-                        _TEMP_RELATION_ID_,
-                        subplan,
-                        numCols);
-    sortplan->plan.cost = subplan->cost;       /* XXX assume no cost */
  
-    /*
-     * make the Group node
-     */
-    tlist = copyObject(tlist); /* make a copy */
-    grpplan = make_group(tlist, tuplePerGroup, numCols, grpColIdx, sortplan);
-    
-    return (Plan*)grpplan;
+       *cheapest_path = cheapestpath;
+       *sorted_path = sortedpath;
  }