]> granicus.if.org Git - postgresql/commitdiff
Teach planner how to propagate pathkeys from sub-SELECTs in FROM up to
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 15 Feb 2003 20:12:41 +0000 (20:12 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 15 Feb 2003 20:12:41 +0000 (20:12 +0000)
the outer query.  (The implementation is a bit klugy, but it would take
nontrivial restructuring to make it nicer, which this is probably not
worth.)  This avoids unnecessary sort steps in examples like
SELECT foo,count(*) FROM (SELECT ... ORDER BY foo,bar) sub GROUP BY foo
which means there is now a reasonable technique for controlling the
order of inputs to custom aggregates, even in the grouping case.

src/backend/optimizer/path/allpaths.c
src/backend/optimizer/path/pathkeys.c
src/backend/optimizer/plan/planner.c
src/backend/optimizer/util/pathnode.c
src/backend/optimizer/util/relnode.c
src/backend/optimizer/util/tlist.c
src/include/optimizer/pathnode.h
src/include/optimizer/paths.h
src/include/optimizer/tlist.h

index c168ecd3b624361c63c7792635499d32a8b5866f..24a604716b8af704046d4bca6db7d9ca2dff6ef3 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.96 2003/02/08 20:20:54 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.97 2003/02/15 20:12:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -291,6 +291,7 @@ set_subquery_pathlist(Query *root, RelOptInfo *rel,
                                          Index rti, RangeTblEntry *rte)
 {
        Query      *subquery = rte->subquery;
+       List       *pathkeys;
 
        /*
         * If there are any restriction clauses that have been attached to the
@@ -351,8 +352,11 @@ set_subquery_pathlist(Query *root, RelOptInfo *rel,
        /* Mark rel with estimated output rows, width, etc */
        set_baserel_size_estimates(root, rel);
 
+       /* Convert subquery pathkeys to outer representation */
+       pathkeys = build_subquery_pathkeys(root, rel, subquery);
+
        /* Generate appropriate path */
-       add_path(rel, create_subqueryscan_path(rel));
+       add_path(rel, create_subqueryscan_path(rel, pathkeys));
 
        /* Select cheapest path (pretty easy in this case...) */
        set_cheapest(rel);
index c72a635535c61afef09ecacd807af19bf1207dd2..7007ca0cfe5af82103ea4fd5e817c4f5e4708c73 100644 (file)
@@ -11,7 +11,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.46 2003/02/08 20:20:54 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.47 2003/02/15 20:12:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -366,6 +366,31 @@ canonicalize_pathkeys(Query *root, List *pathkeys)
        return new_pathkeys;
 }
 
+
+/*
+ * count_canonical_peers
+ *       Given a PathKeyItem, find the equi_key_list subset it is a member of,
+ *       if any.  If so, return the number of other members of the set.
+ *       If not, return 0 (without actually adding it to our equi_key_list).
+ *
+ * This is a hack to support the rather bogus heuristics in
+ * build_subquery_pathkeys.
+ */
+static int
+count_canonical_peers(Query *root, PathKeyItem *item)
+{
+       List       *cursetlink;
+
+       foreach(cursetlink, root->equi_key_list)
+       {
+               List       *curset = lfirst(cursetlink);
+
+               if (member(item, curset))
+                       return length(curset) - 1;
+       }
+       return 0;
+}
+
 /****************************************************************************
  *             PATHKEY COMPARISONS
  ****************************************************************************/
@@ -597,6 +622,9 @@ get_cheapest_fractional_path_for_pathkeys(List *paths,
  *
  * If 'scandir' is BackwardScanDirection, attempt to build pathkeys
  * representing a backwards scan of the index. Return NIL if can't do it.
+ *
+ * We generate the full pathkeys list whether or not all are useful for the
+ * current query.  Caller should do truncate_useless_pathkeys().
  */
 List *
 build_index_pathkeys(Query *root,
@@ -699,9 +727,10 @@ find_indexkey_var(Query *root, RelOptInfo *rel, AttrNumber varattno)
 
        foreach(temp, rel->targetlist)
        {
-               Var                *tle_var = get_expr(lfirst(temp));
+               Var                *tle_var = (Var *) ((TargetEntry *) lfirst(temp))->expr;
 
-               if (IsA(tle_var, Var) &&tle_var->varattno == varattno)
+               if (IsA(tle_var, Var) &&
+                       tle_var->varattno == varattno)
                        return tle_var;
        }
 
@@ -714,6 +743,112 @@ find_indexkey_var(Query *root, RelOptInfo *rel, AttrNumber varattno)
        return makeVar(relid, varattno, vartypeid, type_mod, 0);
 }
 
+/*
+ * build_subquery_pathkeys
+ *       Build a pathkeys list that describes the ordering of a subquery's
+ *       result (in the terms of the outer query).  The subquery must already
+ *       have been planned, so that its query_pathkeys field has been set.
+ *
+ * It is not necessary for caller to do truncate_useless_pathkeys(),
+ * because we select keys in a way that takes usefulness of the keys into
+ * account.
+ */
+List *
+build_subquery_pathkeys(Query *root, RelOptInfo *rel, Query *subquery)
+{
+       List       *retval = NIL;
+       int                     retvallen = 0;
+       int                     outer_query_keys = length(root->query_pathkeys);
+       List       *l;
+
+       foreach(l, subquery->query_pathkeys)
+       {
+               List       *sub_pathkey = (List *) lfirst(l);
+               List       *j;
+               PathKeyItem *best_item = NULL;
+               int                     best_score = 0;
+               List       *cpathkey;
+
+               /*
+                * The sub_pathkey could contain multiple elements (representing
+                * knowledge that multiple items are effectively equal).  Each
+                * element might match none, one, or more of the output columns
+                * that are visible to the outer query.  This means we may have
+                * multiple possible representations of the sub_pathkey in the
+                * context of the outer query.  Ideally we would generate them all
+                * and put them all into a pathkey list of the outer query, thereby
+                * propagating equality knowledge up to the outer query.  Right now
+                * we cannot do so, because the outer query's canonical pathkey
+                * sets are already frozen when this is called.  Instead we prefer
+                * the one that has the highest "score" (number of canonical pathkey
+                * peers, plus one if it matches the outer query_pathkeys).
+                * This is the most likely to be useful in the outer query.
+                */
+               foreach(j, sub_pathkey)
+               {
+                       PathKeyItem *sub_item = (PathKeyItem *) lfirst(j);
+                       Node   *sub_key = sub_item->key;
+                       List   *k;
+
+                       foreach(k, subquery->targetList)
+                       {
+                               TargetEntry *tle = (TargetEntry *) lfirst(k);
+
+                               if (!tle->resdom->resjunk &&
+                                       equal(tle->expr, sub_key))
+                               {
+                                       /* Found a representation for this sub_key */
+                                       Var        *outer_var;
+                                       PathKeyItem *outer_item;
+                                       int             score;
+
+                                       outer_var = makeVar(rel->relid,
+                                                                               tle->resdom->resno,
+                                                                               tle->resdom->restype,
+                                                                               tle->resdom->restypmod,
+                                                                               0);
+                                       outer_item = makePathKeyItem((Node *) outer_var,
+                                                                                                sub_item->sortop);
+                                       /* score = # of mergejoin peers */
+                                       score = count_canonical_peers(root, outer_item);
+                                       /* +1 if it matches the proper query_pathkeys item */
+                                       if (retvallen < outer_query_keys &&
+                                               member(outer_item,
+                                                          nth(retvallen, root->query_pathkeys)))
+                                               score++;
+                                       if (score > best_score)
+                                       {
+                                               best_item = outer_item;
+                                               best_score = score;
+                                       }
+                               }
+                       }
+               }
+
+               /*
+                * If we couldn't find a representation of this sub_pathkey,
+                * we're done (we can't use the ones to its right, either).
+                */
+               if (!best_item)
+                       break;
+
+               /* Canonicalize the chosen item (we did not before) */
+               cpathkey = make_canonical_pathkey(root, best_item);
+
+               /*
+                * Eliminate redundant ordering info; could happen if outer
+                * query equijoins subquery keys...
+                */
+               if (!ptrMember(cpathkey, retval))
+               {
+                       retval = lappend(retval, cpathkey);
+                       retvallen++;
+               }
+       }
+
+       return retval;
+}
+
 /*
  * build_join_pathkeys
  *       Build the path keys for a join relation constructed by mergejoin or
index 9a0df30613be19fd5288554cd51e110524531279..2b46b4b740102b229051a26c17071cb993422366 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.146 2003/02/09 23:57:19 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.147 2003/02/15 20:12:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -469,6 +469,9 @@ inheritance_planner(Query *parse, List *inheritlist)
        /* Save the target-relations list for the executor, too */
        parse->resultRelations = inheritlist;
 
+       /* Mark result as unordered (probably unnecessary) */
+       parse->query_pathkeys = NIL;
+
        return (Plan *) make_append(subplans, true, tlist);
 }
 
@@ -491,7 +494,8 @@ inheritance_planner(Query *parse, List *inheritlist)
  * The normal case is to pass -1, but some callers pass values >= 0 to
  * override this routine's determination of the appropriate fraction.
  *
- * Returns a query plan.
+ * Returns a query plan.  Also, parse->query_pathkeys is returned as the
+ * actual output ordering of the plan (in pathkey format).
  *--------------------
  */
 static Plan *
@@ -1191,10 +1195,13 @@ grouping_planner(Query *parse, double tuple_fraction)
        if (parse->sortClause)
        {
                if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
+               {
                        result_plan = (Plan *) make_sort_from_sortclauses(parse,
                                                                                                                          tlist,
                                                                                                                          result_plan,
                                                                                                                          parse->sortClause);
+                       current_pathkeys = sort_pathkeys;
+               }
        }
 
        /*
@@ -1232,6 +1239,12 @@ grouping_planner(Query *parse, double tuple_fraction)
                                                                                  parse->limitCount);
        }
 
+       /*
+        * Return the actual output ordering in query_pathkeys for possible
+        * use by an outer query level.
+        */
+       parse->query_pathkeys = current_pathkeys;
+
        return result_plan;
 }
 
index 8a59730aef52773f4aef456770e0e65e934745c4..3984c666f51242e2a6c6a3fe932c35fbe7db2c9a 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.87 2003/02/08 20:20:55 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.88 2003/02/15 20:12:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -676,13 +676,13 @@ hash_safe_tlist(List *tlist)
  *       returning the pathnode.
  */
 Path *
-create_subqueryscan_path(RelOptInfo *rel)
+create_subqueryscan_path(RelOptInfo *rel, List *pathkeys)
 {
        Path       *pathnode = makeNode(Path);
 
        pathnode->pathtype = T_SubqueryScan;
        pathnode->parent = rel;
-       pathnode->pathkeys = NIL;       /* for now, assume unordered result */
+       pathnode->pathkeys = pathkeys;
 
        /* just copy the subplan's cost estimates */
        pathnode->startup_cost = rel->subplan->startup_cost;
index 5b875dfe15511ba0323067ba8b831aec2dbc7167..af44cb7f20616aa9a493c426332b2f72518cc53a 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.47 2003/02/08 20:20:55 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.48 2003/02/15 20:12:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -381,11 +381,11 @@ new_join_tlist(List *tlist,
 
        foreach(i, tlist)
        {
-               TargetEntry *xtl = lfirst(i);
+               TargetEntry *tle = lfirst(i);
 
                resdomno += 1;
                t_list = lappend(t_list,
-                                                create_tl_element(get_expr(xtl), resdomno));
+                                                create_tl_element((Var *) tle->expr, resdomno));
        }
 
        return t_list;
index 6f90ea875686a01c8d6ee7c5993da709468a0a28..53d5615cb0b8879f343d4c88c4a7063a73c227a2 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/tlist.c,v 1.54 2003/01/20 18:54:57 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/tlist.c,v 1.55 2003/02/15 20:12:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -204,15 +204,6 @@ add_to_flat_tlist(List *tlist, List *vars)
        return tlist;
 }
 
-Var *
-get_expr(TargetEntry *tle)
-{
-       Assert(tle != NULL);
-       Assert(tle->expr != NULL);
-
-       return (Var *) tle->expr;
-}
-
 /*
  * get_sortgroupclause_tle
  *             Find the targetlist entry matching the given SortClause
index e842a699b17391821ce786b5db94bab15f404771..6d112a5cf07a8e2b51fc43938931d67d1fc8ff1e 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pathnode.h,v 1.49 2003/02/08 20:20:55 tgl Exp $
+ * $Id: pathnode.h,v 1.50 2003/02/15 20:12:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -41,7 +41,7 @@ extern ResultPath *create_result_path(RelOptInfo *rel, Path *subpath,
 extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath);
 extern UniquePath *create_unique_path(Query *root, RelOptInfo *rel,
                                                                          Path *subpath);
-extern Path *create_subqueryscan_path(RelOptInfo *rel);
+extern Path *create_subqueryscan_path(RelOptInfo *rel, List *pathkeys);
 extern Path *create_functionscan_path(Query *root, RelOptInfo *rel);
 
 extern NestPath *create_nestloop_path(Query *root,
index 8b4f701744ed6408870be9fb3edd2f1557a0326c..768e493e0c9c0be86ff7d63169519d8beebc7b6d 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: paths.h,v 1.65 2003/01/25 23:10:30 tgl Exp $
+ * $Id: paths.h,v 1.66 2003/02/15 20:12:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -104,6 +104,8 @@ extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths,
 extern List *build_index_pathkeys(Query *root, RelOptInfo *rel,
                                         IndexOptInfo *index,
                                         ScanDirection scandir);
+extern List *build_subquery_pathkeys(Query *root, RelOptInfo *rel,
+                                                                        Query *subquery);
 extern List *build_join_pathkeys(Query *root,
                                        RelOptInfo *joinrel,
                                        List *outer_pathkeys);
index b38f4016f9823951f40e432d52b7ab565ccd97b2..bce5db6c55cf1a378ea81f40de1b9a8414b7ba86 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: tlist.h,v 1.33 2003/01/20 18:55:06 tgl Exp $
+ * $Id: tlist.h,v 1.34 2003/02/15 20:12:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -26,8 +26,6 @@ extern List *new_unsorted_tlist(List *targetlist);
 extern List *flatten_tlist(List *tlist);
 extern List *add_to_flat_tlist(List *tlist, List *vars);
 
-extern Var *get_expr(TargetEntry *tle);
-
 extern TargetEntry *get_sortgroupclause_tle(SortClause *sortClause,
                                                List *targetList);
 extern Node *get_sortgroupclause_expr(SortClause *sortClause,