Put back planner's ability to cache the results of mergejoinscansel(),
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 22 Jan 2007 20:00:40 +0000 (20:00 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 22 Jan 2007 20:00:40 +0000 (20:00 +0000)
which I had removed in the first cut of the EquivalenceClass rewrite to
simplify that patch a little.  But it's still important --- in a four-way
join problem mergejoinscansel() was eating about 40% of the planning time
according to gprof.  Also, improve the EquivalenceClass code to re-use
join RestrictInfos rather than generating fresh ones for each join
considered.  This saves some memory space but more importantly improves
the effectiveness of caching planning info in RestrictInfos.

src/backend/nodes/copyfuncs.c
src/backend/nodes/outfuncs.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/path/equivclass.c
src/backend/optimizer/prep/prepunion.c
src/backend/optimizer/util/restrictinfo.c
src/backend/utils/adt/selfuncs.c
src/include/nodes/relation.h
src/include/utils/selfuncs.h

index 7b003dc095c8446fb4ef0deea1a527c2d36020cc..1237dc7fe642c8797651cce727d689dbbf720845 100644 (file)
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.362 2007/01/20 20:45:38 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.363 2007/01/22 20:00:39 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1326,6 +1326,10 @@ _copyRestrictInfo(RestrictInfo *from)
        /* EquivalenceClasses are never copied, so shallow-copy the pointers */
        COPY_SCALAR_FIELD(left_ec);
        COPY_SCALAR_FIELD(right_ec);
+       COPY_SCALAR_FIELD(left_em);
+       COPY_SCALAR_FIELD(right_em);
+       /* MergeScanSelCache isn't a Node, so hard to copy; just reset cache */
+       newnode->scansel_cache = NIL;
        COPY_SCALAR_FIELD(outer_is_left);
        COPY_SCALAR_FIELD(hashjoinoperator);
        COPY_SCALAR_FIELD(left_bucketsize);
index f0b72ea0f643987a46ad91a2d05bb973254c67d6..b79b7d1a2d8f8a12017cfc17c428f50fe6d6c212 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.294 2007/01/20 20:45:38 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.295 2007/01/22 20:00:39 tgl Exp $
  *
  * NOTES
  *       Every node type that can appear in stored rules' parsetrees *must*
@@ -1304,6 +1304,7 @@ _outEquivalenceClass(StringInfo str, EquivalenceClass *node)
        WRITE_NODE_FIELD(ec_opfamilies);
        WRITE_NODE_FIELD(ec_members);
        WRITE_NODE_FIELD(ec_sources);
+       WRITE_NODE_FIELD(ec_derives);
        WRITE_BITMAPSET_FIELD(ec_relids);
        WRITE_BOOL_FIELD(ec_has_const);
        WRITE_BOOL_FIELD(ec_has_volatile);
@@ -1354,6 +1355,8 @@ _outRestrictInfo(StringInfo str, RestrictInfo *node)
        WRITE_NODE_FIELD(mergeopfamilies);
        WRITE_NODE_FIELD(left_ec);
        WRITE_NODE_FIELD(right_ec);
+       WRITE_NODE_FIELD(left_em);
+       WRITE_NODE_FIELD(right_em);
        WRITE_BOOL_FIELD(outer_is_left);
        WRITE_OID_FIELD(hashjoinoperator);
 }
index 2d241e774d0174136d203c022141a92bd5f8f9f1..422ef9232226ad82dd22d73f2d712b5eb80a0c90 100644 (file)
@@ -54,7 +54,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.176 2007/01/22 01:35:20 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.177 2007/01/22 20:00:39 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -108,6 +108,9 @@ bool                enable_mergejoin = true;
 bool           enable_hashjoin = true;
 
 
+static MergeScanSelCache *cached_scansel(PlannerInfo *root,
+                                                                                RestrictInfo *rinfo,
+                                                                                PathKey *pathkey);
 static bool cost_qual_eval_walker(Node *node, QualCost *total);
 static Selectivity approx_selectivity(PlannerInfo *root, List *quals,
                                   JoinType jointype);
@@ -1349,9 +1352,9 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
         * (unless it's an outer join, in which case the outer side has to be
         * scanned all the way anyway).  Estimate fraction of the left and right
         * inputs that will actually need to be scanned. We use only the first
-        * (most significant) merge clause for this purpose.
-        *
-        * XXX mergejoinscansel is a bit expensive, can we cache its results?
+        * (most significant) merge clause for this purpose.  Since
+        * mergejoinscansel() is a fairly expensive computation, we cache the
+        * results in the merge clause RestrictInfo.
         */
        if (mergeclauses && path->jpath.jointype != JOIN_FULL)
        {
@@ -1360,8 +1363,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
                List       *ipathkeys;
                PathKey    *opathkey;
                PathKey    *ipathkey;
-               Selectivity leftscansel,
-                                       rightscansel;
+               MergeScanSelCache *cache;
 
                /* Get the input pathkeys to determine the sort-order details */
                opathkeys = outersortkeys ? outersortkeys : outer_path->pathkeys;
@@ -1376,22 +1378,21 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
                        opathkey->pk_nulls_first != ipathkey->pk_nulls_first)
                        elog(ERROR, "left and right pathkeys do not match in mergejoin");
 
-               mergejoinscansel(root, (Node *) firstclause->clause,
-                                                opathkey->pk_opfamily, opathkey->pk_strategy,
-                                                &leftscansel, &rightscansel);
+               /* Get the selectivity with caching */
+               cache = cached_scansel(root, firstclause, opathkey);
 
                if (bms_is_subset(firstclause->left_relids,
                                                  outer_path->parent->relids))
                {
                        /* left side of clause is outer */
-                       outerscansel = leftscansel;
-                       innerscansel = rightscansel;
+                       outerscansel = cache->leftscansel;
+                       innerscansel = cache->rightscansel;
                }
                else
                {
                        /* left side of clause is inner */
-                       outerscansel = rightscansel;
-                       innerscansel = leftscansel;
+                       outerscansel = cache->rightscansel;
+                       innerscansel = cache->leftscansel;
                }
                if (path->jpath.jointype == JOIN_LEFT)
                        outerscansel = 1.0;
@@ -1493,6 +1494,54 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
        path->jpath.path.total_cost = startup_cost + run_cost;
 }
 
+/*
+ * run mergejoinscansel() with caching
+ */
+static MergeScanSelCache *
+cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
+{
+       MergeScanSelCache *cache;
+       ListCell   *lc;
+       Selectivity leftscansel,
+                               rightscansel;
+       MemoryContext oldcontext;
+
+       /* Do we have this result already? */
+       foreach(lc, rinfo->scansel_cache)
+       {
+               cache = (MergeScanSelCache *) lfirst(lc);
+               if (cache->opfamily == pathkey->pk_opfamily &&
+                       cache->strategy == pathkey->pk_strategy &&
+                       cache->nulls_first == pathkey->pk_nulls_first)
+                       return cache;
+       }
+
+       /* Nope, do the computation */
+       mergejoinscansel(root,
+                                        (Node *) rinfo->clause,
+                                        pathkey->pk_opfamily,
+                                        pathkey->pk_strategy,
+                                        pathkey->pk_nulls_first,
+                                        &leftscansel,
+                                        &rightscansel);
+
+       /* Cache the result in suitably long-lived workspace */
+       oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+       cache = (MergeScanSelCache *) palloc(sizeof(MergeScanSelCache));
+       cache->opfamily = pathkey->pk_opfamily;
+       cache->strategy = pathkey->pk_strategy;
+       cache->nulls_first = pathkey->pk_nulls_first;
+       cache->leftscansel = leftscansel;
+       cache->rightscansel = rightscansel;
+
+       rinfo->scansel_cache = lappend(rinfo->scansel_cache, cache);
+
+       MemoryContextSwitchTo(oldcontext);
+
+       return cache;
+}
+
 /*
  * cost_hashjoin
  *       Determines and returns the cost of joining two relations using the
index 063e8d5d014b9037c713253f4977693c64a6e24b..b6503ef193be72ff32c3332e026a1163a9c49897 100644 (file)
@@ -10,7 +10,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.1 2007/01/20 20:45:39 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.2 2007/01/22 20:00:39 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -26,8 +26,9 @@
 #include "utils/lsyscache.h"
 
 
-static void add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
-                         bool is_child, Oid datatype);
+static EquivalenceMember *add_eq_member(EquivalenceClass *ec,
+                                                                               Expr *expr, Relids relids,
+                                                                               bool is_child, Oid datatype);
 static void generate_base_implied_equalities_const(PlannerInfo *root,
                                                                                                   EquivalenceClass *ec);
 static void generate_base_implied_equalities_no_const(PlannerInfo *root,
@@ -46,6 +47,11 @@ static List *generate_join_implied_equalities_broken(PlannerInfo *root,
                                                                                RelOptInfo *inner_rel);
 static Oid     select_equality_operator(EquivalenceClass *ec,
                                                                         Oid lefttype, Oid righttype);
+static RestrictInfo *create_join_clause(PlannerInfo *root,
+                                                                               EquivalenceClass *ec, Oid opno,
+                                                                               EquivalenceMember *leftem,
+                                                                               EquivalenceMember *rightem,
+                                                                               EquivalenceClass *parent_ec);
 static void reconsider_outer_join_clause(PlannerInfo *root,
                                                                                 RestrictInfo *rinfo,
                                                                                 bool outer_on_left);
@@ -95,6 +101,8 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
        List       *opfamilies;
        EquivalenceClass *ec1,
                           *ec2;
+       EquivalenceMember *em1,
+                          *em2;
        ListCell   *lc1;
 
        /* Extract info from given clause */
@@ -152,6 +160,7 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
         * there is no shortcut here for item1 and item2 equal.)
         */
        ec1 = ec2 = NULL;
+       em1 = em2 = NULL;
        foreach(lc1, root->eq_classes)
        {
                EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
@@ -188,6 +197,7 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
                                equal(item1, cur_em->em_expr))
                        {
                                ec1 = cur_ec;
+                               em1 = cur_em;
                                if (ec2)
                                        break;
                        }
@@ -197,6 +207,7 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
                                equal(item2, cur_em->em_expr))
                        {
                                ec2 = cur_ec;
+                               em2 = cur_em;
                                if (ec1)
                                        break;
                        }
@@ -215,6 +226,10 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
                {
                        ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
                        ec1->ec_below_outer_join |= below_outer_join;
+                       /* mark the RI as usable with this pair of EMs */
+                       /* NB: can't set left_ec/right_ec until merging is finished */
+                       restrictinfo->left_em = em1;
+                       restrictinfo->right_em = em2;
                        return true;
                }
 
@@ -227,6 +242,7 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
                 */
                ec1->ec_members = list_concat(ec1->ec_members, ec2->ec_members);
                ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources);
+               ec1->ec_derives = list_concat(ec1->ec_derives, ec2->ec_derives);
                ec1->ec_relids = bms_join(ec1->ec_relids, ec2->ec_relids);
                ec1->ec_has_const |= ec2->ec_has_const;
                /* can't need to set has_volatile */
@@ -236,23 +252,33 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
                /* just to avoid debugging confusion w/ dangling pointers: */
                ec2->ec_members = NIL;
                ec2->ec_sources = NIL;
+               ec2->ec_derives = NIL;
                ec2->ec_relids = NULL;
                ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
                ec1->ec_below_outer_join |= below_outer_join;
+               /* mark the RI as usable with this pair of EMs */
+               restrictinfo->left_em = em1;
+               restrictinfo->right_em = em2;
        }
        else if (ec1)
        {
                /* Case 3: add item2 to ec1 */
-               add_eq_member(ec1, item2, item2_relids, false, item2_type);
+               em2 = add_eq_member(ec1, item2, item2_relids, false, item2_type);
                ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
                ec1->ec_below_outer_join |= below_outer_join;
+               /* mark the RI as usable with this pair of EMs */
+               restrictinfo->left_em = em1;
+               restrictinfo->right_em = em2;
        }
        else if (ec2)
        {
                /* Case 3: add item1 to ec2 */
-               add_eq_member(ec2, item1, item1_relids, false, item1_type);
+               em1 = add_eq_member(ec2, item1, item1_relids, false, item1_type);
                ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo);
                ec2->ec_below_outer_join |= below_outer_join;
+               /* mark the RI as usable with this pair of EMs */
+               restrictinfo->left_em = em1;
+               restrictinfo->right_em = em2;
        }
        else
        {
@@ -262,16 +288,21 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
                ec->ec_opfamilies = opfamilies;
                ec->ec_members = NIL;
                ec->ec_sources = list_make1(restrictinfo);
+               ec->ec_derives = NIL;
                ec->ec_relids = NULL;
                ec->ec_has_const = false;
                ec->ec_has_volatile = false;
                ec->ec_below_outer_join = below_outer_join;
                ec->ec_broken = false;
                ec->ec_merged = NULL;
-               add_eq_member(ec, item1, item1_relids, false, item1_type);
-               add_eq_member(ec, item2, item2_relids, false, item2_type);
+               em1 = add_eq_member(ec, item1, item1_relids, false, item1_type);
+               em2 = add_eq_member(ec, item2, item2_relids, false, item2_type);
 
                root->eq_classes = lappend(root->eq_classes, ec);
+
+               /* mark the RI as usable with this pair of EMs */
+               restrictinfo->left_em = em1;
+               restrictinfo->right_em = em2;
        }
 
        return true;
@@ -280,7 +311,7 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
 /*
  * add_eq_member - build a new EquivalenceMember and add it to an EC
  */
-static void
+static EquivalenceMember *
 add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
                          bool is_child, Oid datatype)
 {
@@ -312,6 +343,8 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
                ec->ec_relids = bms_add_members(ec->ec_relids, relids);
        }
        ec->ec_members = lappend(ec->ec_members, em);
+
+       return em;
 }
 
 
@@ -337,6 +370,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
                                                 List *opfamilies)
 {
        EquivalenceClass *newec;
+       EquivalenceMember *newem;
        ListCell   *lc1;
        MemoryContext oldcontext;
 
@@ -383,14 +417,15 @@ get_eclass_for_sort_expr(PlannerInfo *root,
        newec->ec_opfamilies = list_copy(opfamilies);
        newec->ec_members = NIL;
        newec->ec_sources = NIL;
+       newec->ec_derives = NIL;
        newec->ec_relids = NULL;
        newec->ec_has_const = false;
        newec->ec_has_volatile = contain_volatile_functions((Node *) expr);
        newec->ec_below_outer_join = false;
        newec->ec_broken = false;
        newec->ec_merged = NULL;
-       add_eq_member(newec, expr, pull_varnos((Node *) expr),
-                                 false, expr_datatype);
+       newem = add_eq_member(newec, expr, pull_varnos((Node *) expr),
+                                                 false, expr_datatype);
 
        /*
         * add_eq_member doesn't check for volatile functions or aggregates,
@@ -402,7 +437,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
                if (newec->ec_has_volatile || contain_agg_clause((Node *) expr))
                {
                        newec->ec_has_const = false;
-                       ((EquivalenceMember *) linitial(newec->ec_members))->em_is_const = false;
+                       newem->em_is_const = false;
                }
        }
 
@@ -455,6 +490,12 @@ get_eclass_for_sort_expr(PlannerInfo *root,
  * process_implied_equality (in plan/initsplan.c) to be inserted into the
  * restrictinfo datastructures.  Note that this must be called after initial
  * scanning of the quals and before Path construction begins.
+ *
+ * We make no attempt to avoid generating duplicate RestrictInfos here: we
+ * don't search ec_sources for matches, nor put the created RestrictInfos
+ * into ec_derives.  Doing so would require some slightly ugly changes in
+ * initsplan.c's API, and there's no real advantage, because the clauses
+ * generated here can't duplicate anything we will generate for joins anyway.
  */
 void
 generate_base_implied_equalities(PlannerInfo *root)
@@ -664,6 +705,13 @@ generate_base_implied_equalities_broken(PlannerInfo *root,
  * for use in a nestloop-with-inner-indexscan join, however.  indxpath.c makes
  * its own selections of clauses to use, and if the ones we pick here are
  * redundant with those, the extras will be eliminated in createplan.c.
+ *
+ * Because the same join clauses are likely to be needed multiple times as
+ * we consider different join paths, we avoid generating multiple copies:
+ * whenever we select a particular pair of EquivalenceMembers to join,
+ * we check to see if the pair matches any original clause (in ec_sources)
+ * or previously-built clause (in ec_derives).  This saves memory and allows
+ * re-use of information cached in RestrictInfos.
  */
 List *
 generate_join_implied_equalities(PlannerInfo *root,
@@ -818,15 +866,13 @@ generate_join_implied_equalities_normal(PlannerInfo *root,
                        return NIL;
                }
 
-               rinfo = build_implied_join_equality(best_eq_op,
-                                                                                       best_outer_em->em_expr,
-                                                                                       best_inner_em->em_expr,
-                                                                                       ec->ec_relids);
-               /* mark restrictinfo as redundant with other joinclauses */
-               rinfo->parent_ec = ec;
-               /* we can set these too, rather than letting them be looked up later */
-               rinfo->left_ec = ec;
-               rinfo->right_ec = ec;
+               /*
+                * Create clause, setting parent_ec to mark it as redundant with other
+                * joinclauses
+                */
+               rinfo = create_join_clause(root, ec, best_eq_op,
+                                                                  best_outer_em, best_inner_em,
+                                                                  ec);
 
                result = lappend(result, rinfo);
        }
@@ -867,16 +913,10 @@ generate_join_implied_equalities_normal(PlannerInfo *root,
                                        ec->ec_broken = true;
                                        return NIL;
                                }
-                               rinfo = build_implied_join_equality(eq_op,
-                                                                                                       prev_em->em_expr,
-                                                                                                       cur_em->em_expr,
-                                                                                                       ec->ec_relids);
-
                                /* do NOT set parent_ec, this qual is not redundant! */
-
-                               /* we can set these, though */
-                               rinfo->left_ec = ec;
-                               rinfo->right_ec = ec;
+                               rinfo = create_join_clause(root, ec, eq_op,
+                                                                                  prev_em, cur_em,
+                                                                                  NULL);
 
                                result = lappend(result, rinfo);
                        }
@@ -941,6 +981,86 @@ select_equality_operator(EquivalenceClass *ec, Oid lefttype, Oid righttype)
 }
 
 
+/*
+ * create_join_clause
+ *       Find or make a RestrictInfo comparing the two given EC members
+ *       with the given operator.
+ *
+ * parent_ec is either equal to ec (if the clause is a potentially-redundant
+ * join clause) or NULL (if not).  We have to treat this as part of the
+ * match requirements --- it's possible that a clause comparing the same two
+ * EMs is a join clause in one join path and a restriction clause in another.
+ */
+static RestrictInfo *
+create_join_clause(PlannerInfo *root,
+                                  EquivalenceClass *ec, Oid opno,
+                                  EquivalenceMember *leftem,
+                                  EquivalenceMember *rightem,
+                                  EquivalenceClass *parent_ec)
+{
+       RestrictInfo *rinfo;
+       ListCell   *lc;
+       MemoryContext oldcontext;
+
+       /*
+        * Search to see if we already built a RestrictInfo for this pair of
+        * EquivalenceMembers.  We can use either original source clauses or
+        * previously-derived clauses.  The check on opno is probably redundant,
+        * but be safe ...
+        */
+       foreach(lc, ec->ec_sources)
+       {
+               rinfo = (RestrictInfo *) lfirst(lc);
+               if (rinfo->left_em == leftem &&
+                       rinfo->right_em == rightem &&
+                       rinfo->parent_ec == parent_ec &&
+                       opno == ((OpExpr *) rinfo->clause)->opno)
+                       return rinfo;
+       }
+
+       foreach(lc, ec->ec_derives)
+       {
+               rinfo = (RestrictInfo *) lfirst(lc);
+               if (rinfo->left_em == leftem &&
+                       rinfo->right_em == rightem &&
+                       rinfo->parent_ec == parent_ec &&
+                       opno == ((OpExpr *) rinfo->clause)->opno)
+                       return rinfo;
+       }
+
+       /*
+        * Not there, so build it, in planner context so we can re-use it.
+        * (Not important in normal planning, but definitely so in GEQO.)
+        */
+       oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+       rinfo = build_implied_join_equality(opno,
+                                                                               leftem->em_expr,
+                                                                               rightem->em_expr,
+                                                                               ec->ec_relids);
+
+       /* Mark the clause as redundant, or not */
+       rinfo->parent_ec = parent_ec;
+
+       /*
+        * We can set these now, rather than letting them be looked up later,
+        * since this is only used after EC merging is complete.
+        */
+       rinfo->left_ec = ec;
+       rinfo->right_ec = ec;
+
+       /* Mark it as usable with these EMs */
+       rinfo->left_em = leftem;
+       rinfo->right_em = rightem;
+       /* and save it for possible re-use */
+       ec->ec_derives = lappend(ec->ec_derives, rinfo);
+
+       MemoryContextSwitchTo(oldcontext);
+
+       return rinfo;
+}
+
+
 /*
  * reconsider_outer_join_clauses
  *       Re-examine any outer-join clauses that were set aside by
@@ -1364,8 +1484,8 @@ add_child_rel_equivalences(PlannerInfo *root,
                                child_expr = (Expr *)
                                        adjust_appendrel_attrs((Node *) cur_em->em_expr,
                                                                                   appinfo);
-                               add_eq_member(cur_ec, child_expr, child_rel->relids,
-                                                         true, cur_em->em_datatype);
+                               (void) add_eq_member(cur_ec, child_expr, child_rel->relids,
+                                                                        true, cur_em->em_datatype);
                        }
                }
        }
@@ -1451,15 +1571,10 @@ find_eclass_clauses_for_index_join(PlannerInfo *root, RelOptInfo *rel,
                                /* Found a suitable joinclause */
                                RestrictInfo *rinfo;
 
-                               rinfo = build_implied_join_equality(best_eq_op,
-                                                                                                       cur_em->em_expr,
-                                                                                                       best_outer_em->em_expr,
-                                                                                                       cur_ec->ec_relids);
-                               /* mark restrictinfo as redundant with other joinclauses */
-                               rinfo->parent_ec = cur_ec;
-                               /* we can set these too */
-                               rinfo->left_ec = cur_ec;
-                               rinfo->right_ec = cur_ec;
+                               /* set parent_ec to mark as redundant with other joinclauses */
+                               rinfo = create_join_clause(root, cur_ec, best_eq_op,
+                                                                                  cur_em, best_outer_em,
+                                                                                  cur_ec);
 
                                result = lappend(result, rinfo);
                                /*
index ec93b0dad07ba5b9f87429f1d4ea2431b8b86917..5ee03b75cffd7e87e9d1322eda94390cb5f8f710 100644 (file)
@@ -22,7 +22,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.136 2007/01/20 20:45:39 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.137 2007/01/22 20:00:39 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1197,6 +1197,9 @@ adjust_appendrel_attrs_mutator(Node *node, AppendRelInfo *context)
                newinfo->this_selec = -1;
                newinfo->left_ec = NULL;
                newinfo->right_ec = NULL;
+               newinfo->left_em = NULL;
+               newinfo->right_em = NULL;
+               newinfo->scansel_cache = NIL;
                newinfo->left_bucketsize = -1;
                newinfo->right_bucketsize = -1;
 
index ea8bb5c970b3b376b0c7c4924158398f092751f7..8251e75d650a66bf8cefe98394bc6894788b659b 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.52 2007/01/20 20:45:40 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.53 2007/01/22 20:00:39 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -344,6 +344,9 @@ make_restrictinfo_internal(Expr *clause,
 
        restrictinfo->left_ec = NULL;
        restrictinfo->right_ec = NULL;
+       restrictinfo->left_em = NULL;
+       restrictinfo->right_em = NULL;
+       restrictinfo->scansel_cache = NIL;
 
        restrictinfo->outer_is_left = false;
 
index 493df17b6c22ed9a8b811655430aa9a54740b09f..85e66a3b1c47beaa1fe79fb027b4d1ea0b9c004a 100644 (file)
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.220 2007/01/20 20:45:40 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.221 2007/01/22 20:00:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2112,8 +2112,8 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
  * we can estimate how much of the input will actually be read.  This
  * can have a considerable impact on the cost when using indexscans.
  *
- * clause should be a clause already known to be mergejoinable.  opfamily and
- * strategy specify the sort ordering being used.
+ * clause should be a clause already known to be mergejoinable.  opfamily,
+ * strategy, and nulls_first specify the sort ordering being used.
  *
  * *leftscan is set to the fraction of the left-hand variable expected
  * to be scanned (0 to 1), and similarly *rightscan for the right-hand
@@ -2121,7 +2121,7 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
  */
 void
 mergejoinscansel(PlannerInfo *root, Node *clause,
-                                Oid opfamily, int strategy,
+                                Oid opfamily, int strategy, bool nulls_first,
                                 Selectivity *leftscan,
                                 Selectivity *rightscan)
 {
@@ -2214,18 +2214,39 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
        /*
         * Now, the fraction of the left variable that will be scanned is the
         * fraction that's <= the right-side maximum value.  But only believe
-        * non-default estimates, else stick with our 1.0.
+        * non-default estimates, else stick with our 1.0.  Also, if the sort
+        * order is nulls-first, we're going to have to read over any nulls too.
         */
        selec = scalarineqsel(root, leop, false, &leftvar,
                                                  rightmax, op_righttype);
        if (selec != DEFAULT_INEQ_SEL)
+       {
+               if (nulls_first && HeapTupleIsValid(leftvar.statsTuple))
+               {
+                       Form_pg_statistic stats;
+
+                       stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
+                       selec += stats->stanullfrac;
+                       CLAMP_PROBABILITY(selec);
+               }
                *leftscan = selec;
+       }
 
        /* And similarly for the right variable. */
        selec = scalarineqsel(root, revleop, false, &rightvar,
                                                  leftmax, op_lefttype);
        if (selec != DEFAULT_INEQ_SEL)
+       {
+               if (nulls_first && HeapTupleIsValid(rightvar.statsTuple))
+               {
+                       Form_pg_statistic stats;
+
+                       stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
+                       selec += stats->stanullfrac;
+                       CLAMP_PROBABILITY(selec);
+               }
                *rightscan = selec;
+       }
 
        /*
         * Only one of the two fractions can really be less than 1.0; believe the
index a83a20d21b59cec6f4c9086cfb360078bf0ce508..c67c067a5f239e9d6de844d21452f402af4fa92b 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.133 2007/01/20 20:45:40 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.134 2007/01/22 20:00:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -397,6 +397,7 @@ typedef struct EquivalenceClass
        List       *ec_opfamilies;              /* btree operator family OIDs */
        List       *ec_members;                 /* list of EquivalenceMembers */
        List       *ec_sources;                 /* list of generating RestrictInfos */
+       List       *ec_derives;                 /* list of derived RestrictInfos */
        Relids          ec_relids;                      /* all relids appearing in ec_members */
        bool            ec_has_const;           /* any pseudoconstants in ec_members? */
        bool            ec_has_volatile;        /* the (sole) member is a volatile expr */
@@ -890,6 +891,9 @@ typedef struct RestrictInfo
        /* cache space for mergeclause processing; NULL if not yet set */
        EquivalenceClass *left_ec;      /* EquivalenceClass containing lefthand */
        EquivalenceClass *right_ec;     /* EquivalenceClass containing righthand */
+       EquivalenceMember *left_em;             /* EquivalenceMember for lefthand */
+       EquivalenceMember *right_em;    /* EquivalenceMember for righthand */
+       List       *scansel_cache;      /* list of MergeScanSelCache structs */
 
        /* transient workspace for use while considering a specific join path */
        bool            outer_is_left;  /* T = outer var on left, F = on right */
@@ -902,6 +906,24 @@ typedef struct RestrictInfo
        Selectivity right_bucketsize;           /* avg bucketsize of right side */
 } RestrictInfo;
 
+/*
+ * Since mergejoinscansel() is a relatively expensive function, and would
+ * otherwise be invoked many times while planning a large join tree,
+ * we go out of our way to cache its results.  Each mergejoinable
+ * RestrictInfo carries a list of the specific sort orderings that have
+ * been considered for use with it, and the resulting selectivities.
+ */
+typedef struct MergeScanSelCache
+{
+       /* Ordering details (cache lookup key) */
+       Oid                     opfamily;               /* btree opfamily defining the ordering */
+       int                     strategy;               /* sort direction (ASC or DESC) */
+       bool            nulls_first;    /* do NULLs come before normal values? */
+       /* Results */
+       Selectivity     leftscansel;    /* scan fraction for clause left side */
+       Selectivity     rightscansel;   /* scan fraction for clause right side */
+} MergeScanSelCache;
+
 /*
  * Inner indexscan info.
  *
index 84549ca1d32092a2f0cfaa6bcfe2ca307ee8dfff..f0c6f20427c23e57b230f3d1f164e36d0f32e115 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.38 2007/01/05 22:19:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.39 2007/01/22 20:00:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -160,7 +160,7 @@ extern Selectivity rowcomparesel(PlannerInfo *root,
                          int varRelid, JoinType jointype);
 
 extern void mergejoinscansel(PlannerInfo *root, Node *clause,
-                                Oid opfamily, int strategy,
+                                Oid opfamily, int strategy, bool nulls_first,
                                 Selectivity *leftscan,
                                 Selectivity *rightscan);