]> granicus.if.org Git - postgresql/blobdiff - src/backend/optimizer/path/costsize.c
Desultory de-FastList-ification. RelOptInfo.reltargetlist is back to
[postgresql] / src / backend / optimizer / path / costsize.c
index 454b1db127fec9a26db088d3bf2601b7ab1d2943..46a323fade4609eaa5cc26e7558003ff3ea1fdea 100644 (file)
@@ -49,7 +49,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.123 2004/01/19 03:52:28 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.129 2004/06/01 03:02:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -102,8 +102,6 @@ bool                enable_mergejoin = true;
 bool           enable_hashjoin = true;
 
 
-static Selectivity estimate_hash_bucketsize(Query *root, Var *var,
-                                                int nbuckets);
 static bool cost_qual_eval_walker(Node *node, QualCost *total);
 static Selectivity approx_selectivity(Query *root, List *quals,
                                   JoinType jointype);
@@ -414,7 +412,7 @@ cost_tidscan(Path *path, Query *root,
        Cost            startup_cost = 0;
        Cost            run_cost = 0;
        Cost            cpu_per_tuple;
-       int                     ntuples = length(tideval);
+       int                     ntuples = list_length(tideval);
 
        /* Should only be applied to base relations */
        Assert(baserel->relid > 0);
@@ -503,18 +501,18 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel)
  *       Determines and returns the cost of sorting a relation, including
  *       the cost of reading the input data.
  *
- * If the total volume of data to sort is less than SortMem, we will do
+ * If the total volume of data to sort is less than work_mem, we will do
  * an in-memory sort, which requires no I/O and about t*log2(t) tuple
  * comparisons for t tuples.
  *
- * If the total volume exceeds SortMem, we switch to a tape-style merge
+ * If the total volume exceeds work_mem, we switch to a tape-style merge
  * algorithm.  There will still be about t*log2(t) tuple comparisons in
  * total, but we will also need to write and read each tuple once per
  * merge pass. We expect about ceil(log6(r)) merge passes where r is the
  * number of initial runs formed (log6 because tuplesort.c uses six-tape
- * merging).  Since the average initial run should be about twice SortMem,
+ * merging).  Since the average initial run should be about twice work_mem,
  * we have
- *             disk traffic = 2 * relsize * ceil(log6(p / (2*SortMem)))
+ *             disk traffic = 2 * relsize * ceil(log6(p / (2*work_mem)))
  *             cpu = comparison_cost * t * log2(t)
  *
  * The disk traffic is assumed to be half sequential and half random
@@ -542,7 +540,7 @@ cost_sort(Path *path, Query *root,
        Cost            startup_cost = input_cost;
        Cost            run_cost = 0;
        double          nbytes = relation_byte_size(tuples, width);
-       long            sortmembytes = SortMem * 1024L;
+       long            work_mem_bytes = work_mem * 1024L;
 
        if (!enable_sort)
                startup_cost += disable_cost;
@@ -564,10 +562,10 @@ cost_sort(Path *path, Query *root,
        startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
 
        /* disk costs */
-       if (nbytes > sortmembytes)
+       if (nbytes > work_mem_bytes)
        {
                double          npages = ceil(nbytes / BLCKSZ);
-               double          nruns = nbytes / (sortmembytes * 2);
+               double          nruns = nbytes / (work_mem_bytes * 2);
                double          log_runs = ceil(LOG6(nruns));
                double          npageaccesses;
 
@@ -594,7 +592,7 @@ cost_sort(Path *path, Query *root,
  *       Determines and returns the cost of materializing a relation, including
  *       the cost of reading the input data.
  *
- * If the total volume of data to materialize exceeds SortMem, we will need
+ * If the total volume of data to materialize exceeds work_mem, we will need
  * to write it to disk, so the cost is much higher in that case.
  */
 void
@@ -604,10 +602,10 @@ cost_material(Path *path,
        Cost            startup_cost = input_cost;
        Cost            run_cost = 0;
        double          nbytes = relation_byte_size(tuples, width);
-       long            sortmembytes = SortMem * 1024L;
+       long            work_mem_bytes = work_mem * 1024L;
 
        /* disk costs */
-       if (nbytes > sortmembytes)
+       if (nbytes > work_mem_bytes)
        {
                double          npages = ceil(nbytes / BLCKSZ);
 
@@ -920,23 +918,31 @@ cost_mergejoin(MergePath *path, Query *root)
         * all mergejoin paths associated with the merge clause, we cache the
         * results in the RestrictInfo node.
         */
-       firstclause = (RestrictInfo *) lfirst(mergeclauses);
-       if (firstclause->left_mergescansel < 0)         /* not computed yet? */
-               mergejoinscansel(root, (Node *) firstclause->clause,
-                                                &firstclause->left_mergescansel,
-                                                &firstclause->right_mergescansel);
-
-       if (bms_is_subset(firstclause->left_relids, outer_path->parent->relids))
+       if (mergeclauses)
        {
-               /* left side of clause is outer */
-               outerscansel = firstclause->left_mergescansel;
-               innerscansel = firstclause->right_mergescansel;
+               firstclause = (RestrictInfo *) linitial(mergeclauses);
+               if (firstclause->left_mergescansel < 0) /* not computed yet? */
+                       mergejoinscansel(root, (Node *) firstclause->clause,
+                                                        &firstclause->left_mergescansel,
+                                                        &firstclause->right_mergescansel);
+
+               if (bms_is_subset(firstclause->left_relids, outer_path->parent->relids))
+               {
+                       /* left side of clause is outer */
+                       outerscansel = firstclause->left_mergescansel;
+                       innerscansel = firstclause->right_mergescansel;
+               }
+               else
+               {
+                       /* left side of clause is inner */
+                       outerscansel = firstclause->right_mergescansel;
+                       innerscansel = firstclause->left_mergescansel;
+               }
        }
        else
        {
-               /* left side of clause is inner */
-               outerscansel = firstclause->right_mergescansel;
-               innerscansel = firstclause->left_mergescansel;
+               /* cope with clauseless mergejoin */
+               outerscansel = innerscansel = 1.0;
        }
 
        /* convert selectivity to row count; must scan at least one row */
@@ -1057,13 +1063,13 @@ cost_hashjoin(HashPath *path, Query *root)
                                                                                          outer_path->parent->width);
        double          innerbytes = relation_byte_size(inner_path_rows,
                                                                                          inner_path->parent->width);
-       int                     num_hashclauses = length(hashclauses);
+       int                     num_hashclauses = list_length(hashclauses);
        int                     virtualbuckets;
        int                     physicalbuckets;
        int                     numbatches;
        Selectivity innerbucketsize;
        Selectivity joininfactor;
-       List       *hcl;
+       ListCell   *hcl;
 
        if (!enable_hashjoin)
                startup_cost += disable_cost;
@@ -1152,7 +1158,7 @@ cost_hashjoin(HashPath *path, Query *root)
                                        /* not cached yet */
                                        thisbucketsize =
                                                estimate_hash_bucketsize(root,
-                                                          (Var *) get_rightop(restrictinfo->clause),
+                                                                                                get_rightop(restrictinfo->clause),
                                                                                                 virtualbuckets);
                                        restrictinfo->right_bucketsize = thisbucketsize;
                                }
@@ -1168,7 +1174,7 @@ cost_hashjoin(HashPath *path, Query *root)
                                        /* not cached yet */
                                        thisbucketsize =
                                                estimate_hash_bucketsize(root,
-                                                               (Var *) get_leftop(restrictinfo->clause),
+                                                                                                get_leftop(restrictinfo->clause),
                                                                                                 virtualbuckets);
                                        restrictinfo->left_bucketsize = thisbucketsize;
                                }
@@ -1249,179 +1255,6 @@ cost_hashjoin(HashPath *path, Query *root)
        path->jpath.path.total_cost = startup_cost + run_cost;
 }
 
-/*
- * Estimate hash bucketsize fraction (ie, number of entries in a bucket
- * divided by total tuples in relation) if the specified Var is used
- * as a hash key.
- *
- * XXX This is really pretty bogus since we're effectively assuming that the
- * distribution of hash keys will be the same after applying restriction
- * clauses as it was in the underlying relation.  However, we are not nearly
- * smart enough to figure out how the restrict clauses might change the
- * distribution, so this will have to do for now.
- *
- * We are passed the number of buckets the executor will use for the given
- * input relation.     If the data were perfectly distributed, with the same
- * number of tuples going into each available bucket, then the bucketsize
- * fraction would be 1/nbuckets.  But this happy state of affairs will occur
- * only if (a) there are at least nbuckets distinct data values, and (b)
- * we have a not-too-skewed data distribution. Otherwise the buckets will
- * be nonuniformly occupied.  If the other relation in the join has a key
- * distribution similar to this one's, then the most-loaded buckets are
- * exactly those that will be probed most often.  Therefore, the "average"
- * bucket size for costing purposes should really be taken as something close
- * to the "worst case" bucket size.  We try to estimate this by adjusting the
- * fraction if there are too few distinct data values, and then scaling up
- * by the ratio of the most common value's frequency to the average frequency.
- *
- * If no statistics are available, use a default estimate of 0.1.  This will
- * discourage use of a hash rather strongly if the inner relation is large,
- * which is what we want.  We do not want to hash unless we know that the
- * inner rel is well-dispersed (or the alternatives seem much worse).
- */
-static Selectivity
-estimate_hash_bucketsize(Query *root, Var *var, int nbuckets)
-{
-       Oid                     relid;
-       RelOptInfo *rel;
-       HeapTuple       tuple;
-       Form_pg_statistic stats;
-       double          estfract,
-                               ndistinct,
-                               mcvfreq,
-                               avgfreq;
-       float4     *numbers;
-       int                     nnumbers;
-
-       /* Ignore any binary-compatible relabeling */
-       if (var && IsA(var, RelabelType))
-               var = (Var *) ((RelabelType *) var)->arg;
-
-       /*
-        * Lookup info about var's relation and attribute; if none available,
-        * return default estimate.
-        */
-       if (var == NULL || !IsA(var, Var))
-               return 0.1;
-
-       relid = getrelid(var->varno, root->rtable);
-       if (relid == InvalidOid)
-               return 0.1;
-
-       rel = find_base_rel(root, var->varno);
-
-       if (rel->tuples <= 0.0 || rel->rows <= 0.0)
-               return 0.1;                             /* ensure we can divide below */
-
-       tuple = SearchSysCache(STATRELATT,
-                                                  ObjectIdGetDatum(relid),
-                                                  Int16GetDatum(var->varattno),
-                                                  0, 0);
-       if (!HeapTupleIsValid(tuple))
-       {
-               /*
-                * If the attribute is known unique because of an index,
-                * we can treat it as well-distributed.
-                */
-               if (has_unique_index(rel, var->varattno))
-                       return 1.0 / (double) nbuckets;
-
-               /*
-                * Perhaps the Var is a system attribute; if so, it will have no
-                * entry in pg_statistic, but we may be able to guess something
-                * about its distribution anyway.
-                */
-               switch (var->varattno)
-               {
-                       case ObjectIdAttributeNumber:
-                       case SelfItemPointerAttributeNumber:
-                               /* these are unique, so buckets should be well-distributed */
-                               return 1.0 / (double) nbuckets;
-                       case TableOidAttributeNumber:
-                               /* hashing this is a terrible idea... */
-                               return 1.0;
-               }
-               return 0.1;
-       }
-       stats = (Form_pg_statistic) GETSTRUCT(tuple);
-
-       /*
-        * Obtain number of distinct data values in raw relation.
-        */
-       ndistinct = stats->stadistinct;
-       if (ndistinct < 0.0)
-               ndistinct = -ndistinct * rel->tuples;
-
-       if (ndistinct <= 0.0)           /* ensure we can divide */
-       {
-               ReleaseSysCache(tuple);
-               return 0.1;
-       }
-
-       /* Also compute avg freq of all distinct data values in raw relation */
-       avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
-
-       /*
-        * Adjust ndistinct to account for restriction clauses.  Observe we
-        * are assuming that the data distribution is affected uniformly by
-        * the restriction clauses!
-        *
-        * XXX Possibly better way, but much more expensive: multiply by
-        * selectivity of rel's restriction clauses that mention the target
-        * Var.
-        */
-       ndistinct *= rel->rows / rel->tuples;
-
-       /*
-        * Initial estimate of bucketsize fraction is 1/nbuckets as long as
-        * the number of buckets is less than the expected number of distinct
-        * values; otherwise it is 1/ndistinct.
-        */
-       if (ndistinct > (double) nbuckets)
-               estfract = 1.0 / (double) nbuckets;
-       else
-               estfract = 1.0 / ndistinct;
-
-       /*
-        * Look up the frequency of the most common value, if available.
-        */
-       mcvfreq = 0.0;
-
-       if (get_attstatsslot(tuple, var->vartype, var->vartypmod,
-                                                STATISTIC_KIND_MCV, InvalidOid,
-                                                NULL, NULL, &numbers, &nnumbers))
-       {
-               /*
-                * The first MCV stat is for the most common value.
-                */
-               if (nnumbers > 0)
-                       mcvfreq = numbers[0];
-               free_attstatsslot(var->vartype, NULL, 0,
-                                                 numbers, nnumbers);
-       }
-
-       /*
-        * Adjust estimated bucketsize upward to account for skewed
-        * distribution.
-        */
-       if (avgfreq > 0.0 && mcvfreq > avgfreq)
-               estfract *= mcvfreq / avgfreq;
-
-       /*
-        * Clamp bucketsize to sane range (the above adjustment could easily
-        * produce an out-of-range result).  We set the lower bound a little
-        * above zero, since zero isn't a very sane result.
-        */
-       if (estfract < 1.0e-6)
-               estfract = 1.0e-6;
-       else if (estfract > 1.0)
-               estfract = 1.0;
-
-       ReleaseSysCache(tuple);
-
-       return (Selectivity) estfract;
-}
-
 
 /*
  * cost_qual_eval
@@ -1434,7 +1267,7 @@ estimate_hash_bucketsize(Query *root, Var *var, int nbuckets)
 void
 cost_qual_eval(QualCost *cost, List *quals)
 {
-       List       *l;
+       ListCell   *l;
 
        cost->startup = 0;
        cost->per_tuple = 0;
@@ -1611,7 +1444,7 @@ static Selectivity
 approx_selectivity(Query *root, List *quals, JoinType jointype)
 {
        Selectivity total = 1.0;
-       List       *l;
+       ListCell   *l;
 
        foreach(l, quals)
        {
@@ -1866,9 +1699,9 @@ static void
 set_rel_width(Query *root, RelOptInfo *rel)
 {
        int32           tuple_width = 0;
-       List       *tllist;
+       ListCell   *tllist;
 
-       foreach(tllist, FastListValue(&rel->reltargetlist))
+       foreach(tllist, rel->reltargetlist)
        {
                Var                *var = (Var *) lfirst(tllist);
                int                     ndx = var->varattno - rel->min_attr;