From: Tom Lane Date: Sat, 8 Dec 2007 21:05:11 +0000 (+0000) Subject: Fix mergejoin cost estimation so that we consider the statistical ranges of X-Git-Tag: REL8_3_RC1~120 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9fd88436470482fc030d4c2328977b3cf66f01f1;p=postgresql Fix mergejoin cost estimation so that we consider the statistical ranges of the two join variables at both ends: not only trailing rows that need not be scanned because there cannot be a match on the other side, but initial rows that will be scanned without possibly having a match. This allows a more realistic estimate of startup cost to be made, per recent pgsql-performance discussion. In passing, fix a couple of bugs that had crept into mergejoinscansel: it was not quite up to speed for the task of estimating descending-order scans, which is a new requirement in 8.3. --- diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index fc95399b39..c1e1651c79 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.189 2007/11/15 22:25:15 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.190 2007/12/08 21:05:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1372,12 +1372,16 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) double outer_path_rows = PATH_ROWS(outer_path); double inner_path_rows = PATH_ROWS(inner_path); double outer_rows, - inner_rows; + inner_rows, + outer_skip_rows, + inner_skip_rows; double mergejointuples, rescannedtuples; double rescanratio; - Selectivity outerscansel, - innerscansel; + Selectivity outerstartsel, + outerendsel, + innerstartsel, + innerendsel; Selectivity joininfactor; Path sort_path; /* dummy for result of cost_sort */ @@ -1444,10 +1448,12 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) * A merge join will stop as soon as it exhausts either input stream * (unless it's an outer join, in which case the outer side has to be * scanned all the way anyway). Estimate fraction of the left and right - * inputs that will actually need to be scanned. We use only the first - * (most significant) merge clause for this purpose. Since - * mergejoinscansel() is a fairly expensive computation, we cache the - * results in the merge clause RestrictInfo. + * inputs that will actually need to be scanned. Likewise, we can + * estimate the number of rows that will be skipped before the first + * join pair is found, which should be factored into startup cost. + * We use only the first (most significant) merge clause for this purpose. + * Since mergejoinscansel() is a fairly expensive computation, we cache + * the results in the merge clause RestrictInfo. */ if (mergeclauses && path->jpath.jointype != JOIN_FULL) { @@ -1478,37 +1484,61 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) outer_path->parent->relids)) { /* left side of clause is outer */ - outerscansel = cache->leftscansel; - innerscansel = cache->rightscansel; + outerstartsel = cache->leftstartsel; + outerendsel = cache->leftendsel; + innerstartsel = cache->rightstartsel; + innerendsel = cache->rightendsel; } else { /* left side of clause is inner */ - outerscansel = cache->rightscansel; - innerscansel = cache->leftscansel; + outerstartsel = cache->rightstartsel; + outerendsel = cache->rightendsel; + innerstartsel = cache->leftstartsel; + innerendsel = cache->leftendsel; } if (path->jpath.jointype == JOIN_LEFT) - outerscansel = 1.0; + { + outerstartsel = 0.0; + outerendsel = 1.0; + } else if (path->jpath.jointype == JOIN_RIGHT) - innerscansel = 1.0; + { + innerstartsel = 0.0; + innerendsel = 1.0; + } } else { /* cope with clauseless or full mergejoin */ - outerscansel = innerscansel = 1.0; + outerstartsel = innerstartsel = 0.0; + outerendsel = innerendsel = 1.0; } - /* convert selectivity to row count; must scan at least one row */ - outer_rows = clamp_row_est(outer_path_rows * outerscansel); - inner_rows = clamp_row_est(inner_path_rows * innerscansel); + /* + * Convert selectivities to row counts. We force outer_rows and + * inner_rows to be at least 1, but the skip_rows estimates can be zero. + */ + outer_skip_rows = rint(outer_path_rows * outerstartsel); + inner_skip_rows = rint(inner_path_rows * innerstartsel); + outer_rows = clamp_row_est(outer_path_rows * outerendsel); + inner_rows = clamp_row_est(inner_path_rows * innerendsel); + + Assert(outer_skip_rows <= outer_rows); + Assert(inner_skip_rows <= inner_rows); /* * Readjust scan selectivities to account for above rounding. This is * normally an insignificant effect, but when there are only a few rows in * the inputs, failing to do this makes for a large percentage error. */ - outerscansel = outer_rows / outer_path_rows; - innerscansel = inner_rows / inner_path_rows; + outerstartsel = outer_skip_rows / outer_path_rows; + innerstartsel = inner_skip_rows / inner_path_rows; + outerendsel = outer_rows / outer_path_rows; + innerendsel = inner_rows / inner_path_rows; + + Assert(outerstartsel <= outerendsel); + Assert(innerstartsel <= innerendsel); /* cost of source data */ @@ -1522,14 +1552,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) outer_path->parent->width, -1.0); startup_cost += sort_path.startup_cost; + startup_cost += (sort_path.total_cost - sort_path.startup_cost) + * outerstartsel; run_cost += (sort_path.total_cost - sort_path.startup_cost) - * outerscansel; + * (outerendsel - outerstartsel); } else { startup_cost += outer_path->startup_cost; + startup_cost += (outer_path->total_cost - outer_path->startup_cost) + * outerstartsel; run_cost += (outer_path->total_cost - outer_path->startup_cost) - * outerscansel; + * (outerendsel - outerstartsel); } if (innersortkeys) /* do we need to sort inner? */ @@ -1542,14 +1576,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) inner_path->parent->width, -1.0); startup_cost += sort_path.startup_cost; + startup_cost += (sort_path.total_cost - sort_path.startup_cost) + * innerstartsel * rescanratio; run_cost += (sort_path.total_cost - sort_path.startup_cost) - * innerscansel * rescanratio; + * (innerendsel - innerstartsel) * rescanratio; } else { startup_cost += inner_path->startup_cost; + startup_cost += (inner_path->total_cost - inner_path->startup_cost) + * innerstartsel * rescanratio; run_cost += (inner_path->total_cost - inner_path->startup_cost) - * innerscansel * rescanratio; + * (innerendsel - innerstartsel) * rescanratio; } /* CPU costs */ @@ -1571,8 +1609,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) * joininfactor. */ startup_cost += merge_qual_cost.startup; + startup_cost += merge_qual_cost.per_tuple * + (outer_skip_rows + inner_skip_rows * rescanratio); run_cost += merge_qual_cost.per_tuple * - (outer_rows + inner_rows * rescanratio); + ((outer_rows - outer_skip_rows) + + (inner_rows - inner_skip_rows) * rescanratio); /* * For each tuple that gets through the mergejoin proper, we charge @@ -1597,8 +1638,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey) { MergeScanSelCache *cache; ListCell *lc; - Selectivity leftscansel, - rightscansel; + Selectivity leftstartsel, + leftendsel, + rightstartsel, + rightendsel; MemoryContext oldcontext; /* Do we have this result already? */ @@ -1617,8 +1660,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey) pathkey->pk_opfamily, pathkey->pk_strategy, pathkey->pk_nulls_first, - &leftscansel, - &rightscansel); + &leftstartsel, + &leftendsel, + &rightstartsel, + &rightendsel); /* Cache the result in suitably long-lived workspace */ oldcontext = MemoryContextSwitchTo(root->planner_cxt); @@ -1627,8 +1672,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey) cache->opfamily = pathkey->pk_opfamily; cache->strategy = pathkey->pk_strategy; cache->nulls_first = pathkey->pk_nulls_first; - cache->leftscansel = leftscansel; - cache->rightscansel = rightscansel; + cache->leftstartsel = leftstartsel; + cache->leftendsel = leftendsel; + cache->rightstartsel = rightstartsel; + cache->rightendsel = rightendsel; rinfo->scansel_cache = lappend(rinfo->scansel_cache, cache); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 103f4dc9d7..0b0d992a3b 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.241 2007/11/15 22:25:16 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.242 2007/12/08 21:05:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -128,8 +128,8 @@ static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen, int rangelo, int rangehi); static char *convert_string_datum(Datum value, Oid typid); static double convert_timevalue_to_scalar(Datum value, Oid typid); -static bool get_variable_maximum(PlannerInfo *root, VariableStatData *vardata, - Oid sortop, Datum *max); +static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, + Oid sortop, Datum *min, Datum *max); static Selectivity prefix_selectivity(VariableStatData *vardata, Oid vartype, Oid opfamily, Const *prefixcon); static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype); @@ -2172,18 +2172,24 @@ icnlikejoinsel(PG_FUNCTION_ARGS) * we can estimate how much of the input will actually be read. This * can have a considerable impact on the cost when using indexscans. * + * Also, we can estimate how much of each input has to be read before the + * first join pair is found, which will affect the join's startup time. + * * clause should be a clause already known to be mergejoinable. opfamily, * strategy, and nulls_first specify the sort ordering being used. * - * *leftscan is set to the fraction of the left-hand variable expected - * to be scanned (0 to 1), and similarly *rightscan for the right-hand - * variable. + * The outputs are: + * *leftstart is set to the fraction of the left-hand variable expected + * to be scanned before the first join pair is found (0 to 1). + * *leftend is set to the fraction of the left-hand variable expected + * to be scanned before the join terminates (0 to 1). + * *rightstart, *rightend similarly for the right-hand variable. */ void mergejoinscansel(PlannerInfo *root, Node *clause, Oid opfamily, int strategy, bool nulls_first, - Selectivity *leftscan, - Selectivity *rightscan) + Selectivity *leftstart, Selectivity *leftend, + Selectivity *rightstart, Selectivity *rightend) { Node *left, *right; @@ -2196,14 +2202,23 @@ mergejoinscansel(PlannerInfo *root, Node *clause, Oid opno, lsortop, rsortop, + lstatop, + rstatop, + ltop, leop, + revltop, revleop; - Datum leftmax, + bool isgt; + Datum leftmin, + leftmax, + rightmin, rightmax; double selec; /* Set default results if we can't figure anything out. */ - *leftscan = *rightscan = 1.0; + /* XXX should default "start" fraction be a bit more than 0? */ + *leftstart = *rightstart = 0.0; + *leftend = *rightend = 1.0; /* Deconstruct the merge clause */ if (!is_opclause(clause)) @@ -2229,30 +2244,103 @@ mergejoinscansel(PlannerInfo *root, Node *clause, /* * Look up the various operators we need. If we don't find them all, it - * probably means the opfamily is broken, but we cope anyway. + * probably means the opfamily is broken, but we just fail silently. + * + * Note: we expect that pg_statistic histograms will be sorted by the + * '<' operator, regardless of which sort direction we are considering. */ switch (strategy) { case BTLessStrategyNumber: - lsortop = get_opfamily_member(opfamily, op_lefttype, op_lefttype, - BTLessStrategyNumber); - rsortop = get_opfamily_member(opfamily, op_righttype, op_righttype, - BTLessStrategyNumber); - leop = get_opfamily_member(opfamily, op_lefttype, op_righttype, - BTLessEqualStrategyNumber); - revleop = get_opfamily_member(opfamily, op_righttype, op_lefttype, - BTLessEqualStrategyNumber); + isgt = false; + if (op_lefttype == op_righttype) + { + /* easy case */ + ltop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTLessStrategyNumber); + leop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTLessEqualStrategyNumber); + lsortop = ltop; + rsortop = ltop; + lstatop = lsortop; + rstatop = rsortop; + revltop = ltop; + revleop = leop; + } + else + { + ltop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTLessStrategyNumber); + leop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTLessEqualStrategyNumber); + lsortop = get_opfamily_member(opfamily, + op_lefttype, op_lefttype, + BTLessStrategyNumber); + rsortop = get_opfamily_member(opfamily, + op_righttype, op_righttype, + BTLessStrategyNumber); + lstatop = lsortop; + rstatop = rsortop; + revltop = get_opfamily_member(opfamily, + op_righttype, op_lefttype, + BTLessStrategyNumber); + revleop = get_opfamily_member(opfamily, + op_righttype, op_lefttype, + BTLessEqualStrategyNumber); + } break; case BTGreaterStrategyNumber: /* descending-order case */ - lsortop = get_opfamily_member(opfamily, op_lefttype, op_lefttype, - BTGreaterStrategyNumber); - rsortop = get_opfamily_member(opfamily, op_righttype, op_righttype, - BTGreaterStrategyNumber); - leop = get_opfamily_member(opfamily, op_lefttype, op_righttype, - BTGreaterEqualStrategyNumber); - revleop = get_opfamily_member(opfamily, op_righttype, op_lefttype, - BTGreaterEqualStrategyNumber); + isgt = true; + if (op_lefttype == op_righttype) + { + /* easy case */ + ltop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTGreaterStrategyNumber); + leop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTGreaterEqualStrategyNumber); + lsortop = ltop; + rsortop = ltop; + lstatop = get_opfamily_member(opfamily, + op_lefttype, op_lefttype, + BTLessStrategyNumber); + rstatop = lstatop; + revltop = ltop; + revleop = leop; + } + else + { + ltop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTGreaterStrategyNumber); + leop = get_opfamily_member(opfamily, + op_lefttype, op_righttype, + BTGreaterEqualStrategyNumber); + lsortop = get_opfamily_member(opfamily, + op_lefttype, op_lefttype, + BTGreaterStrategyNumber); + rsortop = get_opfamily_member(opfamily, + op_righttype, op_righttype, + BTGreaterStrategyNumber); + lstatop = get_opfamily_member(opfamily, + op_lefttype, op_lefttype, + BTLessStrategyNumber); + rstatop = get_opfamily_member(opfamily, + op_righttype, op_righttype, + BTLessStrategyNumber); + revltop = get_opfamily_member(opfamily, + op_righttype, op_lefttype, + BTGreaterStrategyNumber); + revleop = get_opfamily_member(opfamily, + op_righttype, op_lefttype, + BTGreaterEqualStrategyNumber); + } break; default: goto fail; /* shouldn't get here */ @@ -2260,66 +2348,133 @@ mergejoinscansel(PlannerInfo *root, Node *clause, if (!OidIsValid(lsortop) || !OidIsValid(rsortop) || + !OidIsValid(lstatop) || + !OidIsValid(rstatop) || + !OidIsValid(ltop) || !OidIsValid(leop) || + !OidIsValid(revltop) || !OidIsValid(revleop)) goto fail; /* insufficient info in catalogs */ - /* Try to get maximum values of both inputs */ - if (!get_variable_maximum(root, &leftvar, lsortop, &leftmax)) - goto fail; /* no max available from stats */ - - if (!get_variable_maximum(root, &rightvar, rsortop, &rightmax)) - goto fail; /* no max available from stats */ + /* Try to get ranges of both inputs */ + if (!isgt) + { + if (!get_variable_range(root, &leftvar, lstatop, + &leftmin, &leftmax)) + goto fail; /* no range available from stats */ + if (!get_variable_range(root, &rightvar, rstatop, + &rightmin, &rightmax)) + goto fail; /* no range available from stats */ + } + else + { + /* need to swap the max and min */ + if (!get_variable_range(root, &leftvar, lstatop, + &leftmax, &leftmin)) + goto fail; /* no range available from stats */ + if (!get_variable_range(root, &rightvar, rstatop, + &rightmax, &rightmin)) + goto fail; /* no range available from stats */ + } /* * Now, the fraction of the left variable that will be scanned is the * fraction that's <= the right-side maximum value. But only believe - * non-default estimates, else stick with our 1.0. Also, if the sort - * order is nulls-first, we're going to have to read over any nulls too. + * non-default estimates, else stick with our 1.0. */ - selec = scalarineqsel(root, leop, false, &leftvar, + selec = scalarineqsel(root, leop, isgt, &leftvar, rightmax, op_righttype); if (selec != DEFAULT_INEQ_SEL) - { - if (nulls_first && HeapTupleIsValid(leftvar.statsTuple)) - { - Form_pg_statistic stats; - - stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple); - selec += stats->stanullfrac; - CLAMP_PROBABILITY(selec); - } - *leftscan = selec; - } + *leftend = selec; /* And similarly for the right variable. */ - selec = scalarineqsel(root, revleop, false, &rightvar, + selec = scalarineqsel(root, revleop, isgt, &rightvar, leftmax, op_lefttype); if (selec != DEFAULT_INEQ_SEL) + *rightend = selec; + + /* + * Only one of the two "end" fractions can really be less than 1.0; + * believe the smaller estimate and reset the other one to exactly 1.0. + * If we get exactly equal estimates (as can easily happen with + * self-joins), believe neither. + */ + if (*leftend > *rightend) + *leftend = 1.0; + else if (*leftend < *rightend) + *rightend = 1.0; + else + *leftend = *rightend = 1.0; + + /* + * Also, the fraction of the left variable that will be scanned before + * the first join pair is found is the fraction that's < the right-side + * minimum value. But only believe non-default estimates, else stick with + * our own default. + */ + selec = scalarineqsel(root, ltop, isgt, &leftvar, + rightmin, op_righttype); + if (selec != DEFAULT_INEQ_SEL) + *leftstart = selec; + + /* And similarly for the right variable. */ + selec = scalarineqsel(root, revltop, isgt, &rightvar, + leftmin, op_lefttype); + if (selec != DEFAULT_INEQ_SEL) + *rightstart = selec; + + /* + * Only one of the two "start" fractions can really be more than zero; + * believe the larger estimate and reset the other one to exactly 0.0. + * If we get exactly equal estimates (as can easily happen with + * self-joins), believe neither. + */ + if (*leftstart < *rightstart) + *leftstart = 0.0; + else if (*leftstart > *rightstart) + *rightstart = 0.0; + else + *leftstart = *rightstart = 0.0; + + /* + * If the sort order is nulls-first, we're going to have to skip over any + * nulls too. These would not have been counted by scalarineqsel, and + * we can safely add in this fraction regardless of whether we believe + * scalarineqsel's results or not. But be sure to clamp the sum to 1.0! + */ + if (nulls_first) { - if (nulls_first && HeapTupleIsValid(rightvar.statsTuple)) - { - Form_pg_statistic stats; + Form_pg_statistic stats; + if (HeapTupleIsValid(leftvar.statsTuple)) + { + stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple); + *leftstart += stats->stanullfrac; + CLAMP_PROBABILITY(*leftstart); + *leftend += stats->stanullfrac; + CLAMP_PROBABILITY(*leftend); + } + if (HeapTupleIsValid(rightvar.statsTuple)) + { stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple); - selec += stats->stanullfrac; - CLAMP_PROBABILITY(selec); + *rightstart += stats->stanullfrac; + CLAMP_PROBABILITY(*rightstart); + *rightend += stats->stanullfrac; + CLAMP_PROBABILITY(*rightend); } - *rightscan = selec; } - /* - * Only one of the two fractions can really be less than 1.0; believe the - * smaller estimate and reset the other one to exactly 1.0. If we get - * exactly equal estimates (as can easily happen with self-joins), believe - * neither. - */ - if (*leftscan > *rightscan) - *leftscan = 1.0; - else if (*leftscan < *rightscan) - *rightscan = 1.0; - else - *leftscan = *rightscan = 1.0; + /* Disbelieve start >= end, just in case that can happen */ + if (*leftstart >= *leftend) + { + *leftstart = 0.0; + *leftend = 1.0; + } + if (*rightstart >= *rightend) + { + *rightstart = 0.0; + *rightend = 1.0; + } fail: ReleaseVariableStats(leftvar); @@ -3778,20 +3933,21 @@ get_variable_numdistinct(VariableStatData *vardata) } /* - * get_variable_maximum - * Estimate the maximum value of the specified variable. - * If successful, store value in *max and return TRUE. + * get_variable_range + * Estimate the minimum and maximum value of the specified variable. + * If successful, store values in *min and *max, and return TRUE. * If no data available, return FALSE. * - * sortop is the "<" comparison operator to use. (To extract the - * minimum instead of the maximum, just pass the ">" operator instead.) + * sortop is the "<" comparison operator to use. This should generally + * be "<" not ">", as only the former is likely to be found in pg_statistic. */ static bool -get_variable_maximum(PlannerInfo *root, VariableStatData *vardata, - Oid sortop, Datum *max) +get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, + Datum *min, Datum *max) { + Datum tmin = 0; Datum tmax = 0; - bool have_max = false; + bool have_data = false; Form_pg_statistic stats; int16 typLen; bool typByVal; @@ -3809,7 +3965,7 @@ get_variable_maximum(PlannerInfo *root, VariableStatData *vardata, get_typlenbyval(vardata->atttype, &typLen, &typByVal); /* - * If there is a histogram, grab the last or first value as appropriate. + * If there is a histogram, grab the first and last values. * * If there is a histogram that is sorted with some other operator than * the one we want, fail --- this suggests that there is data we can't @@ -3823,42 +3979,24 @@ get_variable_maximum(PlannerInfo *root, VariableStatData *vardata, { if (nvalues > 0) { + tmin = datumCopy(values[0], typByVal, typLen); tmax = datumCopy(values[nvalues - 1], typByVal, typLen); - have_max = true; + have_data = true; } free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); } - else + else if (get_attstatsslot(vardata->statsTuple, + vardata->atttype, vardata->atttypmod, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + &values, &nvalues, + NULL, NULL)) { - Oid rsortop = get_commutator(sortop); - - if (OidIsValid(rsortop) && - get_attstatsslot(vardata->statsTuple, - vardata->atttype, vardata->atttypmod, - STATISTIC_KIND_HISTOGRAM, rsortop, - &values, &nvalues, - NULL, NULL)) - { - if (nvalues > 0) - { - tmax = datumCopy(values[0], typByVal, typLen); - have_max = true; - } - free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); - } - else if (get_attstatsslot(vardata->statsTuple, - vardata->atttype, vardata->atttypmod, - STATISTIC_KIND_HISTOGRAM, InvalidOid, - &values, &nvalues, - NULL, NULL)) - { - free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); - return false; - } + free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); + return false; } /* - * If we have most-common-values info, look for a large MCV. This is + * If we have most-common-values info, look for extreme MCVs. This is * needed even if we also have a histogram, since the histogram excludes * the MCVs. However, usually the MCVs will not be the extreme values, so * avoid unnecessary data copying. @@ -3869,31 +4007,41 @@ get_variable_maximum(PlannerInfo *root, VariableStatData *vardata, &values, &nvalues, NULL, NULL)) { - bool large_mcv = false; + bool tmin_is_mcv = false; + bool tmax_is_mcv = false; FmgrInfo opproc; fmgr_info(get_opcode(sortop), &opproc); for (i = 0; i < nvalues; i++) { - if (!have_max) + if (!have_data) { - tmax = values[i]; - large_mcv = have_max = true; + tmin = tmax = values[i]; + tmin_is_mcv = tmax_is_mcv = have_data = true; + continue; + } + if (DatumGetBool(FunctionCall2(&opproc, values[i], tmin))) + { + tmin = values[i]; + tmin_is_mcv = true; } - else if (DatumGetBool(FunctionCall2(&opproc, tmax, values[i]))) + if (DatumGetBool(FunctionCall2(&opproc, tmax, values[i]))) { tmax = values[i]; - large_mcv = true; + tmax_is_mcv = true; } } - if (large_mcv) + if (tmin_is_mcv) + tmin = datumCopy(tmin, typByVal, typLen); + if (tmax_is_mcv) tmax = datumCopy(tmax, typByVal, typLen); free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); } + *min = tmin; *max = tmax; - return have_max; + return have_data; } diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 2dce223d3a..e82b00cdd4 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.150 2007/11/15 22:25:17 momjian Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.151 2007/12/08 21:05:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -993,8 +993,10 @@ typedef struct MergeScanSelCache int strategy; /* sort direction (ASC or DESC) */ bool nulls_first; /* do NULLs come before normal values? */ /* Results */ - Selectivity leftscansel; /* scan fraction for clause left side */ - Selectivity rightscansel; /* scan fraction for clause right side */ + Selectivity leftstartsel; /* first-join fraction for clause left side */ + Selectivity leftendsel; /* last-join fraction for clause left side */ + Selectivity rightstartsel; /* first-join fraction for clause right side */ + Selectivity rightendsel; /* last-join fraction for clause right side */ } MergeScanSelCache; /* diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index f92bb16d07..ededfdf3c6 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.41 2007/11/07 22:37:24 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.42 2007/12/08 21:05:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -161,8 +161,8 @@ extern Selectivity rowcomparesel(PlannerInfo *root, extern void mergejoinscansel(PlannerInfo *root, Node *clause, Oid opfamily, int strategy, bool nulls_first, - Selectivity *leftscan, - Selectivity *rightscan); + Selectivity *leftstart, Selectivity *leftend, + Selectivity *rightstart, Selectivity *rightend); extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows);