From f7111f72d2fd31979c6d473cdde56ba8e7aba26a Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 25 Mar 2019 18:42:52 -0400 Subject: [PATCH] Improve planner's selectivity estimates for inequalities on CTID. We were getting just DEFAULT_INEQ_SEL for comparisons such as "ctid >= constant", but it's possible to do a lot better if we don't mind some assumptions about the table's tuple density being reasonably uniform. There are already assumptions much like that elsewhere in the planner, so that hardly seems like much of an objection. Extracted from a patch set that also proposes to introduce a special executor node type for such queries. Not sure if that's going to make it into v12, but improving the selectivity estimate is useful independently of that. Edmund Horner, reviewed by David Rowley Discussion: https://postgr.es/m/CAMyN-kB-nFTkF=VA_JPwFNo08S0d-Yk0F741S2B7LDmYAi8eyA@mail.gmail.com --- src/backend/utils/adt/selfuncs.c | 75 ++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 12d30d7d63..ecffffbd0c 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -557,6 +557,81 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, if (!HeapTupleIsValid(vardata->statsTuple)) { + /* + * No stats are available. Typically this means we have to fall back + * on the default estimate; but if the variable is CTID then we can + * make an estimate based on comparing the constant to the table size. + */ + if (vardata->var && IsA(vardata->var, Var) && + ((Var *) vardata->var)->varattno == SelfItemPointerAttributeNumber) + { + ItemPointer itemptr; + double block; + double density; + + /* + * If the relation's empty, we're going to include all of it. + * (This is mostly to avoid divide-by-zero below.) + */ + if (vardata->rel->pages == 0) + return 1.0; + + itemptr = (ItemPointer) DatumGetPointer(constval); + block = ItemPointerGetBlockNumberNoCheck(itemptr); + + /* + * Determine the average number of tuples per page (density). + * + * Since the last page will, on average, be only half full, we can + * estimate it to have half as many tuples as earlier pages. So + * give it half the weight of a regular page. + */ + density = vardata->rel->tuples / (vardata->rel->pages - 0.5); + + /* If target is the last page, use half the density. */ + if (block >= vardata->rel->pages - 1) + density *= 0.5; + + /* + * Using the average tuples per page, calculate how far into the + * page the itemptr is likely to be and adjust block accordingly, + * by adding that fraction of a whole block (but never more than a + * whole block, no matter how high the itemptr's offset is). Here + * we are ignoring the possibility of dead-tuple line pointers, + * which is fairly bogus, but we lack the info to do better. + */ + if (density > 0.0) + { + OffsetNumber offset = ItemPointerGetOffsetNumberNoCheck(itemptr); + + block += Min(offset / density, 1.0); + } + + /* + * Convert relative block number to selectivity. Again, the last + * page has only half weight. + */ + selec = block / (vardata->rel->pages - 0.5); + + /* + * The calculation so far gave us a selectivity for the "<=" case. + * We'll have one less tuple for "<" and one additional tuple for + * ">=", the latter of which we'll reverse the selectivity for + * below, so we can simply subtract one tuple for both cases. The + * cases that need this adjustment can be identified by iseq being + * equal to isgt. + */ + if (iseq == isgt && vardata->rel->tuples >= 1.0) + selec -= (1.0 / vardata->rel->tuples); + + /* Finally, reverse the selectivity for the ">", ">=" cases. */ + if (isgt) + selec = 1.0 - selec; + + CLAMP_PROBABILITY(selec); + return selec; + } + /* no stats available, so default result */ return DEFAULT_INEQ_SEL; } -- 2.40.0