From d4af2a64816ea4639c9614112b0c8a5a3cb4b522 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 16 Aug 2008 00:01:38 +0000 Subject: [PATCH] Clean up the loose ends in selectivity estimation left by my patch for semi and anti joins. To do this, pass the SpecialJoinInfo struct for the current join as an additional optional argument to operator join selectivity estimation functions. This allows the estimator to tell not only what kind of join is being formed, but which variable is on which side of the join; a requirement long recognized but not dealt with till now. This also leaves the door open for future improvements in the estimators, such as accounting for the null-insertion effects of lower outer joins. I didn't do anything about that in the current patch but the information is in principle deducible from what's passed. The patch also clarifies the definition of join selectivity for semi/anti joins: it's the fraction of the left input that has (at least one) match in the right input. This allows getting rid of some very fuzzy thinking that I had committed in the original 7.4-era IN-optimization patch. There's probably room to estimate this better than the present patch does, but at least we know what to estimate. Since I had to touch CREATE OPERATOR anyway to allow a variant signature for join estimator functions, I took the opportunity to add a couple of additional checks that were missing, per my recent message to -hackers: * Check that estimator functions return float8; * Require execute permission at the time of CREATE OPERATOR on the operator's function as well as the estimator functions; * Require ownership of any pre-existing operator that's modified by the command. I also moved the lookup of the functions out of OperatorCreate() and into operatorcmds.c, since that seemed more consistent with most of the other catalog object creation processes, eg CREATE TYPE. --- src/backend/catalog/pg_operator.c | 242 ++++++-------- src/backend/commands/operatorcmds.c | 118 ++++++- src/backend/optimizer/path/clausesel.c | 105 +++--- src/backend/optimizer/path/costsize.c | 235 +++++-------- src/backend/optimizer/util/plancat.c | 10 +- src/backend/utils/adt/selfuncs.c | 400 ++++++++++++++++++----- src/include/catalog/catversion.h | 4 +- src/include/catalog/pg_operator.h | 8 +- src/include/catalog/pg_proc.h | 32 +- src/include/optimizer/plancat.h | 5 +- src/include/utils/selfuncs.h | 6 +- src/test/regress/expected/opr_sanity.out | 9 +- src/test/regress/sql/opr_sanity.sql | 9 +- 13 files changed, 702 insertions(+), 481 deletions(-) diff --git a/src/backend/catalog/pg_operator.c b/src/backend/catalog/pg_operator.c index 11690aa8ce..94b3bf2862 100644 --- a/src/backend/catalog/pg_operator.c +++ b/src/backend/catalog/pg_operator.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/pg_operator.c,v 1.104 2008/06/19 00:46:04 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/pg_operator.c,v 1.105 2008/08/16 00:01:35 tgl Exp $ * * NOTES * these routines moved here from commands/define.c and somewhat cleaned up. @@ -21,12 +21,12 @@ #include "access/xact.h" #include "catalog/dependency.h" #include "catalog/indexing.h" +#include "catalog/namespace.h" #include "catalog/pg_namespace.h" #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "miscadmin.h" -#include "parser/parse_func.h" #include "parser/parse_oper.h" #include "utils/acl.h" #include "utils/builtins.h" @@ -273,6 +273,11 @@ OperatorShellMake(const char *operatorName, heap_freetuple(tup); + /* + * Make sure the tuple is visible for subsequent lookups/updates. + */ + CommandCounterIncrement(); + /* * close the operator relation and return the oid. */ @@ -289,14 +294,19 @@ OperatorShellMake(const char *operatorName, * operatorNamespace namespace for new operator * leftTypeId X left type ID * rightTypeId X right type ID - * procedureName procedure for operator + * procedureId procedure ID for operator * commutatorName X commutator operator * negatorName X negator operator - * restrictionName X restriction sel. procedure - * joinName X join sel. procedure + * restrictionId X restriction selectivity procedure ID + * joinId X join selectivity procedure ID * canMerge merge join can be used with this operator * canHash hash join can be used with this operator * + * The caller should have validated properties and permissions for the + * objects passed as OID references. We must handle the commutator and + * negator operator references specially, however, since those need not + * exist beforehand. + * * This routine gets complicated because it allows the user to * specify operators that do not exist. For example, if operator * "op" is being defined, the negator operator "negop" and the @@ -310,59 +320,17 @@ OperatorShellMake(const char *operatorName, * information about the operator (just its name and types). * Forward declaration is used only for this purpose, it is * not available to the user as it is for type definition. - * - * Algorithm: - * - * check if operator already defined - * if so, but oprcode is null, save the Oid -- we are filling in a shell - * otherwise error - * get the attribute types from relation descriptor for pg_operator - * assign values to the fields of the operator: - * operatorName - * owner id (simply the user id of the caller) - * operator "kind" either "b" for binary or "l" for left unary - * canMerge boolean - * canHash boolean - * leftTypeObjectId -- type must already be defined - * rightTypeObjectId -- this is optional, enter ObjectId=0 if none specified - * resultType -- defer this, since it must be determined from - * the pg_procedure catalog - * commutatorObjectId -- if this is NULL, enter ObjectId=0 - * else if this already exists, enter its ObjectId - * else if this does not yet exist, and is not - * the same as the main operatorName, then create - * a shell and enter the new ObjectId - * else if this does not exist but IS the same - * name & types as the main operator, set the ObjectId=0. - * (We are creating a self-commutating operator.) - * The link will be fixed later by OperatorUpd. - * negatorObjectId -- same as for commutatorObjectId - * operatorProcedure -- must access the pg_procedure catalog to get the - * ObjectId of the procedure that actually does the operator - * actions this is required. Do a lookup to find out the - * return type of the procedure - * restrictionProcedure -- must access the pg_procedure catalog to get - * the ObjectId but this is optional - * joinProcedure -- same as restrictionProcedure - * now either insert or replace the operator into the pg_operator catalog - * if the operator shell is being filled in - * access the catalog in order to get a valid buffer - * create a tuple using ModifyHeapTuple - * get the t_self from the modified tuple and call RelationReplaceHeapTuple - * else if a new operator is being created - * create a tuple using heap_formtuple - * call simple_heap_insert */ void OperatorCreate(const char *operatorName, Oid operatorNamespace, Oid leftTypeId, Oid rightTypeId, - List *procedureName, + Oid procedureId, List *commutatorName, List *negatorName, - List *restrictionName, - List *joinName, + Oid restrictionId, + Oid joinId, bool canMerge, bool canHash) { @@ -373,15 +341,10 @@ OperatorCreate(const char *operatorName, Datum values[Natts_pg_operator]; Oid operatorObjectId; bool operatorAlreadyDefined; - Oid procOid; Oid operResultType; Oid commutatorId, - negatorId, - restOid, - joinOid; + negatorId; bool selfCommutator = false; - Oid typeId[4]; /* only need up to 4 args here */ - int nargs; NameData oname; TupleDesc tupDesc; int i; @@ -395,11 +358,6 @@ OperatorCreate(const char *operatorName, errmsg("\"%s\" is not a valid operator name", operatorName))); - if (!OidIsValid(leftTypeId) && !OidIsValid(rightTypeId)) - ereport(ERROR, - (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), - errmsg("at least one of leftarg or rightarg must be specified"))); - if (!(OidIsValid(leftTypeId) && OidIsValid(rightTypeId))) { /* If it's not a binary op, these things mustn't be set: */ @@ -407,7 +365,7 @@ OperatorCreate(const char *operatorName, ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("only binary operators can have commutators"))); - if (joinName) + if (OidIsValid(joinId)) ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("only binary operators can have join selectivity"))); @@ -421,6 +379,33 @@ OperatorCreate(const char *operatorName, errmsg("only binary operators can hash"))); } + operResultType = get_func_rettype(procedureId); + + if (operResultType != BOOLOID) + { + /* If it's not a boolean op, these things mustn't be set: */ + if (negatorName) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only boolean operators can have negators"))); + if (OidIsValid(restrictionId)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only boolean operators can have restriction selectivity"))); + if (OidIsValid(joinId)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only boolean operators can have join selectivity"))); + if (canMerge) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only boolean operators can merge join"))); + if (canHash) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only boolean operators can hash"))); + } + operatorObjectId = OperatorGet(operatorName, operatorNamespace, leftTypeId, @@ -435,85 +420,13 @@ OperatorCreate(const char *operatorName, /* * At this point, if operatorObjectId is not InvalidOid then we are - * filling in a previously-created shell. - */ - - /* - * Look up registered procedures -- find the return type of procedureName - * to place in "result" field. Do this before shells are created so we - * don't have to worry about deleting them later. - */ - if (!OidIsValid(leftTypeId)) - { - typeId[0] = rightTypeId; - nargs = 1; - } - else if (!OidIsValid(rightTypeId)) - { - typeId[0] = leftTypeId; - nargs = 1; - } - else - { - typeId[0] = leftTypeId; - typeId[1] = rightTypeId; - nargs = 2; - } - procOid = LookupFuncName(procedureName, nargs, typeId, false); - operResultType = get_func_rettype(procOid); - - /* - * find restriction estimator - */ - if (restrictionName) - { - typeId[0] = INTERNALOID; /* Query */ - typeId[1] = OIDOID; /* operator OID */ - typeId[2] = INTERNALOID; /* args list */ - typeId[3] = INT4OID; /* varRelid */ - - restOid = LookupFuncName(restrictionName, 4, typeId, false); - } - else - restOid = InvalidOid; - - /* - * find join estimator - */ - if (joinName) - { - typeId[0] = INTERNALOID; /* Query */ - typeId[1] = OIDOID; /* operator OID */ - typeId[2] = INTERNALOID; /* args list */ - typeId[3] = INT2OID; /* jointype */ - - joinOid = LookupFuncName(joinName, 4, typeId, false); - } - else - joinOid = InvalidOid; - - /* - * set up values in the operator tuple + * filling in a previously-created shell. Insist that the user own + * any such shell. */ - - for (i = 0; i < Natts_pg_operator; ++i) - { - values[i] = (Datum) NULL; - replaces[i] = 'r'; - nulls[i] = ' '; - } - - i = 0; - namestrcpy(&oname, operatorName); - values[i++] = NameGetDatum(&oname); /* oprname */ - values[i++] = ObjectIdGetDatum(operatorNamespace); /* oprnamespace */ - values[i++] = ObjectIdGetDatum(GetUserId()); /* oprowner */ - values[i++] = CharGetDatum(leftTypeId ? (rightTypeId ? 'b' : 'r') : 'l'); /* oprkind */ - values[i++] = BoolGetDatum(canMerge); /* oprcanmerge */ - values[i++] = BoolGetDatum(canHash); /* oprcanhash */ - values[i++] = ObjectIdGetDatum(leftTypeId); /* oprleft */ - values[i++] = ObjectIdGetDatum(rightTypeId); /* oprright */ - values[i++] = ObjectIdGetDatum(operResultType); /* oprresult */ + if (OidIsValid(operatorObjectId) && + !pg_oper_ownercheck(operatorObjectId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_OPER, + operatorName); /* * Set up the other operators. If they do not currently exist, create @@ -529,6 +442,12 @@ OperatorCreate(const char *operatorName, leftTypeId, rightTypeId, true); + /* Permission check: must own other operator */ + if (OidIsValid(commutatorId) && + !pg_oper_ownercheck(commutatorId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_OPER, + NameListToString(commutatorName)); + /* * self-linkage to this operator; will fix below. Note that only * self-linkage for commutation makes sense. @@ -538,7 +457,6 @@ OperatorCreate(const char *operatorName, } else commutatorId = InvalidOid; - values[i++] = ObjectIdGetDatum(commutatorId); /* oprcom */ if (negatorName) { @@ -548,19 +466,48 @@ OperatorCreate(const char *operatorName, operatorName, operatorNamespace, leftTypeId, rightTypeId, false); + + /* Permission check: must own other operator */ + if (OidIsValid(negatorId) && + !pg_oper_ownercheck(negatorId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_OPER, + NameListToString(negatorName)); } else negatorId = InvalidOid; - values[i++] = ObjectIdGetDatum(negatorId); /* oprnegate */ - values[i++] = ObjectIdGetDatum(procOid); /* oprcode */ - values[i++] = ObjectIdGetDatum(restOid); /* oprrest */ - values[i++] = ObjectIdGetDatum(joinOid); /* oprjoin */ + /* + * set up values in the operator tuple + */ + + for (i = 0; i < Natts_pg_operator; ++i) + { + values[i] = (Datum) NULL; + replaces[i] = 'r'; + nulls[i] = ' '; + } + + i = 0; + namestrcpy(&oname, operatorName); + values[i++] = NameGetDatum(&oname); /* oprname */ + values[i++] = ObjectIdGetDatum(operatorNamespace); /* oprnamespace */ + values[i++] = ObjectIdGetDatum(GetUserId()); /* oprowner */ + values[i++] = CharGetDatum(leftTypeId ? (rightTypeId ? 'b' : 'r') : 'l'); /* oprkind */ + values[i++] = BoolGetDatum(canMerge); /* oprcanmerge */ + values[i++] = BoolGetDatum(canHash); /* oprcanhash */ + values[i++] = ObjectIdGetDatum(leftTypeId); /* oprleft */ + values[i++] = ObjectIdGetDatum(rightTypeId); /* oprright */ + values[i++] = ObjectIdGetDatum(operResultType); /* oprresult */ + values[i++] = ObjectIdGetDatum(commutatorId); /* oprcom */ + values[i++] = ObjectIdGetDatum(negatorId); /* oprnegate */ + values[i++] = ObjectIdGetDatum(procedureId); /* oprcode */ + values[i++] = ObjectIdGetDatum(restrictionId); /* oprrest */ + values[i++] = ObjectIdGetDatum(joinId); /* oprjoin */ pg_operator_desc = heap_open(OperatorRelationId, RowExclusiveLock); /* - * If we are adding to an operator shell, update; else insert + * If we are replacing an operator shell, update; else insert */ if (operatorObjectId) { @@ -706,7 +653,8 @@ OperatorUpd(Oid baseId, Oid commId, Oid negId) /* * check and update the commutator & negator, if necessary * - * First make sure we can see them... + * We need a CommandCounterIncrement here in case of a self-commutator + * operator: we'll need to update the tuple that we just inserted. */ CommandCounterIncrement(); diff --git a/src/backend/commands/operatorcmds.c b/src/backend/commands/operatorcmds.c index 884181c5cd..391b4d6507 100644 --- a/src/backend/commands/operatorcmds.c +++ b/src/backend/commands/operatorcmds.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/operatorcmds.c,v 1.40 2008/06/19 00:46:04 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/commands/operatorcmds.c,v 1.41 2008/08/16 00:01:35 tgl Exp $ * * DESCRIPTION * The "DefineFoo" routines take the parse tree and pick out the @@ -39,8 +39,10 @@ #include "catalog/indexing.h" #include "catalog/namespace.h" #include "catalog/pg_operator.h" +#include "catalog/pg_type.h" #include "commands/defrem.h" #include "miscadmin.h" +#include "parser/parse_func.h" #include "parser/parse_oper.h" #include "parser/parse_type.h" #include "utils/acl.h" @@ -76,6 +78,11 @@ DefineOperator(List *names, List *parameters) List *negatorName = NIL; /* optional negator operator name */ List *restrictionName = NIL; /* optional restrict. sel. procedure */ List *joinName = NIL; /* optional join sel. procedure */ + Oid functionOid; /* functions converted to OID */ + Oid restrictionOid; + Oid joinOid; + Oid typeId[5]; /* only need up to 5 args here */ + int nargs; ListCell *pl; /* Convert list of names to a name and namespace */ @@ -154,6 +161,109 @@ DefineOperator(List *names, List *parameters) if (typeName2) typeId2 = typenameTypeId(NULL, typeName2, NULL); + if (!OidIsValid(typeId1) && !OidIsValid(typeId2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("at least one of leftarg or rightarg must be specified"))); + + /* + * Look up the operator's underlying function. + */ + if (!OidIsValid(typeId1)) + { + typeId[0] = typeId2; + nargs = 1; + } + else if (!OidIsValid(typeId2)) + { + typeId[0] = typeId1; + nargs = 1; + } + else + { + typeId[0] = typeId1; + typeId[1] = typeId2; + nargs = 2; + } + functionOid = LookupFuncName(functionName, nargs, typeId, false); + + /* + * We require EXECUTE rights for the function. This isn't strictly + * necessary, since EXECUTE will be checked at any attempted use of + * the operator, but it seems like a good idea anyway. + */ + aclresult = pg_proc_aclcheck(functionOid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_PROC, + NameListToString(functionName)); + + /* + * Look up restriction estimator if specified + */ + if (restrictionName) + { + typeId[0] = INTERNALOID; /* PlannerInfo */ + typeId[1] = OIDOID; /* operator OID */ + typeId[2] = INTERNALOID; /* args list */ + typeId[3] = INT4OID; /* varRelid */ + + restrictionOid = LookupFuncName(restrictionName, 4, typeId, false); + + /* estimators must return float8 */ + if (get_func_rettype(restrictionOid) != FLOAT8OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("restriction estimator function %s must return type \"float8\"", + NameListToString(restrictionName)))); + + /* Require EXECUTE rights for the estimator */ + aclresult = pg_proc_aclcheck(restrictionOid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_PROC, + NameListToString(restrictionName)); + } + else + restrictionOid = InvalidOid; + + /* + * Look up join estimator if specified + */ + if (joinName) + { + typeId[0] = INTERNALOID; /* PlannerInfo */ + typeId[1] = OIDOID; /* operator OID */ + typeId[2] = INTERNALOID; /* args list */ + typeId[3] = INT2OID; /* jointype */ + typeId[4] = INTERNALOID; /* SpecialJoinInfo */ + + /* + * As of Postgres 8.4, the preferred signature for join estimators + * has 5 arguments, but we still allow the old 4-argument form. + * Try the preferred form first. + */ + joinOid = LookupFuncName(joinName, 5, typeId, true); + if (!OidIsValid(joinOid)) + joinOid = LookupFuncName(joinName, 4, typeId, true); + /* If not found, reference the 5-argument signature in error msg */ + if (!OidIsValid(joinOid)) + joinOid = LookupFuncName(joinName, 5, typeId, false); + + /* estimators must return float8 */ + if (get_func_rettype(joinOid) != FLOAT8OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("join estimator function %s must return type \"float8\"", + NameListToString(joinName)))); + + /* Require EXECUTE rights for the estimator */ + aclresult = pg_proc_aclcheck(joinOid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_PROC, + NameListToString(joinName)); + } + else + joinOid = InvalidOid; + /* * now have OperatorCreate do all the work.. */ @@ -161,11 +271,11 @@ DefineOperator(List *names, List *parameters) oprNamespace, /* namespace */ typeId1, /* left type id */ typeId2, /* right type id */ - functionName, /* function for operator */ + functionOid, /* function for operator */ commutatorName, /* optional commutator operator name */ negatorName, /* optional negator operator name */ - restrictionName, /* optional restrict. sel. procedure */ - joinName, /* optional join sel. procedure name */ + restrictionOid, /* optional restrict. sel. procedure */ + joinOid, /* optional join sel. procedure name */ canMerge, /* operator merges */ canHash); /* operator hashes */ } diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index e788f27873..6db42c5f58 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.91 2008/08/14 18:47:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.92 2008/08/16 00:01:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -398,6 +398,50 @@ bms_is_subset_singleton(const Bitmapset *s, int x) return false; } +/* + * treat_as_join_clause - + * Decide whether an operator clause is to be handled by the + * restriction or join estimator. Subroutine for clause_selectivity(). + */ +static inline bool +treat_as_join_clause(Node *clause, RestrictInfo *rinfo, + int varRelid, SpecialJoinInfo *sjinfo) +{ + if (varRelid != 0) + { + /* + * Caller is forcing restriction mode (eg, because we are examining + * an inner indexscan qual). + */ + return false; + } + else if (sjinfo == NULL) + { + /* + * It must be a restriction clause, since it's being evaluated at + * a scan node. + */ + return false; + } + else + { + /* + * Otherwise, it's a join if there's more than one relation used. + * We can optimize this calculation if an rinfo was passed. + * + * XXX Since we know the clause is being evaluated at a join, + * the only way it could be single-relation is if it was delayed + * by outer joins. Although we can make use of the restriction + * qual estimators anyway, it seems likely that we ought to account + * for the probability of injected nulls somehow. + */ + if (rinfo) + return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE); + else + return (NumRelids(clause) > 1); + } +} + /* * clause_selectivity - @@ -429,9 +473,6 @@ bms_is_subset_singleton(const Bitmapset *s, int x) * root->join_info_list. * 2. For an INNER join, sjinfo is just a transient struct, and only the * relids and jointype fields in it can be trusted. - * 3. XXX sjinfo might be NULL even though it really is a join. This case - * will go away soon, but fixing it requires API changes for oprjoin and - * amcostestimate functions. * It is possible for jointype to be different from sjinfo->jointype. * This indicates we are considering a variant join: either with * the LHS and RHS switched, or with one input unique-ified. @@ -603,36 +644,14 @@ clause_selectivity(PlannerInfo *root, else if (is_opclause(clause) || IsA(clause, DistinctExpr)) { Oid opno = ((OpExpr *) clause)->opno; - bool is_join_clause; - if (varRelid != 0) - { - /* - * If we are considering a nestloop join then all clauses are - * restriction clauses, since we are only interested in the one - * relation. - */ - is_join_clause = false; - } - else - { - /* - * Otherwise, it's a join if there's more than one relation used. - * We can optimize this calculation if an rinfo was passed. - */ - if (rinfo) - is_join_clause = (bms_membership(rinfo->clause_relids) == - BMS_MULTIPLE); - else - is_join_clause = (NumRelids(clause) > 1); - } - - if (is_join_clause) + if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo)) { /* Estimate selectivity for a join clause. */ s1 = join_selectivity(root, opno, ((OpExpr *) clause)->args, - jointype); + jointype, + sjinfo); } else { @@ -671,35 +690,11 @@ clause_selectivity(PlannerInfo *root, #endif else if (IsA(clause, ScalarArrayOpExpr)) { - /* First, decide if it's a join clause, same as for OpExpr */ - bool is_join_clause; - - if (varRelid != 0) - { - /* - * If we are considering a nestloop join then all clauses are - * restriction clauses, since we are only interested in the one - * relation. - */ - is_join_clause = false; - } - else - { - /* - * Otherwise, it's a join if there's more than one relation used. - * We can optimize this calculation if an rinfo was passed. - */ - if (rinfo) - is_join_clause = (bms_membership(rinfo->clause_relids) == - BMS_MULTIPLE); - else - is_join_clause = (NumRelids(clause) > 1); - } - /* Use node specific selectivity calculation function */ s1 = scalararraysel(root, (ScalarArrayOpExpr *) clause, - is_join_clause, + treat_as_join_clause(clause, rinfo, + varRelid, sjinfo), varRelid, jointype, sjinfo); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index d9d8c89864..14c3cf1f21 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.193 2008/08/14 18:47:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.194 2008/08/16 00:01:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -118,10 +118,8 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey); static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context); -static Selectivity approx_selectivity(PlannerInfo *root, List *quals, - SpecialJoinInfo *sjinfo); -static Selectivity join_in_selectivity(JoinPath *path, PlannerInfo *root, - SpecialJoinInfo *sjinfo); +static double approx_tuple_count(PlannerInfo *root, JoinPath *path, + List *quals, SpecialJoinInfo *sjinfo); static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); @@ -1288,20 +1286,10 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) double outer_path_rows = PATH_ROWS(outer_path); double inner_path_rows = nestloop_inner_path_rows(inner_path); double ntuples; - Selectivity joininfactor; if (!enable_nestloop) startup_cost += disable_cost; - /* - * If we're doing JOIN_IN then we will stop scanning inner tuples for an - * outer tuple as soon as we have one match. Account for the effects of - * this by scaling down the cost estimates in proportion to the JOIN_IN - * selectivity. (This assumes that all the quals attached to the join are - * IN quals, which should be true.) - */ - joininfactor = join_in_selectivity(path, root, sjinfo); - /* cost of source data */ /* @@ -1328,12 +1316,12 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) run_cost += (outer_path_rows - 1) * inner_path->startup_cost; } run_cost += outer_path_rows * - (inner_path->total_cost - inner_path->startup_cost) * joininfactor; + (inner_path->total_cost - inner_path->startup_cost); /* * Compute number of tuples processed (not number emitted!) */ - ntuples = outer_path_rows * inner_path_rows * joininfactor; + ntuples = outer_path_rows * inner_path_rows; /* CPU costs */ cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root); @@ -1369,7 +1357,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) Cost startup_cost = 0; Cost run_cost = 0; Cost cpu_per_tuple; - Selectivity merge_selec; QualCost merge_qual_cost; QualCost qp_qual_cost; double outer_path_rows = PATH_ROWS(outer_path); @@ -1385,7 +1372,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) outerendsel, innerstartsel, innerendsel; - Selectivity joininfactor; Path sort_path; /* dummy for result of cost_sort */ /* Protect some assumptions below that rowcounts aren't zero */ @@ -1398,21 +1384,21 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) startup_cost += disable_cost; /* - * Compute cost and selectivity of the mergequals and qpquals (other - * restriction clauses) separately. We use approx_selectivity here for - * speed --- in most cases, any errors won't affect the result much. - * - * Note: it's probably bogus to use the normal selectivity calculation - * here when either the outer or inner path is a UniquePath. + * Compute cost of the mergequals and qpquals (other restriction clauses) + * separately. */ - merge_selec = approx_selectivity(root, mergeclauses, sjinfo); cost_qual_eval(&merge_qual_cost, mergeclauses, root); cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root); qp_qual_cost.startup -= merge_qual_cost.startup; qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple; - /* approx # tuples passing the merge quals */ - mergejointuples = clamp_row_est(merge_selec * outer_path_rows * inner_path_rows); + /* + * Get approx # tuples passing the mergequals. We use approx_tuple_count + * here for speed --- in most cases, any errors won't affect the result + * much. + */ + mergejointuples = approx_tuple_count(root, &path->jpath, + mergeclauses, sjinfo); /* * When there are equal merge keys in the outer relation, the mergejoin @@ -1422,11 +1408,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) * but on the other hand we ignore the bookkeeping costs of mark/restore. * Not clear if it's worth developing a more refined model. * - * The number of re-fetches can be estimated approximately as size of - * merge join output minus size of inner relation. Assume that the - * distinct key values are 1, 2, ..., and denote the number of values of - * each key in the outer relation as m1, m2, ...; in the inner relation, - * n1, n2, ... Then we have + * For regular inner and outer joins, the number of re-fetches can be + * estimated approximately as size of merge join output minus size of + * inner relation. Assume that the distinct key values are 1, 2, ..., and + * denote the number of values of each key in the outer relation as m1, + * m2, ...; in the inner relation, n1, n2, ... Then we have * * size of join = m1 * n1 + m2 * n2 + ... * @@ -1439,8 +1425,17 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) * are effectively subtracting those from the number of rescanned tuples, * when we should not. Can we do better without expensive selectivity * computations? + * + * For SEMI and ANTI joins, only one inner tuple need be rescanned for + * each group of same-keyed outer tuples (assuming that all joinquals + * are merge quals). This makes the effect small enough to ignore, + * so we just set rescannedtuples = 0. Likewise, the whole issue is + * moot if we are working from a unique-ified outer input. */ - if (IsA(outer_path, UniquePath)) + if (sjinfo->jointype == JOIN_SEMI || + sjinfo->jointype == JOIN_ANTI) + rescannedtuples = 0; + else if (IsA(outer_path, UniquePath)) rescannedtuples = 0; else { @@ -1505,7 +1500,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) innerstartsel = cache->leftstartsel; innerendsel = cache->leftendsel; } - if (path->jpath.jointype == JOIN_LEFT) + if (path->jpath.jointype == JOIN_LEFT || + path->jpath.jointype == JOIN_ANTI) { outerstartsel = 0.0; outerendsel = 1.0; @@ -1600,21 +1596,10 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) /* CPU costs */ - /* - * If we're doing JOIN_IN then we will stop outputting inner tuples for an - * outer tuple as soon as we have one match. Account for the effects of - * this by scaling down the cost estimates in proportion to the expected - * output size. (This assumes that all the quals attached to the join are - * IN quals, which should be true.) - */ - joininfactor = join_in_selectivity(&path->jpath, root, sjinfo); - /* * The number of tuple comparisons needed is approximately number of outer * rows plus number of inner rows plus number of rescanned tuples (can we * refine this?). At each one, we need to evaluate the mergejoin quals. - * NOTE: JOIN_IN mode does not save any work here, so do NOT include - * joininfactor. */ startup_cost += merge_qual_cost.startup; startup_cost += merge_qual_cost.per_tuple * @@ -1627,12 +1612,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) * For each tuple that gets through the mergejoin proper, we charge * cpu_tuple_cost plus the cost of evaluating additional restriction * clauses that are to be applied at the join. (This is pessimistic since - * not all of the quals may get evaluated at each tuple.) This work is - * skipped in JOIN_IN mode, so apply the factor. + * not all of the quals may get evaluated at each tuple.) */ startup_cost += qp_qual_cost.startup; cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple; - run_cost += cpu_per_tuple * mergejointuples * joininfactor; + run_cost += cpu_per_tuple * mergejointuples; path->jpath.path.startup_cost = startup_cost; path->jpath.path.total_cost = startup_cost + run_cost; @@ -1711,7 +1695,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) Cost startup_cost = 0; Cost run_cost = 0; Cost cpu_per_tuple; - Selectivity hash_selec; QualCost hash_qual_cost; QualCost qp_qual_cost; double hashjointuples; @@ -1722,28 +1705,27 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) int numbatches; double virtualbuckets; Selectivity innerbucketsize; - Selectivity joininfactor; ListCell *hcl; if (!enable_hashjoin) startup_cost += disable_cost; /* - * Compute cost and selectivity of the hashquals and qpquals (other - * restriction clauses) separately. We use approx_selectivity here for - * speed --- in most cases, any errors won't affect the result much. - * - * Note: it's probably bogus to use the normal selectivity calculation - * here when either the outer or inner path is a UniquePath. + * Compute cost of the hashquals and qpquals (other restriction clauses) + * separately. */ - hash_selec = approx_selectivity(root, hashclauses, sjinfo); cost_qual_eval(&hash_qual_cost, hashclauses, root); cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root); qp_qual_cost.startup -= hash_qual_cost.startup; qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple; - /* approx # tuples passing the hash quals */ - hashjointuples = clamp_row_est(hash_selec * outer_path_rows * inner_path_rows); + /* + * Get approx # tuples passing the hashquals. We use approx_tuple_count + * here for speed --- in most cases, any errors won't affect the result + * much. + */ + hashjointuples = approx_tuple_count(root, &path->jpath, + hashclauses, sjinfo); /* cost of source data */ startup_cost += outer_path->startup_cost; @@ -1858,15 +1840,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) /* CPU costs */ - /* - * If we're doing JOIN_IN then we will stop comparing inner tuples to an - * outer tuple as soon as we have one match. Account for the effects of - * this by scaling down the cost estimates in proportion to the expected - * output size. (This assumes that all the quals attached to the join are - * IN quals, which should be true.) - */ - joininfactor = join_in_selectivity(&path->jpath, root, sjinfo); - /* * The number of tuple comparisons needed is the number of outer tuples * times the typical number of tuples in a hash bucket, which is the inner @@ -1878,8 +1851,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) */ startup_cost += hash_qual_cost.startup; run_cost += hash_qual_cost.per_tuple * - outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * - joininfactor * 0.5; + outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * 0.5; /* * For each tuple that gets through the hashjoin proper, we charge @@ -1889,7 +1861,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) */ startup_cost += qp_qual_cost.startup; cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple; - run_cost += cpu_per_tuple * hashjointuples * joininfactor; + run_cost += cpu_per_tuple * hashjointuples; path->jpath.path.startup_cost = startup_cost; path->jpath.path.total_cost = startup_cost + run_cost; @@ -2213,13 +2185,12 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan) /* - * approx_selectivity - * Quick-and-dirty estimation of clause selectivities. - * The input can be either an implicitly-ANDed list of boolean - * expressions, or a list of RestrictInfo nodes (typically the latter). + * approx_tuple_count + * Quick-and-dirty estimation of the number of join rows passing + * a set of qual conditions. * - * Currently this is only used in join estimation, so sjinfo should never - * be NULL. + * The quals can be either an implicitly-ANDed list of boolean expressions, + * or a list of RestrictInfo nodes (typically the latter). * * This is quick-and-dirty because we bypass clauselist_selectivity, and * simply multiply the independent clause selectivities together. Now @@ -2232,20 +2203,34 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan) * output tuples are generated and passed through qpqual checking, it * seems OK to live with the approximation. */ -static Selectivity -approx_selectivity(PlannerInfo *root, List *quals, SpecialJoinInfo *sjinfo) +static double +approx_tuple_count(PlannerInfo *root, JoinPath *path, + List *quals, SpecialJoinInfo *sjinfo) { - Selectivity total = 1.0; + double tuples; + double outer_tuples = path->outerjoinpath->parent->rows; + double inner_tuples = path->innerjoinpath->parent->rows; + Selectivity selec = 1.0; ListCell *l; + /* Get the approximate selectivity */ foreach(l, quals) { Node *qual = (Node *) lfirst(l); /* Note that clause_selectivity will be able to cache its result */ - total *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo); + selec *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo); } - return total; + + /* Apply it correctly using the input relation sizes */ + if (sjinfo->jointype == JOIN_SEMI) + tuples = selec * outer_tuples; + else if (sjinfo->jointype == JOIN_ANTI) + tuples = (1.0 - selec) * outer_tuples; + else + tuples = selec * outer_tuples * inner_tuples; + + return clamp_row_est(tuples); } @@ -2315,7 +2300,6 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, Selectivity jselec; Selectivity pselec; double nrows; - UniquePath *upath; /* * Compute joinclause selectivity. Note that we are only considering @@ -2380,9 +2364,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * pushed-down quals are applied after the outer join, so their * selectivity applies fully. * - * For JOIN_IN and variants, the Cartesian product is figured with respect - * to a unique-ified input, and then we can clamp to the size of the other - * input. + * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the fraction + * of LHS rows that have matches, and we apply that straightforwardly. */ switch (jointype) { @@ -2404,22 +2387,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, nrows *= pselec; break; case JOIN_SEMI: - /* XXX this is unsafe, could Assert? */ - upath = create_unique_path(root, inner_rel, - inner_rel->cheapest_total_path, - sjinfo); - if (upath) - nrows = outer_rel->rows * upath->rows * jselec; - else - nrows = outer_rel->rows * inner_rel->rows * jselec; - if (nrows > outer_rel->rows) - nrows = outer_rel->rows; + nrows = outer_rel->rows * jselec; + nrows *= pselec; break; case JOIN_ANTI: - /* XXX this is utterly wrong */ - nrows = outer_rel->rows * inner_rel->rows * jselec; - if (nrows < outer_rel->rows) - nrows = outer_rel->rows; + nrows = outer_rel->rows * (1.0 - jselec); nrows *= pselec; break; default: @@ -2432,67 +2404,6 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, rel->rows = clamp_row_est(nrows); } -/* - * join_in_selectivity - * Determines the factor by which a JOIN_IN join's result is expected - * to be smaller than an ordinary inner join. - * - * 'path' is already filled in except for the cost fields - * 'sjinfo' must be the JOIN_SEMI SpecialJoinInfo for the join - */ -static Selectivity -join_in_selectivity(JoinPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) -{ - RelOptInfo *innerrel; - UniquePath *innerunique; - Selectivity selec; - double nrows; - - /* Return 1.0 whenever it's not JOIN_IN */ - if (path->jointype != JOIN_SEMI) - return 1.0; - Assert(sjinfo && sjinfo->jointype == JOIN_SEMI); - - /* - * Return 1.0 if the inner side is already known unique. The case where - * the inner path is already a UniquePath probably cannot happen in - * current usage, but check it anyway for completeness. The interesting - * case is where we've determined the inner relation itself is unique, - * which we can check by looking at the rows estimate for its UniquePath. - */ - if (IsA(path->innerjoinpath, UniquePath)) - return 1.0; - innerrel = path->innerjoinpath->parent; - /* XXX might assert if sjinfo doesn't exactly match innerrel? */ - innerunique = create_unique_path(root, - innerrel, - innerrel->cheapest_total_path, - sjinfo); - if (innerunique && innerunique->rows >= innerrel->rows) - return 1.0; - - /* - * Compute same result set_joinrel_size_estimates would compute for - * JOIN_INNER. Note that we use the input rels' absolute size estimates, - * not PATH_ROWS() which might be less; if we used PATH_ROWS() we'd be - * double-counting the effects of any join clauses used in input scans. - */ - selec = clauselist_selectivity(root, - path->joinrestrictinfo, - 0, - JOIN_INNER, - NULL); - nrows = path->outerjoinpath->parent->rows * innerrel->rows * selec; - - nrows = clamp_row_est(nrows); - - /* See if it's larger than the actual JOIN_IN size estimate */ - if (nrows > path->path.parent->rows) - return path->path.parent->rows / nrows; - else - return 1.0; -} - /* * set_function_size_estimates * Set the size estimates for a base relation that is a function call. diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index cb9a07b77d..30e841b487 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.148 2008/07/13 20:45:47 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.149 2008/08/16 00:01:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -830,7 +830,8 @@ Selectivity join_selectivity(PlannerInfo *root, Oid operator, List *args, - JoinType jointype) + JoinType jointype, + SpecialJoinInfo *sjinfo) { RegProcedure oprjoin = get_oprjoin(operator); float8 result; @@ -842,11 +843,12 @@ join_selectivity(PlannerInfo *root, if (!oprjoin) return (Selectivity) 0.5; - result = DatumGetFloat8(OidFunctionCall4(oprjoin, + result = DatumGetFloat8(OidFunctionCall5(oprjoin, PointerGetDatum(root), ObjectIdGetDatum(operator), PointerGetDatum(args), - Int16GetDatum(jointype))); + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); if (result < 0.0 || result > 1.0) elog(ERROR, "invalid join selectivity: %f", result); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 7d5609d447..d484221232 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.251 2008/08/14 18:47:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.252 2008/08/16 00:01:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -55,19 +55,34 @@ * * float8 oprrest (internal, oid, internal, int4); * + * The result is a selectivity, that is, a fraction (0 to 1) of the rows + * of the relation that are expected to produce a TRUE result for the + * given operator. + * * The call convention for a join estimator (oprjoin function) is similar - * except that varRelid is not needed, and instead the join type is + * except that varRelid is not needed, and instead join information is * supplied: * * Selectivity oprjoin (PlannerInfo *root, * Oid operator, * List *args, - * JoinType jointype); + * JoinType jointype, + * SpecialJoinInfo *sjinfo); + * + * float8 oprjoin (internal, oid, internal, int2, internal); * - * float8 oprjoin (internal, oid, internal, int2); + * (Before Postgres 8.4, join estimators had only the first four of these + * parameters. That signature is still allowed, but deprecated.) The + * relationship between jointype and sjinfo is explained in the comments for + * clause_selectivity() --- the short version is that jointype is usually + * best ignored in favor of examining sjinfo. * - * (We deliberately make the SQL signature different to facilitate - * catching errors.) + * Join selectivity for regular inner and outer joins is defined as the + * fraction (0 to 1) of the cross product of the relations that is expected + * to produce a TRUE result for the given operator. For both semi and anti + * joins, however, the selectivity is defined as the fraction of the left-hand + * side relation's rows that are expected to have a match (ie, at least one + * row with a TRUE result) in the right-hand side. *---------- */ @@ -113,6 +128,10 @@ static double var_eq_non_const(VariableStatData *vardata, Oid operator, static double ineq_histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, bool isgt, Datum constval, Oid consttype); +static double eqjoinsel_inner(Oid operator, + VariableStatData *vardata1, VariableStatData *vardata2); +static double eqjoinsel_semi(Oid operator, + VariableStatData *vardata1, VariableStatData *vardata2); static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound); @@ -1579,7 +1598,6 @@ scalararraysel(PlannerInfo *root, Oid nominal_element_type; RegProcedure oprsel; FmgrInfo oprselproc; - Datum selarg4; Selectivity s1; /* @@ -1587,15 +1605,9 @@ scalararraysel(PlannerInfo *root, * it hasn't got one. */ if (is_join_clause) - { oprsel = get_oprjoin(operator); - selarg4 = Int16GetDatum(jointype); - } else - { oprsel = get_oprrest(operator); - selarg4 = Int32GetDatum(varRelid); - } if (!oprsel) return (Selectivity) 0.5; fmgr_info(oprsel, &oprselproc); @@ -1660,11 +1672,19 @@ scalararraysel(PlannerInfo *root, elem_values[i], elem_nulls[i], elmbyval)); - s2 = DatumGetFloat8(FunctionCall4(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - selarg4)); + if (is_join_clause) + s2 = DatumGetFloat8(FunctionCall5(&oprselproc, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); + else + s2 = DatumGetFloat8(FunctionCall4(&oprselproc, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int32GetDatum(varRelid))); if (useOr) s1 = s1 + s2 - s1 * s2; else @@ -1694,11 +1714,19 @@ scalararraysel(PlannerInfo *root, * estimation function would really care ... */ args = list_make2(leftop, elem); - s2 = DatumGetFloat8(FunctionCall4(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - selarg4)); + if (is_join_clause) + s2 = DatumGetFloat8(FunctionCall5(&oprselproc, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); + else + s2 = DatumGetFloat8(FunctionCall4(&oprselproc, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int32GetDatum(varRelid))); if (useOr) s1 = s1 + s2 - s1 * s2; else @@ -1721,11 +1749,19 @@ scalararraysel(PlannerInfo *root, dummyexpr->typeId = nominal_element_type; dummyexpr->typeMod = -1; args = list_make2(leftop, dummyexpr); - s2 = DatumGetFloat8(FunctionCall4(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - selarg4)); + if (is_join_clause) + s2 = DatumGetFloat8(FunctionCall5(&oprselproc, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); + else + s2 = DatumGetFloat8(FunctionCall4(&oprselproc, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int32GetDatum(varRelid))); s1 = useOr ? 0.0 : 1.0; /* @@ -1803,13 +1839,24 @@ rowcomparesel(PlannerInfo *root, /* Build equivalent arg list for single operator */ opargs = list_make2(linitial(clause->largs), linitial(clause->rargs)); - /* Decide if it's a join clause, same as for OpExpr */ + /* + * Decide if it's a join clause. This should match clausesel.c's + * treat_as_join_clause(), except that we intentionally consider only + * the leading columns and not the rest of the clause. + */ if (varRelid != 0) { /* - * If we are considering a nestloop join then all clauses are - * restriction clauses, since we are only interested in the one - * relation. + * Caller is forcing restriction mode (eg, because we are examining + * an inner indexscan qual). + */ + is_join_clause = false; + } + else if (sjinfo == NULL) + { + /* + * It must be a restriction clause, since it's being evaluated at + * a scan node. */ is_join_clause = false; } @@ -1817,7 +1864,6 @@ rowcomparesel(PlannerInfo *root, { /* * Otherwise, it's a join if there's more than one relation used. - * Notice we ignore the low-order columns here. */ is_join_clause = (NumRelids((Node *) opargs) > 1); } @@ -1827,7 +1873,8 @@ rowcomparesel(PlannerInfo *root, /* Estimate selectivity for a join clause. */ s1 = join_selectivity(root, opno, opargs, - jointype); + jointype, + sjinfo); } else { @@ -1849,10 +1896,60 @@ eqjoinsel(PG_FUNCTION_ARGS) PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); +#ifdef NOT_USED JoinType jointype = (JoinType) PG_GETARG_INT16(3); +#endif + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); double selec; VariableStatData vardata1; VariableStatData vardata2; + bool join_is_reversed; + + get_join_variables(root, args, sjinfo, + &vardata1, &vardata2, &join_is_reversed); + + switch (sjinfo->jointype) + { + case JOIN_INNER: + case JOIN_LEFT: + case JOIN_FULL: + selec = eqjoinsel_inner(operator, &vardata1, &vardata2); + break; + case JOIN_SEMI: + case JOIN_ANTI: + if (!join_is_reversed) + selec = eqjoinsel_semi(operator, &vardata1, &vardata2); + else + selec = eqjoinsel_semi(get_commutator(operator), + &vardata2, &vardata1); + break; + default: + /* other values not expected here */ + elog(ERROR, "unrecognized join type: %d", + (int) sjinfo->jointype); + selec = 0; /* keep compiler quiet */ + break; + } + + ReleaseVariableStats(vardata1); + ReleaseVariableStats(vardata2); + + CLAMP_PROBABILITY(selec); + + PG_RETURN_FLOAT8((float8) selec); +} + +/* + * eqjoinsel_inner --- eqjoinsel for normal inner join + * + * We also use this for LEFT/FULL outer joins; it's not presently clear + * that it's worth trying to distinguish them here. + */ +static double +eqjoinsel_inner(Oid operator, + VariableStatData *vardata1, VariableStatData *vardata2) +{ + double selec; double nd1; double nd2; Form_pg_statistic stats1 = NULL; @@ -1868,29 +1965,27 @@ eqjoinsel(PG_FUNCTION_ARGS) float4 *numbers2 = NULL; int nnumbers2 = 0; - get_join_variables(root, args, &vardata1, &vardata2); - - nd1 = get_variable_numdistinct(&vardata1); - nd2 = get_variable_numdistinct(&vardata2); + nd1 = get_variable_numdistinct(vardata1); + nd2 = get_variable_numdistinct(vardata2); - if (HeapTupleIsValid(vardata1.statsTuple)) + if (HeapTupleIsValid(vardata1->statsTuple)) { - stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple); - have_mcvs1 = get_attstatsslot(vardata1.statsTuple, - vardata1.atttype, - vardata1.atttypmod, + stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple); + have_mcvs1 = get_attstatsslot(vardata1->statsTuple, + vardata1->atttype, + vardata1->atttypmod, STATISTIC_KIND_MCV, InvalidOid, &values1, &nvalues1, &numbers1, &nnumbers1); } - if (HeapTupleIsValid(vardata2.statsTuple)) + if (HeapTupleIsValid(vardata2->statsTuple)) { - stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple); - have_mcvs2 = get_attstatsslot(vardata2.statsTuple, - vardata2.atttype, - vardata2.atttypmod, + stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple); + have_mcvs2 = get_attstatsslot(vardata2->statsTuple, + vardata2->atttype, + vardata2->atttypmod, STATISTIC_KIND_MCV, InvalidOid, &values2, &nvalues2, @@ -1932,25 +2027,6 @@ eqjoinsel(PG_FUNCTION_ARGS) hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool)); hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool)); - /* - * If we are doing any variant of JOIN_SEMI, pretend all the values of - * the righthand relation are unique (ie, act as if it's been - * DISTINCT'd). - * - * NOTE: it would be dangerous to try to be smart about JOIN_LEFT or - * JOIN_RIGHT here, because we do not have enough information to - * determine which var is really on which side of the join. Perhaps - * someday we should pass in more information. - */ - if (jointype == JOIN_SEMI) - { - float4 oneovern = 1.0 / nd2; - - for (i = 0; i < nvalues2; i++) - numbers2[i] = oneovern; - nullfrac2 = oneovern; - } - /* * Note we assume that each MCV will match at most one member of the * other MCV list. If the operator isn't really equality, there could @@ -2075,18 +2151,170 @@ eqjoinsel(PG_FUNCTION_ARGS) } if (have_mcvs1) - free_attstatsslot(vardata1.atttype, values1, nvalues1, + free_attstatsslot(vardata1->atttype, values1, nvalues1, numbers1, nnumbers1); if (have_mcvs2) - free_attstatsslot(vardata2.atttype, values2, nvalues2, + free_attstatsslot(vardata2->atttype, values2, nvalues2, numbers2, nnumbers2); - ReleaseVariableStats(vardata1); - ReleaseVariableStats(vardata2); + return selec; +} - CLAMP_PROBABILITY(selec); +/* + * eqjoinsel_semi --- eqjoinsel for semi join + * + * (Also used for anti join, which we are supposed to estimate the same way.) + * Caller has ensured that vardata1 is the LHS variable. + */ +static double +eqjoinsel_semi(Oid operator, + VariableStatData *vardata1, VariableStatData *vardata2) +{ + double selec; + double nd1; + double nd2; + Form_pg_statistic stats1 = NULL; + Form_pg_statistic stats2 = NULL; + bool have_mcvs1 = false; + Datum *values1 = NULL; + int nvalues1 = 0; + float4 *numbers1 = NULL; + int nnumbers1 = 0; + bool have_mcvs2 = false; + Datum *values2 = NULL; + int nvalues2 = 0; + float4 *numbers2 = NULL; + int nnumbers2 = 0; - PG_RETURN_FLOAT8((float8) selec); + nd1 = get_variable_numdistinct(vardata1); + nd2 = get_variable_numdistinct(vardata2); + + if (HeapTupleIsValid(vardata1->statsTuple)) + { + stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple); + have_mcvs1 = get_attstatsslot(vardata1->statsTuple, + vardata1->atttype, + vardata1->atttypmod, + STATISTIC_KIND_MCV, + InvalidOid, + &values1, &nvalues1, + &numbers1, &nnumbers1); + } + + if (HeapTupleIsValid(vardata2->statsTuple)) + { + stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple); + have_mcvs2 = get_attstatsslot(vardata2->statsTuple, + vardata2->atttype, + vardata2->atttypmod, + STATISTIC_KIND_MCV, + InvalidOid, + &values2, &nvalues2, + &numbers2, &nnumbers2); + } + + if (have_mcvs1 && have_mcvs2 && OidIsValid(operator)) + { + /* + * We have most-common-value lists for both relations. Run through + * the lists to see which MCVs actually join to each other with the + * given operator. This allows us to determine the exact join + * selectivity for the portion of the relations represented by the MCV + * lists. We still have to estimate for the remaining population, but + * in a skewed distribution this gives us a big leg up in accuracy. + */ + FmgrInfo eqproc; + bool *hasmatch1; + bool *hasmatch2; + double nullfrac1 = stats1->stanullfrac; + double matchfreq1; + int i, + nmatches; + + fmgr_info(get_opcode(operator), &eqproc); + hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool)); + hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool)); + + /* + * Note we assume that each MCV will match at most one member of the + * other MCV list. If the operator isn't really equality, there could + * be multiple matches --- but we don't look for them, both for speed + * and because the math wouldn't add up... + */ + nmatches = 0; + for (i = 0; i < nvalues1; i++) + { + int j; + + for (j = 0; j < nvalues2; j++) + { + if (hasmatch2[j]) + continue; + if (DatumGetBool(FunctionCall2(&eqproc, + values1[i], + values2[j]))) + { + hasmatch1[i] = hasmatch2[j] = true; + nmatches++; + break; + } + } + } + /* Sum up frequencies of matched MCVs */ + matchfreq1 = 0.0; + for (i = 0; i < nvalues1; i++) + { + if (hasmatch1[i]) + matchfreq1 += numbers1[i]; + } + CLAMP_PROBABILITY(matchfreq1); + pfree(hasmatch1); + pfree(hasmatch2); + + /* + * Now we need to estimate the fraction of relation 1 that has at + * least one join partner. We know for certain that the matched + * MCVs do, so that gives us a lower bound, but we're really in the + * dark about everything else. Our crude approach is: if nd1 <= nd2 + * then assume all non-null rel1 rows have join partners, else assume + * for the uncertain rows that a fraction nd2/nd1 have join partners. + * We can discount the known-matched MCVs from the distinct-values + * counts before doing the division. + */ + nd1 -= nmatches; + nd2 -= nmatches; + if (nd1 <= nd2 || nd2 <= 0) + selec = Max(matchfreq1, 1.0 - nullfrac1); + else + { + double uncertain = 1.0 - matchfreq1 - nullfrac1; + + CLAMP_PROBABILITY(uncertain); + selec = matchfreq1 + (nd2/nd1) * uncertain; + } + } + else + { + /* + * Without MCV lists for both sides, we can only use the heuristic + * about nd1 vs nd2. + */ + double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0; + + if (nd1 <= nd2 || nd2 <= 0) + selec = 1.0 - nullfrac1; + else + selec = (nd2/nd1) * (1.0 - nullfrac1); + } + + if (have_mcvs1) + free_attstatsslot(vardata1->atttype, values1, nvalues1, + numbers1, nnumbers1); + if (have_mcvs2) + free_attstatsslot(vardata2->atttype, values2, nvalues2, + numbers2, nnumbers2); + + return selec; } /* @@ -2099,6 +2327,7 @@ neqjoinsel(PG_FUNCTION_ARGS) Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); JoinType jointype = (JoinType) PG_GETARG_INT16(3); + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); Oid eqop; float8 result; @@ -2109,11 +2338,12 @@ neqjoinsel(PG_FUNCTION_ARGS) eqop = get_negator(operator); if (eqop) { - result = DatumGetFloat8(DirectFunctionCall4(eqjoinsel, + result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel, PointerGetDatum(root), ObjectIdGetDatum(eqop), PointerGetDatum(args), - Int16GetDatum(jointype))); + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); } else { @@ -3661,10 +3891,17 @@ get_restriction_variable(PlannerInfo *root, List *args, int varRelid, /* * get_join_variables * Apply examine_variable() to each side of a join clause. + * Also, attempt to identify whether the join clause has the same + * or reversed sense compared to the SpecialJoinInfo. + * + * We consider the join clause "normal" if it is "lhs_var OP rhs_var", + * or "reversed" if it is "rhs_var OP lhs_var". In complicated cases + * where we can't tell for sure, we default to assuming it's normal. */ void -get_join_variables(PlannerInfo *root, List *args, - VariableStatData *vardata1, VariableStatData *vardata2) +get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, + VariableStatData *vardata1, VariableStatData *vardata2, + bool *join_is_reversed) { Node *left, *right; @@ -3677,6 +3914,15 @@ get_join_variables(PlannerInfo *root, List *args, examine_variable(root, left, 0, vardata1); examine_variable(root, right, 0, vardata2); + + if (vardata1->rel && + bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand)) + *join_is_reversed = true; /* var1 is on RHS */ + else if (vardata2->rel && + bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand)) + *join_is_reversed = true; /* var2 is on LHS */ + else + *join_is_reversed = false; } /* diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index b2345d1364..27501367f5 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.477 2008/08/11 13:58:46 heikki Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.478 2008/08/16 00:01:37 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200808111 +#define CATALOG_VERSION_NO 200808141 #endif diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h index e0f5b48aa4..2fc91e92e3 100644 --- a/src/include/catalog/pg_operator.h +++ b/src/include/catalog/pg_operator.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.161 2008/07/14 00:51:45 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.162 2008/08/16 00:01:37 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -941,11 +941,11 @@ extern void OperatorCreate(const char *operatorName, Oid operatorNamespace, Oid leftTypeId, Oid rightTypeId, - List *procedureName, + Oid procedureId, List *commutatorName, List *negatorName, - List *restrictionName, - List *joinName, + Oid restrictionId, + Oid joinId, bool canMerge, bool canHash); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index c516af8a16..d00d36e85f 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.509 2008/07/18 03:32:53 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.510 2008/08/16 00:01:37 tgl Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -223,13 +223,13 @@ DATA(insert OID = 103 ( scalarltsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "22 DESCR("restriction selectivity of < and related operators on scalar datatypes"); DATA(insert OID = 104 ( scalargtsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ scalargtsel _null_ _null_ _null_ )); DESCR("restriction selectivity of > and related operators on scalar datatypes"); -DATA(insert OID = 105 ( eqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ eqjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 105 ( eqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ eqjoinsel _null_ _null_ _null_ )); DESCR("join selectivity of = and related operators"); -DATA(insert OID = 106 ( neqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ neqjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 106 ( neqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ neqjoinsel _null_ _null_ _null_ )); DESCR("join selectivity of <> and related operators"); -DATA(insert OID = 107 ( scalarltjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ scalarltjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 107 ( scalarltjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ scalarltjoinsel _null_ _null_ _null_ )); DESCR("join selectivity of < and related operators on scalar datatypes"); -DATA(insert OID = 108 ( scalargtjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ scalargtjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 108 ( scalargtjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ scalargtjoinsel _null_ _null_ _null_ )); DESCR("join selectivity of > and related operators on scalar datatypes"); DATA(insert OID = 109 ( unknownin PGNSP PGUID 12 1 0 0 f f t f i 1 705 "2275" _null_ _null_ _null_ unknownin _null_ _null_ _null_ )); @@ -289,7 +289,7 @@ DATA(insert OID = 138 ( box_center PGNSP PGUID 12 1 0 0 f f t f i 1 600 "60 DESCR("center of"); DATA(insert OID = 139 ( areasel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ areasel _null_ _null_ _null_ )); DESCR("restriction selectivity for area-comparison operators"); -DATA(insert OID = 140 ( areajoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ areajoinsel _null_ _null_ _null_ )); +DATA(insert OID = 140 ( areajoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ areajoinsel _null_ _null_ _null_ )); DESCR("join selectivity for area-comparison operators"); DATA(insert OID = 141 ( int4mul PGNSP PGUID 12 1 0 0 f f t f i 2 23 "23 23" _null_ _null_ _null_ int4mul _null_ _null_ _null_ )); DESCR("multiply"); @@ -1630,11 +1630,11 @@ DESCR("current clock time"); DATA(insert OID = 1300 ( positionsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ positionsel _null_ _null_ _null_ )); DESCR("restriction selectivity for position-comparison operators"); -DATA(insert OID = 1301 ( positionjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ positionjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1301 ( positionjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ positionjoinsel _null_ _null_ _null_ )); DESCR("join selectivity for position-comparison operators"); DATA(insert OID = 1302 ( contsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ contsel _null_ _null_ _null_ )); DESCR("restriction selectivity for containment comparison operators"); -DATA(insert OID = 1303 ( contjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ contjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1303 ( contjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ contjoinsel _null_ _null_ _null_ )); DESCR("join selectivity for containment comparison operators"); DATA(insert OID = 1304 ( overlaps PGNSP PGUID 12 1 0 0 f f f f i 4 16 "1184 1184 1184 1184" _null_ _null_ _null_ overlaps_timestamp _null_ _null_ _null_ )); @@ -2684,9 +2684,9 @@ DATA(insert OID = 1814 ( iclikesel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 DESCR("restriction selectivity of ILIKE"); DATA(insert OID = 1815 ( icnlikesel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ icnlikesel _null_ _null_ _null_ )); DESCR("restriction selectivity of NOT ILIKE"); -DATA(insert OID = 1816 ( iclikejoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ iclikejoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1816 ( iclikejoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ iclikejoinsel _null_ _null_ _null_ )); DESCR("join selectivity of ILIKE"); -DATA(insert OID = 1817 ( icnlikejoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ icnlikejoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1817 ( icnlikejoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ icnlikejoinsel _null_ _null_ _null_ )); DESCR("join selectivity of NOT ILIKE"); DATA(insert OID = 1818 ( regexeqsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ regexeqsel _null_ _null_ _null_ )); DESCR("restriction selectivity of regex match"); @@ -2700,17 +2700,17 @@ DATA(insert OID = 1822 ( nlikesel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 2 DESCR("restriction selectivity of NOT LIKE"); DATA(insert OID = 1823 ( icregexnesel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ icregexnesel _null_ _null_ _null_ )); DESCR("restriction selectivity of case-insensitive regex non-match"); -DATA(insert OID = 1824 ( regexeqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ regexeqjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1824 ( regexeqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ regexeqjoinsel _null_ _null_ _null_ )); DESCR("join selectivity of regex match"); -DATA(insert OID = 1825 ( likejoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ likejoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1825 ( likejoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ likejoinsel _null_ _null_ _null_ )); DESCR("join selectivity of LIKE"); -DATA(insert OID = 1826 ( icregexeqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ icregexeqjoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1826 ( icregexeqjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ icregexeqjoinsel _null_ _null_ _null_ )); DESCR("join selectivity of case-insensitive regex match"); -DATA(insert OID = 1827 ( regexnejoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ regexnejoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1827 ( regexnejoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ regexnejoinsel _null_ _null_ _null_ )); DESCR("join selectivity of regex non-match"); -DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ nlikejoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ nlikejoinsel _null_ _null_ _null_ )); DESCR("join selectivity of NOT LIKE"); -DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ icregexnejoinsel _null_ _null_ _null_ )); +DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ icregexnejoinsel _null_ _null_ _null_ )); DESCR("join selectivity of case-insensitive regex non-match"); /* Aggregate-related functions */ diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index 7471d1b44d..7963b745f5 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.50 2008/06/19 00:46:06 alvherre Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.51 2008/08/16 00:01:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -50,6 +50,7 @@ extern Selectivity restriction_selectivity(PlannerInfo *root, extern Selectivity join_selectivity(PlannerInfo *root, Oid operator, List *args, - JoinType jointype); + JoinType jointype, + SpecialJoinInfo *sjinfo); #endif /* PLANCAT_H */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index e94e7e5916..120269eee5 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.45 2008/08/14 18:48:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.46 2008/08/16 00:01:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -104,8 +104,10 @@ extern bool get_restriction_variable(PlannerInfo *root, List *args, VariableStatData *vardata, Node **other, bool *varonleft); extern void get_join_variables(PlannerInfo *root, List *args, + SpecialJoinInfo *sjinfo, VariableStatData *vardata1, - VariableStatData *vardata2); + VariableStatData *vardata2, + bool *join_is_reversed); extern double get_variable_numdistinct(VariableStatData *vardata); extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 533bac3ab6..a21c2ba374 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -543,17 +543,20 @@ WHERE p1.oprrest = p2.oid AND -- If oprjoin is set, the operator must be a binary boolean op, -- and it must link to a proc with the right signature -- to be a join selectivity estimator. --- The proc signature we want is: float8 proc(internal, oid, internal, int2) +-- The proc signature we want is: float8 proc(internal, oid, internal, int2, internal) +-- (Note: the old signature with only 4 args is still allowed, but no core +-- estimator should be using it.) SELECT p1.oid, p1.oprname, p2.oid, p2.proname FROM pg_operator AS p1, pg_proc AS p2 WHERE p1.oprjoin = p2.oid AND (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR p2.prorettype != 'float8'::regtype OR p2.proretset OR - p2.pronargs != 4 OR + p2.pronargs != 5 OR p2.proargtypes[0] != 'internal'::regtype OR p2.proargtypes[1] != 'oid'::regtype OR p2.proargtypes[2] != 'internal'::regtype OR - p2.proargtypes[3] != 'int2'::regtype); + p2.proargtypes[3] != 'int2'::regtype OR + p2.proargtypes[4] != 'internal'::regtype); oid | oprname | oid | proname -----+---------+-----+--------- (0 rows) diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql index 5017849830..57c2cad951 100644 --- a/src/test/regress/sql/opr_sanity.sql +++ b/src/test/regress/sql/opr_sanity.sql @@ -444,18 +444,21 @@ WHERE p1.oprrest = p2.oid AND -- If oprjoin is set, the operator must be a binary boolean op, -- and it must link to a proc with the right signature -- to be a join selectivity estimator. --- The proc signature we want is: float8 proc(internal, oid, internal, int2) +-- The proc signature we want is: float8 proc(internal, oid, internal, int2, internal) +-- (Note: the old signature with only 4 args is still allowed, but no core +-- estimator should be using it.) SELECT p1.oid, p1.oprname, p2.oid, p2.proname FROM pg_operator AS p1, pg_proc AS p2 WHERE p1.oprjoin = p2.oid AND (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR p2.prorettype != 'float8'::regtype OR p2.proretset OR - p2.pronargs != 4 OR + p2.pronargs != 5 OR p2.proargtypes[0] != 'internal'::regtype OR p2.proargtypes[1] != 'oid'::regtype OR p2.proargtypes[2] != 'internal'::regtype OR - p2.proargtypes[3] != 'int2'::regtype); + p2.proargtypes[3] != 'int2'::regtype OR + p2.proargtypes[4] != 'internal'::regtype); -- **************** pg_aggregate **************** -- 2.40.0