]> granicus.if.org Git - postgresql/commitdiff
Clean up the loose ends in selectivity estimation left by my patch for semi
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 16 Aug 2008 00:01:38 +0000 (00:01 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 16 Aug 2008 00:01:38 +0000 (00:01 +0000)
and anti joins.  To do this, pass the SpecialJoinInfo struct for the current
join as an additional optional argument to operator join selectivity
estimation functions.  This allows the estimator to tell not only what kind
of join is being formed, but which variable is on which side of the join;
a requirement long recognized but not dealt with till now.  This also leaves
the door open for future improvements in the estimators, such as accounting
for the null-insertion effects of lower outer joins.  I didn't do anything
about that in the current patch but the information is in principle deducible
from what's passed.

The patch also clarifies the definition of join selectivity for semi/anti
joins: it's the fraction of the left input that has (at least one) match
in the right input.  This allows getting rid of some very fuzzy thinking
that I had committed in the original 7.4-era IN-optimization patch.
There's probably room to estimate this better than the present patch does,
but at least we know what to estimate.

Since I had to touch CREATE OPERATOR anyway to allow a variant signature
for join estimator functions, I took the opportunity to add a couple of
additional checks that were missing, per my recent message to -hackers:
* Check that estimator functions return float8;
* Require execute permission at the time of CREATE OPERATOR on the
operator's function as well as the estimator functions;
* Require ownership of any pre-existing operator that's modified by
the command.
I also moved the lookup of the functions out of OperatorCreate() and
into operatorcmds.c, since that seemed more consistent with most of
the other catalog object creation processes, eg CREATE TYPE.

13 files changed:
src/backend/catalog/pg_operator.c
src/backend/commands/operatorcmds.c
src/backend/optimizer/path/clausesel.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/util/plancat.c
src/backend/utils/adt/selfuncs.c
src/include/catalog/catversion.h
src/include/catalog/pg_operator.h
src/include/catalog/pg_proc.h
src/include/optimizer/plancat.h
src/include/utils/selfuncs.h
src/test/regress/expected/opr_sanity.out
src/test/regress/sql/opr_sanity.sql

index 11690aa8ce62ae64c176d4794ced8e3bbb96d739..94b3bf28625e6b138ca6b9b36b3cb87aa18fb7b3 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/catalog/pg_operator.c,v 1.104 2008/06/19 00:46:04 alvherre Exp $
+ *       $PostgreSQL: pgsql/src/backend/catalog/pg_operator.c,v 1.105 2008/08/16 00:01:35 tgl Exp $
  *
  * NOTES
  *       these routines moved here from commands/define.c and somewhat cleaned up.
 #include "access/xact.h"
 #include "catalog/dependency.h"
 #include "catalog/indexing.h"
+#include "catalog/namespace.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_type.h"
 #include "miscadmin.h"
-#include "parser/parse_func.h"
 #include "parser/parse_oper.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
@@ -273,6 +273,11 @@ OperatorShellMake(const char *operatorName,
 
        heap_freetuple(tup);
 
+       /*
+        * Make sure the tuple is visible for subsequent lookups/updates.
+        */
+       CommandCounterIncrement();
+
        /*
         * close the operator relation and return the oid.
         */
@@ -289,14 +294,19 @@ OperatorShellMake(const char *operatorName,
  *             operatorNamespace               namespace for new operator
  *             leftTypeId                              X left type ID
  *             rightTypeId                             X right type ID
- *             procedureName                   procedure for operator
+ *             procedureId                             procedure ID for operator
  *             commutatorName                  X commutator operator
  *             negatorName                             X negator operator
- *             restrictionName                 X restriction sel. procedure
- *             joinName                                X join sel. procedure
+ *             restrictionId                   X restriction selectivity procedure ID
+ *             joinId                                  X join selectivity procedure ID
  *             canMerge                                merge join can be used with this operator
  *             canHash                                 hash join can be used with this operator
  *
+ * The caller should have validated properties and permissions for the
+ * objects passed as OID references.  We must handle the commutator and
+ * negator operator references specially, however, since those need not
+ * exist beforehand.
+ *
  * This routine gets complicated because it allows the user to
  * specify operators that do not exist.  For example, if operator
  * "op" is being defined, the negator operator "negop" and the
@@ -310,59 +320,17 @@ OperatorShellMake(const char *operatorName,
  * information about the operator (just its name and types).
  * Forward declaration is used only for this purpose, it is
  * not available to the user as it is for type definition.
- *
- * Algorithm:
- *
- * check if operator already defined
- *       if so, but oprcode is null, save the Oid -- we are filling in a shell
- *       otherwise error
- * get the attribute types from relation descriptor for pg_operator
- * assign values to the fields of the operator:
- *      operatorName
- *      owner id (simply the user id of the caller)
- *      operator "kind" either "b" for binary or "l" for left unary
- *      canMerge boolean
- *      canHash boolean
- *      leftTypeObjectId -- type must already be defined
- *      rightTypeObjectId -- this is optional, enter ObjectId=0 if none specified
- *      resultType -- defer this, since it must be determined from
- *                                the pg_procedure catalog
- *      commutatorObjectId -- if this is NULL, enter ObjectId=0
- *                                       else if this already exists, enter its ObjectId
- *                                       else if this does not yet exist, and is not
- *                                             the same as the main operatorName, then create
- *                                             a shell and enter the new ObjectId
- *                                       else if this does not exist but IS the same
- *                                             name & types as the main operator, set the ObjectId=0.
- *                                             (We are creating a self-commutating operator.)
- *                                             The link will be fixed later by OperatorUpd.
- *      negatorObjectId   -- same as for commutatorObjectId
- *      operatorProcedure -- must access the pg_procedure catalog to get the
- *                                ObjectId of the procedure that actually does the operator
- *                                actions this is required.  Do a lookup to find out the
- *                                return type of the procedure
- *      restrictionProcedure -- must access the pg_procedure catalog to get
- *                                the ObjectId but this is optional
- *      joinProcedure -- same as restrictionProcedure
- * now either insert or replace the operator into the pg_operator catalog
- * if the operator shell is being filled in
- *      access the catalog in order to get a valid buffer
- *      create a tuple using ModifyHeapTuple
- *      get the t_self from the modified tuple and call RelationReplaceHeapTuple
- * else if a new operator is being created
- *      create a tuple using heap_formtuple
- *      call simple_heap_insert
  */
 void
 OperatorCreate(const char *operatorName,
                           Oid operatorNamespace,
                           Oid leftTypeId,
                           Oid rightTypeId,
-                          List *procedureName,
+                          Oid procedureId,
                           List *commutatorName,
                           List *negatorName,
-                          List *restrictionName,
-                          List *joinName,
+                          Oid restrictionId,
+                          Oid joinId,
                           bool canMerge,
                           bool canHash)
 {
@@ -373,15 +341,10 @@ OperatorCreate(const char *operatorName,
        Datum           values[Natts_pg_operator];
        Oid                     operatorObjectId;
        bool            operatorAlreadyDefined;
-       Oid                     procOid;
        Oid                     operResultType;
        Oid                     commutatorId,
-                               negatorId,
-                               restOid,
-                               joinOid;
+                               negatorId;
        bool            selfCommutator = false;
-       Oid                     typeId[4];              /* only need up to 4 args here */
-       int                     nargs;
        NameData        oname;
        TupleDesc       tupDesc;
        int                     i;
@@ -395,11 +358,6 @@ OperatorCreate(const char *operatorName,
                                 errmsg("\"%s\" is not a valid operator name",
                                                operatorName)));
 
-       if (!OidIsValid(leftTypeId) && !OidIsValid(rightTypeId))
-               ereport(ERROR,
-                               (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
-                  errmsg("at least one of leftarg or rightarg must be specified")));
-
        if (!(OidIsValid(leftTypeId) && OidIsValid(rightTypeId)))
        {
                /* If it's not a binary op, these things mustn't be set: */
@@ -407,7 +365,7 @@ OperatorCreate(const char *operatorName,
                        ereport(ERROR,
                                        (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
                                         errmsg("only binary operators can have commutators")));
-               if (joinName)
+               if (OidIsValid(joinId))
                        ereport(ERROR,
                                        (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
                                 errmsg("only binary operators can have join selectivity")));
@@ -421,6 +379,33 @@ OperatorCreate(const char *operatorName,
                                         errmsg("only binary operators can hash")));
        }
 
+       operResultType = get_func_rettype(procedureId);
+
+       if (operResultType != BOOLOID)
+       {
+               /* If it's not a boolean op, these things mustn't be set: */
+               if (negatorName)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+                                        errmsg("only boolean operators can have negators")));
+               if (OidIsValid(restrictionId))
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+                                errmsg("only boolean operators can have restriction selectivity")));
+               if (OidIsValid(joinId))
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+                                errmsg("only boolean operators can have join selectivity")));
+               if (canMerge)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+                                        errmsg("only boolean operators can merge join")));
+               if (canHash)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+                                        errmsg("only boolean operators can hash")));
+       }
+
        operatorObjectId = OperatorGet(operatorName,
                                                                   operatorNamespace,
                                                                   leftTypeId,
@@ -435,85 +420,13 @@ OperatorCreate(const char *operatorName,
 
        /*
         * At this point, if operatorObjectId is not InvalidOid then we are
-        * filling in a previously-created shell.
-        */
-
-       /*
-        * Look up registered procedures -- find the return type of procedureName
-        * to place in "result" field. Do this before shells are created so we
-        * don't have to worry about deleting them later.
-        */
-       if (!OidIsValid(leftTypeId))
-       {
-               typeId[0] = rightTypeId;
-               nargs = 1;
-       }
-       else if (!OidIsValid(rightTypeId))
-       {
-               typeId[0] = leftTypeId;
-               nargs = 1;
-       }
-       else
-       {
-               typeId[0] = leftTypeId;
-               typeId[1] = rightTypeId;
-               nargs = 2;
-       }
-       procOid = LookupFuncName(procedureName, nargs, typeId, false);
-       operResultType = get_func_rettype(procOid);
-
-       /*
-        * find restriction estimator
-        */
-       if (restrictionName)
-       {
-               typeId[0] = INTERNALOID;        /* Query */
-               typeId[1] = OIDOID;             /* operator OID */
-               typeId[2] = INTERNALOID;        /* args list */
-               typeId[3] = INT4OID;    /* varRelid */
-
-               restOid = LookupFuncName(restrictionName, 4, typeId, false);
-       }
-       else
-               restOid = InvalidOid;
-
-       /*
-        * find join estimator
-        */
-       if (joinName)
-       {
-               typeId[0] = INTERNALOID;        /* Query */
-               typeId[1] = OIDOID;             /* operator OID */
-               typeId[2] = INTERNALOID;        /* args list */
-               typeId[3] = INT2OID;    /* jointype */
-
-               joinOid = LookupFuncName(joinName, 4, typeId, false);
-       }
-       else
-               joinOid = InvalidOid;
-
-       /*
-        * set up values in the operator tuple
+        * filling in a previously-created shell.  Insist that the user own
+        * any such shell.
         */
-
-       for (i = 0; i < Natts_pg_operator; ++i)
-       {
-               values[i] = (Datum) NULL;
-               replaces[i] = 'r';
-               nulls[i] = ' ';
-       }
-
-       i = 0;
-       namestrcpy(&oname, operatorName);
-       values[i++] = NameGetDatum(&oname); /* oprname */
-       values[i++] = ObjectIdGetDatum(operatorNamespace);      /* oprnamespace */
-       values[i++] = ObjectIdGetDatum(GetUserId());            /* oprowner */
-       values[i++] = CharGetDatum(leftTypeId ? (rightTypeId ? 'b' : 'r') : 'l');       /* oprkind */
-       values[i++] = BoolGetDatum(canMerge);           /* oprcanmerge */
-       values[i++] = BoolGetDatum(canHash);            /* oprcanhash */
-       values[i++] = ObjectIdGetDatum(leftTypeId); /* oprleft */
-       values[i++] = ObjectIdGetDatum(rightTypeId);            /* oprright */
-       values[i++] = ObjectIdGetDatum(operResultType);         /* oprresult */
+       if (OidIsValid(operatorObjectId) &&
+               !pg_oper_ownercheck(operatorObjectId, GetUserId()))
+               aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_OPER,
+                                          operatorName);
 
        /*
         * Set up the other operators.  If they do not currently exist, create
@@ -529,6 +442,12 @@ OperatorCreate(const char *operatorName,
                                                                                  leftTypeId, rightTypeId,
                                                                                  true);
 
+               /* Permission check: must own other operator */
+               if (OidIsValid(commutatorId) &&
+                       !pg_oper_ownercheck(commutatorId, GetUserId()))
+                       aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_OPER,
+                                                  NameListToString(commutatorName));
+
                /*
                 * self-linkage to this operator; will fix below. Note that only
                 * self-linkage for commutation makes sense.
@@ -538,7 +457,6 @@ OperatorCreate(const char *operatorName,
        }
        else
                commutatorId = InvalidOid;
-       values[i++] = ObjectIdGetDatum(commutatorId);           /* oprcom */
 
        if (negatorName)
        {
@@ -548,19 +466,48 @@ OperatorCreate(const char *operatorName,
                                                                           operatorName, operatorNamespace,
                                                                           leftTypeId, rightTypeId,
                                                                           false);
+
+               /* Permission check: must own other operator */
+               if (OidIsValid(negatorId) &&
+                       !pg_oper_ownercheck(negatorId, GetUserId()))
+                       aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_OPER,
+                                                  NameListToString(negatorName));
        }
        else
                negatorId = InvalidOid;
-       values[i++] = ObjectIdGetDatum(negatorId);      /* oprnegate */
 
-       values[i++] = ObjectIdGetDatum(procOid);        /* oprcode */
-       values[i++] = ObjectIdGetDatum(restOid);        /* oprrest */
-       values[i++] = ObjectIdGetDatum(joinOid);        /* oprjoin */
+       /*
+        * set up values in the operator tuple
+        */
+
+       for (i = 0; i < Natts_pg_operator; ++i)
+       {
+               values[i] = (Datum) NULL;
+               replaces[i] = 'r';
+               nulls[i] = ' ';
+       }
+
+       i = 0;
+       namestrcpy(&oname, operatorName);
+       values[i++] = NameGetDatum(&oname); /* oprname */
+       values[i++] = ObjectIdGetDatum(operatorNamespace);      /* oprnamespace */
+       values[i++] = ObjectIdGetDatum(GetUserId());            /* oprowner */
+       values[i++] = CharGetDatum(leftTypeId ? (rightTypeId ? 'b' : 'r') : 'l');       /* oprkind */
+       values[i++] = BoolGetDatum(canMerge);           /* oprcanmerge */
+       values[i++] = BoolGetDatum(canHash);            /* oprcanhash */
+       values[i++] = ObjectIdGetDatum(leftTypeId); /* oprleft */
+       values[i++] = ObjectIdGetDatum(rightTypeId);            /* oprright */
+       values[i++] = ObjectIdGetDatum(operResultType);         /* oprresult */
+       values[i++] = ObjectIdGetDatum(commutatorId);           /* oprcom */
+       values[i++] = ObjectIdGetDatum(negatorId);              /* oprnegate */
+       values[i++] = ObjectIdGetDatum(procedureId);    /* oprcode */
+       values[i++] = ObjectIdGetDatum(restrictionId);  /* oprrest */
+       values[i++] = ObjectIdGetDatum(joinId);                 /* oprjoin */
 
        pg_operator_desc = heap_open(OperatorRelationId, RowExclusiveLock);
 
        /*
-        * If we are adding to an operator shell, update; else insert
+        * If we are replacing an operator shell, update; else insert
         */
        if (operatorObjectId)
        {
@@ -706,7 +653,8 @@ OperatorUpd(Oid baseId, Oid commId, Oid negId)
        /*
         * check and update the commutator & negator, if necessary
         *
-        * First make sure we can see them...
+        * We need a CommandCounterIncrement here in case of a self-commutator
+        * operator: we'll need to update the tuple that we just inserted.
         */
        CommandCounterIncrement();
 
index 884181c5cd91fae0b32bbc61da53ecd7d221909d..391b4d6507d7c1b8e086263b4f2c88b29d4cdc0f 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/operatorcmds.c,v 1.40 2008/06/19 00:46:04 alvherre Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/operatorcmds.c,v 1.41 2008/08/16 00:01:35 tgl Exp $
  *
  * DESCRIPTION
  *       The "DefineFoo" routines take the parse tree and pick out the
 #include "catalog/indexing.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_operator.h"
+#include "catalog/pg_type.h"
 #include "commands/defrem.h"
 #include "miscadmin.h"
+#include "parser/parse_func.h"
 #include "parser/parse_oper.h"
 #include "parser/parse_type.h"
 #include "utils/acl.h"
@@ -76,6 +78,11 @@ DefineOperator(List *names, List *parameters)
        List       *negatorName = NIL;          /* optional negator operator name */
        List       *restrictionName = NIL;      /* optional restrict. sel. procedure */
        List       *joinName = NIL; /* optional join sel. procedure */
+       Oid                     functionOid;    /* functions converted to OID */
+       Oid                     restrictionOid;
+       Oid                     joinOid;
+       Oid                     typeId[5];              /* only need up to 5 args here */
+       int                     nargs;
        ListCell   *pl;
 
        /* Convert list of names to a name and namespace */
@@ -154,6 +161,109 @@ DefineOperator(List *names, List *parameters)
        if (typeName2)
                typeId2 = typenameTypeId(NULL, typeName2, NULL);
 
+       if (!OidIsValid(typeId1) && !OidIsValid(typeId2))
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+                  errmsg("at least one of leftarg or rightarg must be specified")));
+
+       /*
+        * Look up the operator's underlying function.
+        */
+       if (!OidIsValid(typeId1))
+       {
+               typeId[0] = typeId2;
+               nargs = 1;
+       }
+       else if (!OidIsValid(typeId2))
+       {
+               typeId[0] = typeId1;
+               nargs = 1;
+       }
+       else
+       {
+               typeId[0] = typeId1;
+               typeId[1] = typeId2;
+               nargs = 2;
+       }
+       functionOid = LookupFuncName(functionName, nargs, typeId, false);
+
+       /*
+        * We require EXECUTE rights for the function.  This isn't strictly
+        * necessary, since EXECUTE will be checked at any attempted use of
+        * the operator, but it seems like a good idea anyway.
+        */
+       aclresult = pg_proc_aclcheck(functionOid, GetUserId(), ACL_EXECUTE);
+       if (aclresult != ACLCHECK_OK)
+               aclcheck_error(aclresult, ACL_KIND_PROC,
+                                          NameListToString(functionName));
+
+       /*
+        * Look up restriction estimator if specified
+        */
+       if (restrictionName)
+       {
+               typeId[0] = INTERNALOID;        /* PlannerInfo */
+               typeId[1] = OIDOID;             /* operator OID */
+               typeId[2] = INTERNALOID;        /* args list */
+               typeId[3] = INT4OID;    /* varRelid */
+
+               restrictionOid = LookupFuncName(restrictionName, 4, typeId, false);
+
+               /* estimators must return float8 */
+               if (get_func_rettype(restrictionOid) != FLOAT8OID)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                        errmsg("restriction estimator function %s must return type \"float8\"",
+                                                       NameListToString(restrictionName))));
+
+               /* Require EXECUTE rights for the estimator */
+               aclresult = pg_proc_aclcheck(restrictionOid, GetUserId(), ACL_EXECUTE);
+               if (aclresult != ACLCHECK_OK)
+                       aclcheck_error(aclresult, ACL_KIND_PROC,
+                                                  NameListToString(restrictionName));
+       }
+       else
+               restrictionOid = InvalidOid;
+
+       /*
+        * Look up join estimator if specified
+        */
+       if (joinName)
+       {
+               typeId[0] = INTERNALOID;        /* PlannerInfo */
+               typeId[1] = OIDOID;             /* operator OID */
+               typeId[2] = INTERNALOID;        /* args list */
+               typeId[3] = INT2OID;    /* jointype */
+               typeId[4] = INTERNALOID;        /* SpecialJoinInfo */
+
+               /*
+                * As of Postgres 8.4, the preferred signature for join estimators
+                * has 5 arguments, but we still allow the old 4-argument form.
+                * Try the preferred form first.
+                */
+               joinOid = LookupFuncName(joinName, 5, typeId, true);
+               if (!OidIsValid(joinOid))
+                       joinOid = LookupFuncName(joinName, 4, typeId, true);
+               /* If not found, reference the 5-argument signature in error msg */
+               if (!OidIsValid(joinOid))
+                       joinOid = LookupFuncName(joinName, 5, typeId, false);
+
+               /* estimators must return float8 */
+               if (get_func_rettype(joinOid) != FLOAT8OID)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                        errmsg("join estimator function %s must return type \"float8\"",
+                                                       NameListToString(joinName))));
+
+               /* Require EXECUTE rights for the estimator */
+               aclresult = pg_proc_aclcheck(joinOid, GetUserId(), ACL_EXECUTE);
+               if (aclresult != ACLCHECK_OK)
+                       aclcheck_error(aclresult, ACL_KIND_PROC,
+                                                  NameListToString(joinName));
+       }
+       else
+               joinOid = InvalidOid;
+
        /*
         * now have OperatorCreate do all the work..
         */
@@ -161,11 +271,11 @@ DefineOperator(List *names, List *parameters)
                                   oprNamespace,        /* namespace */
                                   typeId1,             /* left type id */
                                   typeId2,             /* right type id */
-                                  functionName,        /* function for operator */
+                                  functionOid, /* function for operator */
                                   commutatorName,              /* optional commutator operator name */
                                   negatorName, /* optional negator operator name */
-                                  restrictionName,             /* optional restrict. sel. procedure */
-                                  joinName,    /* optional join sel. procedure name */
+                                  restrictionOid,              /* optional restrict. sel. procedure */
+                                  joinOid,             /* optional join sel. procedure name */
                                   canMerge,    /* operator merges */
                                   canHash);    /* operator hashes */
 }
index e788f2787310cd3d8f119738f5a503c149e6329e..6db42c5f5860a5edabbe030fa0b0761737d5cca0 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.91 2008/08/14 18:47:59 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.92 2008/08/16 00:01:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -398,6 +398,50 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
        return false;
 }
 
+/*
+ * treat_as_join_clause -
+ *       Decide whether an operator clause is to be handled by the
+ *       restriction or join estimator.  Subroutine for clause_selectivity().
+ */
+static inline bool
+treat_as_join_clause(Node *clause, RestrictInfo *rinfo,
+                                        int varRelid, SpecialJoinInfo *sjinfo)
+{
+       if (varRelid != 0)
+       {
+               /*
+                * Caller is forcing restriction mode (eg, because we are examining
+                * an inner indexscan qual).
+                */
+               return false;
+       }
+       else if (sjinfo == NULL)
+       {
+               /*
+                * It must be a restriction clause, since it's being evaluated at
+                * a scan node.
+                */
+               return false;
+       }
+       else
+       {
+               /*
+                * Otherwise, it's a join if there's more than one relation used.
+                * We can optimize this calculation if an rinfo was passed.
+                *
+                * XXX  Since we know the clause is being evaluated at a join,
+                * the only way it could be single-relation is if it was delayed
+                * by outer joins.  Although we can make use of the restriction
+                * qual estimators anyway, it seems likely that we ought to account
+                * for the probability of injected nulls somehow.
+                */
+               if (rinfo)
+                       return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
+               else
+                       return (NumRelids(clause) > 1);
+       }
+}
+
 
 /*
  * clause_selectivity -
@@ -429,9 +473,6 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
  *        root->join_info_list.
  *     2. For an INNER join, sjinfo is just a transient struct, and only the
  *        relids and jointype fields in it can be trusted.
- *     3. XXX sjinfo might be NULL even though it really is a join.  This case
- *        will go away soon, but fixing it requires API changes for oprjoin and
- *        amcostestimate functions.
  * It is possible for jointype to be different from sjinfo->jointype.
  * This indicates we are considering a variant join: either with
  * the LHS and RHS switched, or with one input unique-ified.
@@ -603,36 +644,14 @@ clause_selectivity(PlannerInfo *root,
        else if (is_opclause(clause) || IsA(clause, DistinctExpr))
        {
                Oid                     opno = ((OpExpr *) clause)->opno;
-               bool            is_join_clause;
 
-               if (varRelid != 0)
-               {
-                       /*
-                        * If we are considering a nestloop join then all clauses are
-                        * restriction clauses, since we are only interested in the one
-                        * relation.
-                        */
-                       is_join_clause = false;
-               }
-               else
-               {
-                       /*
-                        * Otherwise, it's a join if there's more than one relation used.
-                        * We can optimize this calculation if an rinfo was passed.
-                        */
-                       if (rinfo)
-                               is_join_clause = (bms_membership(rinfo->clause_relids) ==
-                                                                 BMS_MULTIPLE);
-                       else
-                               is_join_clause = (NumRelids(clause) > 1);
-               }
-
-               if (is_join_clause)
+               if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
                {
                        /* Estimate selectivity for a join clause. */
                        s1 = join_selectivity(root, opno,
                                                                  ((OpExpr *) clause)->args,
-                                                                 jointype);
+                                                                 jointype,
+                                                                 sjinfo);
                }
                else
                {
@@ -671,35 +690,11 @@ clause_selectivity(PlannerInfo *root,
 #endif
        else if (IsA(clause, ScalarArrayOpExpr))
        {
-               /* First, decide if it's a join clause, same as for OpExpr */
-               bool            is_join_clause;
-
-               if (varRelid != 0)
-               {
-                       /*
-                        * If we are considering a nestloop join then all clauses are
-                        * restriction clauses, since we are only interested in the one
-                        * relation.
-                        */
-                       is_join_clause = false;
-               }
-               else
-               {
-                       /*
-                        * Otherwise, it's a join if there's more than one relation used.
-                        * We can optimize this calculation if an rinfo was passed.
-                        */
-                       if (rinfo)
-                               is_join_clause = (bms_membership(rinfo->clause_relids) ==
-                                                                 BMS_MULTIPLE);
-                       else
-                               is_join_clause = (NumRelids(clause) > 1);
-               }
-
                /* Use node specific selectivity calculation function */
                s1 = scalararraysel(root,
                                                        (ScalarArrayOpExpr *) clause,
-                                                       is_join_clause,
+                                                       treat_as_join_clause(clause, rinfo,
+                                                                                                varRelid, sjinfo),
                                                        varRelid,
                                                        jointype,
                                                        sjinfo);
index d9d8c89864064adbdc43ebdf9690f96c47253cfc..14c3cf1f21d3b87add746a4ec13dbee03c99a981 100644 (file)
@@ -54,7 +54,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.193 2008/08/14 18:47:59 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.194 2008/08/16 00:01:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -118,10 +118,8 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root,
                           RestrictInfo *rinfo,
                           PathKey *pathkey);
 static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
-static Selectivity approx_selectivity(PlannerInfo *root, List *quals,
-                                  SpecialJoinInfo *sjinfo);
-static Selectivity join_in_selectivity(JoinPath *path, PlannerInfo *root,
-                                                                          SpecialJoinInfo *sjinfo);
+static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
+                                                                List *quals, SpecialJoinInfo *sjinfo);
 static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
 static double relation_byte_size(double tuples, int width);
 static double page_size(double tuples, int width);
@@ -1288,20 +1286,10 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
        double          outer_path_rows = PATH_ROWS(outer_path);
        double          inner_path_rows = nestloop_inner_path_rows(inner_path);
        double          ntuples;
-       Selectivity joininfactor;
 
        if (!enable_nestloop)
                startup_cost += disable_cost;
 
-       /*
-        * If we're doing JOIN_IN then we will stop scanning inner tuples for an
-        * outer tuple as soon as we have one match.  Account for the effects of
-        * this by scaling down the cost estimates in proportion to the JOIN_IN
-        * selectivity.  (This assumes that all the quals attached to the join are
-        * IN quals, which should be true.)
-        */
-       joininfactor = join_in_selectivity(path, root, sjinfo);
-
        /* cost of source data */
 
        /*
@@ -1328,12 +1316,12 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                run_cost += (outer_path_rows - 1) * inner_path->startup_cost;
        }
        run_cost += outer_path_rows *
-               (inner_path->total_cost - inner_path->startup_cost) * joininfactor;
+               (inner_path->total_cost - inner_path->startup_cost);
 
        /*
         * Compute number of tuples processed (not number emitted!)
         */
-       ntuples = outer_path_rows * inner_path_rows * joininfactor;
+       ntuples = outer_path_rows * inner_path_rows;
 
        /* CPU costs */
        cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);
@@ -1369,7 +1357,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
        Cost            startup_cost = 0;
        Cost            run_cost = 0;
        Cost            cpu_per_tuple;
-       Selectivity merge_selec;
        QualCost        merge_qual_cost;
        QualCost        qp_qual_cost;
        double          outer_path_rows = PATH_ROWS(outer_path);
@@ -1385,7 +1372,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                                outerendsel,
                                innerstartsel,
                                innerendsel;
-       Selectivity joininfactor;
        Path            sort_path;              /* dummy for result of cost_sort */
 
        /* Protect some assumptions below that rowcounts aren't zero */
@@ -1398,21 +1384,21 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                startup_cost += disable_cost;
 
        /*
-        * Compute cost and selectivity of the mergequals and qpquals (other
-        * restriction clauses) separately.  We use approx_selectivity here for
-        * speed --- in most cases, any errors won't affect the result much.
-        *
-        * Note: it's probably bogus to use the normal selectivity calculation
-        * here when either the outer or inner path is a UniquePath.
+        * Compute cost of the mergequals and qpquals (other restriction clauses)
+        * separately.
         */
-       merge_selec = approx_selectivity(root, mergeclauses, sjinfo);
        cost_qual_eval(&merge_qual_cost, mergeclauses, root);
        cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
        qp_qual_cost.startup -= merge_qual_cost.startup;
        qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;
 
-       /* approx # tuples passing the merge quals */
-       mergejointuples = clamp_row_est(merge_selec * outer_path_rows * inner_path_rows);
+       /*
+        * Get approx # tuples passing the mergequals.  We use approx_tuple_count
+        * here for speed --- in most cases, any errors won't affect the result
+        * much.
+        */
+       mergejointuples = approx_tuple_count(root, &path->jpath,
+                                                                                mergeclauses, sjinfo);
 
        /*
         * When there are equal merge keys in the outer relation, the mergejoin
@@ -1422,11 +1408,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         * but on the other hand we ignore the bookkeeping costs of mark/restore.
         * Not clear if it's worth developing a more refined model.
         *
-        * The number of re-fetches can be estimated approximately as size of
-        * merge join output minus size of inner relation.      Assume that the
-        * distinct key values are 1, 2, ..., and denote the number of values of
-        * each key in the outer relation as m1, m2, ...; in the inner relation,
-        * n1, n2, ... Then we have
+        * For regular inner and outer joins, the number of re-fetches can be
+        * estimated approximately as size of merge join output minus size of
+        * inner relation. Assume that the distinct key values are 1, 2, ..., and
+        * denote the number of values of each key in the outer relation as m1,
+        * m2, ...; in the inner relation, n1, n2, ...  Then we have
         *
         * size of join = m1 * n1 + m2 * n2 + ...
         *
@@ -1439,8 +1425,17 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         * are effectively subtracting those from the number of rescanned tuples,
         * when we should not.  Can we do better without expensive selectivity
         * computations?
+        *
+        * For SEMI and ANTI joins, only one inner tuple need be rescanned for
+        * each group of same-keyed outer tuples (assuming that all joinquals
+        * are merge quals).  This makes the effect small enough to ignore,
+        * so we just set rescannedtuples = 0.  Likewise, the whole issue is
+        * moot if we are working from a unique-ified outer input.
         */
-       if (IsA(outer_path, UniquePath))
+       if (sjinfo->jointype == JOIN_SEMI ||
+               sjinfo->jointype == JOIN_ANTI)
+               rescannedtuples = 0;
+       else if (IsA(outer_path, UniquePath))
                rescannedtuples = 0;
        else
        {
@@ -1505,7 +1500,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                        innerstartsel = cache->leftstartsel;
                        innerendsel = cache->leftendsel;
                }
-               if (path->jpath.jointype == JOIN_LEFT)
+               if (path->jpath.jointype == JOIN_LEFT ||
+                       path->jpath.jointype == JOIN_ANTI)
                {
                        outerstartsel = 0.0;
                        outerendsel = 1.0;
@@ -1600,21 +1596,10 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 
        /* CPU costs */
 
-       /*
-        * If we're doing JOIN_IN then we will stop outputting inner tuples for an
-        * outer tuple as soon as we have one match.  Account for the effects of
-        * this by scaling down the cost estimates in proportion to the expected
-        * output size.  (This assumes that all the quals attached to the join are
-        * IN quals, which should be true.)
-        */
-       joininfactor = join_in_selectivity(&path->jpath, root, sjinfo);
-
        /*
         * The number of tuple comparisons needed is approximately number of outer
         * rows plus number of inner rows plus number of rescanned tuples (can we
         * refine this?).  At each one, we need to evaluate the mergejoin quals.
-        * NOTE: JOIN_IN mode does not save any work here, so do NOT include
-        * joininfactor.
         */
        startup_cost += merge_qual_cost.startup;
        startup_cost += merge_qual_cost.per_tuple *
@@ -1627,12 +1612,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         * For each tuple that gets through the mergejoin proper, we charge
         * cpu_tuple_cost plus the cost of evaluating additional restriction
         * clauses that are to be applied at the join.  (This is pessimistic since
-        * not all of the quals may get evaluated at each tuple.)  This work is
-        * skipped in JOIN_IN mode, so apply the factor.
+        * not all of the quals may get evaluated at each tuple.)
         */
        startup_cost += qp_qual_cost.startup;
        cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
-       run_cost += cpu_per_tuple * mergejointuples * joininfactor;
+       run_cost += cpu_per_tuple * mergejointuples;
 
        path->jpath.path.startup_cost = startup_cost;
        path->jpath.path.total_cost = startup_cost + run_cost;
@@ -1711,7 +1695,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
        Cost            startup_cost = 0;
        Cost            run_cost = 0;
        Cost            cpu_per_tuple;
-       Selectivity hash_selec;
        QualCost        hash_qual_cost;
        QualCost        qp_qual_cost;
        double          hashjointuples;
@@ -1722,28 +1705,27 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
        int                     numbatches;
        double          virtualbuckets;
        Selectivity innerbucketsize;
-       Selectivity joininfactor;
        ListCell   *hcl;
 
        if (!enable_hashjoin)
                startup_cost += disable_cost;
 
        /*
-        * Compute cost and selectivity of the hashquals and qpquals (other
-        * restriction clauses) separately.  We use approx_selectivity here for
-        * speed --- in most cases, any errors won't affect the result much.
-        *
-        * Note: it's probably bogus to use the normal selectivity calculation
-        * here when either the outer or inner path is a UniquePath.
+        * Compute cost of the hashquals and qpquals (other restriction clauses)
+        * separately.
         */
-       hash_selec = approx_selectivity(root, hashclauses, sjinfo);
        cost_qual_eval(&hash_qual_cost, hashclauses, root);
        cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
        qp_qual_cost.startup -= hash_qual_cost.startup;
        qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;
 
-       /* approx # tuples passing the hash quals */
-       hashjointuples = clamp_row_est(hash_selec * outer_path_rows * inner_path_rows);
+       /*
+        * Get approx # tuples passing the hashquals.  We use approx_tuple_count
+        * here for speed --- in most cases, any errors won't affect the result
+        * much.
+        */
+       hashjointuples = approx_tuple_count(root, &path->jpath,
+                                                                               hashclauses, sjinfo);
 
        /* cost of source data */
        startup_cost += outer_path->startup_cost;
@@ -1858,15 +1840,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 
        /* CPU costs */
 
-       /*
-        * If we're doing JOIN_IN then we will stop comparing inner tuples to an
-        * outer tuple as soon as we have one match.  Account for the effects of
-        * this by scaling down the cost estimates in proportion to the expected
-        * output size.  (This assumes that all the quals attached to the join are
-        * IN quals, which should be true.)
-        */
-       joininfactor = join_in_selectivity(&path->jpath, root, sjinfo);
-
        /*
         * The number of tuple comparisons needed is the number of outer tuples
         * times the typical number of tuples in a hash bucket, which is the inner
@@ -1878,8 +1851,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         */
        startup_cost += hash_qual_cost.startup;
        run_cost += hash_qual_cost.per_tuple *
-               outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) *
-               joininfactor * 0.5;
+               outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * 0.5;
 
        /*
         * For each tuple that gets through the hashjoin proper, we charge
@@ -1889,7 +1861,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         */
        startup_cost += qp_qual_cost.startup;
        cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
-       run_cost += cpu_per_tuple * hashjointuples * joininfactor;
+       run_cost += cpu_per_tuple * hashjointuples;
 
        path->jpath.path.startup_cost = startup_cost;
        path->jpath.path.total_cost = startup_cost + run_cost;
@@ -2213,13 +2185,12 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan)
 
 
 /*
- * approx_selectivity
- *             Quick-and-dirty estimation of clause selectivities.
- *             The input can be either an implicitly-ANDed list of boolean
- *             expressions, or a list of RestrictInfo nodes (typically the latter).
+ * approx_tuple_count
+ *             Quick-and-dirty estimation of the number of join rows passing
+ *             a set of qual conditions.
  *
- * Currently this is only used in join estimation, so sjinfo should never
- * be NULL.
+ * The quals can be either an implicitly-ANDed list of boolean expressions,
+ * or a list of RestrictInfo nodes (typically the latter).
  *
  * This is quick-and-dirty because we bypass clauselist_selectivity, and
  * simply multiply the independent clause selectivities together.  Now
@@ -2232,20 +2203,34 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan)
  * output tuples are generated and passed through qpqual checking, it
  * seems OK to live with the approximation.
  */
-static Selectivity
-approx_selectivity(PlannerInfo *root, List *quals, SpecialJoinInfo *sjinfo)
+static double
+approx_tuple_count(PlannerInfo *root, JoinPath *path,
+                                  List *quals, SpecialJoinInfo *sjinfo)
 {
-       Selectivity total = 1.0;
+       double          tuples;
+       double          outer_tuples = path->outerjoinpath->parent->rows;
+       double          inner_tuples = path->innerjoinpath->parent->rows;
+       Selectivity selec = 1.0;
        ListCell   *l;
 
+       /* Get the approximate selectivity */
        foreach(l, quals)
        {
                Node       *qual = (Node *) lfirst(l);
 
                /* Note that clause_selectivity will be able to cache its result */
-               total *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo);
+               selec *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo);
        }
-       return total;
+
+       /* Apply it correctly using the input relation sizes */
+       if (sjinfo->jointype == JOIN_SEMI)
+               tuples = selec * outer_tuples;
+       else if (sjinfo->jointype == JOIN_ANTI)
+               tuples = (1.0 - selec) * outer_tuples;
+       else
+               tuples = selec * outer_tuples * inner_tuples;
+
+       return clamp_row_est(tuples);
 }
 
 
@@ -2315,7 +2300,6 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
        Selectivity jselec;
        Selectivity pselec;
        double          nrows;
-       UniquePath *upath;
 
        /*
         * Compute joinclause selectivity.      Note that we are only considering
@@ -2380,9 +2364,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
         * pushed-down quals are applied after the outer join, so their
         * selectivity applies fully.
         *
-        * For JOIN_IN and variants, the Cartesian product is figured with respect
-        * to a unique-ified input, and then we can clamp to the size of the other
-        * input.
+        * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the fraction
+        * of LHS rows that have matches, and we apply that straightforwardly.
         */
        switch (jointype)
        {
@@ -2404,22 +2387,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                        nrows *= pselec;
                        break;
                case JOIN_SEMI:
-                       /* XXX this is unsafe, could Assert? */
-                       upath = create_unique_path(root, inner_rel,
-                                                                          inner_rel->cheapest_total_path,
-                                                                          sjinfo);
-                       if (upath)
-                               nrows = outer_rel->rows * upath->rows * jselec;
-                       else
-                               nrows = outer_rel->rows * inner_rel->rows * jselec;
-                       if (nrows > outer_rel->rows)
-                               nrows = outer_rel->rows;
+                       nrows = outer_rel->rows * jselec;
+                       nrows *= pselec;
                        break;
                case JOIN_ANTI:
-                       /* XXX this is utterly wrong */
-                       nrows = outer_rel->rows * inner_rel->rows * jselec;
-                       if (nrows < outer_rel->rows)
-                               nrows = outer_rel->rows;
+                       nrows = outer_rel->rows * (1.0 - jselec);
                        nrows *= pselec;
                        break;
                default:
@@ -2432,67 +2404,6 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
        rel->rows = clamp_row_est(nrows);
 }
 
-/*
- * join_in_selectivity
- *       Determines the factor by which a JOIN_IN join's result is expected
- *       to be smaller than an ordinary inner join.
- *
- * 'path' is already filled in except for the cost fields
- * 'sjinfo' must be the JOIN_SEMI SpecialJoinInfo for the join
- */
-static Selectivity
-join_in_selectivity(JoinPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
-{
-       RelOptInfo *innerrel;
-       UniquePath *innerunique;
-       Selectivity selec;
-       double          nrows;
-
-       /* Return 1.0 whenever it's not JOIN_IN */
-       if (path->jointype != JOIN_SEMI)
-               return 1.0;
-       Assert(sjinfo && sjinfo->jointype == JOIN_SEMI);
-
-       /*
-        * Return 1.0 if the inner side is already known unique.  The case where
-        * the inner path is already a UniquePath probably cannot happen in
-        * current usage, but check it anyway for completeness.  The interesting
-        * case is where we've determined the inner relation itself is unique,
-        * which we can check by looking at the rows estimate for its UniquePath.
-        */
-       if (IsA(path->innerjoinpath, UniquePath))
-               return 1.0;
-       innerrel = path->innerjoinpath->parent;
-       /* XXX might assert if sjinfo doesn't exactly match innerrel? */
-       innerunique = create_unique_path(root,
-                                                                        innerrel,
-                                                                        innerrel->cheapest_total_path,
-                                                                        sjinfo);
-       if (innerunique && innerunique->rows >= innerrel->rows)
-               return 1.0;
-
-       /*
-        * Compute same result set_joinrel_size_estimates would compute for
-        * JOIN_INNER.  Note that we use the input rels' absolute size estimates,
-        * not PATH_ROWS() which might be less; if we used PATH_ROWS() we'd be
-        * double-counting the effects of any join clauses used in input scans.
-        */
-       selec = clauselist_selectivity(root,
-                                                                  path->joinrestrictinfo,
-                                                                  0,
-                                                                  JOIN_INNER,
-                                                                  NULL);
-       nrows = path->outerjoinpath->parent->rows * innerrel->rows * selec;
-
-       nrows = clamp_row_est(nrows);
-
-       /* See if it's larger than the actual JOIN_IN size estimate */
-       if (nrows > path->path.parent->rows)
-               return path->path.parent->rows / nrows;
-       else
-               return 1.0;
-}
-
 /*
  * set_function_size_estimates
  *             Set the size estimates for a base relation that is a function call.
index cb9a07b77d53915c55a10e0d1bd1ec793da41970..30e841b487d3b6ff2b9671ed59106b54477aaccc 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.148 2008/07/13 20:45:47 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.149 2008/08/16 00:01:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -830,7 +830,8 @@ Selectivity
 join_selectivity(PlannerInfo *root,
                                 Oid operator,
                                 List *args,
-                                JoinType jointype)
+                                JoinType jointype,
+                                SpecialJoinInfo *sjinfo)
 {
        RegProcedure oprjoin = get_oprjoin(operator);
        float8          result;
@@ -842,11 +843,12 @@ join_selectivity(PlannerInfo *root,
        if (!oprjoin)
                return (Selectivity) 0.5;
 
-       result = DatumGetFloat8(OidFunctionCall4(oprjoin,
+       result = DatumGetFloat8(OidFunctionCall5(oprjoin,
                                                                                         PointerGetDatum(root),
                                                                                         ObjectIdGetDatum(operator),
                                                                                         PointerGetDatum(args),
-                                                                                        Int16GetDatum(jointype)));
+                                                                                        Int16GetDatum(jointype),
+                                                                                        PointerGetDatum(sjinfo)));
 
        if (result < 0.0 || result > 1.0)
                elog(ERROR, "invalid join selectivity: %f", result);
index 7d5609d447ee48568d3881ac84c8e1326df52301..d484221232c4771c1d3d1a6b0f0ff07728e88605 100644 (file)
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.251 2008/08/14 18:47:59 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.252 2008/08/16 00:01:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
  *
  *             float8 oprrest (internal, oid, internal, int4);
  *
+ * The result is a selectivity, that is, a fraction (0 to 1) of the rows
+ * of the relation that are expected to produce a TRUE result for the
+ * given operator.
+ *
  * The call convention for a join estimator (oprjoin function) is similar
- * except that varRelid is not needed, and instead the join type is
+ * except that varRelid is not needed, and instead join information is
  * supplied:
  *
  *             Selectivity oprjoin (PlannerInfo *root,
  *                                                      Oid operator,
  *                                                      List *args,
- *                                                      JoinType jointype);
+ *                                                      JoinType jointype,
+ *                                                      SpecialJoinInfo *sjinfo);
+ *
+ *             float8 oprjoin (internal, oid, internal, int2, internal);
  *
- *             float8 oprjoin (internal, oid, internal, int2);
+ * (Before Postgres 8.4, join estimators had only the first four of these
+ * parameters.  That signature is still allowed, but deprecated.)  The
+ * relationship between jointype and sjinfo is explained in the comments for
+ * clause_selectivity() --- the short version is that jointype is usually
+ * best ignored in favor of examining sjinfo.
  *
- * (We deliberately make the SQL signature different to facilitate
- * catching errors.)
+ * Join selectivity for regular inner and outer joins is defined as the
+ * fraction (0 to 1) of the cross product of the relations that is expected
+ * to produce a TRUE result for the given operator.  For both semi and anti
+ * joins, however, the selectivity is defined as the fraction of the left-hand
+ * side relation's rows that are expected to have a match (ie, at least one
+ * row with a TRUE result) in the right-hand side.
  *----------
  */
 
@@ -113,6 +128,10 @@ static double var_eq_non_const(VariableStatData *vardata, Oid operator,
 static double ineq_histogram_selectivity(VariableStatData *vardata,
                                                   FmgrInfo *opproc, bool isgt,
                                                   Datum constval, Oid consttype);
+static double eqjoinsel_inner(Oid operator,
+                               VariableStatData *vardata1, VariableStatData *vardata2);
+static double eqjoinsel_semi(Oid operator,
+                               VariableStatData *vardata1, VariableStatData *vardata2);
 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                                  Datum lobound, Datum hibound, Oid boundstypid,
                                  double *scaledlobound, double *scaledhibound);
@@ -1579,7 +1598,6 @@ scalararraysel(PlannerInfo *root,
        Oid                     nominal_element_type;
        RegProcedure oprsel;
        FmgrInfo        oprselproc;
-       Datum           selarg4;
        Selectivity s1;
 
        /*
@@ -1587,15 +1605,9 @@ scalararraysel(PlannerInfo *root,
         * it hasn't got one.
         */
        if (is_join_clause)
-       {
                oprsel = get_oprjoin(operator);
-               selarg4 = Int16GetDatum(jointype);
-       }
        else
-       {
                oprsel = get_oprrest(operator);
-               selarg4 = Int32GetDatum(varRelid);
-       }
        if (!oprsel)
                return (Selectivity) 0.5;
        fmgr_info(oprsel, &oprselproc);
@@ -1660,11 +1672,19 @@ scalararraysel(PlannerInfo *root,
                                                                                elem_values[i],
                                                                                elem_nulls[i],
                                                                                elmbyval));
-                       s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
-                                                                                         PointerGetDatum(root),
-                                                                                         ObjectIdGetDatum(operator),
-                                                                                         PointerGetDatum(args),
-                                                                                         selarg4));
+                       if (is_join_clause)
+                               s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
+                                                                                                 PointerGetDatum(root),
+                                                                                                 ObjectIdGetDatum(operator),
+                                                                                                 PointerGetDatum(args),
+                                                                                                 Int16GetDatum(jointype),
+                                                                                                 PointerGetDatum(sjinfo)));
+                       else
+                               s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
+                                                                                                 PointerGetDatum(root),
+                                                                                                 ObjectIdGetDatum(operator),
+                                                                                                 PointerGetDatum(args),
+                                                                                                 Int32GetDatum(varRelid)));
                        if (useOr)
                                s1 = s1 + s2 - s1 * s2;
                        else
@@ -1694,11 +1714,19 @@ scalararraysel(PlannerInfo *root,
                         * estimation function would really care ...
                         */
                        args = list_make2(leftop, elem);
-                       s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
-                                                                                         PointerGetDatum(root),
-                                                                                         ObjectIdGetDatum(operator),
-                                                                                         PointerGetDatum(args),
-                                                                                         selarg4));
+                       if (is_join_clause)
+                               s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
+                                                                                                 PointerGetDatum(root),
+                                                                                                 ObjectIdGetDatum(operator),
+                                                                                                 PointerGetDatum(args),
+                                                                                                 Int16GetDatum(jointype),
+                                                                                                 PointerGetDatum(sjinfo)));
+                       else
+                               s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
+                                                                                                 PointerGetDatum(root),
+                                                                                                 ObjectIdGetDatum(operator),
+                                                                                                 PointerGetDatum(args),
+                                                                                                 Int32GetDatum(varRelid)));
                        if (useOr)
                                s1 = s1 + s2 - s1 * s2;
                        else
@@ -1721,11 +1749,19 @@ scalararraysel(PlannerInfo *root,
                dummyexpr->typeId = nominal_element_type;
                dummyexpr->typeMod = -1;
                args = list_make2(leftop, dummyexpr);
-               s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
-                                                                                 PointerGetDatum(root),
-                                                                                 ObjectIdGetDatum(operator),
-                                                                                 PointerGetDatum(args),
-                                                                                 selarg4));
+               if (is_join_clause)
+                       s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
+                                                                                         PointerGetDatum(root),
+                                                                                         ObjectIdGetDatum(operator),
+                                                                                         PointerGetDatum(args),
+                                                                                         Int16GetDatum(jointype),
+                                                                                         PointerGetDatum(sjinfo)));
+               else
+                       s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
+                                                                                         PointerGetDatum(root),
+                                                                                         ObjectIdGetDatum(operator),
+                                                                                         PointerGetDatum(args),
+                                                                                         Int32GetDatum(varRelid)));
                s1 = useOr ? 0.0 : 1.0;
 
                /*
@@ -1803,13 +1839,24 @@ rowcomparesel(PlannerInfo *root,
        /* Build equivalent arg list for single operator */
        opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
 
-       /* Decide if it's a join clause, same as for OpExpr */
+       /*
+        * Decide if it's a join clause.  This should match clausesel.c's
+        * treat_as_join_clause(), except that we intentionally consider only
+        * the leading columns and not the rest of the clause.
+        */
        if (varRelid != 0)
        {
                /*
-                * If we are considering a nestloop join then all clauses are
-                * restriction clauses, since we are only interested in the one
-                * relation.
+                * Caller is forcing restriction mode (eg, because we are examining
+                * an inner indexscan qual).
+                */
+               is_join_clause = false;
+       }
+       else if (sjinfo == NULL)
+       {
+               /*
+                * It must be a restriction clause, since it's being evaluated at
+                * a scan node.
                 */
                is_join_clause = false;
        }
@@ -1817,7 +1864,6 @@ rowcomparesel(PlannerInfo *root,
        {
                /*
                 * Otherwise, it's a join if there's more than one relation used.
-                * Notice we ignore the low-order columns here.
                 */
                is_join_clause = (NumRelids((Node *) opargs) > 1);
        }
@@ -1827,7 +1873,8 @@ rowcomparesel(PlannerInfo *root,
                /* Estimate selectivity for a join clause. */
                s1 = join_selectivity(root, opno,
                                                          opargs,
-                                                         jointype);
+                                                         jointype,
+                                                         sjinfo);
        }
        else
        {
@@ -1849,10 +1896,60 @@ eqjoinsel(PG_FUNCTION_ARGS)
        PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
        Oid                     operator = PG_GETARG_OID(1);
        List       *args = (List *) PG_GETARG_POINTER(2);
+#ifdef NOT_USED
        JoinType        jointype = (JoinType) PG_GETARG_INT16(3);
+#endif
+       SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
        double          selec;
        VariableStatData vardata1;
        VariableStatData vardata2;
+       bool            join_is_reversed;
+
+       get_join_variables(root, args, sjinfo,
+                                          &vardata1, &vardata2, &join_is_reversed);
+
+       switch (sjinfo->jointype)
+       {
+               case JOIN_INNER:
+               case JOIN_LEFT:
+               case JOIN_FULL:
+                       selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
+                       break;
+               case JOIN_SEMI:
+               case JOIN_ANTI:
+                       if (!join_is_reversed)
+                               selec = eqjoinsel_semi(operator, &vardata1, &vardata2);
+                       else
+                               selec = eqjoinsel_semi(get_commutator(operator),
+                                                                          &vardata2, &vardata1);
+                       break;
+               default:
+                       /* other values not expected here */
+                       elog(ERROR, "unrecognized join type: %d",
+                                (int) sjinfo->jointype);
+                       selec = 0;                      /* keep compiler quiet */
+                       break;
+       }
+
+       ReleaseVariableStats(vardata1);
+       ReleaseVariableStats(vardata2);
+
+       CLAMP_PROBABILITY(selec);
+
+       PG_RETURN_FLOAT8((float8) selec);
+}
+
+/*
+ * eqjoinsel_inner --- eqjoinsel for normal inner join
+ *
+ * We also use this for LEFT/FULL outer joins; it's not presently clear
+ * that it's worth trying to distinguish them here.
+ */
+static double
+eqjoinsel_inner(Oid operator,
+                               VariableStatData *vardata1, VariableStatData *vardata2)
+{
+       double          selec;
        double          nd1;
        double          nd2;
        Form_pg_statistic stats1 = NULL;
@@ -1868,29 +1965,27 @@ eqjoinsel(PG_FUNCTION_ARGS)
        float4     *numbers2 = NULL;
        int                     nnumbers2 = 0;
 
-       get_join_variables(root, args, &vardata1, &vardata2);
-
-       nd1 = get_variable_numdistinct(&vardata1);
-       nd2 = get_variable_numdistinct(&vardata2);
+       nd1 = get_variable_numdistinct(vardata1);
+       nd2 = get_variable_numdistinct(vardata2);
 
-       if (HeapTupleIsValid(vardata1.statsTuple))
+       if (HeapTupleIsValid(vardata1->statsTuple))
        {
-               stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
-               have_mcvs1 = get_attstatsslot(vardata1.statsTuple,
-                                                                         vardata1.atttype,
-                                                                         vardata1.atttypmod,
+               stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
+               have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
+                                                                         vardata1->atttype,
+                                                                         vardata1->atttypmod,
                                                                          STATISTIC_KIND_MCV,
                                                                          InvalidOid,
                                                                          &values1, &nvalues1,
                                                                          &numbers1, &nnumbers1);
        }
 
-       if (HeapTupleIsValid(vardata2.statsTuple))
+       if (HeapTupleIsValid(vardata2->statsTuple))
        {
-               stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
-               have_mcvs2 = get_attstatsslot(vardata2.statsTuple,
-                                                                         vardata2.atttype,
-                                                                         vardata2.atttypmod,
+               stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
+               have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
+                                                                         vardata2->atttype,
+                                                                         vardata2->atttypmod,
                                                                          STATISTIC_KIND_MCV,
                                                                          InvalidOid,
                                                                          &values2, &nvalues2,
@@ -1932,25 +2027,6 @@ eqjoinsel(PG_FUNCTION_ARGS)
                hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
                hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
 
-               /*
-                * If we are doing any variant of JOIN_SEMI, pretend all the values of
-                * the righthand relation are unique (ie, act as if it's been
-                * DISTINCT'd).
-                *
-                * NOTE: it would be dangerous to try to be smart about JOIN_LEFT or
-                * JOIN_RIGHT here, because we do not have enough information to
-                * determine which var is really on which side of the join. Perhaps
-                * someday we should pass in more information.
-                */
-               if (jointype == JOIN_SEMI)
-               {
-                       float4          oneovern = 1.0 / nd2;
-
-                       for (i = 0; i < nvalues2; i++)
-                               numbers2[i] = oneovern;
-                       nullfrac2 = oneovern;
-               }
-
                /*
                 * Note we assume that each MCV will match at most one member of the
                 * other MCV list.      If the operator isn't really equality, there could
@@ -2075,18 +2151,170 @@ eqjoinsel(PG_FUNCTION_ARGS)
        }
 
        if (have_mcvs1)
-               free_attstatsslot(vardata1.atttype, values1, nvalues1,
+               free_attstatsslot(vardata1->atttype, values1, nvalues1,
                                                  numbers1, nnumbers1);
        if (have_mcvs2)
-               free_attstatsslot(vardata2.atttype, values2, nvalues2,
+               free_attstatsslot(vardata2->atttype, values2, nvalues2,
                                                  numbers2, nnumbers2);
 
-       ReleaseVariableStats(vardata1);
-       ReleaseVariableStats(vardata2);
+       return selec;
+}
 
-       CLAMP_PROBABILITY(selec);
+/*
+ * eqjoinsel_semi --- eqjoinsel for semi join
+ *
+ * (Also used for anti join, which we are supposed to estimate the same way.)
+ * Caller has ensured that vardata1 is the LHS variable.
+ */
+static double
+eqjoinsel_semi(Oid operator,
+                          VariableStatData *vardata1, VariableStatData *vardata2)
+{
+       double          selec;
+       double          nd1;
+       double          nd2;
+       Form_pg_statistic stats1 = NULL;
+       Form_pg_statistic stats2 = NULL;
+       bool            have_mcvs1 = false;
+       Datum      *values1 = NULL;
+       int                     nvalues1 = 0;
+       float4     *numbers1 = NULL;
+       int                     nnumbers1 = 0;
+       bool            have_mcvs2 = false;
+       Datum      *values2 = NULL;
+       int                     nvalues2 = 0;
+       float4     *numbers2 = NULL;
+       int                     nnumbers2 = 0;
 
-       PG_RETURN_FLOAT8((float8) selec);
+       nd1 = get_variable_numdistinct(vardata1);
+       nd2 = get_variable_numdistinct(vardata2);
+
+       if (HeapTupleIsValid(vardata1->statsTuple))
+       {
+               stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
+               have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
+                                                                         vardata1->atttype,
+                                                                         vardata1->atttypmod,
+                                                                         STATISTIC_KIND_MCV,
+                                                                         InvalidOid,
+                                                                         &values1, &nvalues1,
+                                                                         &numbers1, &nnumbers1);
+       }
+
+       if (HeapTupleIsValid(vardata2->statsTuple))
+       {
+               stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
+               have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
+                                                                         vardata2->atttype,
+                                                                         vardata2->atttypmod,
+                                                                         STATISTIC_KIND_MCV,
+                                                                         InvalidOid,
+                                                                         &values2, &nvalues2,
+                                                                         &numbers2, &nnumbers2);
+       }
+
+       if (have_mcvs1 && have_mcvs2 && OidIsValid(operator))
+       {
+               /*
+                * We have most-common-value lists for both relations.  Run through
+                * the lists to see which MCVs actually join to each other with the
+                * given operator.      This allows us to determine the exact join
+                * selectivity for the portion of the relations represented by the MCV
+                * lists.  We still have to estimate for the remaining population, but
+                * in a skewed distribution this gives us a big leg up in accuracy.
+                */
+               FmgrInfo        eqproc;
+               bool       *hasmatch1;
+               bool       *hasmatch2;
+               double          nullfrac1 = stats1->stanullfrac;
+               double          matchfreq1;
+               int                     i,
+                                       nmatches;
+
+               fmgr_info(get_opcode(operator), &eqproc);
+               hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
+               hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
+
+               /*
+                * Note we assume that each MCV will match at most one member of the
+                * other MCV list.      If the operator isn't really equality, there could
+                * be multiple matches --- but we don't look for them, both for speed
+                * and because the math wouldn't add up...
+                */
+               nmatches = 0;
+               for (i = 0; i < nvalues1; i++)
+               {
+                       int                     j;
+
+                       for (j = 0; j < nvalues2; j++)
+                       {
+                               if (hasmatch2[j])
+                                       continue;
+                               if (DatumGetBool(FunctionCall2(&eqproc,
+                                                                                          values1[i],
+                                                                                          values2[j])))
+                               {
+                                       hasmatch1[i] = hasmatch2[j] = true;
+                                       nmatches++;
+                                       break;
+                               }
+                       }
+               }
+               /* Sum up frequencies of matched MCVs */
+               matchfreq1 = 0.0;
+               for (i = 0; i < nvalues1; i++)
+               {
+                       if (hasmatch1[i])
+                               matchfreq1 += numbers1[i];
+               }
+               CLAMP_PROBABILITY(matchfreq1);
+               pfree(hasmatch1);
+               pfree(hasmatch2);
+
+               /*
+                * Now we need to estimate the fraction of relation 1 that has at
+                * least one join partner.  We know for certain that the matched
+                * MCVs do, so that gives us a lower bound, but we're really in the
+                * dark about everything else.  Our crude approach is: if nd1 <= nd2
+                * then assume all non-null rel1 rows have join partners, else assume
+                * for the uncertain rows that a fraction nd2/nd1 have join partners.
+                * We can discount the known-matched MCVs from the distinct-values
+                * counts before doing the division.
+                */
+               nd1 -= nmatches;
+               nd2 -= nmatches;
+               if (nd1 <= nd2 || nd2 <= 0)
+                       selec = Max(matchfreq1, 1.0 - nullfrac1);
+               else
+               {
+                       double  uncertain = 1.0 - matchfreq1 - nullfrac1;
+
+                       CLAMP_PROBABILITY(uncertain);
+                       selec = matchfreq1 + (nd2/nd1) * uncertain;
+               }
+       }
+       else
+       {
+               /*
+                * Without MCV lists for both sides, we can only use the heuristic
+                * about nd1 vs nd2.
+                */
+               double          nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
+
+               if (nd1 <= nd2 || nd2 <= 0)
+                       selec = 1.0 - nullfrac1;
+               else
+                       selec = (nd2/nd1) * (1.0 - nullfrac1);
+       }
+
+       if (have_mcvs1)
+               free_attstatsslot(vardata1->atttype, values1, nvalues1,
+                                                 numbers1, nnumbers1);
+       if (have_mcvs2)
+               free_attstatsslot(vardata2->atttype, values2, nvalues2,
+                                                 numbers2, nnumbers2);
+
+       return selec;
 }
 
 /*
@@ -2099,6 +2327,7 @@ neqjoinsel(PG_FUNCTION_ARGS)
        Oid                     operator = PG_GETARG_OID(1);
        List       *args = (List *) PG_GETARG_POINTER(2);
        JoinType        jointype = (JoinType) PG_GETARG_INT16(3);
+       SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
        Oid                     eqop;
        float8          result;
 
@@ -2109,11 +2338,12 @@ neqjoinsel(PG_FUNCTION_ARGS)
        eqop = get_negator(operator);
        if (eqop)
        {
-               result = DatumGetFloat8(DirectFunctionCall4(eqjoinsel,
+               result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel,
                                                                                                        PointerGetDatum(root),
                                                                                                        ObjectIdGetDatum(eqop),
                                                                                                        PointerGetDatum(args),
-                                                                                                       Int16GetDatum(jointype)));
+                                                                                                       Int16GetDatum(jointype),
+                                                                                                       PointerGetDatum(sjinfo)));
        }
        else
        {
@@ -3661,10 +3891,17 @@ get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
 /*
  * get_join_variables
  *             Apply examine_variable() to each side of a join clause.
+ *             Also, attempt to identify whether the join clause has the same
+ *             or reversed sense compared to the SpecialJoinInfo.
+ *
+ * We consider the join clause "normal" if it is "lhs_var OP rhs_var",
+ * or "reversed" if it is "rhs_var OP lhs_var".  In complicated cases
+ * where we can't tell for sure, we default to assuming it's normal.
  */
 void
-get_join_variables(PlannerInfo *root, List *args,
-                                  VariableStatData *vardata1, VariableStatData *vardata2)
+get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
+                                  VariableStatData *vardata1, VariableStatData *vardata2,
+                                  bool *join_is_reversed)
 {
        Node       *left,
                           *right;
@@ -3677,6 +3914,15 @@ get_join_variables(PlannerInfo *root, List *args,
 
        examine_variable(root, left, 0, vardata1);
        examine_variable(root, right, 0, vardata2);
+
+       if (vardata1->rel &&
+               bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
+               *join_is_reversed = true;                               /* var1 is on RHS */
+       else if (vardata2->rel &&
+                        bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
+               *join_is_reversed = true;                               /* var2 is on LHS */
+       else
+               *join_is_reversed = false;
 }
 
 /*
index b2345d1364c5118d10a13437873b2dfba1e18260..27501367f527a5b852f9ed8c7a9b72f7b214c9c6 100644 (file)
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.477 2008/08/11 13:58:46 heikki Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.478 2008/08/16 00:01:37 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     200808111
+#define CATALOG_VERSION_NO     200808141
 
 #endif
index e0f5b48aa4c7608288c1469ec9f570fae2e7343c..2fc91e92e3602c2fc5a7c21316a689d845c9e6c4 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.161 2008/07/14 00:51:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.162 2008/08/16 00:01:37 tgl Exp $
  *
  * NOTES
  *       the genbki.sh script reads this file and generates .bki
@@ -941,11 +941,11 @@ extern void OperatorCreate(const char *operatorName,
                           Oid operatorNamespace,
                           Oid leftTypeId,
                           Oid rightTypeId,
-                          List *procedureName,
+                          Oid procedureId,
                           List *commutatorName,
                           List *negatorName,
-                          List *restrictionName,
-                          List *joinName,
+                          Oid restrictionId,
+                          Oid joinId,
                           bool canMerge,
                           bool canHash);
 
index c516af8a164da2e8c652efb33c2715ca81795c78..d00d36e85fd05b85b3aab47b84dd38298e2d4a0b 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.509 2008/07/18 03:32:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.510 2008/08/16 00:01:37 tgl Exp $
  *
  * NOTES
  *       The script catalog/genbki.sh reads this file and generates .bki
@@ -223,13 +223,13 @@ DATA(insert OID = 103 (  scalarltsel         PGNSP PGUID 12 1 0 0 f f t f s 4 701 "22
 DESCR("restriction selectivity of < and related operators on scalar datatypes");
 DATA(insert OID = 104 (  scalargtsel      PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  scalargtsel _null_ _null_ _null_ ));
 DESCR("restriction selectivity of > and related operators on scalar datatypes");
-DATA(insert OID = 105 (  eqjoinsel                PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  eqjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 105 (  eqjoinsel                PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_     eqjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of = and related operators");
-DATA(insert OID = 106 (  neqjoinsel               PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  neqjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 106 (  neqjoinsel               PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_     neqjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of <> and related operators");
-DATA(insert OID = 107 (  scalarltjoinsel   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ scalarltjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 107 (  scalarltjoinsel   PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_    scalarltjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of < and related operators on scalar datatypes");
-DATA(insert OID = 108 (  scalargtjoinsel   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ scalargtjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 108 (  scalargtjoinsel   PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_    scalargtjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of > and related operators on scalar datatypes");
 
 DATA(insert OID =  109 (  unknownin               PGNSP PGUID 12 1 0 0 f f t f i 1 705 "2275" _null_ _null_ _null_ unknownin _null_ _null_ _null_ ));
@@ -289,7 +289,7 @@ DATA(insert OID = 138 (  box_center            PGNSP PGUID 12 1 0 0 f f t f i 1 600 "60
 DESCR("center of");
 DATA(insert OID = 139 (  areasel                  PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  areasel _null_ _null_ _null_ ));
 DESCR("restriction selectivity for area-comparison operators");
-DATA(insert OID = 140 (  areajoinsel      PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  areajoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 140 (  areajoinsel      PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_     areajoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity for area-comparison operators");
 DATA(insert OID = 141 (  int4mul                  PGNSP PGUID 12 1 0 0 f f t f i 2 23 "23 23" _null_ _null_ _null_ int4mul _null_ _null_ _null_ ));
 DESCR("multiply");
@@ -1630,11 +1630,11 @@ DESCR("current clock time");
 
 DATA(insert OID = 1300 (  positionsel             PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  positionsel _null_ _null_ _null_ ));
 DESCR("restriction selectivity for position-comparison operators");
-DATA(insert OID = 1301 (  positionjoinsel         PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  positionjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1301 (  positionjoinsel         PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_     positionjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity for position-comparison operators");
 DATA(insert OID = 1302 (  contsel                 PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  contsel _null_ _null_ _null_ ));
 DESCR("restriction selectivity for containment comparison operators");
-DATA(insert OID = 1303 (  contjoinsel     PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  contjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1303 (  contjoinsel     PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_     contjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity for containment comparison operators");
 
 DATA(insert OID = 1304 ( overlaps                       PGNSP PGUID 12 1 0 0 f f f f i 4 16 "1184 1184 1184 1184" _null_ _null_ _null_ overlaps_timestamp _null_ _null_ _null_ ));
@@ -2684,9 +2684,9 @@ DATA(insert OID = 1814 ( iclikesel                        PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281
 DESCR("restriction selectivity of ILIKE");
 DATA(insert OID = 1815 ( icnlikesel                    PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  icnlikesel _null_ _null_ _null_ ));
 DESCR("restriction selectivity of NOT ILIKE");
-DATA(insert OID = 1816 ( iclikejoinsel         PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  iclikejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1816 ( iclikejoinsel         PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  iclikejoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of ILIKE");
-DATA(insert OID = 1817 ( icnlikejoinsel                PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  icnlikejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1817 ( icnlikejoinsel                PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  icnlikejoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of NOT ILIKE");
 DATA(insert OID = 1818 ( regexeqsel                    PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  regexeqsel _null_ _null_ _null_ ));
 DESCR("restriction selectivity of regex match");
@@ -2700,17 +2700,17 @@ DATA(insert OID = 1822 ( nlikesel                       PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 2
 DESCR("restriction selectivity of NOT LIKE");
 DATA(insert OID = 1823 ( icregexnesel          PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  icregexnesel _null_ _null_ _null_ ));
 DESCR("restriction selectivity of case-insensitive regex non-match");
-DATA(insert OID = 1824 ( regexeqjoinsel                PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  regexeqjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1824 ( regexeqjoinsel                PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  regexeqjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of regex match");
-DATA(insert OID = 1825 ( likejoinsel           PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  likejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1825 ( likejoinsel           PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  likejoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of LIKE");
-DATA(insert OID = 1826 ( icregexeqjoinsel      PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  icregexeqjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1826 ( icregexeqjoinsel      PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  icregexeqjoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of case-insensitive regex match");
-DATA(insert OID = 1827 ( regexnejoinsel                PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  regexnejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1827 ( regexnejoinsel                PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  regexnejoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of regex non-match");
-DATA(insert OID = 1828 ( nlikejoinsel          PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  nlikejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1828 ( nlikejoinsel          PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  nlikejoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of NOT LIKE");
-DATA(insert OID = 1829 ( icregexnejoinsel      PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  icregexnejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1829 ( icregexnejoinsel      PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  icregexnejoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of case-insensitive regex non-match");
 
 /* Aggregate-related functions */
index 7471d1b44d6a4baad39a11d9bd96dabbd725df77..7963b745f52b5a7492c89d2fd4a7af2a3c514822 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.50 2008/06/19 00:46:06 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.51 2008/08/16 00:01:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -50,6 +50,7 @@ extern Selectivity restriction_selectivity(PlannerInfo *root,
 extern Selectivity join_selectivity(PlannerInfo *root,
                                 Oid operator,
                                 List *args,
-                                JoinType jointype);
+                                JoinType jointype,
+                                SpecialJoinInfo *sjinfo);
 
 #endif   /* PLANCAT_H */
index e94e7e5916582b83da6cfc4ee2f4e85861a7f4e1..120269eee5dcf6a8b8a4c7499d37a136487218fb 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.45 2008/08/14 18:48:00 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.46 2008/08/16 00:01:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -104,8 +104,10 @@ extern bool get_restriction_variable(PlannerInfo *root, List *args,
                                                 VariableStatData *vardata, Node **other,
                                                 bool *varonleft);
 extern void get_join_variables(PlannerInfo *root, List *args,
+                                  SpecialJoinInfo *sjinfo,
                                   VariableStatData *vardata1,
-                                  VariableStatData *vardata2);
+                                  VariableStatData *vardata2,
+                                  bool *join_is_reversed);
 extern double get_variable_numdistinct(VariableStatData *vardata);
 extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
                                Datum constval, bool varonleft,
index 533bac3ab6705bc6ff5be990b68a3e84f7e4cf11..a21c2ba3744250a84dbc4a175dcf8d095b979125 100644 (file)
@@ -543,17 +543,20 @@ WHERE p1.oprrest = p2.oid AND
 -- If oprjoin is set, the operator must be a binary boolean op,
 -- and it must link to a proc with the right signature
 -- to be a join selectivity estimator.
--- The proc signature we want is: float8 proc(internal, oid, internal, int2)
+-- The proc signature we want is: float8 proc(internal, oid, internal, int2, internal)
+-- (Note: the old signature with only 4 args is still allowed, but no core
+-- estimator should be using it.)
 SELECT p1.oid, p1.oprname, p2.oid, p2.proname
 FROM pg_operator AS p1, pg_proc AS p2
 WHERE p1.oprjoin = p2.oid AND
     (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR
      p2.prorettype != 'float8'::regtype OR p2.proretset OR
-     p2.pronargs != 4 OR
+     p2.pronargs != 5 OR
      p2.proargtypes[0] != 'internal'::regtype OR
      p2.proargtypes[1] != 'oid'::regtype OR
      p2.proargtypes[2] != 'internal'::regtype OR
-     p2.proargtypes[3] != 'int2'::regtype);
+     p2.proargtypes[3] != 'int2'::regtype OR
+     p2.proargtypes[4] != 'internal'::regtype);
  oid | oprname | oid | proname 
 -----+---------+-----+---------
 (0 rows)
index 5017849830aa612d14a1359eb8c2cd3f9e622f7f..57c2cad95151649f0fe9c0ad2df14061749b27bb 100644 (file)
@@ -444,18 +444,21 @@ WHERE p1.oprrest = p2.oid AND
 -- If oprjoin is set, the operator must be a binary boolean op,
 -- and it must link to a proc with the right signature
 -- to be a join selectivity estimator.
--- The proc signature we want is: float8 proc(internal, oid, internal, int2)
+-- The proc signature we want is: float8 proc(internal, oid, internal, int2, internal)
+-- (Note: the old signature with only 4 args is still allowed, but no core
+-- estimator should be using it.)
 
 SELECT p1.oid, p1.oprname, p2.oid, p2.proname
 FROM pg_operator AS p1, pg_proc AS p2
 WHERE p1.oprjoin = p2.oid AND
     (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR
      p2.prorettype != 'float8'::regtype OR p2.proretset OR
-     p2.pronargs != 4 OR
+     p2.pronargs != 5 OR
      p2.proargtypes[0] != 'internal'::regtype OR
      p2.proargtypes[1] != 'oid'::regtype OR
      p2.proargtypes[2] != 'internal'::regtype OR
-     p2.proargtypes[3] != 'int2'::regtype);
+     p2.proargtypes[3] != 'int2'::regtype OR
+     p2.proargtypes[4] != 'internal'::regtype);
 
 -- **************** pg_aggregate ****************