Clean up the loose ends in selectivity estimation left by my patch for semi

author Tom Lane <tgl@sss.pgh.pa.us>

Sat, 16 Aug 2008 00:01:38 +0000 (00:01 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sat, 16 Aug 2008 00:01:38 +0000 (00:01 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Sat, 16 Aug 2008 00:01:38 +0000 (00:01 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sat, 16 Aug 2008 00:01:38 +0000 (00:01 +0000)
diff --git a/src/backend/catalog/pg_operator.c b/src/backend/catalog/pg_operator.c

index 11690aa8ce62ae64c176d4794ced8e3bbb96d739..94b3bf28625e6b138ca6b9b36b3cb87aa18fb7b3 100644 (file)
--- a/src/backend/catalog/pg_operator.c
+++ b/src/backend/catalog/pg_operator.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/catalog/pg_operator.c,v 1.104 2008/06/19 00:46:04 alvherre Exp $
+ *       $PostgreSQL: pgsql/src/backend/catalog/pg_operator.c,v 1.105 2008/08/16 00:01:35 tgl Exp $
   *
   * NOTES
   *       these routines moved here from commands/define.c and somewhat cleaned up.
@@ -21,12 +21,12 @@
  #include "access/xact.h"
  #include "catalog/dependency.h"
  #include "catalog/indexing.h"
+#include "catalog/namespace.h"
  #include "catalog/pg_namespace.h"
  #include "catalog/pg_operator.h"
  #include "catalog/pg_proc.h"
  #include "catalog/pg_type.h"
  #include "miscadmin.h"
-#include "parser/parse_func.h"
  #include "parser/parse_oper.h"
  #include "utils/acl.h"
  #include "utils/builtins.h"
@@ -273,6 +273,11 @@ OperatorShellMake(const char *operatorName,
  
         heap_freetuple(tup);
  
+       /*
+        * Make sure the tuple is visible for subsequent lookups/updates.
+        */
+       CommandCounterIncrement();
+
         /*
          * close the operator relation and return the oid.
          */
@@ -289,14 +294,19 @@ OperatorShellMake(const char *operatorName,
   *             operatorNamespace               namespace for new operator
   *             leftTypeId                              X left type ID
   *             rightTypeId                             X right type ID
- *             procedureName                   procedure for operator
+ *             procedureId                             procedure ID for operator
   *             commutatorName                  X commutator operator
   *             negatorName                             X negator operator
- *             restrictionName                 X restriction sel. procedure
- *             joinName                                X join sel. procedure
+ *             restrictionId                   X restriction selectivity procedure ID
+ *             joinId                                  X join selectivity procedure ID
   *             canMerge                                merge join can be used with this operator
   *             canHash                                 hash join can be used with this operator
   *
+ * The caller should have validated properties and permissions for the
+ * objects passed as OID references.  We must handle the commutator and
+ * negator operator references specially, however, since those need not
+ * exist beforehand.
+ *
   * This routine gets complicated because it allows the user to
   * specify operators that do not exist.  For example, if operator
   * "op" is being defined, the negator operator "negop" and the
@@ -310,59 +320,17 @@ OperatorShellMake(const char *operatorName,
   * information about the operator (just its name and types).
   * Forward declaration is used only for this purpose, it is
   * not available to the user as it is for type definition.
- *
- * Algorithm:
- *
- * check if operator already defined
- *       if so, but oprcode is null, save the Oid -- we are filling in a shell
- *       otherwise error
- * get the attribute types from relation descriptor for pg_operator
- * assign values to the fields of the operator:
- *      operatorName
- *      owner id (simply the user id of the caller)
- *      operator "kind" either "b" for binary or "l" for left unary
- *      canMerge boolean
- *      canHash boolean
- *      leftTypeObjectId -- type must already be defined
- *      rightTypeObjectId -- this is optional, enter ObjectId=0 if none specified
- *      resultType -- defer this, since it must be determined from
- *                                the pg_procedure catalog
- *      commutatorObjectId -- if this is NULL, enter ObjectId=0
- *                                       else if this already exists, enter its ObjectId
- *                                       else if this does not yet exist, and is not
- *                                             the same as the main operatorName, then create
- *                                             a shell and enter the new ObjectId
- *                                       else if this does not exist but IS the same
- *                                             name & types as the main operator, set the ObjectId=0.
- *                                             (We are creating a self-commutating operator.)
- *                                             The link will be fixed later by OperatorUpd.
- *      negatorObjectId   -- same as for commutatorObjectId
- *      operatorProcedure -- must access the pg_procedure catalog to get the
- *                                ObjectId of the procedure that actually does the operator
- *                                actions this is required.  Do a lookup to find out the
- *                                return type of the procedure
- *      restrictionProcedure -- must access the pg_procedure catalog to get
- *                                the ObjectId but this is optional
- *      joinProcedure -- same as restrictionProcedure
- * now either insert or replace the operator into the pg_operator catalog
- * if the operator shell is being filled in
- *      access the catalog in order to get a valid buffer
- *      create a tuple using ModifyHeapTuple
- *      get the t_self from the modified tuple and call RelationReplaceHeapTuple
- * else if a new operator is being created
- *      create a tuple using heap_formtuple
- *      call simple_heap_insert
   */
  void
  OperatorCreate(const char *operatorName,
                            Oid operatorNamespace,
                            Oid leftTypeId,
                            Oid rightTypeId,
-                          List *procedureName,
+                          Oid procedureId,
                            List *commutatorName,
                            List *negatorName,
-                          List *restrictionName,
-                          List *joinName,
+                          Oid restrictionId,
+                          Oid joinId,
                            bool canMerge,
                            bool canHash)
  {
@@ -373,15 +341,10 @@ OperatorCreate(const char *operatorName,
         Datum           values[Natts_pg_operator];
         Oid                     operatorObjectId;
         bool            operatorAlreadyDefined;
-       Oid                     procOid;
         Oid                     operResultType;
         Oid                     commutatorId,
-                               negatorId,
-                               restOid,
-                               joinOid;
+                               negatorId;
         bool            selfCommutator = false;
-       Oid                     typeId[4];              /* only need up to 4 args here */
-       int                     nargs;
         NameData        oname;
         TupleDesc       tupDesc;
         int                     i;
@@ -395,11 +358,6 @@ OperatorCreate(const char *operatorName,
                                  errmsg("\"%s\" is not a valid operator name",
                                                 operatorName)));
  
-       if (!OidIsValid(leftTypeId) && !OidIsValid(rightTypeId))
-               ereport(ERROR,
-                               (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
-                  errmsg("at least one of leftarg or rightarg must be specified")));
-
         if (!(OidIsValid(leftTypeId) && OidIsValid(rightTypeId)))
         {
                 /* If it's not a binary op, these things mustn't be set: */
@@ -407,7 +365,7 @@ OperatorCreate(const char *operatorName,
                         ereport(ERROR,
                                         (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
                                          errmsg("only binary operators can have commutators")));
-               if (joinName)
+               if (OidIsValid(joinId))
                         ereport(ERROR,
                                         (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
                                  errmsg("only binary operators can have join selectivity")));
@@ -421,6 +379,33 @@ OperatorCreate(const char *operatorName,
                                          errmsg("only binary operators can hash")));
         }
  
+       operResultType = get_func_rettype(procedureId);
+
+       if (operResultType != BOOLOID)
+       {
+               /* If it's not a boolean op, these things mustn't be set: */
+               if (negatorName)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+                                        errmsg("only boolean operators can have negators")));
+               if (OidIsValid(restrictionId))
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+                                errmsg("only boolean operators can have restriction selectivity")));
+               if (OidIsValid(joinId))
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+                                errmsg("only boolean operators can have join selectivity")));
+               if (canMerge)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+                                        errmsg("only boolean operators can merge join")));
+               if (canHash)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+                                        errmsg("only boolean operators can hash")));
+       }
+
         operatorObjectId = OperatorGet(operatorName,
                                                                    operatorNamespace,
                                                                    leftTypeId,
@@ -435,85 +420,13 @@ OperatorCreate(const char *operatorName,
  
         /*
          * At this point, if operatorObjectId is not InvalidOid then we are
-        * filling in a previously-created shell.
-        */
-
-       /*
-        * Look up registered procedures -- find the return type of procedureName
-        * to place in "result" field. Do this before shells are created so we
-        * don't have to worry about deleting them later.
-        */
-       if (!OidIsValid(leftTypeId))
-       {
-               typeId[0] = rightTypeId;
-               nargs = 1;
-       }
-       else if (!OidIsValid(rightTypeId))
-       {
-               typeId[0] = leftTypeId;
-               nargs = 1;
-       }
-       else
-       {
-               typeId[0] = leftTypeId;
-               typeId[1] = rightTypeId;
-               nargs = 2;
-       }
-       procOid = LookupFuncName(procedureName, nargs, typeId, false);
-       operResultType = get_func_rettype(procOid);
-
-       /*
-        * find restriction estimator
-        */
-       if (restrictionName)
-       {
-               typeId[0] = INTERNALOID;        /* Query */
-               typeId[1] = OIDOID;             /* operator OID */
-               typeId[2] = INTERNALOID;        /* args list */
-               typeId[3] = INT4OID;    /* varRelid */
-
-               restOid = LookupFuncName(restrictionName, 4, typeId, false);
-       }
-       else
-               restOid = InvalidOid;
-
-       /*
-        * find join estimator
-        */
-       if (joinName)
-       {
-               typeId[0] = INTERNALOID;        /* Query */
-               typeId[1] = OIDOID;             /* operator OID */
-               typeId[2] = INTERNALOID;        /* args list */
-               typeId[3] = INT2OID;    /* jointype */
-
-               joinOid = LookupFuncName(joinName, 4, typeId, false);
-       }
-       else
-               joinOid = InvalidOid;
-
-       /*
-        * set up values in the operator tuple
+        * filling in a previously-created shell.  Insist that the user own
+        * any such shell.
          */
-
-       for (i = 0; i < Natts_pg_operator; ++i)
-       {
-               values[i] = (Datum) NULL;
-               replaces[i] = 'r';
-               nulls[i] = ' ';
-       }
-
-       i = 0;
-       namestrcpy(&oname, operatorName);
-       values[i++] = NameGetDatum(&oname); /* oprname */
-       values[i++] = ObjectIdGetDatum(operatorNamespace);      /* oprnamespace */
-       values[i++] = ObjectIdGetDatum(GetUserId());            /* oprowner */
-       values[i++] = CharGetDatum(leftTypeId ? (rightTypeId ? 'b' : 'r') : 'l');       /* oprkind */
-       values[i++] = BoolGetDatum(canMerge);           /* oprcanmerge */
-       values[i++] = BoolGetDatum(canHash);            /* oprcanhash */
-       values[i++] = ObjectIdGetDatum(leftTypeId); /* oprleft */
-       values[i++] = ObjectIdGetDatum(rightTypeId);            /* oprright */
-       values[i++] = ObjectIdGetDatum(operResultType);         /* oprresult */
+       if (OidIsValid(operatorObjectId) &&
+               !pg_oper_ownercheck(operatorObjectId, GetUserId()))
+               aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_OPER,
+                                          operatorName);
  
         /*
          * Set up the other operators.  If they do not currently exist, create
@@ -529,6 +442,12 @@ OperatorCreate(const char *operatorName,
                                                                                   leftTypeId, rightTypeId,
                                                                                   true);
  
+               /* Permission check: must own other operator */
+               if (OidIsValid(commutatorId) &&
+                       !pg_oper_ownercheck(commutatorId, GetUserId()))
+                       aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_OPER,
+                                                  NameListToString(commutatorName));
+
                 /*
                  * self-linkage to this operator; will fix below. Note that only
                  * self-linkage for commutation makes sense.
@@ -538,7 +457,6 @@ OperatorCreate(const char *operatorName,
         }
         else
                 commutatorId = InvalidOid;
-       values[i++] = ObjectIdGetDatum(commutatorId);           /* oprcom */
  
         if (negatorName)
         {
@@ -548,19 +466,48 @@ OperatorCreate(const char *operatorName,
                                                                            operatorName, operatorNamespace,
                                                                            leftTypeId, rightTypeId,
                                                                            false);
+
+               /* Permission check: must own other operator */
+               if (OidIsValid(negatorId) &&
+                       !pg_oper_ownercheck(negatorId, GetUserId()))
+                       aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_OPER,
+                                                  NameListToString(negatorName));
         }
         else
                 negatorId = InvalidOid;
-       values[i++] = ObjectIdGetDatum(negatorId);      /* oprnegate */
  
-       values[i++] = ObjectIdGetDatum(procOid);        /* oprcode */
-       values[i++] = ObjectIdGetDatum(restOid);        /* oprrest */
-       values[i++] = ObjectIdGetDatum(joinOid);        /* oprjoin */
+       /*
+        * set up values in the operator tuple
+        */
+
+       for (i = 0; i < Natts_pg_operator; ++i)
+       {
+               values[i] = (Datum) NULL;
+               replaces[i] = 'r';
+               nulls[i] = ' ';
+       }
+
+       i = 0;
+       namestrcpy(&oname, operatorName);
+       values[i++] = NameGetDatum(&oname); /* oprname */
+       values[i++] = ObjectIdGetDatum(operatorNamespace);      /* oprnamespace */
+       values[i++] = ObjectIdGetDatum(GetUserId());            /* oprowner */
+       values[i++] = CharGetDatum(leftTypeId ? (rightTypeId ? 'b' : 'r') : 'l');       /* oprkind */
+       values[i++] = BoolGetDatum(canMerge);           /* oprcanmerge */
+       values[i++] = BoolGetDatum(canHash);            /* oprcanhash */
+       values[i++] = ObjectIdGetDatum(leftTypeId); /* oprleft */
+       values[i++] = ObjectIdGetDatum(rightTypeId);            /* oprright */
+       values[i++] = ObjectIdGetDatum(operResultType);         /* oprresult */
+       values[i++] = ObjectIdGetDatum(commutatorId);           /* oprcom */
+       values[i++] = ObjectIdGetDatum(negatorId);              /* oprnegate */
+       values[i++] = ObjectIdGetDatum(procedureId);    /* oprcode */
+       values[i++] = ObjectIdGetDatum(restrictionId);  /* oprrest */
+       values[i++] = ObjectIdGetDatum(joinId);                 /* oprjoin */
  
         pg_operator_desc = heap_open(OperatorRelationId, RowExclusiveLock);
  
         /*
-        * If we are adding to an operator shell, update; else insert
+        * If we are replacing an operator shell, update; else insert
          */
         if (operatorObjectId)
         {
@@ -706,7 +653,8 @@ OperatorUpd(Oid baseId, Oid commId, Oid negId)
         /*
          * check and update the commutator & negator, if necessary
          *
-        * First make sure we can see them...
+        * We need a CommandCounterIncrement here in case of a self-commutator
+        * operator: we'll need to update the tuple that we just inserted.
          */
         CommandCounterIncrement();
  
diff --git a/src/backend/commands/operatorcmds.c b/src/backend/commands/operatorcmds.c

index 884181c5cd91fae0b32bbc61da53ecd7d221909d..391b4d6507d7c1b8e086263b4f2c88b29d4cdc0f 100644 (file)
--- a/src/backend/commands/operatorcmds.c
+++ b/src/backend/commands/operatorcmds.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/operatorcmds.c,v 1.40 2008/06/19 00:46:04 alvherre Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/operatorcmds.c,v 1.41 2008/08/16 00:01:35 tgl Exp $
   *
   * DESCRIPTION
   *       The "DefineFoo" routines take the parse tree and pick out the
@@ -39,8 +39,10 @@
  #include "catalog/indexing.h"
  #include "catalog/namespace.h"
  #include "catalog/pg_operator.h"
+#include "catalog/pg_type.h"
  #include "commands/defrem.h"
  #include "miscadmin.h"
+#include "parser/parse_func.h"
  #include "parser/parse_oper.h"
  #include "parser/parse_type.h"
  #include "utils/acl.h"
@@ -76,6 +78,11 @@ DefineOperator(List *names, List *parameters)
         List       *negatorName = NIL;          /* optional negator operator name */
         List       *restrictionName = NIL;      /* optional restrict. sel. procedure */
         List       *joinName = NIL; /* optional join sel. procedure */
+       Oid                     functionOid;    /* functions converted to OID */
+       Oid                     restrictionOid;
+       Oid                     joinOid;
+       Oid                     typeId[5];              /* only need up to 5 args here */
+       int                     nargs;
         ListCell   *pl;
  
         /* Convert list of names to a name and namespace */
@@ -154,6 +161,109 @@ DefineOperator(List *names, List *parameters)
         if (typeName2)
                 typeId2 = typenameTypeId(NULL, typeName2, NULL);
  
+       if (!OidIsValid(typeId1) && !OidIsValid(typeId2))
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+                  errmsg("at least one of leftarg or rightarg must be specified")));
+
+       /*
+        * Look up the operator's underlying function.
+        */
+       if (!OidIsValid(typeId1))
+       {
+               typeId[0] = typeId2;
+               nargs = 1;
+       }
+       else if (!OidIsValid(typeId2))
+       {
+               typeId[0] = typeId1;
+               nargs = 1;
+       }
+       else
+       {
+               typeId[0] = typeId1;
+               typeId[1] = typeId2;
+               nargs = 2;
+       }
+       functionOid = LookupFuncName(functionName, nargs, typeId, false);
+
+       /*
+        * We require EXECUTE rights for the function.  This isn't strictly
+        * necessary, since EXECUTE will be checked at any attempted use of
+        * the operator, but it seems like a good idea anyway.
+        */
+       aclresult = pg_proc_aclcheck(functionOid, GetUserId(), ACL_EXECUTE);
+       if (aclresult != ACLCHECK_OK)
+               aclcheck_error(aclresult, ACL_KIND_PROC,
+                                          NameListToString(functionName));
+
+       /*
+        * Look up restriction estimator if specified
+        */
+       if (restrictionName)
+       {
+               typeId[0] = INTERNALOID;        /* PlannerInfo */
+               typeId[1] = OIDOID;             /* operator OID */
+               typeId[2] = INTERNALOID;        /* args list */
+               typeId[3] = INT4OID;    /* varRelid */
+
+               restrictionOid = LookupFuncName(restrictionName, 4, typeId, false);
+
+               /* estimators must return float8 */
+               if (get_func_rettype(restrictionOid) != FLOAT8OID)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                        errmsg("restriction estimator function %s must return type \"float8\"",
+                                                       NameListToString(restrictionName))));
+
+               /* Require EXECUTE rights for the estimator */
+               aclresult = pg_proc_aclcheck(restrictionOid, GetUserId(), ACL_EXECUTE);
+               if (aclresult != ACLCHECK_OK)
+                       aclcheck_error(aclresult, ACL_KIND_PROC,
+                                                  NameListToString(restrictionName));
+       }
+       else
+               restrictionOid = InvalidOid;
+
+       /*
+        * Look up join estimator if specified
+        */
+       if (joinName)
+       {
+               typeId[0] = INTERNALOID;        /* PlannerInfo */
+               typeId[1] = OIDOID;             /* operator OID */
+               typeId[2] = INTERNALOID;        /* args list */
+               typeId[3] = INT2OID;    /* jointype */
+               typeId[4] = INTERNALOID;        /* SpecialJoinInfo */
+
+               /*
+                * As of Postgres 8.4, the preferred signature for join estimators
+                * has 5 arguments, but we still allow the old 4-argument form.
+                * Try the preferred form first.
+                */
+               joinOid = LookupFuncName(joinName, 5, typeId, true);
+               if (!OidIsValid(joinOid))
+                       joinOid = LookupFuncName(joinName, 4, typeId, true);
+               /* If not found, reference the 5-argument signature in error msg */
+               if (!OidIsValid(joinOid))
+                       joinOid = LookupFuncName(joinName, 5, typeId, false);
+
+               /* estimators must return float8 */
+               if (get_func_rettype(joinOid) != FLOAT8OID)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                        errmsg("join estimator function %s must return type \"float8\"",
+                                                       NameListToString(joinName))));
+
+               /* Require EXECUTE rights for the estimator */
+               aclresult = pg_proc_aclcheck(joinOid, GetUserId(), ACL_EXECUTE);
+               if (aclresult != ACLCHECK_OK)
+                       aclcheck_error(aclresult, ACL_KIND_PROC,
+                                                  NameListToString(joinName));
+       }
+       else
+               joinOid = InvalidOid;
+
         /*
          * now have OperatorCreate do all the work..
          */
@@ -161,11 +271,11 @@ DefineOperator(List *names, List *parameters)
                                    oprNamespace,        /* namespace */
                                    typeId1,             /* left type id */
                                    typeId2,             /* right type id */
-                                  functionName,        /* function for operator */
+                                  functionOid, /* function for operator */
                                    commutatorName,              /* optional commutator operator name */
                                    negatorName, /* optional negator operator name */
-                                  restrictionName,             /* optional restrict. sel. procedure */
-                                  joinName,    /* optional join sel. procedure name */
+                                  restrictionOid,              /* optional restrict. sel. procedure */
+                                  joinOid,             /* optional join sel. procedure name */
                                    canMerge,    /* operator merges */
                                    canHash);    /* operator hashes */
  }
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c

index e788f2787310cd3d8f119738f5a503c149e6329e..6db42c5f5860a5edabbe030fa0b0761737d5cca0 100644 (file)
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.91 2008/08/14 18:47:59 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.92 2008/08/16 00:01:36 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -398,6 +398,50 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
         return false;
  }
  
+/*
+ * treat_as_join_clause -
+ *       Decide whether an operator clause is to be handled by the
+ *       restriction or join estimator.  Subroutine for clause_selectivity().
+ */
+static inline bool
+treat_as_join_clause(Node *clause, RestrictInfo *rinfo,
+                                        int varRelid, SpecialJoinInfo *sjinfo)
+{
+       if (varRelid != 0)
+       {
+               /*
+                * Caller is forcing restriction mode (eg, because we are examining
+                * an inner indexscan qual).
+                */
+               return false;
+       }
+       else if (sjinfo == NULL)
+       {
+               /*
+                * It must be a restriction clause, since it's being evaluated at
+                * a scan node.
+                */
+               return false;
+       }
+       else
+       {
+               /*
+                * Otherwise, it's a join if there's more than one relation used.
+                * We can optimize this calculation if an rinfo was passed.
+                *
+                * XXX  Since we know the clause is being evaluated at a join,
+                * the only way it could be single-relation is if it was delayed
+                * by outer joins.  Although we can make use of the restriction
+                * qual estimators anyway, it seems likely that we ought to account
+                * for the probability of injected nulls somehow.
+                */
+               if (rinfo)
+                       return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
+               else
+                       return (NumRelids(clause) > 1);
+       }
+}
+
  
  /*
   * clause_selectivity -
@@ -429,9 +473,6 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
   *        root->join_info_list.
   *     2. For an INNER join, sjinfo is just a transient struct, and only the
   *        relids and jointype fields in it can be trusted.
- *     3. XXX sjinfo might be NULL even though it really is a join.  This case
- *        will go away soon, but fixing it requires API changes for oprjoin and
- *        amcostestimate functions.
   * It is possible for jointype to be different from sjinfo->jointype.
   * This indicates we are considering a variant join: either with
   * the LHS and RHS switched, or with one input unique-ified.
@@ -603,36 +644,14 @@ clause_selectivity(PlannerInfo *root,
         else if (is_opclause(clause) || IsA(clause, DistinctExpr))
         {
                 Oid                     opno = ((OpExpr *) clause)->opno;
-               bool            is_join_clause;
  
-               if (varRelid != 0)
-               {
-                       /*
-                        * If we are considering a nestloop join then all clauses are
-                        * restriction clauses, since we are only interested in the one
-                        * relation.
-                        */
-                       is_join_clause = false;
-               }
-               else
-               {
-                       /*
-                        * Otherwise, it's a join if there's more than one relation used.
-                        * We can optimize this calculation if an rinfo was passed.
-                        */
-                       if (rinfo)
-                               is_join_clause = (bms_membership(rinfo->clause_relids) ==
-                                                                 BMS_MULTIPLE);
-                       else
-                               is_join_clause = (NumRelids(clause) > 1);
-               }
-
-               if (is_join_clause)
+               if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
                 {
                         /* Estimate selectivity for a join clause. */
                         s1 = join_selectivity(root, opno,
                                                                   ((OpExpr *) clause)->args,
-                                                                 jointype);
+                                                                 jointype,
+                                                                 sjinfo);
                 }
                 else
                 {
@@ -671,35 +690,11 @@ clause_selectivity(PlannerInfo *root,
  #endif
         else if (IsA(clause, ScalarArrayOpExpr))
         {
-               /* First, decide if it's a join clause, same as for OpExpr */
-               bool            is_join_clause;
-
-               if (varRelid != 0)
-               {
-                       /*
-                        * If we are considering a nestloop join then all clauses are
-                        * restriction clauses, since we are only interested in the one
-                        * relation.
-                        */
-                       is_join_clause = false;
-               }
-               else
-               {
-                       /*
-                        * Otherwise, it's a join if there's more than one relation used.
-                        * We can optimize this calculation if an rinfo was passed.
-                        */
-                       if (rinfo)
-                               is_join_clause = (bms_membership(rinfo->clause_relids) ==
-                                                                 BMS_MULTIPLE);
-                       else
-                               is_join_clause = (NumRelids(clause) > 1);
-               }
-
                 /* Use node specific selectivity calculation function */
                 s1 = scalararraysel(root,
                                                         (ScalarArrayOpExpr *) clause,
-                                                       is_join_clause,
+                                                       treat_as_join_clause(clause, rinfo,
+                                                                                                varRelid, sjinfo),
                                                         varRelid,
                                                         jointype,
                                                         sjinfo);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index d9d8c89864064adbdc43ebdf9690f96c47253cfc..14c3cf1f21d3b87add746a4ec13dbee03c99a981 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -54,7 +54,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.193 2008/08/14 18:47:59 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.194 2008/08/16 00:01:36 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -118,10 +118,8 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root,
                            RestrictInfo *rinfo,
                            PathKey *pathkey);
  static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
-static Selectivity approx_selectivity(PlannerInfo *root, List *quals,
-                                  SpecialJoinInfo *sjinfo);
-static Selectivity join_in_selectivity(JoinPath *path, PlannerInfo *root,
-                                                                          SpecialJoinInfo *sjinfo);
+static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
+                                                                List *quals, SpecialJoinInfo *sjinfo);
  static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
  static double relation_byte_size(double tuples, int width);
  static double page_size(double tuples, int width);
@@ -1288,20 +1286,10 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         double          outer_path_rows = PATH_ROWS(outer_path);
         double          inner_path_rows = nestloop_inner_path_rows(inner_path);
         double          ntuples;
-       Selectivity joininfactor;
  
         if (!enable_nestloop)
                 startup_cost += disable_cost;
  
-       /*
-        * If we're doing JOIN_IN then we will stop scanning inner tuples for an
-        * outer tuple as soon as we have one match.  Account for the effects of
-        * this by scaling down the cost estimates in proportion to the JOIN_IN
-        * selectivity.  (This assumes that all the quals attached to the join are
-        * IN quals, which should be true.)
-        */
-       joininfactor = join_in_selectivity(path, root, sjinfo);
-
         /* cost of source data */
  
         /*
@@ -1328,12 +1316,12 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                 run_cost += (outer_path_rows - 1) * inner_path->startup_cost;
         }
         run_cost += outer_path_rows *
-               (inner_path->total_cost - inner_path->startup_cost) * joininfactor;
+               (inner_path->total_cost - inner_path->startup_cost);
  
         /*
          * Compute number of tuples processed (not number emitted!)
          */
-       ntuples = outer_path_rows * inner_path_rows * joininfactor;
+       ntuples = outer_path_rows * inner_path_rows;
  
         /* CPU costs */
         cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);
@@ -1369,7 +1357,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         Cost            startup_cost = 0;
         Cost            run_cost = 0;
         Cost            cpu_per_tuple;
-       Selectivity merge_selec;
         QualCost        merge_qual_cost;
         QualCost        qp_qual_cost;
         double          outer_path_rows = PATH_ROWS(outer_path);
@@ -1385,7 +1372,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                                 outerendsel,
                                 innerstartsel,
                                 innerendsel;
-       Selectivity joininfactor;
         Path            sort_path;              /* dummy for result of cost_sort */
  
         /* Protect some assumptions below that rowcounts aren't zero */
@@ -1398,21 +1384,21 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                 startup_cost += disable_cost;
  
         /*
-        * Compute cost and selectivity of the mergequals and qpquals (other
-        * restriction clauses) separately.  We use approx_selectivity here for
-        * speed --- in most cases, any errors won't affect the result much.
-        *
-        * Note: it's probably bogus to use the normal selectivity calculation
-        * here when either the outer or inner path is a UniquePath.
+        * Compute cost of the mergequals and qpquals (other restriction clauses)
+        * separately.
          */
-       merge_selec = approx_selectivity(root, mergeclauses, sjinfo);
         cost_qual_eval(&merge_qual_cost, mergeclauses, root);
         cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
         qp_qual_cost.startup -= merge_qual_cost.startup;
         qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;
  
-       /* approx # tuples passing the merge quals */
-       mergejointuples = clamp_row_est(merge_selec * outer_path_rows * inner_path_rows);
+       /*
+        * Get approx # tuples passing the mergequals.  We use approx_tuple_count
+        * here for speed --- in most cases, any errors won't affect the result
+        * much.
+        */
+       mergejointuples = approx_tuple_count(root, &path->jpath,
+                                                                                mergeclauses, sjinfo);
  
         /*
          * When there are equal merge keys in the outer relation, the mergejoin
@@ -1422,11 +1408,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
          * but on the other hand we ignore the bookkeeping costs of mark/restore.
          * Not clear if it's worth developing a more refined model.
          *
-        * The number of re-fetches can be estimated approximately as size of
-        * merge join output minus size of inner relation.      Assume that the
-        * distinct key values are 1, 2, ..., and denote the number of values of
-        * each key in the outer relation as m1, m2, ...; in the inner relation,
-        * n1, n2, ... Then we have
+        * For regular inner and outer joins, the number of re-fetches can be
+        * estimated approximately as size of merge join output minus size of
+        * inner relation. Assume that the distinct key values are 1, 2, ..., and
+        * denote the number of values of each key in the outer relation as m1,
+        * m2, ...; in the inner relation, n1, n2, ...  Then we have
          *
          * size of join = m1 * n1 + m2 * n2 + ...
          *
@@ -1439,8 +1425,17 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
          * are effectively subtracting those from the number of rescanned tuples,
          * when we should not.  Can we do better without expensive selectivity
          * computations?
+        *
+        * For SEMI and ANTI joins, only one inner tuple need be rescanned for
+        * each group of same-keyed outer tuples (assuming that all joinquals
+        * are merge quals).  This makes the effect small enough to ignore,
+        * so we just set rescannedtuples = 0.  Likewise, the whole issue is
+        * moot if we are working from a unique-ified outer input.
          */
-       if (IsA(outer_path, UniquePath))
+       if (sjinfo->jointype == JOIN_SEMI ||
+               sjinfo->jointype == JOIN_ANTI)
+               rescannedtuples = 0;
+       else if (IsA(outer_path, UniquePath))
                 rescannedtuples = 0;
         else
         {
@@ -1505,7 +1500,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                         innerstartsel = cache->leftstartsel;
                         innerendsel = cache->leftendsel;
                 }
-               if (path->jpath.jointype == JOIN_LEFT)
+               if (path->jpath.jointype == JOIN_LEFT ||
+                       path->jpath.jointype == JOIN_ANTI)
                 {
                         outerstartsel = 0.0;
                         outerendsel = 1.0;
@@ -1600,21 +1596,10 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
  
         /* CPU costs */
  
-       /*
-        * If we're doing JOIN_IN then we will stop outputting inner tuples for an
-        * outer tuple as soon as we have one match.  Account for the effects of
-        * this by scaling down the cost estimates in proportion to the expected
-        * output size.  (This assumes that all the quals attached to the join are
-        * IN quals, which should be true.)
-        */
-       joininfactor = join_in_selectivity(&path->jpath, root, sjinfo);
-
         /*
          * The number of tuple comparisons needed is approximately number of outer
          * rows plus number of inner rows plus number of rescanned tuples (can we
          * refine this?).  At each one, we need to evaluate the mergejoin quals.
-        * NOTE: JOIN_IN mode does not save any work here, so do NOT include
-        * joininfactor.
          */
         startup_cost += merge_qual_cost.startup;
         startup_cost += merge_qual_cost.per_tuple *
@@ -1627,12 +1612,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
          * For each tuple that gets through the mergejoin proper, we charge
          * cpu_tuple_cost plus the cost of evaluating additional restriction
          * clauses that are to be applied at the join.  (This is pessimistic since
-        * not all of the quals may get evaluated at each tuple.)  This work is
-        * skipped in JOIN_IN mode, so apply the factor.
+        * not all of the quals may get evaluated at each tuple.)
          */
         startup_cost += qp_qual_cost.startup;
         cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
-       run_cost += cpu_per_tuple * mergejointuples * joininfactor;
+       run_cost += cpu_per_tuple * mergejointuples;
  
         path->jpath.path.startup_cost = startup_cost;
         path->jpath.path.total_cost = startup_cost + run_cost;
@@ -1711,7 +1695,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         Cost            startup_cost = 0;
         Cost            run_cost = 0;
         Cost            cpu_per_tuple;
-       Selectivity hash_selec;
         QualCost        hash_qual_cost;
         QualCost        qp_qual_cost;
         double          hashjointuples;
@@ -1722,28 +1705,27 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         int                     numbatches;
         double          virtualbuckets;
         Selectivity innerbucketsize;
-       Selectivity joininfactor;
         ListCell   *hcl;
  
         if (!enable_hashjoin)
                 startup_cost += disable_cost;
  
         /*
-        * Compute cost and selectivity of the hashquals and qpquals (other
-        * restriction clauses) separately.  We use approx_selectivity here for
-        * speed --- in most cases, any errors won't affect the result much.
-        *
-        * Note: it's probably bogus to use the normal selectivity calculation
-        * here when either the outer or inner path is a UniquePath.
+        * Compute cost of the hashquals and qpquals (other restriction clauses)
+        * separately.
          */
-       hash_selec = approx_selectivity(root, hashclauses, sjinfo);
         cost_qual_eval(&hash_qual_cost, hashclauses, root);
         cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
         qp_qual_cost.startup -= hash_qual_cost.startup;
         qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;
  
-       /* approx # tuples passing the hash quals */
-       hashjointuples = clamp_row_est(hash_selec * outer_path_rows * inner_path_rows);
+       /*
+        * Get approx # tuples passing the hashquals.  We use approx_tuple_count
+        * here for speed --- in most cases, any errors won't affect the result
+        * much.
+        */
+       hashjointuples = approx_tuple_count(root, &path->jpath,
+                                                                               hashclauses, sjinfo);
  
         /* cost of source data */
         startup_cost += outer_path->startup_cost;
@@ -1858,15 +1840,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
  
         /* CPU costs */
  
-       /*
-        * If we're doing JOIN_IN then we will stop comparing inner tuples to an
-        * outer tuple as soon as we have one match.  Account for the effects of
-        * this by scaling down the cost estimates in proportion to the expected
-        * output size.  (This assumes that all the quals attached to the join are
-        * IN quals, which should be true.)
-        */
-       joininfactor = join_in_selectivity(&path->jpath, root, sjinfo);
-
         /*
          * The number of tuple comparisons needed is the number of outer tuples
          * times the typical number of tuples in a hash bucket, which is the inner
@@ -1878,8 +1851,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
          */
         startup_cost += hash_qual_cost.startup;
         run_cost += hash_qual_cost.per_tuple *
-               outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) *
-               joininfactor * 0.5;
+               outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * 0.5;
  
         /*
          * For each tuple that gets through the hashjoin proper, we charge
@@ -1889,7 +1861,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
          */
         startup_cost += qp_qual_cost.startup;
         cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
-       run_cost += cpu_per_tuple * hashjointuples * joininfactor;
+       run_cost += cpu_per_tuple * hashjointuples;
  
         path->jpath.path.startup_cost = startup_cost;
         path->jpath.path.total_cost = startup_cost + run_cost;
@@ -2213,13 +2185,12 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan)
  
  
  /*
- * approx_selectivity
- *             Quick-and-dirty estimation of clause selectivities.
- *             The input can be either an implicitly-ANDed list of boolean
- *             expressions, or a list of RestrictInfo nodes (typically the latter).
+ * approx_tuple_count
+ *             Quick-and-dirty estimation of the number of join rows passing
+ *             a set of qual conditions.
   *
- * Currently this is only used in join estimation, so sjinfo should never
- * be NULL.
+ * The quals can be either an implicitly-ANDed list of boolean expressions,
+ * or a list of RestrictInfo nodes (typically the latter).
   *
   * This is quick-and-dirty because we bypass clauselist_selectivity, and
   * simply multiply the independent clause selectivities together.  Now
@@ -2232,20 +2203,34 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan)
   * output tuples are generated and passed through qpqual checking, it
   * seems OK to live with the approximation.
   */
-static Selectivity
-approx_selectivity(PlannerInfo *root, List *quals, SpecialJoinInfo *sjinfo)
+static double
+approx_tuple_count(PlannerInfo *root, JoinPath *path,
+                                  List *quals, SpecialJoinInfo *sjinfo)
  {
-       Selectivity total = 1.0;
+       double          tuples;
+       double          outer_tuples = path->outerjoinpath->parent->rows;
+       double          inner_tuples = path->innerjoinpath->parent->rows;
+       Selectivity selec = 1.0;
         ListCell   *l;
  
+       /* Get the approximate selectivity */
         foreach(l, quals)
         {
                 Node       *qual = (Node *) lfirst(l);
  
                 /* Note that clause_selectivity will be able to cache its result */
-               total *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo);
+               selec *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo);
         }
-       return total;
+
+       /* Apply it correctly using the input relation sizes */
+       if (sjinfo->jointype == JOIN_SEMI)
+               tuples = selec * outer_tuples;
+       else if (sjinfo->jointype == JOIN_ANTI)
+               tuples = (1.0 - selec) * outer_tuples;
+       else
+               tuples = selec * outer_tuples * inner_tuples;
+
+       return clamp_row_est(tuples);
  }
  
  
@@ -2315,7 +2300,6 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
         Selectivity jselec;
         Selectivity pselec;
         double          nrows;
-       UniquePath *upath;
  
         /*
          * Compute joinclause selectivity.      Note that we are only considering
@@ -2380,9 +2364,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
          * pushed-down quals are applied after the outer join, so their
          * selectivity applies fully.
          *
-        * For JOIN_IN and variants, the Cartesian product is figured with respect
-        * to a unique-ified input, and then we can clamp to the size of the other
-        * input.
+        * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the fraction
+        * of LHS rows that have matches, and we apply that straightforwardly.
          */
         switch (jointype)
         {
@@ -2404,22 +2387,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                         nrows *= pselec;
                         break;
                 case JOIN_SEMI:
-                       /* XXX this is unsafe, could Assert? */
-                       upath = create_unique_path(root, inner_rel,
-                                                                          inner_rel->cheapest_total_path,
-                                                                          sjinfo);
-                       if (upath)
-                               nrows = outer_rel->rows * upath->rows * jselec;
-                       else
-                               nrows = outer_rel->rows * inner_rel->rows * jselec;
-                       if (nrows > outer_rel->rows)
-                               nrows = outer_rel->rows;
+                       nrows = outer_rel->rows * jselec;
+                       nrows *= pselec;
                         break;
                 case JOIN_ANTI:
-                       /* XXX this is utterly wrong */
-                       nrows = outer_rel->rows * inner_rel->rows * jselec;
-                       if (nrows < outer_rel->rows)
-                               nrows = outer_rel->rows;
+                       nrows = outer_rel->rows * (1.0 - jselec);
                         nrows *= pselec;
                         break;
                 default:
@@ -2432,67 +2404,6 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
         rel->rows = clamp_row_est(nrows);
  }
  
-/*
- * join_in_selectivity
- *       Determines the factor by which a JOIN_IN join's result is expected
- *       to be smaller than an ordinary inner join.
- *
- * 'path' is already filled in except for the cost fields
- * 'sjinfo' must be the JOIN_SEMI SpecialJoinInfo for the join
- */
-static Selectivity
-join_in_selectivity(JoinPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
-{
-       RelOptInfo *innerrel;
-       UniquePath *innerunique;
-       Selectivity selec;
-       double          nrows;
-
-       /* Return 1.0 whenever it's not JOIN_IN */
-       if (path->jointype != JOIN_SEMI)
-               return 1.0;
-       Assert(sjinfo && sjinfo->jointype == JOIN_SEMI);
-
-       /*
-        * Return 1.0 if the inner side is already known unique.  The case where
-        * the inner path is already a UniquePath probably cannot happen in
-        * current usage, but check it anyway for completeness.  The interesting
-        * case is where we've determined the inner relation itself is unique,
-        * which we can check by looking at the rows estimate for its UniquePath.
-        */
-       if (IsA(path->innerjoinpath, UniquePath))
-               return 1.0;
-       innerrel = path->innerjoinpath->parent;
-       /* XXX might assert if sjinfo doesn't exactly match innerrel? */
-       innerunique = create_unique_path(root,
-                                                                        innerrel,
-                                                                        innerrel->cheapest_total_path,
-                                                                        sjinfo);
-       if (innerunique && innerunique->rows >= innerrel->rows)
-               return 1.0;
-
-       /*
-        * Compute same result set_joinrel_size_estimates would compute for
-        * JOIN_INNER.  Note that we use the input rels' absolute size estimates,
-        * not PATH_ROWS() which might be less; if we used PATH_ROWS() we'd be
-        * double-counting the effects of any join clauses used in input scans.
-        */
-       selec = clauselist_selectivity(root,
-                                                                  path->joinrestrictinfo,
-                                                                  0,
-                                                                  JOIN_INNER,
-                                                                  NULL);
-       nrows = path->outerjoinpath->parent->rows * innerrel->rows * selec;
-
-       nrows = clamp_row_est(nrows);
-
-       /* See if it's larger than the actual JOIN_IN size estimate */
-       if (nrows > path->path.parent->rows)
-               return path->path.parent->rows / nrows;
-       else
-               return 1.0;
-}
-
  /*
   * set_function_size_estimates
   *             Set the size estimates for a base relation that is a function call.
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c

index cb9a07b77d53915c55a10e0d1bd1ec793da41970..30e841b487d3b6ff2b9671ed59106b54477aaccc 100644 (file)
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.148 2008/07/13 20:45:47 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.149 2008/08/16 00:01:36 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -830,7 +830,8 @@ Selectivity
  join_selectivity(PlannerInfo *root,
                                  Oid operator,
                                  List *args,
-                                JoinType jointype)
+                                JoinType jointype,
+                                SpecialJoinInfo *sjinfo)
  {
         RegProcedure oprjoin = get_oprjoin(operator);
         float8          result;
@@ -842,11 +843,12 @@ join_selectivity(PlannerInfo *root,
         if (!oprjoin)
                 return (Selectivity) 0.5;
  
-       result = DatumGetFloat8(OidFunctionCall4(oprjoin,
+       result = DatumGetFloat8(OidFunctionCall5(oprjoin,
                                                                                          PointerGetDatum(root),
                                                                                          ObjectIdGetDatum(operator),
                                                                                          PointerGetDatum(args),
-                                                                                        Int16GetDatum(jointype)));
+                                                                                        Int16GetDatum(jointype),
+                                                                                        PointerGetDatum(sjinfo)));
  
         if (result < 0.0 || result > 1.0)
                 elog(ERROR, "invalid join selectivity: %f", result);
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index 7d5609d447ee48568d3881ac84c8e1326df52301..d484221232c4771c1d3d1a6b0f0ff07728e88605 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.251 2008/08/14 18:47:59 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.252 2008/08/16 00:01:36 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -55,19 +55,34 @@
   *
   *             float8 oprrest (internal, oid, internal, int4);
   *
+ * The result is a selectivity, that is, a fraction (0 to 1) of the rows
+ * of the relation that are expected to produce a TRUE result for the
+ * given operator.
+ *
   * The call convention for a join estimator (oprjoin function) is similar
- * except that varRelid is not needed, and instead the join type is
+ * except that varRelid is not needed, and instead join information is
   * supplied:
   *
   *             Selectivity oprjoin (PlannerInfo *root,
   *                                                      Oid operator,
   *                                                      List *args,
- *                                                      JoinType jointype);
+ *                                                      JoinType jointype,
+ *                                                      SpecialJoinInfo *sjinfo);
+ *
+ *             float8 oprjoin (internal, oid, internal, int2, internal);
   *
- *             float8 oprjoin (internal, oid, internal, int2);
+ * (Before Postgres 8.4, join estimators had only the first four of these
+ * parameters.  That signature is still allowed, but deprecated.)  The
+ * relationship between jointype and sjinfo is explained in the comments for
+ * clause_selectivity() --- the short version is that jointype is usually
+ * best ignored in favor of examining sjinfo.
   *
- * (We deliberately make the SQL signature different to facilitate
- * catching errors.)
+ * Join selectivity for regular inner and outer joins is defined as the
+ * fraction (0 to 1) of the cross product of the relations that is expected
+ * to produce a TRUE result for the given operator.  For both semi and anti
+ * joins, however, the selectivity is defined as the fraction of the left-hand
+ * side relation's rows that are expected to have a match (ie, at least one
+ * row with a TRUE result) in the right-hand side.
   *----------
   */
  
@@ -113,6 +128,10 @@ static double var_eq_non_const(VariableStatData *vardata, Oid operator,
  static double ineq_histogram_selectivity(VariableStatData *vardata,
                                                    FmgrInfo *opproc, bool isgt,
                                                    Datum constval, Oid consttype);
+static double eqjoinsel_inner(Oid operator,
+                               VariableStatData *vardata1, VariableStatData *vardata2);
+static double eqjoinsel_semi(Oid operator,
+                               VariableStatData *vardata1, VariableStatData *vardata2);
  static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                                   Datum lobound, Datum hibound, Oid boundstypid,
                                   double *scaledlobound, double *scaledhibound);
@@ -1579,7 +1598,6 @@ scalararraysel(PlannerInfo *root,
         Oid                     nominal_element_type;
         RegProcedure oprsel;
         FmgrInfo        oprselproc;
-       Datum           selarg4;
         Selectivity s1;
  
         /*
@@ -1587,15 +1605,9 @@ scalararraysel(PlannerInfo *root,
          * it hasn't got one.
          */
         if (is_join_clause)
-       {
                 oprsel = get_oprjoin(operator);
-               selarg4 = Int16GetDatum(jointype);
-       }
         else
-       {
                 oprsel = get_oprrest(operator);
-               selarg4 = Int32GetDatum(varRelid);
-       }
         if (!oprsel)
                 return (Selectivity) 0.5;
         fmgr_info(oprsel, &oprselproc);
@@ -1660,11 +1672,19 @@ scalararraysel(PlannerInfo *root,
                                                                                 elem_values[i],
                                                                                 elem_nulls[i],
                                                                                 elmbyval));
-                       s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
-                                                                                         PointerGetDatum(root),
-                                                                                         ObjectIdGetDatum(operator),
-                                                                                         PointerGetDatum(args),
-                                                                                         selarg4));
+                       if (is_join_clause)
+                               s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
+                                                                                                 PointerGetDatum(root),
+                                                                                                 ObjectIdGetDatum(operator),
+                                                                                                 PointerGetDatum(args),
+                                                                                                 Int16GetDatum(jointype),
+                                                                                                 PointerGetDatum(sjinfo)));
+                       else
+                               s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
+                                                                                                 PointerGetDatum(root),
+                                                                                                 ObjectIdGetDatum(operator),
+                                                                                                 PointerGetDatum(args),
+                                                                                                 Int32GetDatum(varRelid)));
                         if (useOr)
                                 s1 = s1 + s2 - s1 * s2;
                         else
@@ -1694,11 +1714,19 @@ scalararraysel(PlannerInfo *root,
                          * estimation function would really care ...
                          */
                         args = list_make2(leftop, elem);
-                       s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
-                                                                                         PointerGetDatum(root),
-                                                                                         ObjectIdGetDatum(operator),
-                                                                                         PointerGetDatum(args),
-                                                                                         selarg4));
+                       if (is_join_clause)
+                               s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
+                                                                                                 PointerGetDatum(root),
+                                                                                                 ObjectIdGetDatum(operator),
+                                                                                                 PointerGetDatum(args),
+                                                                                                 Int16GetDatum(jointype),
+                                                                                                 PointerGetDatum(sjinfo)));
+                       else
+                               s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
+                                                                                                 PointerGetDatum(root),
+                                                                                                 ObjectIdGetDatum(operator),
+                                                                                                 PointerGetDatum(args),
+                                                                                                 Int32GetDatum(varRelid)));
                         if (useOr)
                                 s1 = s1 + s2 - s1 * s2;
                         else
@@ -1721,11 +1749,19 @@ scalararraysel(PlannerInfo *root,
                 dummyexpr->typeId = nominal_element_type;
                 dummyexpr->typeMod = -1;
                 args = list_make2(leftop, dummyexpr);
-               s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
-                                                                                 PointerGetDatum(root),
-                                                                                 ObjectIdGetDatum(operator),
-                                                                                 PointerGetDatum(args),
-                                                                                 selarg4));
+               if (is_join_clause)
+                       s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
+                                                                                         PointerGetDatum(root),
+                                                                                         ObjectIdGetDatum(operator),
+                                                                                         PointerGetDatum(args),
+                                                                                         Int16GetDatum(jointype),
+                                                                                         PointerGetDatum(sjinfo)));
+               else
+                       s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
+                                                                                         PointerGetDatum(root),
+                                                                                         ObjectIdGetDatum(operator),
+                                                                                         PointerGetDatum(args),
+                                                                                         Int32GetDatum(varRelid)));
                 s1 = useOr ? 0.0 : 1.0;
  
                 /*
@@ -1803,13 +1839,24 @@ rowcomparesel(PlannerInfo *root,
         /* Build equivalent arg list for single operator */
         opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
  
-       /* Decide if it's a join clause, same as for OpExpr */
+       /*
+        * Decide if it's a join clause.  This should match clausesel.c's
+        * treat_as_join_clause(), except that we intentionally consider only
+        * the leading columns and not the rest of the clause.
+        */
         if (varRelid != 0)
         {
                 /*
-                * If we are considering a nestloop join then all clauses are
-                * restriction clauses, since we are only interested in the one
-                * relation.
+                * Caller is forcing restriction mode (eg, because we are examining
+                * an inner indexscan qual).
+                */
+               is_join_clause = false;
+       }
+       else if (sjinfo == NULL)
+       {
+               /*
+                * It must be a restriction clause, since it's being evaluated at
+                * a scan node.
                  */
                 is_join_clause = false;
         }
@@ -1817,7 +1864,6 @@ rowcomparesel(PlannerInfo *root,
         {
                 /*
                  * Otherwise, it's a join if there's more than one relation used.
-                * Notice we ignore the low-order columns here.
                  */
                 is_join_clause = (NumRelids((Node *) opargs) > 1);
         }
@@ -1827,7 +1873,8 @@ rowcomparesel(PlannerInfo *root,
                 /* Estimate selectivity for a join clause. */
                 s1 = join_selectivity(root, opno,
                                                           opargs,
-                                                         jointype);
+                                                         jointype,
+                                                         sjinfo);
         }
         else
         {
@@ -1849,10 +1896,60 @@ eqjoinsel(PG_FUNCTION_ARGS)
         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
         Oid                     operator = PG_GETARG_OID(1);
         List       *args = (List *) PG_GETARG_POINTER(2);
+#ifdef NOT_USED
         JoinType        jointype = (JoinType) PG_GETARG_INT16(3);
+#endif
+       SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
         double          selec;
         VariableStatData vardata1;
         VariableStatData vardata2;
+       bool            join_is_reversed;
+
+       get_join_variables(root, args, sjinfo,
+                                          &vardata1, &vardata2, &join_is_reversed);
+
+       switch (sjinfo->jointype)
+       {
+               case JOIN_INNER:
+               case JOIN_LEFT:
+               case JOIN_FULL:
+                       selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
+                       break;
+               case JOIN_SEMI:
+               case JOIN_ANTI:
+                       if (!join_is_reversed)
+                               selec = eqjoinsel_semi(operator, &vardata1, &vardata2);
+                       else
+                               selec = eqjoinsel_semi(get_commutator(operator),
+                                                                          &vardata2, &vardata1);
+                       break;
+               default:
+                       /* other values not expected here */
+                       elog(ERROR, "unrecognized join type: %d",
+                                (int) sjinfo->jointype);
+                       selec = 0;                      /* keep compiler quiet */
+                       break;
+       }
+
+       ReleaseVariableStats(vardata1);
+       ReleaseVariableStats(vardata2);
+
+       CLAMP_PROBABILITY(selec);
+
+       PG_RETURN_FLOAT8((float8) selec);
+}
+
+/*
+ * eqjoinsel_inner --- eqjoinsel for normal inner join
+ *
+ * We also use this for LEFT/FULL outer joins; it's not presently clear
+ * that it's worth trying to distinguish them here.
+ */
+static double
+eqjoinsel_inner(Oid operator,
+                               VariableStatData *vardata1, VariableStatData *vardata2)
+{
+       double          selec;
         double          nd1;
         double          nd2;
         Form_pg_statistic stats1 = NULL;
@@ -1868,29 +1965,27 @@ eqjoinsel(PG_FUNCTION_ARGS)
         float4     *numbers2 = NULL;
         int                     nnumbers2 = 0;
  
-       get_join_variables(root, args, &vardata1, &vardata2);
-
-       nd1 = get_variable_numdistinct(&vardata1);
-       nd2 = get_variable_numdistinct(&vardata2);
+       nd1 = get_variable_numdistinct(vardata1);
+       nd2 = get_variable_numdistinct(vardata2);
  
-       if (HeapTupleIsValid(vardata1.statsTuple))
+       if (HeapTupleIsValid(vardata1->statsTuple))
         {
-               stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
-               have_mcvs1 = get_attstatsslot(vardata1.statsTuple,
-                                                                         vardata1.atttype,
-                                                                         vardata1.atttypmod,
+               stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
+               have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
+                                                                         vardata1->atttype,
+                                                                         vardata1->atttypmod,
                                                                           STATISTIC_KIND_MCV,
                                                                           InvalidOid,
                                                                           &values1, &nvalues1,
                                                                           &numbers1, &nnumbers1);
         }
  
-       if (HeapTupleIsValid(vardata2.statsTuple))
+       if (HeapTupleIsValid(vardata2->statsTuple))
         {
-               stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
-               have_mcvs2 = get_attstatsslot(vardata2.statsTuple,
-                                                                         vardata2.atttype,
-                                                                         vardata2.atttypmod,
+               stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
+               have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
+                                                                         vardata2->atttype,
+                                                                         vardata2->atttypmod,
                                                                           STATISTIC_KIND_MCV,
                                                                           InvalidOid,
                                                                           &values2, &nvalues2,
@@ -1932,25 +2027,6 @@ eqjoinsel(PG_FUNCTION_ARGS)
                 hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
                 hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
  
-               /*
-                * If we are doing any variant of JOIN_SEMI, pretend all the values of
-                * the righthand relation are unique (ie, act as if it's been
-                * DISTINCT'd).
-                *
-                * NOTE: it would be dangerous to try to be smart about JOIN_LEFT or
-                * JOIN_RIGHT here, because we do not have enough information to
-                * determine which var is really on which side of the join. Perhaps
-                * someday we should pass in more information.
-                */
-               if (jointype == JOIN_SEMI)
-               {
-                       float4          oneovern = 1.0 / nd2;
-
-                       for (i = 0; i < nvalues2; i++)
-                               numbers2[i] = oneovern;
-                       nullfrac2 = oneovern;
-               }
-
                 /*
                  * Note we assume that each MCV will match at most one member of the
                  * other MCV list.      If the operator isn't really equality, there could
@@ -2075,18 +2151,170 @@ eqjoinsel(PG_FUNCTION_ARGS)
         }
  
         if (have_mcvs1)
-               free_attstatsslot(vardata1.atttype, values1, nvalues1,
+               free_attstatsslot(vardata1->atttype, values1, nvalues1,
                                                   numbers1, nnumbers1);
         if (have_mcvs2)
-               free_attstatsslot(vardata2.atttype, values2, nvalues2,
+               free_attstatsslot(vardata2->atttype, values2, nvalues2,
                                                   numbers2, nnumbers2);
  
-       ReleaseVariableStats(vardata1);
-       ReleaseVariableStats(vardata2);
+       return selec;
+}
  
-       CLAMP_PROBABILITY(selec);
+/*
+ * eqjoinsel_semi --- eqjoinsel for semi join
+ *
+ * (Also used for anti join, which we are supposed to estimate the same way.)
+ * Caller has ensured that vardata1 is the LHS variable.
+ */
+static double
+eqjoinsel_semi(Oid operator,
+                          VariableStatData *vardata1, VariableStatData *vardata2)
+{
+       double          selec;
+       double          nd1;
+       double          nd2;
+       Form_pg_statistic stats1 = NULL;
+       Form_pg_statistic stats2 = NULL;
+       bool            have_mcvs1 = false;
+       Datum      *values1 = NULL;
+       int                     nvalues1 = 0;
+       float4     *numbers1 = NULL;
+       int                     nnumbers1 = 0;
+       bool            have_mcvs2 = false;
+       Datum      *values2 = NULL;
+       int                     nvalues2 = 0;
+       float4     *numbers2 = NULL;
+       int                     nnumbers2 = 0;
  
-       PG_RETURN_FLOAT8((float8) selec);
+       nd1 = get_variable_numdistinct(vardata1);
+       nd2 = get_variable_numdistinct(vardata2);
+
+       if (HeapTupleIsValid(vardata1->statsTuple))
+       {
+               stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
+               have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
+                                                                         vardata1->atttype,
+                                                                         vardata1->atttypmod,
+                                                                         STATISTIC_KIND_MCV,
+                                                                         InvalidOid,
+                                                                         &values1, &nvalues1,
+                                                                         &numbers1, &nnumbers1);
+       }
+
+       if (HeapTupleIsValid(vardata2->statsTuple))
+       {
+               stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
+               have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
+                                                                         vardata2->atttype,
+                                                                         vardata2->atttypmod,
+                                                                         STATISTIC_KIND_MCV,
+                                                                         InvalidOid,
+                                                                         &values2, &nvalues2,
+                                                                         &numbers2, &nnumbers2);
+       }
+
+       if (have_mcvs1 && have_mcvs2 && OidIsValid(operator))
+       {
+               /*
+                * We have most-common-value lists for both relations.  Run through
+                * the lists to see which MCVs actually join to each other with the
+                * given operator.      This allows us to determine the exact join
+                * selectivity for the portion of the relations represented by the MCV
+                * lists.  We still have to estimate for the remaining population, but
+                * in a skewed distribution this gives us a big leg up in accuracy.
+                */
+               FmgrInfo        eqproc;
+               bool       *hasmatch1;
+               bool       *hasmatch2;
+               double          nullfrac1 = stats1->stanullfrac;
+               double          matchfreq1;
+               int                     i,
+                                       nmatches;
+
+               fmgr_info(get_opcode(operator), &eqproc);
+               hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
+               hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
+
+               /*
+                * Note we assume that each MCV will match at most one member of the
+                * other MCV list.      If the operator isn't really equality, there could
+                * be multiple matches --- but we don't look for them, both for speed
+                * and because the math wouldn't add up...
+                */
+               nmatches = 0;
+               for (i = 0; i < nvalues1; i++)
+               {
+                       int                     j;
+
+                       for (j = 0; j < nvalues2; j++)
+                       {
+                               if (hasmatch2[j])
+                                       continue;
+                               if (DatumGetBool(FunctionCall2(&eqproc,
+                                                                                          values1[i],
+                                                                                          values2[j])))
+                               {
+                                       hasmatch1[i] = hasmatch2[j] = true;
+                                       nmatches++;
+                                       break;
+                               }
+                       }
+               }
+               /* Sum up frequencies of matched MCVs */
+               matchfreq1 = 0.0;
+               for (i = 0; i < nvalues1; i++)
+               {
+                       if (hasmatch1[i])
+                               matchfreq1 += numbers1[i];
+               }
+               CLAMP_PROBABILITY(matchfreq1);
+               pfree(hasmatch1);
+               pfree(hasmatch2);
+
+               /*
+                * Now we need to estimate the fraction of relation 1 that has at
+                * least one join partner.  We know for certain that the matched
+                * MCVs do, so that gives us a lower bound, but we're really in the
+                * dark about everything else.  Our crude approach is: if nd1 <= nd2
+                * then assume all non-null rel1 rows have join partners, else assume
+                * for the uncertain rows that a fraction nd2/nd1 have join partners.
+                * We can discount the known-matched MCVs from the distinct-values
+                * counts before doing the division.
+                */
+               nd1 -= nmatches;
+               nd2 -= nmatches;
+               if (nd1 <= nd2 || nd2 <= 0)
+                       selec = Max(matchfreq1, 1.0 - nullfrac1);
+               else
+               {
+                       double  uncertain = 1.0 - matchfreq1 - nullfrac1;
+
+                       CLAMP_PROBABILITY(uncertain);
+                       selec = matchfreq1 + (nd2/nd1) * uncertain;
+               }
+       }
+       else
+       {
+               /*
+                * Without MCV lists for both sides, we can only use the heuristic
+                * about nd1 vs nd2.
+                */
+               double          nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
+
+               if (nd1 <= nd2 || nd2 <= 0)
+                       selec = 1.0 - nullfrac1;
+               else
+                       selec = (nd2/nd1) * (1.0 - nullfrac1);
+       }
+
+       if (have_mcvs1)
+               free_attstatsslot(vardata1->atttype, values1, nvalues1,
+                                                 numbers1, nnumbers1);
+       if (have_mcvs2)
+               free_attstatsslot(vardata2->atttype, values2, nvalues2,
+                                                 numbers2, nnumbers2);
+
+       return selec;
  }
  
  /*
@@ -2099,6 +2327,7 @@ neqjoinsel(PG_FUNCTION_ARGS)
         Oid                     operator = PG_GETARG_OID(1);
         List       *args = (List *) PG_GETARG_POINTER(2);
         JoinType        jointype = (JoinType) PG_GETARG_INT16(3);
+       SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
         Oid                     eqop;
         float8          result;
  
@@ -2109,11 +2338,12 @@ neqjoinsel(PG_FUNCTION_ARGS)
         eqop = get_negator(operator);
         if (eqop)
         {
-               result = DatumGetFloat8(DirectFunctionCall4(eqjoinsel,
+               result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel,
                                                                                                         PointerGetDatum(root),
                                                                                                         ObjectIdGetDatum(eqop),
                                                                                                         PointerGetDatum(args),
-                                                                                                       Int16GetDatum(jointype)));
+                                                                                                       Int16GetDatum(jointype),
+                                                                                                       PointerGetDatum(sjinfo)));
         }
         else
         {
@@ -3661,10 +3891,17 @@ get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
  /*
   * get_join_variables
   *             Apply examine_variable() to each side of a join clause.
+ *             Also, attempt to identify whether the join clause has the same
+ *             or reversed sense compared to the SpecialJoinInfo.
+ *
+ * We consider the join clause "normal" if it is "lhs_var OP rhs_var",
+ * or "reversed" if it is "rhs_var OP lhs_var".  In complicated cases
+ * where we can't tell for sure, we default to assuming it's normal.
   */
  void
-get_join_variables(PlannerInfo *root, List *args,
-                                  VariableStatData *vardata1, VariableStatData *vardata2)
+get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
+                                  VariableStatData *vardata1, VariableStatData *vardata2,
+                                  bool *join_is_reversed)
  {
         Node       *left,
                            *right;
@@ -3677,6 +3914,15 @@ get_join_variables(PlannerInfo *root, List *args,
  
         examine_variable(root, left, 0, vardata1);
         examine_variable(root, right, 0, vardata2);
+
+       if (vardata1->rel &&
+               bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
+               *join_is_reversed = true;                               /* var1 is on RHS */
+       else if (vardata2->rel &&
+                        bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
+               *join_is_reversed = true;                               /* var2 is on LHS */
+       else
+               *join_is_reversed = false;
  }
  
  /*
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index b2345d1364c5118d10a13437873b2dfba1e18260..27501367f527a5b852f9ed8c7a9b72f7b214c9c6 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.477 2008/08/11 13:58:46 heikki Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.478 2008/08/16 00:01:37 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -53,6 +53,6 @@
   */
  
  /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     200808111
+#define CATALOG_VERSION_NO     200808141
  
  #endif
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h

index e0f5b48aa4c7608288c1469ec9f570fae2e7343c..2fc91e92e3602c2fc5a7c21316a689d845c9e6c4 100644 (file)
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.161 2008/07/14 00:51:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.162 2008/08/16 00:01:37 tgl Exp $
   *
   * NOTES
   *       the genbki.sh script reads this file and generates .bki
@@ -941,11 +941,11 @@ extern void OperatorCreate(const char *operatorName,
                            Oid operatorNamespace,
                            Oid leftTypeId,
                            Oid rightTypeId,
-                          List *procedureName,
+                          Oid procedureId,
                            List *commutatorName,
                            List *negatorName,
-                          List *restrictionName,
-                          List *joinName,
+                          Oid restrictionId,
+                          Oid joinId,
                            bool canMerge,
                            bool canHash);
  
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h

index c516af8a164da2e8c652efb33c2715ca81795c78..d00d36e85fd05b85b3aab47b84dd38298e2d4a0b 100644 (file)
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.509 2008/07/18 03:32:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.510 2008/08/16 00:01:37 tgl Exp $
   *
   * NOTES
   *       The script catalog/genbki.sh reads this file and generates .bki
@@ -223,13 +223,13 @@ DATA(insert OID = 103 (  scalarltsel         PGNSP PGUID 12 1 0 0 f f t f s 4 701 "22
  DESCR("restriction selectivity of < and related operators on scalar datatypes");
  DATA(insert OID = 104 (  scalargtsel      PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  scalargtsel _null_ _null_ _null_ ));
  DESCR("restriction selectivity of > and related operators on scalar datatypes");
-DATA(insert OID = 105 (  eqjoinsel                PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  eqjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 105 (  eqjoinsel                PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_     eqjoinsel _null_ _null_ _null_ ));
  DESCR("join selectivity of = and related operators");
-DATA(insert OID = 106 (  neqjoinsel               PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  neqjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 106 (  neqjoinsel               PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_     neqjoinsel _null_ _null_ _null_ ));
  DESCR("join selectivity of <> and related operators");
-DATA(insert OID = 107 (  scalarltjoinsel   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ scalarltjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 107 (  scalarltjoinsel   PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_    scalarltjoinsel _null_ _null_ _null_ ));
  DESCR("join selectivity of < and related operators on scalar datatypes");
-DATA(insert OID = 108 (  scalargtjoinsel   PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_ scalargtjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 108 (  scalargtjoinsel   PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_    scalargtjoinsel _null_ _null_ _null_ ));
  DESCR("join selectivity of > and related operators on scalar datatypes");
  
  DATA(insert OID =  109 (  unknownin               PGNSP PGUID 12 1 0 0 f f t f i 1 705 "2275" _null_ _null_ _null_ unknownin _null_ _null_ _null_ ));
@@ -289,7 +289,7 @@ DATA(insert OID = 138 (  box_center            PGNSP PGUID 12 1 0 0 f f t f i 1 600 "60
  DESCR("center of");
  DATA(insert OID = 139 (  areasel                  PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  areasel _null_ _null_ _null_ ));
  DESCR("restriction selectivity for area-comparison operators");
-DATA(insert OID = 140 (  areajoinsel      PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  areajoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 140 (  areajoinsel      PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_     areajoinsel _null_ _null_ _null_ ));
  DESCR("join selectivity for area-comparison operators");
  DATA(insert OID = 141 (  int4mul                  PGNSP PGUID 12 1 0 0 f f t f i 2 23 "23 23" _null_ _null_ _null_ int4mul _null_ _null_ _null_ ));
  DESCR("multiply");
@@ -1630,11 +1630,11 @@ DESCR("current clock time");
  
  DATA(insert OID = 1300 (  positionsel             PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  positionsel _null_ _null_ _null_ ));
  DESCR("restriction selectivity for position-comparison operators");
-DATA(insert OID = 1301 (  positionjoinsel         PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  positionjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1301 (  positionjoinsel         PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_     positionjoinsel _null_ _null_ _null_ ));
  DESCR("join selectivity for position-comparison operators");
  DATA(insert OID = 1302 (  contsel                 PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  contsel _null_ _null_ _null_ ));
  DESCR("restriction selectivity for containment comparison operators");
-DATA(insert OID = 1303 (  contjoinsel     PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  contjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1303 (  contjoinsel     PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_     contjoinsel _null_ _null_ _null_ ));
  DESCR("join selectivity for containment comparison operators");
  
  DATA(insert OID = 1304 ( overlaps                       PGNSP PGUID 12 1 0 0 f f f f i 4 16 "1184 1184 1184 1184" _null_ _null_ _null_ overlaps_timestamp _null_ _null_ _null_ ));
@@ -2684,9 +2684,9 @@ DATA(insert OID = 1814 ( iclikesel                        PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281
  DESCR("restriction selectivity of ILIKE");
  DATA(insert OID = 1815 ( icnlikesel                    PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  icnlikesel _null_ _null_ _null_ ));
  DESCR("restriction selectivity of NOT ILIKE");
-DATA(insert OID = 1816 ( iclikejoinsel         PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  iclikejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1816 ( iclikejoinsel         PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  iclikejoinsel _null_ _null_ _null_ ));
  DESCR("join selectivity of ILIKE");
-DATA(insert OID = 1817 ( icnlikejoinsel                PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  icnlikejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1817 ( icnlikejoinsel                PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  icnlikejoinsel _null_ _null_ _null_ ));
  DESCR("join selectivity of NOT ILIKE");
  DATA(insert OID = 1818 ( regexeqsel                    PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  regexeqsel _null_ _null_ _null_ ));
  DESCR("restriction selectivity of regex match");
@@ -2700,17 +2700,17 @@ DATA(insert OID = 1822 ( nlikesel                       PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 2
  DESCR("restriction selectivity of NOT LIKE");
  DATA(insert OID = 1823 ( icregexnesel          PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_  icregexnesel _null_ _null_ _null_ ));
  DESCR("restriction selectivity of case-insensitive regex non-match");
-DATA(insert OID = 1824 ( regexeqjoinsel                PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  regexeqjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1824 ( regexeqjoinsel                PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  regexeqjoinsel _null_ _null_ _null_ ));
  DESCR("join selectivity of regex match");
-DATA(insert OID = 1825 ( likejoinsel           PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  likejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1825 ( likejoinsel           PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  likejoinsel _null_ _null_ _null_ ));
  DESCR("join selectivity of LIKE");
-DATA(insert OID = 1826 ( icregexeqjoinsel      PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  icregexeqjoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1826 ( icregexeqjoinsel      PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  icregexeqjoinsel _null_ _null_ _null_ ));
  DESCR("join selectivity of case-insensitive regex match");
-DATA(insert OID = 1827 ( regexnejoinsel                PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  regexnejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1827 ( regexnejoinsel                PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  regexnejoinsel _null_ _null_ _null_ ));
  DESCR("join selectivity of regex non-match");
-DATA(insert OID = 1828 ( nlikejoinsel          PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  nlikejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1828 ( nlikejoinsel          PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  nlikejoinsel _null_ _null_ _null_ ));
  DESCR("join selectivity of NOT LIKE");
-DATA(insert OID = 1829 ( icregexnejoinsel      PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 21" _null_ _null_ _null_  icregexnejoinsel _null_ _null_ _null_ ));
+DATA(insert OID = 1829 ( icregexnejoinsel      PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_  icregexnejoinsel _null_ _null_ _null_ ));
  DESCR("join selectivity of case-insensitive regex non-match");
  
  /* Aggregate-related functions */
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h

index 7471d1b44d6a4baad39a11d9bd96dabbd725df77..7963b745f52b5a7492c89d2fd4a7af2a3c514822 100644 (file)
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.50 2008/06/19 00:46:06 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.51 2008/08/16 00:01:38 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -50,6 +50,7 @@ extern Selectivity restriction_selectivity(PlannerInfo *root,
  extern Selectivity join_selectivity(PlannerInfo *root,
                                  Oid operator,
                                  List *args,
-                                JoinType jointype);
+                                JoinType jointype,
+                                SpecialJoinInfo *sjinfo);
  
  #endif   /* PLANCAT_H */
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h

index e94e7e5916582b83da6cfc4ee2f4e85861a7f4e1..120269eee5dcf6a8b8a4c7499d37a136487218fb 100644 (file)
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.45 2008/08/14 18:48:00 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.46 2008/08/16 00:01:38 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -104,8 +104,10 @@ extern bool get_restriction_variable(PlannerInfo *root, List *args,
                                                  VariableStatData *vardata, Node **other,
                                                  bool *varonleft);
  extern void get_join_variables(PlannerInfo *root, List *args,
+                                  SpecialJoinInfo *sjinfo,
                                    VariableStatData *vardata1,
-                                  VariableStatData *vardata2);
+                                  VariableStatData *vardata2,
+                                  bool *join_is_reversed);
  extern double get_variable_numdistinct(VariableStatData *vardata);
  extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
                                 Datum constval, bool varonleft,
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out

index 533bac3ab6705bc6ff5be990b68a3e84f7e4cf11..a21c2ba3744250a84dbc4a175dcf8d095b979125 100644 (file)
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -543,17 +543,20 @@ WHERE p1.oprrest = p2.oid AND
  -- If oprjoin is set, the operator must be a binary boolean op,
  -- and it must link to a proc with the right signature
  -- to be a join selectivity estimator.
--- The proc signature we want is: float8 proc(internal, oid, internal, int2)
+-- The proc signature we want is: float8 proc(internal, oid, internal, int2, internal)
+-- (Note: the old signature with only 4 args is still allowed, but no core
+-- estimator should be using it.)
  SELECT p1.oid, p1.oprname, p2.oid, p2.proname
  FROM pg_operator AS p1, pg_proc AS p2
  WHERE p1.oprjoin = p2.oid AND
      (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR
       p2.prorettype != 'float8'::regtype OR p2.proretset OR
-     p2.pronargs != 4 OR
+     p2.pronargs != 5 OR
       p2.proargtypes[0] != 'internal'::regtype OR
       p2.proargtypes[1] != 'oid'::regtype OR
       p2.proargtypes[2] != 'internal'::regtype OR
-     p2.proargtypes[3] != 'int2'::regtype);
+     p2.proargtypes[3] != 'int2'::regtype OR
+     p2.proargtypes[4] != 'internal'::regtype);
   oid | oprname | oid | proname 
  -----+---------+-----+---------
  (0 rows)
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql

index 5017849830aa612d14a1359eb8c2cd3f9e622f7f..57c2cad95151649f0fe9c0ad2df14061749b27bb 100644 (file)
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -444,18 +444,21 @@ WHERE p1.oprrest = p2.oid AND
  -- If oprjoin is set, the operator must be a binary boolean op,
  -- and it must link to a proc with the right signature
  -- to be a join selectivity estimator.
--- The proc signature we want is: float8 proc(internal, oid, internal, int2)
+-- The proc signature we want is: float8 proc(internal, oid, internal, int2, internal)
+-- (Note: the old signature with only 4 args is still allowed, but no core
+-- estimator should be using it.)
  
  SELECT p1.oid, p1.oprname, p2.oid, p2.proname
  FROM pg_operator AS p1, pg_proc AS p2
  WHERE p1.oprjoin = p2.oid AND
      (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR
       p2.prorettype != 'float8'::regtype OR p2.proretset OR
-     p2.pronargs != 4 OR
+     p2.pronargs != 5 OR
       p2.proargtypes[0] != 'internal'::regtype OR
       p2.proargtypes[1] != 'oid'::regtype OR
       p2.proargtypes[2] != 'internal'::regtype OR
-     p2.proargtypes[3] != 'int2'::regtype);
+     p2.proargtypes[3] != 'int2'::regtype OR
+     p2.proargtypes[4] != 'internal'::regtype);
  
  -- **************** pg_aggregate ****************
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 16 Aug 2008 00:01:38 +0000 (00:01 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 16 Aug 2008 00:01:38 +0000 (00:01 +0000)
src/backend/catalog/pg_operator.c		patch \| blob \| history
src/backend/commands/operatorcmds.c		patch \| blob \| history
src/backend/optimizer/path/clausesel.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/backend/optimizer/util/plancat.c		patch \| blob \| history
src/backend/utils/adt/selfuncs.c		patch \| blob \| history
src/include/catalog/catversion.h		patch \| blob \| history
src/include/catalog/pg_operator.h		patch \| blob \| history
src/include/catalog/pg_proc.h		patch \| blob \| history
src/include/optimizer/plancat.h		patch \| blob \| history
src/include/utils/selfuncs.h		patch \| blob \| history
src/test/regress/expected/opr_sanity.out		patch \| blob \| history
src/test/regress/sql/opr_sanity.sql		patch \| blob \| history