List *args = (List *) PG_GETARG_POINTER(2);
JoinType jointype = (JoinType) PG_GETARG_INT16(3);
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
- Oid eqop;
float8 result;
- /*
- * We want 1 - eqjoinsel() where the equality operator is the one
- * associated with this != operator, that is, its negator.
- */
- eqop = get_negator(operator);
- if (eqop)
+ if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
{
- result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel,
- PointerGetDatum(root),
- ObjectIdGetDatum(eqop),
- PointerGetDatum(args),
- Int16GetDatum(jointype),
- PointerGetDatum(sjinfo)));
+ /*
+ * For semi-joins, if there is more than one distinct value in the RHS
+ * relation then every non-null LHS row must find a row to join since
+ * it can only be equal to one of them. We'll assume that there is
+ * always more than one distinct RHS value for the sake of stability,
+ * though in theory we could have special cases for empty RHS
+ * (selectivity = 0) and single-distinct-value RHS (selectivity =
+ * fraction of LHS that has the same value as the single RHS value).
+ *
+ * For anti-joins, if we use the same assumption that there is more
+ * than one distinct key in the RHS relation, then every non-null LHS
+ * row must be suppressed by the anti-join.
+ *
+ * So either way, the selectivity estimate should be 1 - nullfrac.
+ */
+ VariableStatData leftvar;
+ VariableStatData rightvar;
+ bool reversed;
+ HeapTuple statsTuple;
+ double nullfrac;
+
+ get_join_variables(root, args, sjinfo, &leftvar, &rightvar, &reversed);
+ statsTuple = reversed ? rightvar.statsTuple : leftvar.statsTuple;
+ if (HeapTupleIsValid(statsTuple))
+ nullfrac = ((Form_pg_statistic) GETSTRUCT(statsTuple))->stanullfrac;
+ else
+ nullfrac = 0.0;
+ ReleaseVariableStats(leftvar);
+ ReleaseVariableStats(rightvar);
+
+ result = 1.0 - nullfrac;
}
else
{
- /* Use default selectivity (should we raise an error instead?) */
- result = DEFAULT_EQ_SEL;
+ /*
+ * We want 1 - eqjoinsel() where the equality operator is the one
+ * associated with this != operator, that is, its negator.
+ */
+ Oid eqop = get_negator(operator);
+
+ if (eqop)
+ {
+ result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(eqop),
+ PointerGetDatum(args),
+ Int16GetDatum(jointype),
+ PointerGetDatum(sjinfo)));
+ }
+ else
+ {
+ /* Use default selectivity (should we raise an error instead?) */
+ result = DEFAULT_EQ_SEL;
+ }
+ result = 1.0 - result;
}
- result = 1.0 - result;
+
PG_RETURN_FLOAT8(result);
}
| 1 | 4 | one | -1
(1 row)
+--
+-- semijoin selectivity for <>
+--
+explain (costs off)
+select * from int4_tbl i4, tenk1 a
+where exists(select * from tenk1 b
+ where a.twothousand = b.twothousand and a.fivethous <> b.fivethous)
+ and i4.f1 = a.tenthous;
+ QUERY PLAN
+----------------------------------------------
+ Hash Semi Join
+ Hash Cond: (a.twothousand = b.twothousand)
+ Join Filter: (a.fivethous <> b.fivethous)
+ -> Hash Join
+ Hash Cond: (a.tenthous = i4.f1)
+ -> Seq Scan on tenk1 a
+ -> Hash
+ -> Seq Scan on int4_tbl i4
+ -> Hash
+ -> Seq Scan on tenk1 b
+(10 rows)
+
--
-- More complicated constructs
--