From 0f0deb71948321efc89cf4e3e8cbd9750cc9e566 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 21 Mar 2018 18:30:46 -0400
Subject: [PATCH] Improve predtest.c's handling of cases with NULL-constant
 inputs.

Currently, if operator_predicate_proof() is given an operator clause like
"something op NULL", it just throws up its hands and reports it can't prove
anything.  But we can often do better than that, if the operator is strict,
because then we know that the clause returns NULL overall.  Depending on
whether we're trying to prove or refute something, and whether we need
weak or strong semantics for NULL, this may be enough to prove the
implication, especially when we rely on the standard rule that "false
implies anything".  In particular, this lets us do something useful with
questions like "does X IN (1,3,5,NULL) imply X <= 5?"  The null entry
in the IN list can effectively be ignored for this purpose, but the
proof rules were not previously smart enough to deduce that.

This patch is by me, but it owes something to previous work by
Amit Langote to try to solve problems of the form mentioned.
Thanks also to Emre Hasegeli and Ashutosh Bapat for review.

Discussion: https://postgr.es/m/3bad48fc-f257-c445-feeb-8a2b2fb622ba@lab.ntt.co.jp
---
 src/backend/optimizer/util/predtest.c         | 59 ++++++++++++++-----
 .../test_predtest/expected/test_predtest.out  |  3 +-
 .../test_predtest/sql/test_predtest.sql       |  1 -
 src/test/regress/expected/inherit.out         |  6 +-
 4 files changed, 46 insertions(+), 23 deletions(-)

diff --git a/src/backend/optimizer/util/predtest.c b/src/backend/optimizer/util/predtest.c
index fb963d0a8f..446207de30 100644
--- a/src/backend/optimizer/util/predtest.c
+++ b/src/backend/optimizer/util/predtest.c
@@ -100,7 +100,7 @@ static Node *extract_not_arg(Node *clause);
 static Node *extract_strong_not_arg(Node *clause);
 static bool clause_is_strict_for(Node *clause, Node *subexpr);
 static bool operator_predicate_proof(Expr *predicate, Node *clause,
-						 bool refute_it);
+						 bool refute_it, bool weak);
 static bool operator_same_subexprs_proof(Oid pred_op, Oid clause_op,
 							 bool refute_it);
 static bool operator_same_subexprs_lookup(Oid pred_op, Oid clause_op,
@@ -1137,7 +1137,7 @@ predicate_implied_by_simple_clause(Expr *predicate, Node *clause,
 	}
 
 	/* Else try operator-related knowledge */
-	return operator_predicate_proof(predicate, clause, false);
+	return operator_predicate_proof(predicate, clause, false, weak);
 }
 
 /*----------
@@ -1232,7 +1232,7 @@ predicate_refuted_by_simple_clause(Expr *predicate, Node *clause,
 	}
 
 	/* Else try operator-related knowledge */
-	return operator_predicate_proof(predicate, clause, true);
+	return operator_predicate_proof(predicate, clause, true, weak);
 }
 
 
@@ -1498,9 +1498,8 @@ static const StrategyNumber BT_refute_table[6][6] = {
  * values, since then the operators aren't being given identical inputs.  But
  * we only support that for btree operators, for which we can assume that all
  * non-null inputs result in non-null outputs, so that it doesn't matter which
- * two non-null constants we consider.  Currently the code below just reports
- * "proof failed" if either constant is NULL, but in some cases we could be
- * smarter (and that likely would require checking strong vs. weak proofs).
+ * two non-null constants we consider.  If either constant is NULL, we have
+ * to think harder, but sometimes the proof still works, as explained below.
  *
  * We can make proofs involving several expression forms (here "foo" and "bar"
  * represent subexpressions that are identical according to equal()):
@@ -1528,7 +1527,8 @@ static const StrategyNumber BT_refute_table[6][6] = {
  * and we dare not make deductions with those.
  */
 static bool
-operator_predicate_proof(Expr *predicate, Node *clause, bool refute_it)
+operator_predicate_proof(Expr *predicate, Node *clause,
+						 bool refute_it, bool weak)
 {
 	OpExpr	   *pred_opexpr,
 			   *clause_opexpr;
@@ -1675,17 +1675,46 @@ operator_predicate_proof(Expr *predicate, Node *clause, bool refute_it)
 	 * We have two identical subexpressions, and two other subexpressions that
 	 * are not identical but are both Consts; and we have commuted the
 	 * operators if necessary so that the Consts are on the right.  We'll need
-	 * to compare the Consts' values.  If either is NULL, fail.
-	 *
-	 * Future work: in some cases the desired proof might hold even with NULL
-	 * constants.  But beware that we've not yet identified the operators as
-	 * btree ops, so for instance it'd be quite unsafe to assume they are
-	 * strict without checking.
+	 * to compare the Consts' values.  If either is NULL, we can't do that, so
+	 * usually the proof fails ... but in some cases we can claim success.
 	 */
-	if (pred_const->constisnull)
-		return false;
 	if (clause_const->constisnull)
+	{
+		/* If clause_op isn't strict, we can't prove anything */
+		if (!op_strict(clause_op))
+			return false;
+
+		/*
+		 * At this point we know that the clause returns NULL.  For proof
+		 * types that assume truth of the clause, this means the proof is
+		 * vacuously true (a/k/a "false implies anything").  That's all proof
+		 * types except weak implication.
+		 */
+		if (!(weak && !refute_it))
+			return true;
+
+		/*
+		 * For weak implication, it's still possible for the proof to succeed,
+		 * if the predicate can also be proven NULL.  In that case we've got
+		 * NULL => NULL which is valid for this proof type.
+		 */
+		if (pred_const->constisnull && op_strict(pred_op))
+			return true;
+		/* Else the proof fails */
+		return false;
+	}
+	if (pred_const->constisnull)
+	{
+		/*
+		 * If the pred_op is strict, we know the predicate yields NULL, which
+		 * means the proof succeeds for either weak implication or weak
+		 * refutation.
+		 */
+		if (weak && op_strict(pred_op))
+			return true;
+		/* Else the proof fails */
 		return false;
+	}
 
 	/*
 	 * Lookup the constant-comparison operator using the system catalogs and
diff --git a/src/test/modules/test_predtest/expected/test_predtest.out b/src/test/modules/test_predtest/expected/test_predtest.out
index 9dd8aecefd..5574e03204 100644
--- a/src/test/modules/test_predtest/expected/test_predtest.out
+++ b/src/test/modules/test_predtest/expected/test_predtest.out
@@ -781,13 +781,12 @@ w_i_holds         | f
 s_r_holds         | f
 w_r_holds         | f
 
--- XXX ideally, we could prove this case too, for strong implication
 select * from test_predtest($$
 select x <= 5, x in (1,3,5,null)
 from integers
 $$);
 -[ RECORD 1 ]-----+--
-strong_implied_by | f
+strong_implied_by | t
 weak_implied_by   | f
 strong_refuted_by | f
 weak_refuted_by   | f
diff --git a/src/test/modules/test_predtest/sql/test_predtest.sql b/src/test/modules/test_predtest/sql/test_predtest.sql
index 298b8bf5ea..2734735843 100644
--- a/src/test/modules/test_predtest/sql/test_predtest.sql
+++ b/src/test/modules/test_predtest/sql/test_predtest.sql
@@ -306,7 +306,6 @@ select x <= 5, x in (1,3,5,7)
 from integers
 $$);
 
--- XXX ideally, we could prove this case too, for strong implication
 select * from test_predtest($$
 select x <= 5, x in (1,3,5,null)
 from integers
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index d768dc0215..f56151fc1e 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -1739,11 +1739,7 @@ explain (costs off) select * from list_parted where a = 'ab' or a in (null, 'cd'
  Append
    ->  Seq Scan on part_ab_cd
          Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[])))
-   ->  Seq Scan on part_ef_gh
-         Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[])))
-   ->  Seq Scan on part_null_xy
-         Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[])))
-(7 rows)
+(3 rows)
 
 explain (costs off) select * from list_parted where a = 'ab';
                 QUERY PLAN                
-- 
2.40.0