From 25bfa7efd037a3c44d6a2989d18f55758090e8a9 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Fri, 25 Dec 2015 13:05:13 +0300 Subject: [PATCH] Improve the gin index scan performance in pg_trgm. Previous coding assumes too pessimistic upper bound of similarity in consistent method of GIN. Author: Fornaroli Christophe with comments by me. --- contrib/pg_trgm/trgm_gin.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c index 6a0731d44e..baa4cc70a2 100644 --- a/contrib/pg_trgm/trgm_gin.c +++ b/contrib/pg_trgm/trgm_gin.c @@ -190,11 +190,23 @@ gin_trgm_consistent(PG_FUNCTION_ARGS) if (check[i]) ntrue++; } -#ifdef DIVUNION - res = (nkeys == ntrue) ? true : ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >= trgm_limit) ? true : false); -#else + + /*-------------------- + * If DIVUNION is defined then similarity formula is: + * c / (len1 + len2 - c) + * where c is number of common trigrams and it stands as ntrue in + * this code. Here we don't know value of len2 but we can assume + * that c (ntrue) is a lower bound of len2, so upper bound of + * similarity is: + * c / (len1 + c - c) => c / len1 + * If DIVUNION is not defined then similarity formula is: + * c / max(len1, len2) + * And again, c (ntrue) is a lower bound of len2, but c <= len1 + * just by definition and, consequently, upper bound of + * similarity is just c / len1. + * So, independly on DIVUNION the upper bound formula is the same. + */ res = (nkeys == 0) ? false : ((((((float4) ntrue) / ((float4) nkeys))) >= trgm_limit) ? true : false); -#endif break; case ILikeStrategyNumber: #ifndef IGNORECASE @@ -267,11 +279,11 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS) if (check[i] != GIN_FALSE) ntrue++; } -#ifdef DIVUNION - res = (nkeys == ntrue) ? GIN_MAYBE : (((((float4) ntrue) / ((float4) (nkeys - ntrue))) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE); -#else + + /* + * See comment in gin_trgm_consistent() about * upper bound formula + */ res = (nkeys == 0) ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE); -#endif break; case ILikeStrategyNumber: #ifndef IGNORECASE -- 2.40.0