From 3dbbd0f02a257d8d5c4cba14726371505f2e7266 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Mon, 27 Jun 2016 20:47:32 +0300 Subject: [PATCH] Do not fallback to AND for FTS phrase operator. If there is no positional information of lexemes then phrase operator will not fallback to AND operator. This change makes needing to modify TS_execute() interface, because somewhere (in indexes, for example) positional information is unaccesible and in this cases we need to force fallback to AND. Per discussion c19fcfec308e6ccd952cdde9e648b505@mail.gmail.com --- src/backend/utils/adt/tsginidx.c | 2 +- src/backend/utils/adt/tsgistidx.c | 6 +++-- src/backend/utils/adt/tsrank.c | 6 +++-- src/backend/utils/adt/tsvector_op.c | 35 +++++++++++++++------------ src/include/tsearch/ts_utils.h | 19 ++++++++++++++- src/test/regress/expected/tsearch.out | 9 ++++--- src/test/regress/sql/tsearch.sql | 3 ++- 7 files changed, 53 insertions(+), 27 deletions(-) diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c index b096329143..c953f531ff 100644 --- a/src/backend/utils/adt/tsginidx.c +++ b/src/backend/utils/adt/tsginidx.c @@ -308,7 +308,7 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS) res = TS_execute(GETQUERY(query), &gcv, - true, + TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_AS_AND, checkcondition_gin); } diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c index cdd5d43fce..6cdfb13f6d 100644 --- a/src/backend/utils/adt/tsgistidx.c +++ b/src/backend/utils/adt/tsgistidx.c @@ -361,7 +361,8 @@ gtsvector_consistent(PG_FUNCTION_ARGS) PG_RETURN_BOOL(TS_execute( GETQUERY(query), - (void *) GETSIGN(key), false, + (void *) GETSIGN(key), + TS_EXEC_PHRASE_AS_AND, checkcondition_bit )); } @@ -373,7 +374,8 @@ gtsvector_consistent(PG_FUNCTION_ARGS) chkval.arre = chkval.arrb + ARRNELEM(key); PG_RETURN_BOOL(TS_execute( GETQUERY(query), - (void *) &chkval, true, + (void *) &chkval, + TS_EXEC_PHRASE_AS_AND | TS_EXEC_CALC_NOT, checkcondition_arr )); } diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c index 32023821b3..d887a14d05 100644 --- a/src/backend/utils/adt/tsrank.c +++ b/src/backend/utils/adt/tsrank.c @@ -662,7 +662,8 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext) { fillQueryRepresentationData(qr, ptr); - if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand)) + if (TS_execute(GETQUERY(qr->query), (void *) qr, + TS_EXEC_EMPTY, checkcondition_QueryOperand)) { if (WEP_GETPOS(ptr->pos) > ext->q) { @@ -691,7 +692,8 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext) */ fillQueryRepresentationData(qr, ptr); - if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand)) + if (TS_execute(GETQUERY(qr->query), (void *) qr, + TS_EXEC_CALC_NOT, checkcondition_QueryOperand)) { if (WEP_GETPOS(ptr->pos) < ext->p) { diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index 04718829a0..242b7e137b 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -1360,7 +1360,7 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data) */ static bool TS_phrase_execute(QueryItem *curitem, - void *checkval, bool calcnot, ExecPhraseData *data, + void *checkval, uint32 flags, ExecPhraseData *data, bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *)) { /* since this function recurses, it could be driven to stack overflow */ @@ -1382,18 +1382,19 @@ TS_phrase_execute(QueryItem *curitem, Assert(curitem->qoperator.oper == OP_PHRASE); if (!TS_phrase_execute(curitem + curitem->qoperator.left, - checkval, calcnot, &Ldata, chkcond)) + checkval, flags, &Ldata, chkcond)) return false; - if (!TS_phrase_execute(curitem + 1, checkval, calcnot, &Rdata, chkcond)) + if (!TS_phrase_execute(curitem + 1, checkval, flags, &Rdata, chkcond)) return false; /* * if at least one of the operands has no position information, - * fallback to AND operation. + * then return false. But if TS_EXEC_PHRASE_AS_AND flag is set then + * we return true as it is a AND operation */ if (Ldata.npos == 0 || Rdata.npos == 0) - return true; + return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false; /* * Result of the operation is a list of the corresponding positions of @@ -1498,13 +1499,11 @@ TS_phrase_execute(QueryItem *curitem, * chkcond is a callback function used to evaluate each VAL node in the query. * checkval can be used to pass information to the callback. TS_execute doesn't * do anything with it. - * if calcnot is false, NOT expressions are always evaluated to be true. This - * is used in ranking. * It believes that ordinary operators are always closier to root than phrase * operator, so, TS_execute() may not take care of lexeme's position at all. */ bool -TS_execute(QueryItem *curitem, void *checkval, bool calcnot, +TS_execute(QueryItem *curitem, void *checkval, uint32 flags, bool (*chkcond) (void *checkval, QueryOperand *val, ExecPhraseData *data)) { /* since this function recurses, it could be driven to stack overflow */ @@ -1517,25 +1516,29 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot, switch (curitem->qoperator.oper) { case OP_NOT: - if (calcnot) - return !TS_execute(curitem + 1, checkval, calcnot, chkcond); + if (flags & TS_EXEC_CALC_NOT) + return !TS_execute(curitem + 1, checkval, flags, chkcond); else return true; case OP_AND: - if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond)) - return TS_execute(curitem + 1, checkval, calcnot, chkcond); + if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond)) + return TS_execute(curitem + 1, checkval, flags, chkcond); else return false; case OP_OR: - if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond)) + if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond)) return true; else - return TS_execute(curitem + 1, checkval, calcnot, chkcond); + return TS_execute(curitem + 1, checkval, flags, chkcond); case OP_PHRASE: - return TS_phrase_execute(curitem, checkval, calcnot, NULL, chkcond); + /* + * do not check TS_EXEC_PHRASE_AS_AND here because chkcond() + * could do something more if it's called from TS_phrase_execute() + */ + return TS_phrase_execute(curitem, checkval, flags, NULL, chkcond); default: elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); @@ -1633,7 +1636,7 @@ ts_match_vq(PG_FUNCTION_ARGS) result = TS_execute( GETQUERY(query), &chkval, - true, + TS_EXEC_CALC_NOT, checkcondition_str ); diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h index e16ddaf72f..e09a9c636f 100644 --- a/src/include/tsearch/ts_utils.h +++ b/src/include/tsearch/ts_utils.h @@ -111,8 +111,25 @@ typedef struct ExecPhraseData WordEntryPos *pos; } ExecPhraseData; -extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot, +/* + * Evaluates tsquery, flags are followe below + */ +extern bool TS_execute(QueryItem *curitem, void *checkval, uint32 flags, bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *)); + +#define TS_EXEC_EMPTY (0x00) +/* + * if TS_EXEC_CALC_NOT is not set then NOT expression evaluated to be true, + * used in cases where NOT cannot be accurately computed (GiST) or + * it isn't important (ranking) + */ +#define TS_EXEC_CALC_NOT (0x01) +/* + * Treat OP_PHRASE as OP_AND. Used when posiotional information is not + * accessible, like in consistent methods of GIN/GiST indexes + */ +#define TS_EXEC_PHRASE_AS_AND (0x02) + extern bool tsquery_requires_match(QueryItem *curitem); /* diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index 2c3aa1a293..3a13ad985f 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -1459,13 +1459,14 @@ select * from pendtest where 'ipi:*'::tsquery @@ ts; --check OP_PHRASE on index create temp table phrase_index_test(fts tsvector); -insert into phrase_index_test values('A fat cat has just eaten a rat.'); +insert into phrase_index_test values ('A fat cat has just eaten a rat.'); +insert into phrase_index_test values (to_tsvector('english', 'A fat cat has just eaten a rat.')); create index phrase_index_test_idx on phrase_index_test using gin(fts); set enable_seqscan = off; select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat'); - fts -------------------------------------------------- - 'A' 'a' 'cat' 'eaten' 'fat' 'has' 'just' 'rat.' + fts +----------------------------------- + 'cat':3 'eaten':6 'fat':2 'rat':8 (1 row) set enable_seqscan = on; diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index 34b46fa324..5f3d335fc3 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -482,7 +482,8 @@ select * from pendtest where 'ipi:*'::tsquery @@ ts; --check OP_PHRASE on index create temp table phrase_index_test(fts tsvector); -insert into phrase_index_test values('A fat cat has just eaten a rat.'); +insert into phrase_index_test values ('A fat cat has just eaten a rat.'); +insert into phrase_index_test values (to_tsvector('english', 'A fat cat has just eaten a rat.')); create index phrase_index_test_idx on phrase_index_test using gin(fts); set enable_seqscan = off; select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat'); -- 2.40.0