From: Tom Lane Date: Fri, 16 Dec 2016 16:50:07 +0000 (-0500) Subject: Improve documentation around TS_execute(). X-Git-Tag: REL_10_BETA1~1244 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=23c75b55aaccddea79545ffaf1cbfc9f1edeaa8c;p=postgresql Improve documentation around TS_execute(). I got frustrated by the lack of commentary in this area, so here is some reverse-engineered documentation, along with minor stylistic cleanup. No code changes more significant than removal of unused variables. Back-patch to 9.6, not because that's useful in itself, but because we have some bugs to fix in phrase search and this would cause merge failures if it's only in HEAD. --- diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index ac1c4d2316..95f61a0854 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -2123,7 +2123,7 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q) ch.words = &(prs->words[*p]); ch.len = *q - *p + 1; - if (TS_execute(GETQUERY(query), &ch, false, checkcondition_HL)) + if (TS_execute(GETQUERY(query), &ch, TS_EXEC_EMPTY, checkcondition_HL)) return true; else { diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c index c953f531ff..efc111e379 100644 --- a/src/backend/utils/adt/tsginidx.c +++ b/src/backend/utils/adt/tsginidx.c @@ -188,7 +188,7 @@ checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *d * information then set recheck flag */ if (val->weight != 0 || data != NULL) - *gcv->need_recheck = true; + *(gcv->need_recheck) = true; /* convert item's number to corresponding entry's (operand's) number */ j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item]; @@ -289,19 +289,18 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS) bool *recheck = (bool *) PG_GETARG_POINTER(5); bool res = FALSE; - /* The query requires recheck only if it involves weights */ + /* Initially assume query doesn't require recheck */ *recheck = false; if (query->size > 0) { - QueryItem *item; GinChkVal gcv; /* * check-parameter array has one entry for each value (operand) in the * query. */ - gcv.first_item = item = GETQUERY(query); + gcv.first_item = GETQUERY(query); gcv.check = check; gcv.map_item_operand = (int *) (extra_data[0]); gcv.need_recheck = recheck; @@ -328,19 +327,18 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS) GinTernaryValue res = GIN_FALSE; bool recheck; - /* The query requires recheck only if it involves weights */ + /* Initially assume query doesn't require recheck */ recheck = false; if (query->size > 0) { - QueryItem *item; GinChkVal gcv; /* * check-parameter array has one entry for each value (operand) in the * query. */ - gcv.first_item = item = GETQUERY(query); + gcv.first_item = GETQUERY(query); gcv.check = check; gcv.map_item_operand = (int *) (extra_data[0]); gcv.need_recheck = &recheck; diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index c9d5060f2c..36cc10c901 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -1405,20 +1405,26 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data) } /* - * Check for phrase condition. Fallback to the AND operation - * if there is no positional information. + * Execute tsquery at or below an OP_PHRASE operator. + * + * This handles the recursion at levels where we need to care about + * match locations. In addition to the same arguments used for TS_execute, + * the caller may pass a preinitialized-to-zeroes ExecPhraseData struct to + * be filled with lexeme match positions on success. data == NULL if no + * match data need be returned. (In practice, outside callers pass NULL, + * and only the internal recursion cases pass a data pointer.) */ static bool -TS_phrase_execute(QueryItem *curitem, - void *checkval, uint32 flags, ExecPhraseData *data, - bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *)) +TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, + ExecPhraseData *data, + TSExecuteCallback chkcond) { /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); if (curitem->type == QI_VAL) { - return chkcond(checkval, (QueryOperand *) curitem, data); + return chkcond(arg, (QueryOperand *) curitem, data); } else { @@ -1432,33 +1438,31 @@ TS_phrase_execute(QueryItem *curitem, Assert(curitem->qoperator.oper == OP_PHRASE); if (!TS_phrase_execute(curitem + curitem->qoperator.left, - checkval, flags, &Ldata, chkcond)) + arg, flags, &Ldata, chkcond)) return false; - if (!TS_phrase_execute(curitem + 1, checkval, flags, &Rdata, chkcond)) + if (!TS_phrase_execute(curitem + 1, arg, flags, &Rdata, chkcond)) return false; /* - * if at least one of the operands has no position information, then - * return false. But if TS_EXEC_PHRASE_AS_AND flag is set then we - * return true as it is a AND operation + * If either operand has no position information, then we normally + * return false. But if TS_EXEC_PHRASE_AS_AND flag is set then we + * return true, treating OP_PHRASE as if it were OP_AND. */ if (Ldata.npos == 0 || Rdata.npos == 0) return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false; /* - * Result of the operation is a list of the corresponding positions of - * RIGHT operand. + * Prepare output position array if needed. */ if (data) { + /* + * We can recycle the righthand operand's result array if it was + * palloc'd, else must allocate our own. The number of matches + * couldn't be more than the smaller of the two operands' matches. + */ if (!Rdata.allocated) - - /* - * OP_PHRASE is based on the OP_AND, so the number of - * resulting positions could not be greater than the total - * amount of operands. - */ data->pos = palloc(sizeof(WordEntryPos) * Min(Ldata.npos, Rdata.npos)); else data->pos = Rdata.pos; @@ -1469,10 +1473,12 @@ TS_phrase_execute(QueryItem *curitem, } /* - * Find matches by distance, WEP_GETPOS() is needed because - * ExecPhraseData->data can point to the tsvector's WordEntryPosVector + * Find matches by distance. WEP_GETPOS() is needed because + * ExecPhraseData->data can point to a tsvector's WordEntryPosVector. + * + * Note that the output positions are those of the matching RIGHT + * operands. */ - Rpos = Rdata.pos; LposStart = Ldata.pos; while (Rpos < Rdata.pos + Rdata.npos) @@ -1505,8 +1511,9 @@ TS_phrase_execute(QueryItem *curitem, else { /* - * We are in the root of the phrase tree and hence we - * don't have to store the resulting positions + * We are at the root of the phrase tree and hence we + * don't have to identify all the match positions. + * Just report success. */ return true; } @@ -1546,42 +1553,45 @@ TS_phrase_execute(QueryItem *curitem, /* * Evaluate tsquery boolean expression. * - * chkcond is a callback function used to evaluate each VAL node in the query. - * checkval can be used to pass information to the callback. TS_execute doesn't - * do anything with it. - * It believes that ordinary operators are always closier to root than phrase - * operator, so, TS_execute() may not take care of lexeme's position at all. + * curitem: current tsquery item (initially, the first one) + * arg: opaque value to pass through to callback function + * flags: bitmask of flag bits shown in ts_utils.h + * chkcond: callback function to check whether a primitive value is present + * + * The logic here deals only with operators above any phrase operator, for + * which we do not need to worry about lexeme positions. As soon as we hit an + * OP_PHRASE operator, we pass it off to TS_phrase_execute which does worry. */ bool -TS_execute(QueryItem *curitem, void *checkval, uint32 flags, - bool (*chkcond) (void *checkval, QueryOperand *val, ExecPhraseData *data)) +TS_execute(QueryItem *curitem, void *arg, uint32 flags, + TSExecuteCallback chkcond) { /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); if (curitem->type == QI_VAL) - return chkcond(checkval, (QueryOperand *) curitem, + return chkcond(arg, (QueryOperand *) curitem, NULL /* we don't need position info */ ); switch (curitem->qoperator.oper) { case OP_NOT: if (flags & TS_EXEC_CALC_NOT) - return !TS_execute(curitem + 1, checkval, flags, chkcond); + return !TS_execute(curitem + 1, arg, flags, chkcond); else return true; case OP_AND: - if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond)) - return TS_execute(curitem + 1, checkval, flags, chkcond); + if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond)) + return TS_execute(curitem + 1, arg, flags, chkcond); else return false; case OP_OR: - if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond)) + if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond)) return true; else - return TS_execute(curitem + 1, checkval, flags, chkcond); + return TS_execute(curitem + 1, arg, flags, chkcond); case OP_PHRASE: @@ -1589,7 +1599,7 @@ TS_execute(QueryItem *curitem, void *checkval, uint32 flags, * do not check TS_EXEC_PHRASE_AS_AND here because chkcond() could * do something more if it's called from TS_phrase_execute() */ - return TS_phrase_execute(curitem, checkval, flags, NULL, chkcond); + return TS_phrase_execute(curitem, arg, flags, NULL, chkcond); default: elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); @@ -1684,12 +1694,10 @@ ts_match_vq(PG_FUNCTION_ARGS) chkval.arre = chkval.arrb + val->size; chkval.values = STRPTR(val); chkval.operand = GETOPERAND(query); - result = TS_execute( - GETQUERY(query), + result = TS_execute(GETQUERY(query), &chkval, TS_EXEC_CALC_NOT, - checkcondition_str - ); + checkcondition_str); PG_FREE_IF_COPY(val, 0); PG_FREE_IF_COPY(query, 1); diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h index e09a9c636f..1fbd983898 100644 --- a/src/include/tsearch/ts_utils.h +++ b/src/include/tsearch/ts_utils.h @@ -12,9 +12,9 @@ #ifndef _PG_TS_UTILS_H_ #define _PG_TS_UTILS_H_ -#include "tsearch/ts_type.h" -#include "tsearch/ts_public.h" #include "nodes/pg_list.h" +#include "tsearch/ts_public.h" +#include "tsearch/ts_type.h" /* * Common parse definitions for tsvector and tsquery @@ -102,34 +102,67 @@ extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, extern text *generateHeadline(HeadlineParsedText *prs); /* - * Common check function for tsvector @@ tsquery + * TSQuery execution support + * + * TS_execute() executes a tsquery against data that can be represented in + * various forms. The TSExecuteCallback callback function is called to check + * whether a given primitive tsquery value is matched in the data. + */ + +/* + * struct ExecPhraseData is passed to a TSExecuteCallback function if we need + * lexeme position data (because of a phrase-match operator in the tsquery). + * The callback should fill in position data when it returns true (success). + * If it cannot return position data, it may ignore its "data" argument, but + * then the caller of TS_execute() must pass the TS_EXEC_PHRASE_AS_AND flag + * and must arrange for a later recheck with position data available. + * + * The reported lexeme positions must be sorted and unique. Callers must only + * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]). + * This allows the returned "pos" to point directly to the WordEntryPos + * portion of a tsvector value. If "allocated" is true then the pos array + * is palloc'd workspace and caller may free it when done. + * + * All fields of the ExecPhraseData struct are initially zeroed by caller. */ typedef struct ExecPhraseData { - int npos; - bool allocated; - WordEntryPos *pos; + int npos; /* number of positions reported */ + bool allocated; /* pos points to palloc'd data? */ + WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */ } ExecPhraseData; /* - * Evaluates tsquery, flags are followe below + * Signature for TSQuery lexeme check functions + * + * arg: opaque value passed through from caller of TS_execute + * val: lexeme to test for presence of + * data: to be filled with lexeme positions; NULL if position data not needed + * + * Return TRUE if lexeme is present in data, else FALSE */ -extern bool TS_execute(QueryItem *curitem, void *checkval, uint32 flags, - bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *)); +typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val, + ExecPhraseData *data); +/* + * Flag bits for TS_execute + */ #define TS_EXEC_EMPTY (0x00) /* - * if TS_EXEC_CALC_NOT is not set then NOT expression evaluated to be true, - * used in cases where NOT cannot be accurately computed (GiST) or - * it isn't important (ranking) + * If TS_EXEC_CALC_NOT is not set, then NOT expressions are automatically + * evaluated to be true. Useful in cases where NOT cannot be accurately + * computed (GiST) or it isn't important (ranking). */ #define TS_EXEC_CALC_NOT (0x01) /* - * Treat OP_PHRASE as OP_AND. Used when posiotional information is not - * accessible, like in consistent methods of GIN/GiST indexes + * Treat OP_PHRASE as OP_AND. Used when positional information is not + * accessible, like in consistent methods of GIN/GiST indexes; rechecking + * must occur later. */ #define TS_EXEC_PHRASE_AS_AND (0x02) +extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, + TSExecuteCallback chkcond); extern bool tsquery_requires_match(QueryItem *curitem); /*