}
/*
- * Check for phrase condition. Fallback to the AND operation
- * if there is no positional information.
+ * Execute tsquery at or below an OP_PHRASE operator.
+ *
+ * This handles the recursion at levels where we need to care about
+ * match locations. In addition to the same arguments used for TS_execute,
+ * the caller may pass a preinitialized-to-zeroes ExecPhraseData struct to
+ * be filled with lexeme match positions on success. data == NULL if no
+ * match data need be returned. (In practice, outside callers pass NULL,
+ * and only the internal recursion cases pass a data pointer.)
*/
static bool
-TS_phrase_execute(QueryItem *curitem,
- void *checkval, uint32 flags, ExecPhraseData *data,
- bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *))
+TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
+ ExecPhraseData *data,
+ TSExecuteCallback chkcond)
{
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
if (curitem->type == QI_VAL)
{
- return chkcond(checkval, (QueryOperand *) curitem, data);
+ return chkcond(arg, (QueryOperand *) curitem, data);
}
else
{
Assert(curitem->qoperator.oper == OP_PHRASE);
if (!TS_phrase_execute(curitem + curitem->qoperator.left,
- checkval, flags, &Ldata, chkcond))
+ arg, flags, &Ldata, chkcond))
return false;
- if (!TS_phrase_execute(curitem + 1, checkval, flags, &Rdata, chkcond))
+ if (!TS_phrase_execute(curitem + 1, arg, flags, &Rdata, chkcond))
return false;
/*
- * if at least one of the operands has no position information, then
- * return false. But if TS_EXEC_PHRASE_AS_AND flag is set then we
- * return true as it is a AND operation
+ * If either operand has no position information, then we normally
+ * return false. But if TS_EXEC_PHRASE_AS_AND flag is set then we
+ * return true, treating OP_PHRASE as if it were OP_AND.
*/
if (Ldata.npos == 0 || Rdata.npos == 0)
return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false;
/*
- * Result of the operation is a list of the corresponding positions of
- * RIGHT operand.
+ * Prepare output position array if needed.
*/
if (data)
{
+ /*
+ * We can recycle the righthand operand's result array if it was
+ * palloc'd, else must allocate our own. The number of matches
+ * couldn't be more than the smaller of the two operands' matches.
+ */
if (!Rdata.allocated)
-
- /*
- * OP_PHRASE is based on the OP_AND, so the number of
- * resulting positions could not be greater than the total
- * amount of operands.
- */
data->pos = palloc(sizeof(WordEntryPos) * Min(Ldata.npos, Rdata.npos));
else
data->pos = Rdata.pos;
}
/*
- * Find matches by distance, WEP_GETPOS() is needed because
- * ExecPhraseData->data can point to the tsvector's WordEntryPosVector
+ * Find matches by distance. WEP_GETPOS() is needed because
+ * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
+ *
+ * Note that the output positions are those of the matching RIGHT
+ * operands.
*/
-
Rpos = Rdata.pos;
LposStart = Ldata.pos;
while (Rpos < Rdata.pos + Rdata.npos)
else
{
/*
- * We are in the root of the phrase tree and hence we
- * don't have to store the resulting positions
+ * We are at the root of the phrase tree and hence we
+ * don't have to identify all the match positions.
+ * Just report success.
*/
return true;
}
/*
* Evaluate tsquery boolean expression.
*
- * chkcond is a callback function used to evaluate each VAL node in the query.
- * checkval can be used to pass information to the callback. TS_execute doesn't
- * do anything with it.
- * It believes that ordinary operators are always closier to root than phrase
- * operator, so, TS_execute() may not take care of lexeme's position at all.
+ * curitem: current tsquery item (initially, the first one)
+ * arg: opaque value to pass through to callback function
+ * flags: bitmask of flag bits shown in ts_utils.h
+ * chkcond: callback function to check whether a primitive value is present
+ *
+ * The logic here deals only with operators above any phrase operator, for
+ * which we do not need to worry about lexeme positions. As soon as we hit an
+ * OP_PHRASE operator, we pass it off to TS_phrase_execute which does worry.
*/
bool
-TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
- bool (*chkcond) (void *checkval, QueryOperand *val, ExecPhraseData *data))
+TS_execute(QueryItem *curitem, void *arg, uint32 flags,
+ TSExecuteCallback chkcond)
{
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
if (curitem->type == QI_VAL)
- return chkcond(checkval, (QueryOperand *) curitem,
+ return chkcond(arg, (QueryOperand *) curitem,
NULL /* we don't need position info */ );
switch (curitem->qoperator.oper)
{
case OP_NOT:
if (flags & TS_EXEC_CALC_NOT)
- return !TS_execute(curitem + 1, checkval, flags, chkcond);
+ return !TS_execute(curitem + 1, arg, flags, chkcond);
else
return true;
case OP_AND:
- if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
- return TS_execute(curitem + 1, checkval, flags, chkcond);
+ if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
+ return TS_execute(curitem + 1, arg, flags, chkcond);
else
return false;
case OP_OR:
- if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
+ if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
return true;
else
- return TS_execute(curitem + 1, checkval, flags, chkcond);
+ return TS_execute(curitem + 1, arg, flags, chkcond);
case OP_PHRASE:
* do not check TS_EXEC_PHRASE_AS_AND here because chkcond() could
* do something more if it's called from TS_phrase_execute()
*/
- return TS_phrase_execute(curitem, checkval, flags, NULL, chkcond);
+ return TS_phrase_execute(curitem, arg, flags, NULL, chkcond);
default:
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
chkval.arre = chkval.arrb + val->size;
chkval.values = STRPTR(val);
chkval.operand = GETOPERAND(query);
- result = TS_execute(
- GETQUERY(query),
+ result = TS_execute(GETQUERY(query),
&chkval,
TS_EXEC_CALC_NOT,
- checkcondition_str
- );
+ checkcondition_str);
PG_FREE_IF_COPY(val, 0);
PG_FREE_IF_COPY(query, 1);
#ifndef _PG_TS_UTILS_H_
#define _PG_TS_UTILS_H_
-#include "tsearch/ts_type.h"
-#include "tsearch/ts_public.h"
#include "nodes/pg_list.h"
+#include "tsearch/ts_public.h"
+#include "tsearch/ts_type.h"
/*
* Common parse definitions for tsvector and tsquery
extern text *generateHeadline(HeadlineParsedText *prs);
/*
- * Common check function for tsvector @@ tsquery
+ * TSQuery execution support
+ *
+ * TS_execute() executes a tsquery against data that can be represented in
+ * various forms. The TSExecuteCallback callback function is called to check
+ * whether a given primitive tsquery value is matched in the data.
+ */
+
+/*
+ * struct ExecPhraseData is passed to a TSExecuteCallback function if we need
+ * lexeme position data (because of a phrase-match operator in the tsquery).
+ * The callback should fill in position data when it returns true (success).
+ * If it cannot return position data, it may ignore its "data" argument, but
+ * then the caller of TS_execute() must pass the TS_EXEC_PHRASE_AS_AND flag
+ * and must arrange for a later recheck with position data available.
+ *
+ * The reported lexeme positions must be sorted and unique. Callers must only
+ * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
+ * This allows the returned "pos" to point directly to the WordEntryPos
+ * portion of a tsvector value. If "allocated" is true then the pos array
+ * is palloc'd workspace and caller may free it when done.
+ *
+ * All fields of the ExecPhraseData struct are initially zeroed by caller.
*/
typedef struct ExecPhraseData
{
- int npos;
- bool allocated;
- WordEntryPos *pos;
+ int npos; /* number of positions reported */
+ bool allocated; /* pos points to palloc'd data? */
+ WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */
} ExecPhraseData;
/*
- * Evaluates tsquery, flags are followe below
+ * Signature for TSQuery lexeme check functions
+ *
+ * arg: opaque value passed through from caller of TS_execute
+ * val: lexeme to test for presence of
+ * data: to be filled with lexeme positions; NULL if position data not needed
+ *
+ * Return TRUE if lexeme is present in data, else FALSE
*/
-extern bool TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
- bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *));
+typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val,
+ ExecPhraseData *data);
+/*
+ * Flag bits for TS_execute
+ */
#define TS_EXEC_EMPTY (0x00)
/*
- * if TS_EXEC_CALC_NOT is not set then NOT expression evaluated to be true,
- * used in cases where NOT cannot be accurately computed (GiST) or
- * it isn't important (ranking)
+ * If TS_EXEC_CALC_NOT is not set, then NOT expressions are automatically
+ * evaluated to be true. Useful in cases where NOT cannot be accurately
+ * computed (GiST) or it isn't important (ranking).
*/
#define TS_EXEC_CALC_NOT (0x01)
/*
- * Treat OP_PHRASE as OP_AND. Used when posiotional information is not
- * accessible, like in consistent methods of GIN/GiST indexes
+ * Treat OP_PHRASE as OP_AND. Used when positional information is not
+ * accessible, like in consistent methods of GIN/GiST indexes; rechecking
+ * must occur later.
*/
#define TS_EXEC_PHRASE_AS_AND (0x02)
+extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
+ TSExecuteCallback chkcond);
extern bool tsquery_requires_match(QueryItem *curitem);
/*