From 6734a1cacd44f5b731933cbc93182b135b167d0c Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Mon, 27 Jun 2016 20:55:24 +0300 Subject: [PATCH] Change predecence of phrase operator. <-> operator now have higher predecence than & (AND) operator. This change was motivated by unexpected difference of similar queries: 'a & b <-> c'::tsquery and 'b <-> c & a'. Before first query means (a & b) <-> c and second one - '(b <-> c) & a', now phrase operator evaluates first. Per suggestion from Tom Lane 32260.1465402409@sss.pgh.pa.us --- src/backend/utils/adt/tsquery.c | 114 +++++++++++------------- src/backend/utils/adt/tsquery_cleanup.c | 17 +++- src/include/tsearch/ts_type.h | 16 +--- src/test/regress/expected/tsdicts.out | 12 +-- src/test/regress/expected/tsearch.out | 18 ++-- src/test/regress/expected/tstypes.out | 78 ++++++++-------- 6 files changed, 121 insertions(+), 134 deletions(-) diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c index 21a18bfbc4..72e608eb91 100644 --- a/src/backend/utils/adt/tsquery.c +++ b/src/backend/utils/adt/tsquery.c @@ -26,10 +26,10 @@ /* FTS operator priorities, see ts_type.h */ const int tsearch_op_priority[OP_COUNT] = { - 3, /* OP_NOT */ + 4, /* OP_NOT */ 2, /* OP_AND */ 1, /* OP_OR */ - 4 /* OP_PHRASE */ + 3 /* OP_PHRASE */ }; struct TSQueryParserStateData @@ -430,6 +430,40 @@ pushStop(TSQueryParserState state) #define STACKDEPTH 32 +typedef struct OperatorElement { + int8 op; + int16 distance; +} OperatorElement; + +static void +pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance) +{ + if (*lenstack == STACKDEPTH) /* internal error */ + elog(ERROR, "tsquery stack too small"); + + stack[*lenstack].op = op; + stack[*lenstack].distance = distance; + + (*lenstack)++; +} + +static void +cleanOpStack(TSQueryParserState state, + OperatorElement *stack, int *lenstack, int8 op) +{ + int opPriority = OP_PRIORITY(op); + + while(*lenstack) + { + if (opPriority > OP_PRIORITY(stack[*lenstack - 1].op)) + break; + + (*lenstack)--; + pushOperator(state, stack[*lenstack].op, + stack[*lenstack].distance); + } +} + /* * Make polish (prefix) notation of query. * @@ -444,11 +478,7 @@ makepol(TSQueryParserState state, ts_tokentype type; int lenval = 0; char *strval = NULL; - struct - { - int8 op; - int16 distance; - } opstack[STACKDEPTH]; + OperatorElement opstack[STACKDEPTH]; int lenstack = 0; int16 weight = 0; bool prefix; @@ -462,49 +492,16 @@ makepol(TSQueryParserState state, { case PT_VAL: pushval(opaque, state, strval, lenval, weight, prefix); - while (lenstack && (opstack[lenstack - 1].op == OP_AND || - opstack[lenstack - 1].op == OP_PHRASE || - opstack[lenstack - 1].op == OP_NOT)) - { - lenstack--; - pushOperator(state, - opstack[lenstack].op, - opstack[lenstack].distance); - } break; case PT_OPR: - if (lenstack && operator == OP_OR) - pushOperator(state, OP_OR, 0); - else - { - if (lenstack == STACKDEPTH) /* internal error */ - elog(ERROR, "tsquery stack too small"); - opstack[lenstack].op = operator; - opstack[lenstack].distance = weight; - lenstack++; - } + cleanOpStack(state, opstack, &lenstack, operator); + pushOpStack(opstack, &lenstack, operator, weight); break; case PT_OPEN: makepol(state, pushval, opaque); - - while (lenstack && (opstack[lenstack - 1].op == OP_AND || - opstack[lenstack - 1].op == OP_PHRASE || - opstack[lenstack - 1].op == OP_NOT)) - { - lenstack--; - pushOperator(state, - opstack[lenstack].op, - opstack[lenstack].distance); - } break; case PT_CLOSE: - while (lenstack) - { - lenstack--; - pushOperator(state, - opstack[lenstack].op, - opstack[lenstack].distance); - }; + cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */); return; case PT_ERR: default: @@ -514,13 +511,8 @@ makepol(TSQueryParserState state, state->buffer))); } } - while (lenstack) - { - lenstack--; - pushOperator(state, - opstack[lenstack].op, - opstack[lenstack].distance); - } + + cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */); } static void @@ -750,7 +742,7 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \ * print it in infix (human-readable) form */ static void -infix(INFIX *in, int parentPriority) +infix(INFIX *in, int parentPriority, bool rightPhraseOp) { /* since this function recurses, it could be driven to stack overflow. */ check_stack_depth(); @@ -819,7 +811,7 @@ infix(INFIX *in, int parentPriority) } else if (in->curpol->qoperator.oper == OP_NOT) { - int priority = PRINT_PRIORITY(in->curpol); + int priority = QO_PRIORITY(in->curpol); if (priority < parentPriority) { @@ -833,7 +825,7 @@ infix(INFIX *in, int parentPriority) *(in->cur) = '\0'; in->curpol++; - infix(in, priority); + infix(in, priority, false); if (priority < parentPriority) { RESIZEBUF(in, 2); @@ -844,17 +836,15 @@ infix(INFIX *in, int parentPriority) else { int8 op = in->curpol->qoperator.oper; - int priority = PRINT_PRIORITY(in->curpol); + int priority = QO_PRIORITY(in->curpol); int16 distance = in->curpol->qoperator.distance; INFIX nrm; bool needParenthesis = false; in->curpol++; if (priority < parentPriority || - (op == OP_PHRASE && - (priority == parentPriority || /* phrases are not - * commutative! */ - parentPriority == OP_PRIORITY(OP_AND)))) + /* phrase operator depends on order */ + (op == OP_PHRASE && rightPhraseOp)) { needParenthesis = true; RESIZEBUF(in, 2); @@ -868,11 +858,11 @@ infix(INFIX *in, int parentPriority) nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); /* get right operand */ - infix(&nrm, priority); + infix(&nrm, priority, (op == OP_PHRASE)); /* get & print left operand */ in->curpol = nrm.curpol; - infix(in, priority); + infix(in, priority, false); /* print operator & right operand */ RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf)); @@ -924,7 +914,7 @@ tsqueryout(PG_FUNCTION_ARGS) nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); *(nrm.cur) = '\0'; nrm.op = GETOPERAND(query); - infix(&nrm, -1 /* lowest priority */ ); + infix(&nrm, -1 /* lowest priority */, false); PG_FREE_IF_COPY(query, 0); PG_RETURN_CSTRING(nrm.buf); @@ -1151,7 +1141,7 @@ tsquerytree(PG_FUNCTION_ARGS) nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); *(nrm.cur) = '\0'; nrm.op = GETOPERAND(query); - infix(&nrm, true); + infix(&nrm, -1, false); res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf); pfree(q); } diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c index 6c74070e45..1a90964ce7 100644 --- a/src/backend/utils/adt/tsquery_cleanup.c +++ b/src/backend/utils/adt/tsquery_cleanup.c @@ -25,11 +25,18 @@ typedef struct NODE QueryItem *valnode; } NODE; -/* Non-operator nodes have fake (but highest) priority */ +/* + * To simplify walking on query tree and pushing down of phrase operator + * we define some fake priority here: phrase operator has highest priority + * of any other operators (and we believe here that OP_PHRASE is a highest + * code of operations) and value node has ever highest priority. + * Priority values of other operations don't matter until they are less than + * phrase operator and value node. + */ +#define VALUE_PRIORITY (OP_COUNT + 1) #define NODE_PRIORITY(x) \ ( ((x)->valnode->qoperator.type == QI_OPR) ? \ - QO_PRIORITY((x)->valnode) : \ - TOP_PRIORITY ) + (x)->valnode->qoperator.oper : VALUE_PRIORITY ) /* * make query tree from plain view of query @@ -416,6 +423,10 @@ normalize_phrase_tree(NODE *node) node->left = normalize_phrase_tree(node->left); node->right = normalize_phrase_tree(node->right); + /* + * if subtree contains only nodes with higher "priority" then + * we are done. See comment near NODE_PRIORITY() + */ if (NODE_PRIORITY(node) <= NODE_PRIORITY(node->right) && NODE_PRIORITY(node) <= NODE_PRIORITY(node->left)) return node; diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h index 80eb75c14e..d1f4114d21 100644 --- a/src/include/tsearch/ts_type.h +++ b/src/include/tsearch/ts_type.h @@ -217,33 +217,19 @@ typedef struct /* * Legal values for QueryOperator.operator. - * They should be ordered by priority! We assume that phrase - * has highest priority, but this agreement is only - * for query transformation! That's need to simplify - * algorithm of query transformation. */ #define OP_NOT 1 #define OP_AND 2 #define OP_OR 3 -#define OP_PHRASE 4 +#define OP_PHRASE 4 /* highest code, tsquery_cleanup.c */ #define OP_COUNT 4 extern const int tsearch_op_priority[OP_COUNT]; -#define NOT_PHRASE_P 5 /* OP_PHRASE negation operations must have - * greater priority in order to force infix() - * to surround the whole OP_PHRASE expression - * with parentheses. */ - -#define TOP_PRIORITY 6 /* highest priority for val nodes */ - /* get operation priority by its code*/ #define OP_PRIORITY(x) ( tsearch_op_priority[(x) - 1] ) /* get QueryOperator priority */ #define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper) -/* special case: get QueryOperator priority for correct printing !(a <-> b>) */ -#define PRINT_PRIORITY(x) \ - ( (((QueryOperator *) (x))->oper == OP_NOT) ? NOT_PHRASE_P : QO_PRIORITY(x) ) typedef struct { diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out index 5ddbe80234..c55591a678 100644 --- a/src/test/regress/expected/tsdicts.out +++ b/src/test/regress/expected/tsdicts.out @@ -470,15 +470,15 @@ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); (1 row) SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky'); - to_tsquery ------------------------------------------------------------------------------ - ( 'foot':B <-> 'sky' ) & ( 'ball':B <-> 'sky' ) & ( 'klubber':B <-> 'sky' ) + to_tsquery +----------------------------------------------------------------- + 'foot':B <-> 'sky' & 'ball':B <-> 'sky' & 'klubber':B <-> 'sky' (1 row) SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky'); - phraseto_tsquery ------------------------------------------------------------------------ - ( 'foot' <-> 'sky' ) & ( 'ball' <-> 'sky' ) & ( 'klubber' <-> 'sky' ) + phraseto_tsquery +----------------------------------------------------------- + 'foot' <-> 'sky' & 'ball' <-> 'sky' & 'klubber' <-> 'sky' (1 row) -- Test ispell dictionary with hunspell affix with FLAG long in configuration diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index 3a13ad985f..2ec3d1b6ab 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -778,9 +778,9 @@ SELECT to_tsquery('english', 'foo <-> a <-> the <-> bar'); (1 row) SELECT phraseto_tsquery('english', 'PostgreSQL can be extended by the user in many ways'); - phraseto_tsquery ------------------------------------------------------------------------ - ( ( ( 'postgresql' <3> 'extend' ) <3> 'user' ) <2> 'mani' ) <-> 'way' + phraseto_tsquery +----------------------------------------------------------- + 'postgresql' <3> 'extend' <3> 'user' <2> 'mani' <-> 'way' (1 row) SELECT ts_rank_cd(to_tsvector('english', ' @@ -1215,15 +1215,15 @@ SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::t (1 row) SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::text ); - ts_rewrite ------------------------ - '1' & ( '2' <2> '3' ) + ts_rewrite +------------------- + '1' & '2' <2> '3' (1 row) SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text ); - ts_rewrite ------------------------------------------------ - ( '5' <-> '1' ) & ( '5' <-> ( '2' <-> '3' ) ) + ts_rewrite +--------------------------------------- + '5' <-> '1' & '5' <-> ( '2' <-> '3' ) (1 row) SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text ); diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out index 781be70736..6705e6900f 100644 --- a/src/test/regress/expected/tstypes.out +++ b/src/test/regress/expected/tstypes.out @@ -350,21 +350,21 @@ SELECT '(a|b) <-> (d|c)'::tsquery; (1 row) SELECT 'a <-> (b&c)'::tsquery; - tsquery ------------------------------------ - ( 'a' <-> 'b' ) & ( 'a' <-> 'c' ) + tsquery +--------------------------- + 'a' <-> 'b' & 'a' <-> 'c' (1 row) SELECT '(a&b) <-> c'::tsquery; - tsquery ------------------------------------ - ( 'a' <-> 'c' ) & ( 'b' <-> 'c' ) + tsquery +--------------------------- + 'a' <-> 'c' & 'b' <-> 'c' (1 row) SELECT '(a&b) <-> (d&c)'::tsquery; - tsquery ------------------------------------------------------------------------ - ( 'a' <-> 'd' ) & ( 'b' <-> 'd' ) & ( 'a' <-> 'c' ) & ( 'b' <-> 'c' ) + tsquery +------------------------------------------------------- + 'a' <-> 'd' & 'b' <-> 'd' & 'a' <-> 'c' & 'b' <-> 'c' (1 row) SELECT 'a <-> !b'::tsquery; @@ -386,9 +386,9 @@ SELECT '!a <-> !b'::tsquery; (1 row) SELECT 'a <-> !(b&c)'::tsquery; - tsquery ----------------------------------------------- - 'a' & !( ( 'a' <-> 'b' ) & ( 'a' <-> 'c' ) ) + tsquery +-------------------------------------- + 'a' & !( 'a' <-> 'b' & 'a' <-> 'c' ) (1 row) SELECT 'a <-> !(b|c)'::tsquery; @@ -398,9 +398,9 @@ SELECT 'a <-> !(b|c)'::tsquery; (1 row) SELECT '!(a&b) <-> c'::tsquery; - tsquery ----------------------------------------------- - !( ( 'a' <-> 'c' ) & ( 'b' <-> 'c' ) ) & 'c' + tsquery +-------------------------------------- + !( 'a' <-> 'c' & 'b' <-> 'c' ) & 'c' (1 row) SELECT '!(a|b) <-> c'::tsquery; @@ -416,9 +416,9 @@ SELECT '(!a|b) <-> c'::tsquery; (1 row) SELECT '(!a&b) <-> c'::tsquery; - tsquery ------------------------------------------- - !( 'a' <-> 'c' ) & 'c' & ( 'b' <-> 'c' ) + tsquery +-------------------------------------- + !( 'a' <-> 'c' ) & 'c' & 'b' <-> 'c' (1 row) SELECT 'c <-> (!a|b)'::tsquery; @@ -428,9 +428,9 @@ SELECT 'c <-> (!a|b)'::tsquery; (1 row) SELECT 'c <-> (!a&b)'::tsquery; - tsquery ------------------------------------------- - 'c' & !( 'c' <-> 'a' ) & ( 'c' <-> 'b' ) + tsquery +-------------------------------------- + 'c' & !( 'c' <-> 'a' ) & 'c' <-> 'b' (1 row) SELECT '(a|b) <-> !c'::tsquery; @@ -440,9 +440,9 @@ SELECT '(a|b) <-> !c'::tsquery; (1 row) SELECT '(a&b) <-> !c'::tsquery; - tsquery ----------------------------------------------------- - 'a' & 'b' & !( ( 'a' <-> 'c' ) & ( 'b' <-> 'c' ) ) + tsquery +-------------------------------------------- + 'a' & 'b' & !( 'a' <-> 'c' & 'b' <-> 'c' ) (1 row) SELECT '!c <-> (a|b)'::tsquery; @@ -532,33 +532,33 @@ SELECT 'foo & bar'::tsquery && 'asd | fg'; (1 row) SELECT 'a' <-> 'b & d'::tsquery; - ?column? ------------------------------------ - ( 'a' <-> 'b' ) & ( 'a' <-> 'd' ) + ?column? +--------------------------- + 'a' <-> 'b' & 'a' <-> 'd' (1 row) SELECT 'a & g' <-> 'b & d'::tsquery; - ?column? ------------------------------------------------------------------------ - ( 'a' <-> 'b' ) & ( 'g' <-> 'b' ) & ( 'a' <-> 'd' ) & ( 'g' <-> 'd' ) + ?column? +------------------------------------------------------- + 'a' <-> 'b' & 'g' <-> 'b' & 'a' <-> 'd' & 'g' <-> 'd' (1 row) SELECT 'a & g' <-> 'b | d'::tsquery; - ?column? ------------------------------------------------------------------------ - ( 'a' <-> 'b' ) & ( 'g' <-> 'b' ) | ( 'a' <-> 'd' ) & ( 'g' <-> 'd' ) + ?column? +------------------------------------------------------- + 'a' <-> 'b' & 'g' <-> 'b' | 'a' <-> 'd' & 'g' <-> 'd' (1 row) SELECT 'a & g' <-> 'b <-> d'::tsquery; - ?column? ------------------------------------------------------------ - ( 'a' <-> ( 'b' <-> 'd' ) ) & ( 'g' <-> ( 'b' <-> 'd' ) ) + ?column? +--------------------------------------------------- + 'a' <-> ( 'b' <-> 'd' ) & 'g' <-> ( 'b' <-> 'd' ) (1 row) SELECT tsquery_phrase('a <3> g', 'b & d', 10); - tsquery_phrase -------------------------------------------------------------- - ( ( 'a' <3> 'g' ) <10> 'b' ) & ( ( 'a' <3> 'g' ) <10> 'd' ) + tsquery_phrase +--------------------------------------------- + 'a' <3> 'g' <10> 'b' & 'a' <3> 'g' <10> 'd' (1 row) -- tsvector-tsquery operations -- 2.40.0