*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.2 2007/09/07 15:09:55 teodor Exp $
*
*-------------------------------------------------------------------------
*/
/*
- * This function is used for morph parsing
+ * This function is used for morph parsing.
+ *
+ * The value is passed to parsetext which will call the right dictionary to
+ * lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP
+ * to the stack.
+ *
+ * All words belonging to the same variant are pushed as an ANDed list,
+ * and different variants are ORred together.
*/
static void
-pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, int2 weight)
+pushval_morph(void *opaque, TSQueryParserState state, char *strval, int lenval, int2 weight)
{
int4 count = 0;
ParsedText prs;
cntvar = 0,
cntpos = 0,
cnt = 0;
+ Oid cfg_id = (Oid) opaque; /* the input is actually an Oid, not a pointer */
prs.lenwords = 4;
prs.curwords = 0;
prs.pos = 0;
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
- parsetext(state->cfg_id, &prs, strval, lenval);
+ parsetext(cfg_id, &prs, strval, lenval);
if (prs.curwords > 0)
{
while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
{
- pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+ pushValue(state, prs.words[count].word, prs.words[count].len, weight);
pfree(prs.words[count].word);
if (cnt)
- pushquery(state, OPR, (int4) '&', 0, 0, 0);
+ pushOperator(state, OP_AND);
cnt++;
count++;
}
if (cntvar)
- pushquery(state, OPR, (int4) '|', 0, 0, 0);
+ pushOperator(state, OP_OR);
cntvar++;
}
if (cntpos)
- pushquery(state, OPR, (int4) '&', 0, 0, 0);
+ pushOperator(state, OP_AND);
cntpos++;
}
}
else
- pushval_asis(state, VALSTOP, NULL, 0, 0);
+ pushStop(state);
}
Datum
QueryItem *res;
int4 len;
- query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, false);
+ query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *) cfgid, false);
if (query->size == 0)
PG_RETURN_TSQUERY(query);
QueryItem *res;
int4 len;
- query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, true);
+ query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *)cfgid, true);
if (query->size == 0)
PG_RETURN_TSQUERY(query);
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.2 2007/08/25 00:03:59 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
*
*-------------------------------------------------------------------------
*/
}
/*
- * Parse string and lexize words
+ * Parse string and lexize words.
+ *
+ * prs will be filled in.
*/
void
-parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
+parsetext(Oid cfgId, ParsedText * prs, char *buf, int buflen)
{
int type,
lenlemm;
* Headline framework
*/
static void
-hladdword(HeadlineParsedText * prs, char *buf, int4 buflen, int type)
+hladdword(HeadlineParsedText * prs, char *buf, int buflen, int type)
{
while (prs->curwords >= prs->lenwords)
{
word = &(prs->words[prs->curwords - 1]);
for (i = 0; i < query->size; i++)
{
- if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0)
+ if (item->type == QI_VAL &&
+ item->operand.length == buflen &&
+ strncmp(GETOPERAND(query) + item->operand.distance, buf, buflen) == 0)
{
if (word->item)
{
memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
- prs->words[prs->curwords].item = item;
+ prs->words[prs->curwords].item = &item->operand;
prs->words[prs->curwords].repeated = 1;
prs->curwords++;
}
else
- word->item = item;
+ word->item = &item->operand;
}
item++;
}
}
void
-hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int4 buflen)
+hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
{
int type,
lenlemm;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.2 2007/08/22 01:39:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
*
*-------------------------------------------------------------------------
*/
} hlCheck;
static bool
-checkcondition_HL(void *checkval, QueryItem * val)
+checkcondition_HL(void *checkval, QueryOperand * val)
{
int i;
for (j = 0; j < query->size; j++)
{
- if (item->type != VAL)
+ if (item->type != QI_VAL)
{
item++;
continue;
}
for (i = pos; i < prs->curwords; i++)
{
- if (prs->words[i].item == item)
+ if (prs->words[i].item == &item->operand)
{
if (i > *q)
*q = i;
item = GETQUERY(query);
for (j = 0; j < query->size; j++)
{
- if (item->type != VAL)
+ if (item->type != QI_VAL)
{
item++;
continue;
}
for (i = *q; i >= pos; i--)
{
- if (prs->words[i].item == item)
+ if (prs->words[i].item == &item->operand)
{
if (i < *p)
*p = i;
#
# Makefile for utils/adt
#
-# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.66 2007/08/27 01:39:24 tgl Exp $
+# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.67 2007/09/07 15:09:56 teodor Exp $
#
subdir = src/backend/utils/adt
ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o \
tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \
tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \
- tsvector.o tsvector_op.o \
+ tsvector.o tsvector_op.o tsvector_parser.o\
uuid.o xml.o
like.o: like.c like_match.c
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
item = GETQUERY(query);
for (i = 0; i < query->size; i++)
- if (item[i].type == VAL)
+ if (item[i].type == QI_VAL)
(*nentries)++;
entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
for (i = 0; i < query->size; i++)
- if (item[i].type == VAL)
+ if (item[i].type == QI_VAL)
{
text *txt;
+ QueryOperand *val = &item[i].operand;
- txt = (text *) palloc(VARHDRSZ + item[i].length);
+ txt = (text *) palloc(VARHDRSZ + val->length);
- SET_VARSIZE(txt, VARHDRSZ + item[i].length);
- memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length);
+ SET_VARSIZE(txt, VARHDRSZ + val->length);
+ memcpy(VARDATA(txt), GETOPERAND(query) + val->distance, val->length);
entries[j++] = PointerGetDatum(txt);
- if (strategy != TSearchWithClassStrategyNumber && item[i].weight != 0)
+ if (strategy != TSearchWithClassStrategyNumber && val->weight != 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("@@ operator does not support lexeme class restrictions"),
} GinChkVal;
static bool
-checkcondition_gin(void *checkval, QueryItem * val)
+checkcondition_gin(void *checkval, QueryOperand * val)
{
GinChkVal *gcv = (GinChkVal *) checkval;
- return gcv->mapped_check[val - gcv->frst];
+ return gcv->mapped_check[((QueryItem *) val) - gcv->frst];
}
Datum
gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
for (i = 0; i < query->size; i++)
- if (item[i].type == VAL)
+ if (item[i].type == QI_VAL)
gcv.mapped_check[i] = check[j++];
res = TS_execute(
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.2 2007/08/21 06:34:42 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
* is there value 'val' in array or not ?
*/
static bool
-checkcondition_arr(void *checkval, QueryItem * val)
+checkcondition_arr(void *checkval, QueryOperand * val)
{
int4 *StopLow = ((CHKVAL *) checkval)->arrb;
int4 *StopHigh = ((CHKVAL *) checkval)->arre;
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
- if (*StopMiddle == val->val)
+ if (*StopMiddle == val->valcrc)
return (true);
- else if (*StopMiddle < val->val)
+ else if (*StopMiddle < val->valcrc)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
static bool
-checkcondition_bit(void *checkval, QueryItem * val)
+checkcondition_bit(void *checkval, QueryOperand * val)
{
- return GETBIT(checkval, HASHVAL(val->val));
+ return GETBIT(checkval, HASHVAL(val->valcrc));
}
Datum
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.2 2007/08/31 02:26:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
#include "utils/pg_crc.h"
+struct TSQueryParserStateData
+{
+ /* State for gettoken_query */
+ char *buffer; /* entire string we are scanning */
+ char *buf; /* current scan point */
+ int state;
+ int count; /* nesting count, incremented by (,
+ decremented by ) */
+
+ /* polish (prefix) notation in list, filled in by push* functions */
+ List *polstr;
+
+ /* Strings from operands are collected in op. curop is a pointer to
+ * the end of used space of op. */
+ char *op;
+ char *curop;
+ int lenop; /* allocated size of op */
+ int sumlen; /* used size of op */
+
+ /* state for value's parser */
+ TSVectorParseState valstate;
+};
+
/* parser's states */
#define WAITOPERAND 1
#define WAITOPERATOR 2
#define WAITSINGLEOPERAND 4
/*
- * node of query tree, also used
- * for storing polish notation in parser
+ * subroutine to parse the weight part, like ':1AB' of a query.
*/
-typedef struct ParseQueryNode
-{
- int2 weight;
- int2 type;
- int4 val;
- int2 distance;
- int2 length;
- struct ParseQueryNode *next;
-} ParseQueryNode;
-
static char *
-get_weight(char *buf, int2 *weight)
+get_weight(char *buf, int16 *weight)
{
*weight = 0;
return buf;
}
+/*
+ * token types for parsing
+ */
+typedef enum {
+ PT_END = 0,
+ PT_ERR = 1,
+ PT_VAL = 2,
+ PT_OPR = 3,
+ PT_OPEN = 4,
+ PT_CLOSE = 5,
+} ts_tokentype;
+
/*
* get token from query string
+ *
+ * *operator is filled in with OP_* when return values is PT_OPR
+ * *strval, *lenval and *weight are filled in when return value is PT_VAL
*/
-static int4
-gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+static ts_tokentype
+gettoken_query(TSQueryParserState state,
+ int8 *operator,
+ int *lenval, char **strval, int16 *weight)
{
while (1)
{
{
(state->buf)++; /* can safely ++, t_iseq guarantee
* that pg_mblen()==1 */
- *val = (int4) '!';
+ *operator = OP_NOT;
state->state = WAITOPERAND;
- return OPR;
+ return PT_OPR;
}
else if (t_iseq(state->buf, '('))
{
state->count++;
(state->buf)++;
state->state = WAITOPERAND;
- return OPEN;
+ return PT_OPEN;
}
else if (t_iseq(state->buf, ':'))
{
}
else if (!t_isspace(state->buf))
{
- state->valstate.prsbuf = state->buf;
- if (gettoken_tsvector(&(state->valstate)))
+ /* We rely on the tsvector parser to parse the value for us */
+ reset_tsvector_parser(state->valstate, state->buf);
+ if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
{
- *strval = state->valstate.word;
- *lenval = state->valstate.curpos - state->valstate.word;
- state->buf = get_weight(state->valstate.prsbuf, weight);
+ state->buf = get_weight(state->buf, weight);
state->state = WAITOPERATOR;
- return VAL;
+ return PT_VAL;
}
else if (state->state == WAITFIRSTOPERAND)
- return END;
+ return PT_END;
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
}
break;
case WAITOPERATOR:
- if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))
+ if (t_iseq(state->buf, '&'))
+ {
+ state->state = WAITOPERAND;
+ *operator = OP_AND;
+ (state->buf)++;
+ return PT_OPR;
+ }
+ if (t_iseq(state->buf, '|'))
{
state->state = WAITOPERAND;
- *val = (int4) *(state->buf);
+ *operator = OP_OR;
(state->buf)++;
- return OPR;
+ return PT_OPR;
}
else if (t_iseq(state->buf, ')'))
{
(state->buf)++;
state->count--;
- return (state->count < 0) ? ERR : CLOSE;
+ return (state->count < 0) ? PT_ERR : PT_CLOSE;
}
else if (*(state->buf) == '\0')
- return (state->count) ? ERR : END;
+ return (state->count) ? PT_ERR : PT_END;
else if (!t_isspace(state->buf))
- return ERR;
+ return PT_ERR;
break;
case WAITSINGLEOPERAND:
if (*(state->buf) == '\0')
- return END;
+ return PT_END;
*strval = state->buf;
*lenval = strlen(state->buf);
state->buf += strlen(state->buf);
state->count++;
- return VAL;
+ return PT_VAL;
default:
- return ERR;
+ return PT_ERR;
break;
}
state->buf += pg_mblen(state->buf);
}
- return END;
+ return PT_END;
}
/*
- * push new one in polish notation reverse view
+ * Push an operator to state->polstr
*/
void
-pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+pushOperator(TSQueryParserState state, int8 oper)
{
- ParseQueryNode *tmp = (ParseQueryNode *) palloc(sizeof(ParseQueryNode));
+ QueryOperator *tmp;
+
+ Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR);
+
+ tmp = (QueryOperator *) palloc(sizeof(QueryOperator));
+ tmp->type = QI_OPR;
+ tmp->oper = oper;
+ /* left is filled in later with findoprnd */
+
+ state->polstr = lcons(tmp, state->polstr);
+}
+
+static void
+pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight)
+{
+ QueryOperand *tmp;
- tmp->weight = weight;
- tmp->type = type;
- tmp->val = val;
if (distance >= MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("operand is too long in tsearch query: \"%s\"",
state->buffer)));
- tmp->distance = distance;
+
+ tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
+ tmp->type = QI_VAL;
+ tmp->weight = weight;
+ tmp->valcrc = (int32) valcrc;
tmp->length = lenval;
- tmp->next = state->str;
- state->str = tmp;
- state->num++;
+ tmp->distance = distance;
+
+ state->polstr = lcons(tmp, state->polstr);
}
/*
- * This function is used for tsquery parsing
+ * Push an operand to state->polstr.
+ *
+ * strval must point to a string equal to state->curop. lenval is the length
+ * of the string.
*/
void
-pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int2 weight)
+pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
{
- pg_crc32 c;
+ pg_crc32 valcrc;
if (lenval >= MAXSTRLEN)
ereport(ERROR,
errmsg("word is too long in tsearch query: \"%s\"",
state->buffer)));
- INIT_CRC32(c);
- COMP_CRC32(c, strval, lenval);
- FIN_CRC32(c);
- pushquery(state, type, *(int4 *) &c,
- state->curop - state->op, lenval, weight);
+ INIT_CRC32(valcrc);
+ COMP_CRC32(valcrc, strval, lenval);
+ FIN_CRC32(valcrc);
+ pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight);
+ /* append the value string to state.op, enlarging buffer if needed first */
while (state->curop - state->op + lenval + 1 >= state->lenop)
{
- int4 tmp = state->curop - state->op;
+ int used = state->curop - state->op;
state->lenop *= 2;
state->op = (char *) repalloc((void *) state->op, state->lenop);
- state->curop = state->op + tmp;
+ state->curop = state->op + used;
}
memcpy((void *) state->curop, (void *) strval, lenval);
state->curop += lenval;
*(state->curop) = '\0';
state->curop++;
state->sumlen += lenval + 1 /* \0 */ ;
- return;
}
+
+/*
+ * Push a stopword placeholder to state->polstr
+ */
+void
+pushStop(TSQueryParserState state)
+{
+ QueryOperand *tmp;
+
+ tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
+ tmp->type = QI_VALSTOP;
+
+ state->polstr = lcons(tmp, state->polstr);
+}
+
+
#define STACKDEPTH 32
/*
- * make polish notation of query
+ * Make polish (prefix) notation of query.
+ *
+ * See parse_tsquery for explanation of pushval.
*/
-static int4
-makepol(TSQueryParserState * state,
- void (*pushval) (TSQueryParserState *, int, char *, int, int2))
+static void
+makepol(TSQueryParserState state,
+ PushFunction pushval,
+ void *opaque)
{
- int4 val = 0,
- type;
- int4 lenval = 0;
+ int8 operator = 0;
+ ts_tokentype type;
+ int lenval = 0;
char *strval = NULL;
- int4 stack[STACKDEPTH];
- int4 lenstack = 0;
- int2 weight = 0;
+ int8 opstack[STACKDEPTH];
+ int lenstack = 0;
+ int16 weight = 0;
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
- while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+ while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END)
{
switch (type)
{
- case VAL:
- pushval(state, VAL, strval, lenval, weight);
- while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
- stack[lenstack - 1] == (int4) '!'))
+ case PT_VAL:
+ pushval(opaque, state, strval, lenval, weight);
+ while (lenstack && (opstack[lenstack - 1] == OP_AND ||
+ opstack[lenstack - 1] == OP_NOT))
{
lenstack--;
- pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ pushOperator(state, opstack[lenstack]);
}
break;
- case OPR:
- if (lenstack && val == (int4) '|')
- pushquery(state, OPR, val, 0, 0, 0);
+ case PT_OPR:
+ if (lenstack && operator == OP_OR)
+ pushOperator(state, OP_OR);
else
{
if (lenstack == STACKDEPTH) /* internal error */
elog(ERROR, "tsquery stack too small");
- stack[lenstack] = val;
+ opstack[lenstack] = operator;
lenstack++;
}
break;
- case OPEN:
- if (makepol(state, pushval) == ERR)
- return ERR;
- if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
- stack[lenstack - 1] == (int4) '!'))
+ case PT_OPEN:
+ makepol(state, pushval, opaque);
+
+ if (lenstack && (opstack[lenstack - 1] == OP_AND ||
+ opstack[lenstack - 1] == OP_NOT))
{
lenstack--;
- pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ pushOperator(state, opstack[lenstack]);
}
break;
- case CLOSE:
+ case PT_CLOSE:
while (lenstack)
{
lenstack--;
- pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ pushOperator(state, opstack[lenstack]);
};
- return END;
- break;
- case ERR:
+ return;
+ case PT_ERR:
default:
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsearch query: \"%s\"",
state->buffer)));
- return ERR;
-
}
}
while (lenstack)
{
lenstack--;
- pushquery(state, OPR, stack[lenstack], 0, 0, 0);
- };
- return END;
+ pushOperator(state, opstack[lenstack]);
+ }
}
+/*
+ * Fills in the left-fields previously left unfilled. The input
+ * QueryItems must be in polish (prefix) notation.
+ */
static void
-findoprnd(QueryItem * ptr, int4 *pos)
+findoprnd(QueryItem *ptr, int *pos)
{
- if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP)
- {
- ptr[*pos].left = 0;
- (*pos)++;
- }
- else if (ptr[*pos].val == (int4) '!')
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (ptr[*pos].type == QI_VAL ||
+ ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here,
+ * they haven't been cleansed
+ * away yet.
+ */
{
- ptr[*pos].left = 1;
(*pos)++;
- findoprnd(ptr, pos);
}
- else
+ else
{
- QueryItem *curitem = &ptr[*pos];
- int4 tmp = *pos;
+ Assert(ptr[*pos].type == QI_OPR);
- (*pos)++;
- findoprnd(ptr, pos);
- curitem->left = *pos - tmp;
- findoprnd(ptr, pos);
+ if (ptr[*pos].operator.oper == OP_NOT)
+ {
+ ptr[*pos].operator.left = 1;
+ (*pos)++;
+ findoprnd(ptr, pos);
+ }
+ else
+ {
+ QueryOperator *curitem = &ptr[*pos].operator;
+ int tmp = *pos;
+
+ Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
+
+ (*pos)++;
+ findoprnd(ptr, pos);
+ curitem->left = *pos - tmp;
+ findoprnd(ptr, pos);
+ }
}
}
-
/*
- * input
+ * Each value (operand) in the query is be passed to pushval. pushval can
+ * transform the simple value to an arbitrarily complex expression using
+ * pushValue and pushOperator. It must push a single value with pushValue,
+ * a complete expression with all operands, or a a stopword placeholder
+ * with pushStop, otherwise the prefix notation representation will be broken,
+ * having an operator with no operand.
+ *
+ * opaque is passed on to pushval as is, pushval can use it to store its
+ * private state.
+ *
+ * The returned query might contain QI_STOPVAL nodes. The caller is responsible
+ * for cleaning them up (with clean_fakeval)
*/
TSQuery
-parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int, int2), Oid cfg_id, bool isplain)
+parse_tsquery(char *buf,
+ PushFunction pushval,
+ void *opaque,
+ bool isplain)
{
- TSQueryParserState state;
- int4 i;
+ struct TSQueryParserStateData state;
+ int i;
TSQuery query;
- int4 commonlen;
+ int commonlen;
QueryItem *ptr;
- ParseQueryNode *tmp;
- int4 pos = 0;
+ int pos = 0;
+ ListCell *cell;
/* init state */
state.buffer = buf;
state.buf = buf;
state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
state.count = 0;
- state.num = 0;
- state.str = NULL;
- state.cfg_id = cfg_id;
+ state.polstr = NIL;
/* init value parser's state */
- state.valstate.oprisdelim = true;
- state.valstate.len = 32;
- state.valstate.word = (char *) palloc(state.valstate.len);
+ state.valstate = init_tsvector_parser(NULL, true);
/* init list of operand */
state.sumlen = 0;
*(state.curop) = '\0';
/* parse query & make polish notation (postfix, but in reverse order) */
- makepol(&state, pushval);
- pfree(state.valstate.word);
- if (!state.num)
+ makepol(&state, pushval, opaque);
+
+ close_tsvector_parser(state.valstate);
+
+ if (list_length(state.polstr) == 0)
{
ereport(NOTICE,
(errmsg("tsearch query doesn't contain lexeme(s): \"%s\"",
return query;
}
- /* make finish struct */
- commonlen = COMPUTESIZE(state.num, state.sumlen);
- query = (TSQuery) palloc(commonlen);
+ /* Pack the QueryItems in the final TSQuery struct to return to caller */
+ commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
+ query = (TSQuery) palloc0(commonlen);
SET_VARSIZE(query, commonlen);
- query->size = state.num;
+ query->size = list_length(state.polstr);
ptr = GETQUERY(query);
- /* set item in polish notation */
- for (i = 0; i < state.num; i++)
+ /* Copy QueryItems to TSQuery */
+ i = 0;
+ foreach(cell, state.polstr)
{
- ptr[i].weight = state.str->weight;
- ptr[i].type = state.str->type;
- ptr[i].val = state.str->val;
- ptr[i].distance = state.str->distance;
- ptr[i].length = state.str->length;
- tmp = state.str->next;
- pfree(state.str);
- state.str = tmp;
+ QueryItem *item = (QueryItem *) lfirst(cell);
+
+ switch(item->type)
+ {
+ case QI_VAL:
+ memcpy(&ptr[i], item, sizeof(QueryOperand));
+ break;
+ case QI_VALSTOP:
+ ptr[i].type = QI_VALSTOP;
+ break;
+ case QI_OPR:
+ memcpy(&ptr[i], item, sizeof(QueryOperator));
+ break;
+ default:
+ elog(ERROR, "unknown QueryItem type %d", item->type);
+ }
+ i++;
}
- /* set user friendly-operand view */
+ /* Copy all the operand strings to TSQuery */
memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
pfree(state.op);
- /* set left operand's position for every operator */
+ /* Set left operand pointers for every operator. */
pos = 0;
findoprnd(ptr, &pos);
return query;
}
+static void
+pushval_asis(void *opaque, TSQueryParserState state, char *strval, int lenval,
+ int16 weight)
+{
+ pushValue(state, strval, lenval, weight);
+}
+
/*
* in without morphology
*/
pg_verifymbstr(in, strlen(in), false);
- PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, InvalidOid, false));
+ PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, NULL, false));
}
/*
char *buf;
char *cur;
char *op;
- int4 buflen;
+ int buflen;
} INFIX;
-#define RESIZEBUF(inf,addsize) \
+/* Makes sure inf->buf is large enough for adding 'addsize' bytes */
+#define RESIZEBUF(inf, addsize) \
while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
{ \
- int4 len = (inf)->cur - (inf)->buf; \
+ int len = (inf)->cur - (inf)->buf; \
(inf)->buflen *= 2; \
(inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
(inf)->cur = (inf)->buf + len; \
static void
infix(INFIX * in, bool first)
{
- if (in->curpol->type == VAL)
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (in->curpol->type == QI_VAL)
{
- char *op = in->op + in->curpol->distance;
+ QueryOperand *curpol = &in->curpol->operand;
+ char *op = in->op + curpol->distance;
int clen;
- RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
+ RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
*(in->cur) = '\'';
in->cur++;
while (*op)
}
*(in->cur) = '\'';
in->cur++;
- if (in->curpol->weight)
+ if (curpol->weight)
{
*(in->cur) = ':';
in->cur++;
- if (in->curpol->weight & (1 << 3))
+ if (curpol->weight & (1 << 3))
{
*(in->cur) = 'A';
in->cur++;
}
- if (in->curpol->weight & (1 << 2))
+ if (curpol->weight & (1 << 2))
{
*(in->cur) = 'B';
in->cur++;
}
- if (in->curpol->weight & (1 << 1))
+ if (curpol->weight & (1 << 1))
{
*(in->cur) = 'C';
in->cur++;
}
- if (in->curpol->weight & 1)
+ if (curpol->weight & 1)
{
*(in->cur) = 'D';
in->cur++;
*(in->cur) = '\0';
in->curpol++;
}
- else if (in->curpol->val == (int4) '!')
+ else if (in->curpol->operator.oper == OP_NOT)
{
bool isopr = false;
in->cur++;
*(in->cur) = '\0';
in->curpol++;
- if (in->curpol->type == OPR)
+
+ if (in->curpol->type == QI_OPR)
{
isopr = true;
RESIZEBUF(in, 2);
sprintf(in->cur, "( ");
in->cur = strchr(in->cur, '\0');
}
+
infix(in, isopr);
if (isopr)
{
}
else
{
- int4 op = in->curpol->val;
+ int8 op = in->curpol->operator.oper;
INFIX nrm;
in->curpol++;
- if (op == (int4) '|' && !first)
+ if (op == OP_OR && !first)
{
RESIZEBUF(in, 2);
sprintf(in->cur, "( ");
/* print operator & right operand */
RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
- sprintf(in->cur, " %c %s", op, nrm.buf);
+ switch(op)
+ {
+ case OP_OR:
+ sprintf(in->cur, " | %s", nrm.buf);
+ break;
+ case OP_AND:
+ sprintf(in->cur, " & %s", nrm.buf);
+ break;
+ default:
+ /* OP_NOT is handled in above if-branch*/
+ elog(ERROR, "unexpected operator type %d", op);
+ }
in->cur = strchr(in->cur, '\0');
pfree(nrm.buf);
- if (op == (int4) '|' && !first)
+ if (op == OP_OR && !first)
{
RESIZEBUF(in, 2);
sprintf(in->cur, " )");
pq_sendint(&buf, query->size, sizeof(int32));
for (i = 0; i < query->size; i++)
{
- int tmp;
-
pq_sendint(&buf, item->type, sizeof(item->type));
- pq_sendint(&buf, item->weight, sizeof(item->weight));
- pq_sendint(&buf, item->left, sizeof(item->left));
- pq_sendint(&buf, item->val, sizeof(item->val));
-
- /*
- * We are sure that sizeof(WordEntry) == sizeof(int32), and about
- * layout of QueryItem
- */
- tmp = *(int32 *) (((char *) item) + HDRSIZEQI);
- pq_sendint(&buf, tmp, sizeof(tmp));
+ switch(item->type)
+ {
+ case QI_VAL:
+ pq_sendint(&buf, item->operand.weight, sizeof(item->operand.weight));
+ pq_sendint(&buf, item->operand.valcrc, sizeof(item->operand.valcrc));
+ pq_sendint(&buf, item->operand.length, sizeof(int16));
+ /* istrue flag is just for temporary use in tsrank.c/Cover,
+ * so we don't need to transfer that */
+ break;
+ case QI_OPR:
+ pq_sendint(&buf, item->operator.oper, sizeof(item->operator.oper));
+ if (item->operator.oper != OP_NOT)
+ pq_sendint(&buf, item->operator.left, sizeof(item->operator.left));
+ break;
+ default:
+ elog(ERROR, "unknown tsquery node type %d", item->type);
+ }
item++;
}
item = GETQUERY(query);
for (i = 0; i < query->size; i++)
{
- if (item->type == VAL)
- pq_sendbytes(&buf, GETOPERAND(query) + item->distance, item->length);
+ if (item->type == QI_VAL)
+ pq_sendbytes(&buf, GETOPERAND(query) + item->operand.distance, item->operand.length);
item++;
}
TSQuery query;
int i,
size,
- tmp,
- len = HDRSIZETQ;
+ len;
QueryItem *item;
int datalen = 0;
char *ptr;
size = pq_getmsgint(buf, sizeof(uint32));
if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
elog(ERROR, "invalid size of tsquery");
- len += sizeof(QueryItem) * size;
+
+ len = HDRSIZETQ + sizeof(QueryItem) * size;
query = (TSQuery) palloc(len);
query->size = size;
for (i = 0; i < size; i++)
{
item->type = (int8) pq_getmsgint(buf, sizeof(int8));
- item->weight = (int8) pq_getmsgint(buf, sizeof(int8));
- item->left = (int16) pq_getmsgint(buf, sizeof(int16));
- item->val = (int32) pq_getmsgint(buf, sizeof(int32));
- tmp = pq_getmsgint(buf, sizeof(int32));
- memcpy((((char *) item) + HDRSIZEQI), &tmp, sizeof(int32));
-
- /*
- * Sanity checks
- */
- if (item->type == VAL)
- {
- datalen += item->length + 1; /* \0 */
- }
- else if (item->type == OPR)
+
+ switch(item->type)
{
- if (item->val == '|' || item->val == '&')
- {
- if (item->left <= 0 || i + item->left >= size)
- elog(ERROR, "invalid pointer to left operand");
- }
+ case QI_VAL:
+ item->operand.weight = (int8) pq_getmsgint(buf, sizeof(int8));
+ item->operand.valcrc = (int32) pq_getmsgint(buf, sizeof(int32));
+ item->operand.length = pq_getmsgint(buf, sizeof(int16));
+
+ /*
+ * Check that datalen doesn't grow too large. Without the
+ * check, a malicious client could induce a buffer overflow
+ * by sending a tsquery whose size exceeds 2GB. datalen
+ * would overflow, we would allocate a too small buffer below,
+ * and overflow the buffer. Because operand.length is a 20-bit
+ * field, adding one such value to datalen must exceed
+ * MaxAllocSize before wrapping over the 32-bit datalen field,
+ * so this check will protect from it.
+ */
+ if (datalen > MAXSTRLEN)
+ elog(ERROR, "invalid tsquery; total operand length exceeded");
+
+ /* We can calculate distance from datalen, no need to send it
+ * through the wire. If we did, we would have to check that
+ * it's valid anyway.
+ */
+ item->operand.distance = datalen;
+
+ datalen += item->operand.length + 1; /* \0 */
- if (i == size - 1)
- elog(ERROR, "invalid pointer to right operand");
+ break;
+ case QI_OPR:
+ item->operator.oper = (int8) pq_getmsgint(buf, sizeof(int8));
+ if (item->operator.oper != OP_NOT &&
+ item->operator.oper != OP_OR &&
+ item->operator.oper != OP_AND)
+ elog(ERROR, "unknown operator type %d", (int) item->operator.oper);
+ if(item->operator.oper != OP_NOT)
+ {
+ item->operator.left = (int16) pq_getmsgint(buf, sizeof(int16));
+ /*
+ * Sanity checks
+ */
+ if (item->operator.left <= 0 || i + item->operator.left >= size)
+ elog(ERROR, "invalid pointer to left operand");
+
+ /* XXX: Though there's no way to construct a TSQuery that's
+ * not in polish notation, we don't enforce that for
+ * queries received from client in binary mode. Is there
+ * anything that relies on it?
+ *
+ * XXX: The tree could be malformed in other ways too,
+ * a node could have two parents, for example.
+ */
+ }
+
+ if (i == size - 1)
+ elog(ERROR, "invalid pointer to right operand");
+ break;
+ default:
+ elog(ERROR, "unknown tsquery node type %d", item->type);
}
- else
- elog(ERROR, "unknown tsquery node type");
item++;
}
ptr = GETOPERAND(query);
for (i = 0; i < size; i++)
{
- if (item->type == VAL)
+ if (item->type == QI_VAL)
{
- item->distance = ptr - GETOPERAND(query);
memcpy(ptr,
- pq_getmsgbytes(buf, item->length),
- item->length);
- ptr += item->length;
+ pq_getmsgbytes(buf, item->operand.length),
+ item->operand.length);
+ ptr += item->operand.length;
*ptr++ = '\0';
}
item++;
INFIX nrm;
text *res;
QueryItem *q;
- int4 len;
+ int len;
if (query->size == 0)
{
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
node->valnode = in;
node->right = node->left = NULL;
- if (in->type == OPR)
+ if (in->type == QI_OPR)
{
node->right = maketree(in + 1);
- if (in->val != (int4) '!')
- node->left = maketree(in + in->left);
+ if (in->operator.oper != OP_NOT)
+ node->left = maketree(in + in->operator.left);
}
return node;
}
+/*
+ * Internal state for plaintree and plainnode
+ */
typedef struct
{
QueryItem *ptr;
- int4 len;
- int4 cur;
+ int len; /* allocated size of ptr */
+ int cur; /* number of elements in ptr */
} PLAINTREE;
static void
state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem));
}
memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem));
- if (node->valnode->type == VAL)
+ if (node->valnode->type == QI_VAL)
state->cur++;
- else if (node->valnode->val == (int4) '!')
+ else if (node->valnode->operator.oper == OP_NOT)
{
- state->ptr[state->cur].left = 1;
+ state->ptr[state->cur].operator.left = 1;
state->cur++;
plainnode(state, node->right);
}
else
{
- int4 cur = state->cur;
+ int cur = state->cur;
state->cur++;
plainnode(state, node->right);
- state->ptr[cur].left = state->cur - cur;
+ state->ptr[cur].operator.left = state->cur - cur;
plainnode(state, node->left);
}
pfree(node);
}
/*
- * make plain view of tree from 'normal' view of tree
+ * make plain view of tree from a NODE-tree representation
*/
static QueryItem *
-plaintree(NODE * root, int4 *len)
+plaintree(NODE * root, int *len)
{
PLAINTREE pl;
pl.cur = 0;
pl.len = 16;
- if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+ if (root && (root->valnode->type == QI_VAL || root->valnode->type == QI_OPR))
{
pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
plainnode(&pl, root);
static NODE *
clean_NOT_intree(NODE * node)
{
- if (node->valnode->type == VAL)
+ if (node->valnode->type == QI_VAL)
return node;
- if (node->valnode->val == (int4) '!')
+ if (node->valnode->operator.oper == OP_NOT)
{
freetree(node);
return NULL;
}
/* operator & or | */
- if (node->valnode->val == (int4) '|')
+ if (node->valnode->operator.oper == OP_OR)
{
if ((node->left = clean_NOT_intree(node->left)) == NULL ||
(node->right = clean_NOT_intree(node->right)) == NULL)
else
{
NODE *res = node;
+
+ Assert(node->valnode->operator.oper == OP_AND);
node->left = clean_NOT_intree(node->left);
node->right = clean_NOT_intree(node->right);
}
QueryItem *
-clean_NOT(QueryItem * ptr, int4 *len)
+clean_NOT(QueryItem * ptr, int *len)
{
NODE *root = maketree(ptr);
#undef V_UNKNOWN
#endif
-#define V_UNKNOWN 0
-#define V_TRUE 1
-#define V_FALSE 2
-#define V_STOP 3
+/*
+ * output values for result output parameter of clean_fakeval_intree
+ */
+#define V_UNKNOWN 0 /* the expression can't be evaluated statically */
+#define V_TRUE 1 /* the expression is always true (not implemented) */
+#define V_FALSE 2 /* the expression is always false (not implemented) */
+#define V_STOP 3 /* the expression is a stop word */
/*
* Clean query tree from values which is always in
char lresult = V_UNKNOWN,
rresult = V_UNKNOWN;
- if (node->valnode->type == VAL)
+ if (node->valnode->type == QI_VAL)
return node;
- else if (node->valnode->type == VALSTOP)
+ else
+ if (node->valnode->type == QI_VALSTOP)
{
pfree(node);
*result = V_STOP;
return NULL;
}
+ Assert(node->valnode->type == QI_OPR);
- if (node->valnode->val == (int4) '!')
+ if (node->valnode->operator.oper == OP_NOT)
{
node->right = clean_fakeval_intree(node->right, &rresult);
if (!node->right)
node->left = clean_fakeval_intree(node->left, &lresult);
node->right = clean_fakeval_intree(node->right, &rresult);
+
if (lresult == V_STOP && rresult == V_STOP)
{
freetree(node);
}
QueryItem *
-clean_fakeval(QueryItem * ptr, int4 *len)
+clean_fakeval(QueryItem * ptr, int *len)
{
NODE *root = maketree(ptr);
char result = V_UNKNOWN;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
}
static QTNode *
-join_tsqueries(TSQuery a, TSQuery b)
+join_tsqueries(TSQuery a, TSQuery b, int8 operator)
{
QTNode *res = (QTNode *) palloc0(sizeof(QTNode));
res->flags |= QTN_NEEDFREE;
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
- res->valnode->type = OPR;
+ res->valnode->type = QI_OPR;
+ res->valnode->operator.oper = operator;
res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
PG_RETURN_POINTER(a);
}
- res = join_tsqueries(a, b);
-
- res->valnode->val = '&';
+ res = join_tsqueries(a, b, OP_AND);
query = QTN2QT(res);
PG_RETURN_POINTER(a);
}
- res = join_tsqueries(a, b);
-
- res->valnode->val = '|';
+ res = join_tsqueries(a, b, OP_OR);
query = QTN2QT(res);
res->flags |= QTN_NEEDFREE;
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
- res->valnode->type = OPR;
- res->valnode->val = '!';
+ res->valnode->type = QI_OPR;
+ res->valnode->operator.oper = OP_NOT;
res->child = (QTNode **) palloc0(sizeof(QTNode *));
res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a));
for (i = 0; i < a->size; i++)
{
- if (ptr->type == VAL)
- sign |= ((TSQuerySign) 1) << (ptr->val % TSQS_SIGLEN);
+ if (ptr->type == QI_VAL)
+ sign |= ((TSQuerySign) 1) << (ptr->operand.valcrc % TSQS_SIGLEN);
ptr++;
}
for (i = 0; i < ex->size; i++)
{
iq = GETQUERY(query);
- if (ie[i].type != VAL)
+ if (ie[i].type != QI_VAL)
continue;
for (j = 0; j < query->size; j++)
- if (iq[j].type == VAL && ie[i].val == iq[j].val)
+ if (iq[j].type == QI_VAL && ie[i].operand.valcrc == iq[j].operand.valcrc)
{
j = query->size + 1;
break;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
return 1;
}
+/*
+ * If node is equal to ex, replace it with subs. Replacement is actually done
+ * by returning either node or a copy of subs.
+ */
static QTNode *
findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
{
- if ((node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val)
+ if ((node->sign & ex->sign) != ex->sign ||
+ node->valnode->type != ex->valnode->type)
return node;
if (node->flags & QTN_NOCHANGE)
return node;
-
- if (node->valnode->type == OPR)
+
+ if (node->valnode->type == QI_OPR)
{
+ if (node->valnode->operator.oper != ex->valnode->operator.oper)
+ return node;
+
if (node->nchild == ex->nchild)
{
if (QTNEq(node, ex))
}
else if (node->nchild > ex->nchild)
{
+ /*
+ * AND and NOT are commutative, so we check if a subset of the
+ * children match. For example, if tnode is A | B | C, and
+ * ex is B | C, we have a match after we convert tnode to
+ * A | (B | C).
+ */
int *counters = (int *) palloc(sizeof(int) * node->nchild);
int i;
QTNode *tnode = (QTNode *) palloc(sizeof(QTNode));
pfree(counters);
}
}
- else if (QTNEq(node, ex))
+ else
{
- QTNFree(node);
- if (subs)
- {
- node = QTNCopy(subs);
- node->flags |= QTN_NOCHANGE;
- }
- else
+ Assert(node->valnode->type == QI_VAL);
+
+ if (node->valnode->operand.valcrc != ex->valnode->operand.valcrc)
+ return node;
+ else if (QTNEq(node, ex))
{
- node = NULL;
+ QTNFree(node);
+ if (subs)
+ {
+ node = QTNCopy(subs);
+ node->flags |= QTN_NOCHANGE;
+ }
+ else
+ {
+ node = NULL;
+ }
+ *isfind = true;
}
- *isfind = true;
}
return node;
{
root = findeq(root, ex, subs, isfind);
- if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == OPR)
+ if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == QI_OPR)
{
int i;
if (!root)
return NULL;
- if (root->valnode->type == OPR)
+ if (root->valnode->type == QI_OPR)
{
int i,
j = 0;
root->nchild = j;
- if (root->valnode->val == (int4) '!' && root->nchild == 0)
+ if (root->valnode->operator.oper == OP_NOT && root->nchild == 0)
{
QTNFree(root);
root = NULL;
elog(ERROR, "array must be one-dimensional, not %d dimensions",
ARR_NDIM(qa));
if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3)
- elog(ERROR, "array should have only three elements");
+ elog(ERROR, "array must have three elements");
if (ARR_ELEMTYPE(qa) != TSQUERYOID)
- elog(ERROR, "array should contain tsquery type");
+ elog(ERROR, "array must contain tsquery elements");
deconstruct_array(qa, TSQUERYOID, -1, false, 'i', &elemsp, NULL, &nelemsp);
subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
tree = findsubquery(tree, qex, subs, NULL);
+
QTNFree(qex);
QTNFree(subs);
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
-
QTNode *
QT2QTN(QueryItem * in, char *operand)
{
node->valnode = in;
- if (in->type == OPR)
+ if (in->type == QI_OPR)
{
node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
node->child[0] = QT2QTN(in + 1, operand);
node->sign = node->child[0]->sign;
- if (in->val == (int4) '!')
+ if (in->operator.oper == OP_NOT)
node->nchild = 1;
else
{
node->nchild = 2;
- node->child[1] = QT2QTN(in + in->left, operand);
+ node->child[1] = QT2QTN(in + in->operator.left, operand);
node->sign |= node->child[1]->sign;
}
}
else if (operand)
{
- node->word = operand + in->distance;
- node->sign = 1 << (in->val % 32);
+ node->word = operand + in->operand.distance;
+ node->sign = 1 << (in->operand.valcrc % 32);
}
return node;
if (!in)
return;
- if (in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
+ if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
pfree(in->word);
if (in->child)
{
if (in->valnode)
{
- if (in->valnode->type == OPR && in->nchild > 0)
+ if (in->valnode->type == QI_OPR && in->nchild > 0)
{
int i;
{
if (an->valnode->type != bn->valnode->type)
return (an->valnode->type > bn->valnode->type) ? -1 : 1;
- else if (an->valnode->val != bn->valnode->val)
- return (an->valnode->val > bn->valnode->val) ? -1 : 1;
- else if (an->valnode->type == VAL)
- {
- if (an->valnode->length == bn->valnode->length)
- return strncmp(an->word, bn->word, an->valnode->length);
- else
- return (an->valnode->length > bn->valnode->length) ? -1 : 1;
- }
- else if (an->nchild != bn->nchild)
+
+ if (an->valnode->type == QI_OPR)
{
- return (an->nchild > bn->nchild) ? -1 : 1;
+ QueryOperator *ao = &an->valnode->operator;
+ QueryOperator *bo = &bn->valnode->operator;
+
+ if(ao->oper != bo->oper)
+ return (ao->oper > bo->oper) ? -1 : 1;
+
+ if (an->nchild != bn->nchild)
+ return (an->nchild > bn->nchild) ? -1 : 1;
+
+ {
+ int i,
+ res;
+
+ for (i = 0; i < an->nchild; i++)
+ if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
+ return res;
+ }
+ return 0;
}
else
{
- int i,
- res;
+ QueryOperand *ao = &an->valnode->operand;
+ QueryOperand *bo = &bn->valnode->operand;
- for (i = 0; i < an->nchild; i++)
- if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
- return res;
- }
+ Assert(an->valnode->type == QI_VAL);
+
+ if (ao->valcrc != bo->valcrc)
+ {
+ return (ao->valcrc > bo->valcrc) ? -1 : 1;
+ }
- return 0;
+ if (ao->length == bo->length)
+ return strncmp(an->word, bn->word, ao->length);
+ else
+ return (ao->length > bo->length) ? -1 : 1;
+ }
}
static int
{
int i;
- if (in->valnode->type != OPR)
+ if (in->valnode->type != QI_OPR)
return;
for (i = 0; i < in->nchild; i++)
return (QTNodeCompare(a, b) == 0) ? true : false;
}
+/*
+ * Remove unnecessary intermediate nodes. For example:
+ *
+ * OR OR
+ * a OR -> a b c
+ * b c
+ */
void
QTNTernary(QTNode * in)
{
int i;
- if (in->valnode->type != OPR)
+ if (in->valnode->type != QI_OPR)
return;
for (i = 0; i < in->nchild; i++)
for (i = 0; i < in->nchild; i++)
{
- if (in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val)
+ QTNode *cc = in->child[i];
+
+ if (cc->valnode->type == QI_OPR && in->valnode->operator.oper == cc->valnode->operator.oper)
{
- QTNode *cc = in->child[i];
int oldnchild = in->nchild;
in->nchild += cc->nchild - 1;
memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
i += cc->nchild - 1;
+ if(cc->flags & QTN_NEEDFREE)
+ pfree(cc->valnode);
pfree(cc);
}
}
}
+/*
+ * Convert a tree to binary tree by inserting intermediate nodes.
+ * (Opposite of QTNTernary)
+ */
void
QTNBinary(QTNode * in)
{
int i;
- if (in->valnode->type != OPR)
+ if (in->valnode->type != QI_OPR)
return;
for (i = 0; i < in->nchild; i++)
nn->sign = nn->child[0]->sign | nn->child[1]->sign;
nn->valnode->type = in->valnode->type;
- nn->valnode->val = in->valnode->val;
+ nn->valnode->operator.oper = in->valnode->operator.oper;
in->child[0] = nn;
in->child[1] = in->child[in->nchild - 1];
}
}
+/*
+ * Count the total length of operand string in tree, including '\0'-
+ * terminators.
+ */
static void
-cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
+cntsize(QTNode * in, int *sumlen, int *nnode)
{
*nnode += 1;
- if (in->valnode->type == OPR)
+ if (in->valnode->type == QI_OPR)
{
int i;
}
else
{
- *sumlen += in->valnode->length + 1;
+ *sumlen += in->valnode->operand.length + 1;
}
}
} QTN2QTState;
static void
-fillQT(QTN2QTState * state, QTNode * in)
+fillQT(QTN2QTState *state, QTNode *in)
{
- *(state->curitem) = *(in->valnode);
-
- if (in->valnode->type == VAL)
+ if (in->valnode->type == QI_VAL)
{
- memcpy(state->curoperand, in->word, in->valnode->length);
- state->curitem->distance = state->curoperand - state->operand;
- state->curoperand[in->valnode->length] = '\0';
- state->curoperand += in->valnode->length + 1;
+ memcpy(state->curitem, in->valnode, sizeof(QueryOperand));
+
+ memcpy(state->curoperand, in->word, in->valnode->operand.length);
+ state->curitem->operand.distance = state->curoperand - state->operand;
+ state->curoperand[in->valnode->operand.length] = '\0';
+ state->curoperand += in->valnode->operand.length + 1;
state->curitem++;
}
else
{
QueryItem *curitem = state->curitem;
+ Assert(in->valnode->type == QI_OPR);
+
+ memcpy(state->curitem, in->valnode, sizeof(QueryOperator));
+
Assert(in->nchild <= 2);
state->curitem++;
if (in->nchild == 2)
{
- curitem->left = state->curitem - curitem;
+ curitem->operator.left = state->curitem - curitem;
fillQT(state, in->child[1]);
}
}
*(out->valnode) = *(in->valnode);
out->flags |= QTN_NEEDFREE;
- if (in->valnode->type == VAL)
+ if (in->valnode->type == QI_VAL)
{
- out->word = palloc(in->valnode->length + 1);
- memcpy(out->word, in->word, in->valnode->length);
- out->word[in->valnode->length] = '\0';
+ out->word = palloc(in->valnode->operand.length + 1);
+ memcpy(out->word, in->word, in->valnode->operand.length);
+ out->word[in->valnode->operand.length] = '\0';
out->flags |= QTN_WORDFREE;
}
else
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
}
static int4
-WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
+WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
{
if (ptr->len == item->length)
return strncmp(
}
static WordEntry *
-find_wordentry(TSVector t, TSQuery q, QueryItem * item)
+find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
{
WordEntry *StopLow = ARRPTR(t);
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
}
+/*
+ * sort QueryOperands by (length, word)
+ */
static int
-compareQueryItem(const void *a, const void *b, void *arg)
+compareQueryOperand(const void *a, const void *b, void *arg)
{
char *operand = (char *) arg;
+ QueryOperand *qa = (*(QueryOperand **) a);
+ QueryOperand *qb = (*(QueryOperand **) b);
- if ((*(QueryItem **) a)->length == (*(QueryItem **) b)->length)
- return strncmp(operand + (*(QueryItem **) a)->distance,
- operand + (*(QueryItem **) b)->distance,
- (*(QueryItem **) b)->length);
+ if (qa->length == qb->length)
+ return strncmp(operand + qa->distance,
+ operand + qb->distance,
+ qb->length);
- return ((*(QueryItem **) a)->length > (*(QueryItem **) b)->length) ? 1 : -1;
+ return (qa->length > qb->length) ? 1 : -1;
}
-static QueryItem **
-SortAndUniqItems(char *operand, QueryItem * item, int *size)
+/*
+ * Returns a sorted, de-duplicated array of QueryOperands in a query.
+ * The returned QueryOperands are pointers to the original QueryOperands
+ * in the query.
+ *
+ * Length of the returned array is stored in *size
+ */
+static QueryOperand **
+SortAndUniqItems(TSQuery q, int *size)
{
- QueryItem **res,
+ char *operand = GETOPERAND(q);
+ QueryItem * item = GETQUERY(q);
+ QueryOperand **res,
**ptr,
**prevptr;
- ptr = res = (QueryItem **) palloc(sizeof(QueryItem *) * *size);
+ ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
+ /* Collect all operands from the tree to res */
while ((*size)--)
{
- if (item->type == VAL)
+ if (item->type == QI_VAL)
{
- *ptr = item;
+ *ptr = (QueryOperand *) item;
ptr++;
}
item++;
if (*size < 2)
return res;
- qsort_arg(res, *size, sizeof(QueryItem **), compareQueryItem, (void *) operand);
+ qsort_arg(res, *size, sizeof(QueryOperand **), compareQueryOperand, (void *) operand);
ptr = res + 1;
prevptr = res;
+ /* remove duplicates */
while (ptr - res < *size)
{
- if (compareQueryItem((void *) ptr, (void *) prevptr, (void *) operand) != 0)
+ if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
{
prevptr++;
*prevptr = *ptr;
lenct,
dist;
float res = -1.0;
- QueryItem **item;
+ QueryOperand **item;
int size = q->size;
- item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
+ item = SortAndUniqItems(q, &size);
if (size < 2)
{
pfree(item);
j,
i;
float res = 0.0;
- QueryItem **item;
+ QueryOperand **item;
int size = q->size;
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
- item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
+ item = SortAndUniqItems(q, &size);
for (i = 0; i < size; i++)
{
if (!t->size || !q->size)
return 0.0;
- res = (item->type != VAL && item->val == (int4) '&') ?
+ /* XXX: What about NOT? */
+ res = (item->type == QI_OPR && item->operator.oper == OP_AND) ?
calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
if (res < 0)
}
static bool
-checkcondition_QueryItem(void *checkval, QueryItem * val)
+checkcondition_QueryOperand(void *checkval, QueryOperand *val)
{
return (bool) (val->istrue);
}
/* reset istrue flag */
for (i = 0; i < query->size; i++)
{
- if (item->type == VAL)
- item->istrue = 0;
+ if (item->type == QI_VAL)
+ item->operand.istrue = 0;
item++;
}
}
static bool
-Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
+Cover(DocRepresentation *doc, int len, TSQuery query, Extention *ext)
{
DocRepresentation *ptr;
int lastpos = ext->pos;
while (ptr - doc < len)
{
for (i = 0; i < ptr->nitem; i++)
- ptr->item[i]->istrue = 1;
- if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryItem))
+ {
+ if(ptr->item[i]->type == QI_VAL)
+ ptr->item[i]->operand.istrue = 1;
+ }
+ if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryOperand))
{
if (ptr->pos > ext->q)
{
while (ptr >= doc + ext->pos)
{
for (i = 0; i < ptr->nitem; i++)
- ptr->item[i]->istrue = 1;
- if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryItem))
+ if(ptr->item[i]->type == QI_VAL) /* XXX */
+ ptr->item[i]->operand.istrue = 1;
+ if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryOperand))
{
if (ptr->pos < ext->p)
{
for (i = 0; i < query->size; i++)
{
- if (item[i].type != VAL || item[i].istrue)
+ QueryOperand *curoperand;
+
+ if (item[i].type != QI_VAL)
+ continue;
+
+ curoperand = &item[i].operand;
+
+ if(item[i].operand.istrue)
continue;
- entry = find_wordentry(txt, query, &(item[i]));
+ entry = find_wordentry(txt, query, curoperand);
if (!entry)
continue;
{
if (j == 0)
{
- QueryItem *kptr,
- *iptr = item + i;
int k;
doc[cur].needfree = false;
for (k = 0; k < query->size; k++)
{
- kptr = item + k;
+ QueryOperand *kptr = &item[k].operand;
+ QueryOperand *iptr = &item[i].operand;
+
if (k == i ||
- (item[k].type == VAL &&
- compareQueryItem(&kptr, &iptr, operand) == 0))
+ (item[k].type == QI_VAL &&
+ compareQueryOperand(&kptr, &iptr, operand) == 0))
{
+ /* if k == i, we've already checked above that it's type == Q_VAL */
doc[cur].item[doc[cur].nitem] = item + k;
doc[cur].nitem++;
- kptr->istrue = 1;
+ item[k].operand.istrue = 1;
}
}
}
if (cur > 0)
{
- if (cur > 1)
- qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+ qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
return doc;
}
{
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1);
- TSQuery query = PG_GETARG_TSQUERY_COPY(2);
+ TSQuery query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
int method = PG_GETARG_INT32(3);
float res;
{
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1);
- TSQuery query = PG_GETARG_TSQUERY_COPY(2);
+ TSQuery query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
float res;
res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
ts_rankcd_ttf(PG_FUNCTION_ARGS)
{
TSVector txt = PG_GETARG_TSVECTOR(0);
- TSQuery query = PG_GETARG_TSQUERY_COPY(1);
+ TSQuery query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
int method = PG_GETARG_INT32(2);
float res;
ts_rankcd_tt(PG_FUNCTION_ARGS)
{
TSVector txt = PG_GETARG_TSVECTOR(0);
- TSQuery query = PG_GETARG_TSQUERY_COPY(1);
+ TSQuery query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
float res;
res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.2 2007/08/21 01:45:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
#include "tsearch/ts_utils.h"
#include "utils/memutils.h"
+typedef struct
+{
+ WordEntry entry; /* should be first ! */
+ WordEntryPos *pos;
+ int poslen; /* number of elements in pos */
+} WordEntryIN;
static int
comparePos(const void *a, const void *b)
{
- if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b))
+ int apos = WEP_GETPOS(*(WordEntryPos *) a);
+ int bpos = WEP_GETPOS(*(WordEntryPos *) b);
+
+ if (apos == bpos)
return 0;
- return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1;
+ return (apos > bpos) ? 1 : -1;
}
+/*
+ * Removes duplicate pos entries. If there's two entries with same pos
+ * but different weight, the higher weight is retained.
+ *
+ * Returns new length.
+ */
static int
-uniquePos(WordEntryPos * a, int4 l)
+uniquePos(WordEntryPos * a, int l)
{
WordEntryPos *ptr,
*res;
- if (l == 1)
+ if (l <= 1)
return l;
res = a;
}
static int
-uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+uniqueentry(WordEntryIN * a, int l, char *buf, int *outbuflen)
{
WordEntryIN *ptr,
*res;
- res = a;
+ Assert(l >= 1);
+
if (l == 1)
{
if (a->entry.haspos)
{
- *(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
- *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
+ a->poslen = uniquePos(a->pos, a->poslen);
+ *outbuflen = SHORTALIGN(a->entry.len) + (a->poslen + 1) * sizeof(WordEntryPos);
}
return l;
}
+ res = a;
ptr = a + 1;
qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf);
{
if (res->entry.haspos)
{
- *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
- *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
+ res->poslen = uniquePos(res->pos, res->poslen);
+ *outbuflen += res->poslen * sizeof(WordEntryPos);
}
*outbuflen += SHORTALIGN(res->entry.len);
res++;
{
if (res->entry.haspos)
{
- int4 len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
+ int newlen = ptr->poslen + res->poslen;
+
+ /* Append res to pos */
- res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
- memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
- &(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
- *(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
+ res->pos = (WordEntryPos *) repalloc(res->pos, newlen * sizeof(WordEntryPos));
+ memcpy(&res->pos[res->poslen],
+ ptr->pos, ptr->poslen * sizeof(WordEntryPos));
+ res->poslen = newlen;
pfree(ptr->pos);
}
else
}
if (res->entry.haspos)
{
- *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
- *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
+ res->poslen = uniquePos(res->pos, res->poslen);
+ *outbuflen += res->poslen * sizeof(WordEntryPos);
}
*outbuflen += SHORTALIGN(res->entry.len);
return compareentry(a, b, buf);
}
-#define WAITWORD 1
-#define WAITENDWORD 2
-#define WAITNEXTCHAR 3
-#define WAITENDCMPLX 4
-#define WAITPOSINFO 5
-#define INPOSINFO 6
-#define WAITPOSDELIM 7
-#define WAITCHARCMPLX 8
-
-#define RESIZEPRSBUF \
-do { \
- if ( state->curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
- { \
- int4 clen = state->curpos - state->word; \
- state->len *= 2; \
- state->word = (char*)repalloc( (void*)state->word, state->len ); \
- state->curpos = state->word + clen; \
- } \
-} while (0)
-
-bool
-gettoken_tsvector(TSVectorParseState *state)
-{
- int4 oldstate = 0;
-
- state->curpos = state->word;
- state->state = WAITWORD;
- state->alen = 0;
-
- while (1)
- {
- if (state->state == WAITWORD)
- {
- if (*(state->prsbuf) == '\0')
- return false;
- else if (t_iseq(state->prsbuf, '\''))
- state->state = WAITENDCMPLX;
- else if (t_iseq(state->prsbuf, '\\'))
- {
- state->state = WAITNEXTCHAR;
- oldstate = WAITENDWORD;
- }
- else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("syntax error in tsvector")));
- else if (!t_isspace(state->prsbuf))
- {
- COPYCHAR(state->curpos, state->prsbuf);
- state->curpos += pg_mblen(state->prsbuf);
- state->state = WAITENDWORD;
- }
- }
- else if (state->state == WAITNEXTCHAR)
- {
- if (*(state->prsbuf) == '\0')
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("there is no escaped character")));
- else
- {
- RESIZEPRSBUF;
- COPYCHAR(state->curpos, state->prsbuf);
- state->curpos += pg_mblen(state->prsbuf);
- state->state = oldstate;
- }
- }
- else if (state->state == WAITENDWORD)
- {
- if (t_iseq(state->prsbuf, '\\'))
- {
- state->state = WAITNEXTCHAR;
- oldstate = WAITENDWORD;
- }
- else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
- (state->oprisdelim && ISOPERATOR(state->prsbuf)))
- {
- RESIZEPRSBUF;
- if (state->curpos == state->word)
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("syntax error in tsvector")));
- *(state->curpos) = '\0';
- return true;
- }
- else if (t_iseq(state->prsbuf, ':'))
- {
- if (state->curpos == state->word)
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("syntax error in tsvector")));
- *(state->curpos) = '\0';
- if (state->oprisdelim)
- return true;
- else
- state->state = INPOSINFO;
- }
- else
- {
- RESIZEPRSBUF;
- COPYCHAR(state->curpos, state->prsbuf);
- state->curpos += pg_mblen(state->prsbuf);
- }
- }
- else if (state->state == WAITENDCMPLX)
- {
- if (t_iseq(state->prsbuf, '\''))
- {
- state->state = WAITCHARCMPLX;
- }
- else if (t_iseq(state->prsbuf, '\\'))
- {
- state->state = WAITNEXTCHAR;
- oldstate = WAITENDCMPLX;
- }
- else if (*(state->prsbuf) == '\0')
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("syntax error in tsvector")));
- else
- {
- RESIZEPRSBUF;
- COPYCHAR(state->curpos, state->prsbuf);
- state->curpos += pg_mblen(state->prsbuf);
- }
- }
- else if (state->state == WAITCHARCMPLX)
- {
- if (t_iseq(state->prsbuf, '\''))
- {
- RESIZEPRSBUF;
- COPYCHAR(state->curpos, state->prsbuf);
- state->curpos += pg_mblen(state->prsbuf);
- state->state = WAITENDCMPLX;
- }
- else
- {
- RESIZEPRSBUF;
- *(state->curpos) = '\0';
- if (state->curpos == state->word)
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("syntax error in tsvector")));
- if (state->oprisdelim)
- {
- /* state->prsbuf+=pg_mblen(state->prsbuf); */
- return true;
- }
- else
- state->state = WAITPOSINFO;
- continue; /* recheck current character */
- }
- }
- else if (state->state == WAITPOSINFO)
- {
- if (t_iseq(state->prsbuf, ':'))
- state->state = INPOSINFO;
- else
- return true;
- }
- else if (state->state == INPOSINFO)
- {
- if (t_isdigit(state->prsbuf))
- {
- if (state->alen == 0)
- {
- state->alen = 4;
- state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
- *(uint16 *) (state->pos) = 0;
- }
- else if (*(uint16 *) (state->pos) + 1 >= state->alen)
- {
- state->alen *= 2;
- state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
- }
- (*(uint16 *) (state->pos))++;
- WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf)));
- if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0)
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("wrong position info in tsvector")));
- WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
- state->state = WAITPOSDELIM;
- }
- else
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("syntax error in tsvector")));
- }
- else if (state->state == WAITPOSDELIM)
- {
- if (t_iseq(state->prsbuf, ','))
- state->state = INPOSINFO;
- else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
- {
- if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("syntax error in tsvector")));
- WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3);
- }
- else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
- {
- if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("syntax error in tsvector")));
- WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2);
- }
- else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
- {
- if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("syntax error in tsvector")));
- WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1);
- }
- else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
- {
- if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("syntax error in tsvector")));
- WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
- }
- else if (t_isspace(state->prsbuf) ||
- *(state->prsbuf) == '\0')
- return true;
- else if (!t_isdigit(state->prsbuf))
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("syntax error in tsvector")));
- }
- else /* internal error */
- elog(ERROR, "internal error in gettoken_tsvector");
-
- /* get next char */
- state->prsbuf += pg_mblen(state->prsbuf);
- }
-
- return false;
-}
Datum
tsvectorin(PG_FUNCTION_ARGS)
char *buf = PG_GETARG_CSTRING(0);
TSVectorParseState state;
WordEntryIN *arr;
+ int totallen;
+ int arrlen; /* allocated size of arr */
WordEntry *inarr;
- int4 len = 0,
- totallen = 64;
+ int len = 0;
TSVector in;
- char *tmpbuf,
- *cur;
- int4 i,
- buflen = 256;
+ int i;
+ char *token;
+ int toklen;
+ WordEntryPos *pos;
+ int poslen;
+
+ /*
+ * Tokens are appended to tmpbuf, cur is a pointer
+ * to the end of used space in tmpbuf.
+ */
+ char *tmpbuf;
+ char *cur;
+ int buflen = 256; /* allocated size of tmpbuf */
pg_verifymbstr(buf, strlen(buf), false);
- state.prsbuf = buf;
- state.len = 32;
- state.word = (char *) palloc(state.len);
- state.oprisdelim = false;
- arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+ state = init_tsvector_parser(buf, false);
+
+ arrlen = 64;
+ arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
cur = tmpbuf = (char *) palloc(buflen);
- while (gettoken_tsvector(&state))
+ while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
{
- /*
- * Realloc buffers if it's needed
- */
- if (len >= totallen)
- {
- totallen *= 2;
- arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
- }
-
- while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
- {
- int4 dist = cur - tmpbuf;
-
- buflen *= 2;
- tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
- cur = tmpbuf + dist;
- }
- if (state.curpos - state.word >= MAXSTRLEN)
+ if (toklen >= MAXSTRLEN)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long (%ld bytes, max %ld bytes)",
- (long) (state.curpos - state.word),
+ (long) toklen,
(long) MAXSTRLEN)));
- arr[len].entry.len = state.curpos - state.word;
+
if (cur - tmpbuf > MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("position value too large")));
+
+ /*
+ * Enlarge buffers if needed
+ */
+ if (len >= arrlen)
+ {
+ arrlen *= 2;
+ arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * arrlen);
+ }
+ while ((cur - tmpbuf) + toklen >= buflen)
+ {
+ int dist = cur - tmpbuf;
+
+ buflen *= 2;
+ tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+ cur = tmpbuf + dist;
+ }
+ arr[len].entry.len = toklen;
arr[len].entry.pos = cur - tmpbuf;
- memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
- cur += arr[len].entry.len;
+ memcpy((void *) cur, (void *) token, toklen);
+ cur += toklen;
- if (state.alen)
+ if (poslen != 0)
{
arr[len].entry.haspos = 1;
- arr[len].pos = state.pos;
+ arr[len].pos = pos;
+ arr[len].poslen = poslen;
}
else
arr[len].entry.haspos = 0;
len++;
}
- pfree(state.word);
+
+ close_tsvector_parser(state);
if (len > 0)
len = uniqueentry(arr, len, tmpbuf, &buflen);
cur += SHORTALIGN(arr[i].entry.len);
if (arr[i].entry.haspos)
{
- memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
- cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
+ uint16 tmplen;
+
+ if(arr[i].poslen > 0xFFFF)
+ elog(ERROR, "positions array too long");
+
+ tmplen = (uint16) arr[i].poslen;
+
+ /* Copy length to output struct */
+ memcpy(cur, &tmplen, sizeof(uint16));
+ cur += sizeof(uint16);
+
+ /* Copy positions */
+ memcpy(cur, arr[i].pos, (arr[i].poslen) * sizeof(WordEntryPos));
+ cur += arr[i].poslen * sizeof(WordEntryPos);
+
pfree(arr[i].pos);
}
inarr[i] = arr[i].entry;
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
TSVector vec;
- int i,
- size,
- len = DATAHDRSIZE;
+ int i;
+ uint32 size;
WordEntry *weptr;
int datalen = 0;
+ Size len;
size = pq_getmsgint(buf, sizeof(uint32));
if (size < 0 || size > (MaxAllocSize / sizeof(WordEntry)))
elog(ERROR, "invalid size of tsvector");
- len += sizeof(WordEntry) * size;
+ len = DATAHDRSIZE + sizeof(WordEntry) * size;
- len *= 2;
+ len = len * 2; /* times two to make room for lexemes */
vec = (TSVector) palloc0(len);
vec->size = size;
weptr = ARRPTR(vec);
for (i = 0; i < size; i++)
{
- int tmp;
+ int32 tmp;
weptr = ARRPTR(vec) + i;
npos;
WordEntryPos *wepptr;
- npos = (uint16) pq_getmsgint(buf, sizeof(int16));
+ npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
if (npos > MAXNUMPOS)
elog(ERROR, "unexpected number of positions");
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.2 2007/08/31 02:26:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
+/*
+ * Order: haspos, len, word, for all positions (pos, weight)
+ */
static int
silly_cmp_tsvector(const TSVector a, const TSVector b)
{
* compare 2 string values
*/
static int4
-ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item)
+ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryOperand * item)
{
if (ptr->len == item->length)
return strncmp(
* check weight info
*/
static bool
-checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item)
+checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item)
{
WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16));
uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len)));
* is there value 'val' in array or not ?
*/
static bool
-checkcondition_str(void *checkval, QueryItem * val)
+checkcondition_str(void *checkval, QueryOperand * val)
{
- WordEntry *StopLow = ((CHKVAL *) checkval)->arrb;
- WordEntry *StopHigh = ((CHKVAL *) checkval)->arre;
+ CHKVAL *chkval = (CHKVAL *) checkval;
+ WordEntry *StopLow = chkval->arrb;
+ WordEntry *StopHigh = chkval->arre;
WordEntry *StopMiddle;
int difference;
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
- difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+ difference = ValCompare(chkval, StopMiddle, val);
if (difference == 0)
return (val->weight && StopMiddle->haspos) ?
- checkclass_str((CHKVAL *) checkval, StopMiddle, val) : true;
+ checkclass_str(chkval, StopMiddle, val) : true;
else if (difference < 0)
StopLow = StopMiddle + 1;
else
}
/*
- * check for boolean condition
+ * check for boolean condition.
+ *
+ * if calcnot is false, NOT expressions are always evaluated to be true. This is used in ranking.
+ * checkval can be used to pass information to the callback. TS_execute doesn't
+ * do anything with it.
+ * chkcond is a callback function used to evaluate each VAL node in the query.
+ *
*/
bool
TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
- bool (*chkcond) (void *checkval, QueryItem * val))
+ bool (*chkcond) (void *checkval, QueryOperand * val))
{
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
- if (curitem->type == VAL)
- return chkcond(checkval, curitem);
- else if (curitem->val == (int4) '!')
- {
- return (calcnot) ?
- !TS_execute(curitem + 1, checkval, calcnot, chkcond)
- : true;
- }
- else if (curitem->val == (int4) '&')
+ if (curitem->type == QI_VAL)
+ return chkcond(checkval, (QueryOperand *) curitem);
+
+ switch(curitem->operator.oper)
{
- if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
- return TS_execute(curitem + 1, checkval, calcnot, chkcond);
- else
- return false;
- }
- else
- { /* |-operator */
- if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
- return true;
- else
- return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+ case OP_NOT:
+ if (calcnot)
+ return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
+ else
+ return true;
+ case OP_AND:
+ if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
+ return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+ else
+ return false;
+
+ case OP_OR:
+ if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
+ return true;
+ else
+ return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+
+ default:
+ elog(ERROR, "unknown operator %d", curitem->operator.oper);
}
+
+ /* not reachable, but keep compiler quiet */
return false;
}
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * tsvector_parser.c
+ * Parser for tsvector
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.1 2007/09/07 15:09:56 teodor Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "libpq/pqformat.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+#include "utils/memutils.h"
+
+struct TSVectorParseStateData
+{
+ char *prsbuf;
+ char *word; /* buffer to hold the current word */
+ int len; /* size in bytes allocated for 'word' */
+ bool oprisdelim;
+};
+
+/*
+ * Initializes parser for the input string. If oprisdelim is set, the
+ * following characters are treated as delimiters in addition to whitespace:
+ * ! | & ( )
+ */
+TSVectorParseState
+init_tsvector_parser(char *input, bool oprisdelim)
+{
+ TSVectorParseState state;
+
+ state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
+ state->prsbuf = input;
+ state->len = 32;
+ state->word = (char *) palloc(state->len);
+ state->oprisdelim = oprisdelim;
+
+ return state;
+}
+
+/*
+ * Reinitializes parser for parsing 'input', instead of previous input.
+ */
+void
+reset_tsvector_parser(TSVectorParseState state, char *input)
+{
+ state->prsbuf = input;
+}
+
+/*
+ * Shuts down a tsvector parser.
+ */
+void
+close_tsvector_parser(TSVectorParseState state)
+{
+ pfree(state->word);
+ pfree(state);
+}
+
+#define RESIZEPRSBUF \
+do { \
+ if ( curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
+ { \
+ int clen = curpos - state->word; \
+ state->len *= 2; \
+ state->word = (char*)repalloc( (void*)state->word, state->len ); \
+ curpos = state->word + clen; \
+ } \
+} while (0)
+
+
+#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
+
+/* Fills the output parameters, and returns true */
+#define RETURN_TOKEN \
+do { \
+ if (pos_ptr != NULL) \
+ { \
+ *pos_ptr = pos; \
+ *poslen = npos; \
+ } \
+ else if (pos != NULL) \
+ pfree(pos); \
+ \
+ if (strval != NULL) \
+ *strval = state->word; \
+ if (lenval != NULL) \
+ *lenval = curpos - state->word; \
+ if (endptr != NULL) \
+ *endptr = state->prsbuf; \
+ return true; \
+} while(0)
+
+
+/* State codes used in gettoken_tsvector */
+#define WAITWORD 1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR 3
+#define WAITENDCMPLX 4
+#define WAITPOSINFO 5
+#define INPOSINFO 6
+#define WAITPOSDELIM 7
+#define WAITCHARCMPLX 8
+
+/*
+ * Get next token from string being parsed. Returns false if
+ * end of input string is reached, otherwise strval, lenval, pos_ptr
+ * and poslen output parameters are filled in:
+ *
+ * *strval token
+ * *lenval length of*strval
+ * *pos_ptr pointer to a palloc'd array of positions and weights
+ * associated with the token. If the caller is not interested
+ * in the information, NULL can be supplied. Otherwise
+ * the caller is responsible for pfreeing the array.
+ * *poslen number of elements in *pos_ptr
+ */
+bool
+gettoken_tsvector(TSVectorParseState state,
+ char **strval, int *lenval,
+ WordEntryPos **pos_ptr, int *poslen,
+ char **endptr)
+{
+ int oldstate = 0;
+ char *curpos = state->word;
+ int statecode = WAITWORD;
+
+ /* pos is for collecting the comma delimited list of positions followed
+ * by the actual token.
+ */
+ WordEntryPos *pos = NULL;
+ int npos = 0; /* elements of pos used */
+ int posalen = 0; /* allocated size of pos */
+
+ while (1)
+ {
+ if (statecode == WAITWORD)
+ {
+ if (*(state->prsbuf) == '\0')
+ return false;
+ else if (t_iseq(state->prsbuf, '\''))
+ statecode = WAITENDCMPLX;
+ else if (t_iseq(state->prsbuf, '\\'))
+ {
+ statecode = WAITNEXTCHAR;
+ oldstate = WAITENDWORD;
+ }
+ else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ else if (!t_isspace(state->prsbuf))
+ {
+ COPYCHAR(curpos, state->prsbuf);
+ curpos += pg_mblen(state->prsbuf);
+ statecode = WAITENDWORD;
+ }
+ }
+ else if (statecode == WAITNEXTCHAR)
+ {
+ if (*(state->prsbuf) == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("there is no escaped character")));
+ else
+ {
+ RESIZEPRSBUF;
+ COPYCHAR(curpos, state->prsbuf);
+ curpos += pg_mblen(state->prsbuf);
+ Assert(oldstate != 0);
+ statecode = oldstate;
+ }
+ }
+ else if (statecode == WAITENDWORD)
+ {
+ if (t_iseq(state->prsbuf, '\\'))
+ {
+ statecode = WAITNEXTCHAR;
+ oldstate = WAITENDWORD;
+ }
+ else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
+ (state->oprisdelim && ISOPERATOR(state->prsbuf)))
+ {
+ RESIZEPRSBUF;
+ if (curpos == state->word)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ *(curpos) = '\0';
+ RETURN_TOKEN;
+ }
+ else if (t_iseq(state->prsbuf, ':'))
+ {
+ if (curpos == state->word)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ *(curpos) = '\0';
+ if (state->oprisdelim)
+ RETURN_TOKEN;
+ else
+ statecode = INPOSINFO;
+ }
+ else
+ {
+ RESIZEPRSBUF;
+ COPYCHAR(curpos, state->prsbuf);
+ curpos += pg_mblen(state->prsbuf);
+ }
+ }
+ else if (statecode == WAITENDCMPLX)
+ {
+ if (t_iseq(state->prsbuf, '\''))
+ {
+ statecode = WAITCHARCMPLX;
+ }
+ else if (t_iseq(state->prsbuf, '\\'))
+ {
+ statecode = WAITNEXTCHAR;
+ oldstate = WAITENDCMPLX;
+ }
+ else if (*(state->prsbuf) == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ else
+ {
+ RESIZEPRSBUF;
+ COPYCHAR(curpos, state->prsbuf);
+ curpos += pg_mblen(state->prsbuf);
+ }
+ }
+ else if (statecode == WAITCHARCMPLX)
+ {
+ if (t_iseq(state->prsbuf, '\''))
+ {
+ RESIZEPRSBUF;
+ COPYCHAR(curpos, state->prsbuf);
+ curpos += pg_mblen(state->prsbuf);
+ statecode = WAITENDCMPLX;
+ }
+ else
+ {
+ RESIZEPRSBUF;
+ *(curpos) = '\0';
+ if (curpos == state->word)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ if (state->oprisdelim)
+ {
+ /* state->prsbuf+=pg_mblen(state->prsbuf); */
+ RETURN_TOKEN;
+ }
+ else
+ statecode = WAITPOSINFO;
+ continue; /* recheck current character */
+ }
+ }
+ else if (statecode == WAITPOSINFO)
+ {
+ if (t_iseq(state->prsbuf, ':'))
+ statecode = INPOSINFO;
+ else
+ RETURN_TOKEN;
+ }
+ else if (statecode == INPOSINFO)
+ {
+ if (t_isdigit(state->prsbuf))
+ {
+ if (posalen == 0)
+ {
+ posalen = 4;
+ pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
+ npos = 0;
+ }
+ else if (npos + 1 >= posalen)
+ {
+ posalen *= 2;
+ pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
+ }
+ npos++;
+ WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
+ if (WEP_GETPOS(pos[npos - 1]) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("wrong position info in tsvector")));
+ WEP_SETWEIGHT(pos[npos - 1], 0);
+ statecode = WAITPOSDELIM;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ }
+ else if (statecode == WAITPOSDELIM)
+ {
+ if (t_iseq(state->prsbuf, ','))
+ statecode = INPOSINFO;
+ else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
+ {
+ if (WEP_GETWEIGHT(pos[npos - 1]))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ WEP_SETWEIGHT(pos[npos - 1], 3);
+ }
+ else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
+ {
+ if (WEP_GETWEIGHT(pos[npos - 1]))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ WEP_SETWEIGHT(pos[npos - 1], 2);
+ }
+ else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
+ {
+ if (WEP_GETWEIGHT(pos[npos - 1]))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ WEP_SETWEIGHT(pos[npos - 1], 1);
+ }
+ else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
+ {
+ if (WEP_GETWEIGHT(pos[npos - 1]))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ WEP_SETWEIGHT(pos[npos - 1], 0);
+ }
+ else if (t_isspace(state->prsbuf) ||
+ *(state->prsbuf) == '\0')
+ RETURN_TOKEN;
+ else if (!t_isdigit(state->prsbuf))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ }
+ else /* internal error */
+ elog(ERROR, "internal error in gettoken_tsvector");
+
+ /* get next char */
+ state->prsbuf += pg_mblen(state->prsbuf);
+ }
+
+ return false;
+}
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.3 2007/08/25 00:03:59 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.4 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
type:8,
len:16;
char *word;
- QueryItem *item;
+ QueryOperand *item;
} HeadlineWordEntry;
typedef struct
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.1 2007/08/21 01:11:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
#define _PG_TSTYPE_H_
#include "fmgr.h"
+#include "utils/pg_crc.h"
+
/*
* TSVector type.
pos:20; /* MAX 1Mb */
} WordEntry;
-#define MAXSTRLEN ( 1<<11 )
-#define MAXSTRPOS ( 1<<20 )
+#define MAXSTRLEN ( (1<<11) - 1)
+#define MAXSTRPOS ( (1<<20) - 1)
/*
* Equivalent to
typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */
- int4 size;
+ uint32 size;
char data[1];
} TSVectorData;
/*
* TSQuery
+ *
+ *
*/
+typedef int8 QueryItemType;
+
+/* Valid values for QueryItemType: */
+#define QI_VAL 1
+#define QI_OPR 2
+#define QI_VALSTOP 3 /* This is only used in an intermediate stack representation in parse_tsquery. It's not a legal type elsewhere. */
+
/*
* QueryItem is one node in tsquery - operator or operand.
*/
-
-typedef struct QueryItem
+typedef struct
{
- int8 type; /* operand or kind of operator */
- int8 weight; /* weights of operand to search */
- int2 left; /* pointer to left operand Right operand is
- * item + 1, left operand is placed
- * item+item->left */
- int4 val; /* crc32 value of operand's value */
+ QueryItemType type; /* operand or kind of operator (ts_tokentype) */
+ int8 weight; /* weights of operand to search. It's a bitmask of allowed weights.
+ * if it =0 then any weight are allowed */
+ int32 valcrc; /* XXX: pg_crc32 would be a more appropriate data type,
+ * but we use comparisons to signed integers in the code.
+ * They would need to be changed as well. */
+
/* pointer to text value of operand, must correlate with WordEntry */
uint32
istrue:1, /* use for ranking in Cover */
length:11,
distance:20;
-} QueryItem;
+} QueryOperand;
+
+
+/* Legal values for QueryOperator.operator */
+#define OP_NOT 1
+#define OP_AND 2
+#define OP_OR 3
+
+typedef struct
+{
+ QueryItemType type;
+ int8 oper; /* see above */
+ int16 left; /* pointer to left operand. Right operand is
+ * item + 1, left operand is placed
+ * item+item->left */
+} QueryOperator;
/*
- * It's impossible to use offsetof(QueryItem, istrue)
+ * Note: TSQuery is 4-bytes aligned, so make sure there's no fields
+ * inside QueryItem requiring 8-byte alignment, like int64.
*/
-#define HDRSIZEQI ( sizeof(int8) + sizeof(int8) + sizeof(int2) + sizeof(int4) )
+typedef union
+{
+ QueryItemType type;
+ QueryOperator operator;
+ QueryOperand operand;
+} QueryItem;
/*
* Storage:
- * (len)(size)(array of ITEM)(array of operand in text form)
- * operands are always finished by '\0'
+ * (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
*/
typedef struct
typedef TSQueryData *TSQuery;
#define HDRSIZETQ ( VARHDRSZ + sizeof(int4) )
-#define COMPUTESIZE(size,lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
-#define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
-#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
-#define OPERANDSSIZE(x) ( (x)->len - HDRSIZETQ - (x)->size * sizeof(QueryItem) )
-#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
+/* Computes the size of header and all QueryItems. size is the number of
+ * QueryItems, and lenofoperand is the total length of all operands
+ */
+#define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
+/* Returns a pointer to the first QueryItem in a TSVector */
+#define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
+
+/* Returns a pointer to the beginning of operands in a TSVector */
+#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
/*
* fmgr interface macros
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.2 2007/08/25 00:03:59 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.3 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
#include "tsearch/ts_type.h"
#include "tsearch/ts_public.h"
+#include "nodes/pg_list.h"
/*
* Common parse definitions for tsvector and tsquery
*/
-typedef struct
-{
- WordEntry entry; /* should be first ! */
- WordEntryPos *pos;
-} WordEntryIN;
-
-typedef struct
-{
- char *prsbuf;
- char *word;
- char *curpos;
- int4 len;
- int4 state;
- int4 alen;
- WordEntryPos *pos;
- bool oprisdelim;
-} TSVectorParseState;
-
-extern bool gettoken_tsvector(TSVectorParseState *state);
+/* tsvector parser support. */
-struct ParseQueryNode; /* private in backend/utils/adt/tsquery.c */
+struct TSVectorParseStateData;
+typedef struct TSVectorParseStateData *TSVectorParseState;
-typedef struct
-{
- char *buffer; /* entire string we are scanning */
- char *buf; /* current scan point */
- int4 state;
- int4 count;
+extern TSVectorParseState init_tsvector_parser(char *input, bool oprisdelim);
+extern void reset_tsvector_parser(TSVectorParseState state, char *input);
+extern bool gettoken_tsvector(TSVectorParseState state,
+ char **token, int *len,
+ WordEntryPos **pos, int *poslen,
+ char **endptr);
+extern void close_tsvector_parser(TSVectorParseState state);
- /* reverse polish notation in list (for temporary usage) */
- struct ParseQueryNode *str;
+/* parse_tsquery */
- /* number in str */
- int4 num;
+struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */
+typedef struct TSQueryParserStateData *TSQueryParserState;
- /* text-form operand */
- int4 lenop;
- int4 sumlen;
- char *op;
- char *curop;
-
- /* state for value's parser */
- TSVectorParseState valstate;
- /* tscfg */
- Oid cfg_id;
-} TSQueryParserState;
+typedef void (*PushFunction)(void *opaque, TSQueryParserState state, char *, int, int2);
extern TSQuery parse_tsquery(char *buf,
- void (*pushval) (TSQueryParserState *, int, char *, int, int2),
- Oid cfg_id, bool isplain);
-extern void pushval_asis(TSQueryParserState * state,
- int type, char *strval, int lenval, int2 weight);
-extern void pushquery(TSQueryParserState * state, int4 type, int4 val,
- int4 distance, int4 lenval, int2 weight);
+ PushFunction pushval,
+ void *opaque, bool isplain);
+
+/* Functions for use by PushFunction implementations */
+extern void pushValue(TSQueryParserState state,
+ char *strval, int lenval, int2 weight);
+extern void pushStop(TSQueryParserState state);
+extern void pushOperator(TSQueryParserState state, int8 operator);
/*
* parse plain text and lexize words
union
{
uint16 pos;
+ /*
+ * When apos array is used, apos[0] is the number of elements
+ * in the array (excluding apos[0]), and alen is the allocated
+ * size of the array.
+ */
uint16 *apos;
} pos;
char *word;
char *buf, int4 buflen);
extern text *generateHeadline(HeadlineParsedText * prs);
-/*
- * token/node types for parsing
- */
-#define END 0
-#define ERR 1
-#define VAL 2
-#define OPR 3
-#define OPEN 4
-#define CLOSE 5
-#define VALSTOP 6 /* for stop words */
-
/*
* Common check function for tsvector @@ tsquery
*/
extern bool TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
- bool (*chkcond) (void *checkval, QueryItem * val));
+ bool (*chkcond) (void *checkval, QueryOperand * val));
/*
* Useful conversion macros