]> granicus.if.org Git - postgresql/commitdiff
1 Fix problem with lost precision in rank with OR-ed lexemes
authorTeodor Sigaev <teodor@sigaev.ru>
Fri, 28 Oct 2005 13:05:06 +0000 (13:05 +0000)
committerTeodor Sigaev <teodor@sigaev.ru>
Fri, 28 Oct 2005 13:05:06 +0000 (13:05 +0000)
2 Allow tsquery_in to input void tsquery: resolve dump/restore problem with tsquery

contrib/tsearch2/expected/tsearch2.out
contrib/tsearch2/query.c
contrib/tsearch2/rank.c

index 6c266a29ac6c4191f3b573df1a1a345611869127..a7ac240ef9eb04e6076cf48153841554e193c3a9 100644 (file)
@@ -746,21 +746,21 @@ select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
 (1 row)
 
 select rank(' a:1 s:2C d g'::tsvector, 'a | s');
- rank 
-------
- 0.28
+   rank    
+-----------
+ 0.0911891
 (1 row)
 
 select rank(' a:1 s:2B d g'::tsvector, 'a | s');
- rank 
-------
- 0.46
+   rank   
+----------
+ 0.151982
 (1 row)
 
 select rank(' a:1 s:2 d g'::tsvector, 'a | s');
- rank 
-------
- 0.19
+   rank    
+-----------
+ 0.0607927
 (1 row)
 
 select rank(' a:1 s:2C d g'::tsvector, 'a & s');
index d8b8d4c80d55135e60d7bfbd98a758dba4f0f906..013f0031965e40014e77c4a900a90435559096db 100644 (file)
@@ -55,6 +55,7 @@ Datum         to_tsquery_current(PG_FUNCTION_ARGS);
 /* parser's states */
 #define WAITOPERAND 1
 #define WAITOPERATOR   2
+#define WAITFIRSTOPERAND 3
 
 /*
  * node of query tree, also used
@@ -137,6 +138,7 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
        {
                switch (state->state)
                {
+                       case WAITFIRSTOPERAND:
                        case WAITOPERAND:
                                if (*(state->buf) == '!')
                                {
@@ -159,14 +161,16 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
                                else if (*(state->buf) != ' ')
                                {
                                        state->valstate.prsbuf = state->buf;
-                                       state->state = WAITOPERATOR;
                                        if (gettoken_tsvector(&(state->valstate)))
                                        {
                                                *strval = state->valstate.word;
                                                *lenval = state->valstate.curpos - state->valstate.word;
                                                state->buf = get_weight(state->valstate.prsbuf, weight);
+                                               state->state = WAITOPERATOR;
                                                return VAL;
                                        }
+                                       else if ( state->state == WAITFIRSTOPERAND ) 
+                                               return END;
                                        else
                                                ereport(ERROR,
                                                                (errcode(ERRCODE_SYNTAX_ERROR),
@@ -596,7 +600,7 @@ static QUERYTYPE *
 
        /* init state */
        state.buf = buf;
-       state.state = WAITOPERAND;
+       state.state = WAITFIRSTOPERAND;
        state.count = 0;
        state.num = 0;
        state.str = NULL;
@@ -616,10 +620,13 @@ static QUERYTYPE *
        /* parse query & make polish notation (postfix, but in reverse order) */
        makepol(&state, pushval);
        pfree(state.valstate.word);
-       if (!state.num)
-               ereport(ERROR,
-                               (errcode(ERRCODE_SYNTAX_ERROR),
-                                errmsg("empty query")));
+       if (!state.num) {
+               elog(NOTICE, "Query doesn't contain lexem(s)");
+               query = (QUERYTYPE*)palloc( HDRSIZEQT );
+               query->len = HDRSIZEQT;
+               query->size = 0;
+               return query; 
+       }
 
        /* make finish struct */
        commonlen = COMPUTESIZE(state.num, state.sumlen);
@@ -905,6 +912,10 @@ to_tsquery(PG_FUNCTION_ARGS)
        PG_FREE_IF_COPY(in, 1);
 
        query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+       
+       if ( query->size == 0 )
+               PG_RETURN_POINTER(query);
+
        res = clean_fakeval_v2(GETQUERY(query), &len);
        if (!res)
        {
index 081b0840875aa360270c963277f792bbb1868313..40bec1f48407be28356ef0c60313e144f755ed39 100644 (file)
@@ -257,7 +257,7 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
        int4            dimt,
                                j,
                                i;
-       float           res = -1.0;
+       float           res = 0.0;
        ITEM      **item;
        int                     size = q->size;
 
@@ -266,6 +266,8 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
 
        for (i = 0; i < size; i++)
        {
+               float resj,wjm;
+               int4  jm;
                entry = find_wordentry(t, q, item[i]);
                if (!entry)
                        continue;
@@ -281,14 +283,27 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
                        post = POSNULL + 1;
                }
 
-               for (j = 0; j < dimt; j++)
-               {
-                       if (res < 0)
-                               res = wpos(post[j]);
-                       else
-                               res = 1.0 - (1.0 - res) * (1.0 - wpos(post[j]));
-               }
+                resj = 0.0;
+                wjm = -1.0;
+                jm = 0;
+                for (j = 0; j < dimt; j++)
+                {
+                        resj = resj + wpos(post[j])/((j+1)*(j+1));
+                        if ( wpos(post[j]) > wjm ) {
+                                wjm = wpos(post[j]);
+                                jm  = j;
+                        }
+                }
+/* 
+        limit (sum(i/i^2),i->inf) = pi^2/6
+        resj = sum(wi/i^2),i=1,noccurence,
+        wi - should be sorted desc, 
+        don't sort for now, just choose maximum weight. This should be corrected
+               Oleg Bartunov
+*/
+                res = res + ( wjm + resj - wjm/((jm+1)*(jm+1)))/1.64493406685; 
        }
+       res = res /size;
        pfree(item);
        return res;
 }