-<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.33 2007/11/14 18:36:37 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.34 2007/11/14 23:43:27 tgl Exp $ -->
<chapter id="textsearch">
<title id="textsearch-title">Full Text Search</title>
<listitem>
<para>
4 divides the rank by the mean harmonic distance between extents
+ (this is implemented only by <function>ts_rank_cd</>)
</para>
</listitem>
<listitem>
of unique words in document
</para>
</listitem>
+ <listitem>
+ <para>
+ 32 divides the rank by itself + 1
+ </para>
+ </listitem>
</itemizedlist>
+ If more than one flag bit is specified, the transformations are
+ applied in the order listed.
</para>
<para>
It is important to note that the ranking functions do not use any global
- information so it is impossible to produce a fair normalization to 1% or
- 100%, as sometimes desired. However, a simple technique like
- <literal>rank/(rank+1)</literal> can be applied. Of course, this is just
- a cosmetic change, i.e., the ordering of the search results will not
- change.
+ information, so it is impossible to produce a fair normalization to 1% or
+ 100% as sometimes desired. Normalization option 32
+ (<literal>rank/(rank+1)</literal>) can be applied to scale all ranks
+ into the range zero to one, but of course this is just a cosmetic change;
+ it will not affect the ordering of the search results.
</para>
<para>
This is the same example using normalized ranking:
<programlisting>
-SELECT title, ts_rank_cd(textsearch, query)/(ts_rank_cd(textsearch, query) + 1) AS rank
+SELECT title, ts_rank_cd(textsearch, query, 32 /* rank/(rank+1) */ ) AS rank
FROM apod, to_tsquery('neutrino|(dark & matter)') query
WHERE query @@ textsearch
ORDER BY rank DESC LIMIT 10;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.8 2007/09/20 18:10:57 teodor Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.9 2007/11/14 23:43:27 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] )
-#define RANK_NO_NORM 0x00
+#define RANK_NO_NORM 0x00
#define RANK_NORM_LOGLENGTH 0x01
-#define RANK_NORM_LENGTH 0x02
-#define RANK_NORM_EXTDIST 0x04
-#define RANK_NORM_UNIQ 0x08
-#define RANK_NORM_LOGUNIQ 0x10
-#define DEF_NORM_METHOD RANK_NO_NORM
+#define RANK_NORM_LENGTH 0x02
+#define RANK_NORM_EXTDIST 0x04
+#define RANK_NORM_UNIQ 0x08
+#define RANK_NORM_LOGUNIQ 0x10
+#define RANK_NORM_RDIVRPLUS1 0x20
+#define DEF_NORM_METHOD RANK_NO_NORM
static float calc_rank_or(float *w, TSVector t, TSQuery q);
static float calc_rank_and(float *w, TSVector t, TSQuery q);
res /= (float) len;
}
+ /* RANK_NORM_EXTDIST not applicable */
+
if ((method & RANK_NORM_UNIQ) && t->size > 0)
res /= (float) (t->size);
if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
res /= log((double) (t->size + 1)) / log(2.0);
+ if (method & RANK_NORM_RDIVRPLUS1)
+ res /= (res + 1);
+
return res;
}
Wdoc /= (double) len;
}
- if ((method & RANK_NORM_EXTDIST) && SumDist > 0)
+ if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
Wdoc /= ((double) NExtent) / SumDist;
if ((method & RANK_NORM_UNIQ) && txt->size > 0)
if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
Wdoc /= log((double) (txt->size + 1)) / log(2.0);
+ if (method & RANK_NORM_RDIVRPLUS1)
+ Wdoc /= (Wdoc + 1);
+
pfree(doc);
pfree( qr.operandexist );