-<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.52 2009/06/17 21:58:49 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.53 2009/08/14 14:53:20 teodor Exp $ -->
<chapter id="textsearch">
<title id="textsearch-title">Full Text Search</title>
asciiword | Word, all ASCII | Paris | {my_synonym,english_stem} | my_synonym | {paris}
</programlisting>
</para>
+
+ <para>
+ An asterisk (<literal>*</literal>) at the end of definition word indicates
+ that definition word is a prefix, and <function>to_tsquery()</function>
+ function will transform that definition to the prefix search format (see
+ <xref linkend="textsearch-parsing-queries">).
+ Notice that it is ignored in <function>to_tsvector()</function>.
+ </para>
+
+ <para>
+ Contents of <filename>$SHAREDIR/tsearch_data/synonym_sample.syn</>:
+ </para>
+<programlisting>
+postgres pgsql
+postgresql pgsql
+postgre pgsql
+gogle googl
+indices index*
+</programlisting>
+
+ <para>
+ Results:
+ </para>
+<programlisting>
+=# create text search dictionary syn( template=synonym,synonyms='synonym_sample');
+=# select ts_lexize('syn','indices');
+ ts_lexize
+-----------
+ {index}
+(1 row)
+
+=# create text search configuration tst ( copy=simple);
+=# alter text search configuration tst alter mapping for asciiword with syn;
+=# select to_tsquery('tst','indices');
+ to_tsquery
+------------
+ 'index':*
+(1 row)
+
+=# select 'indexes are very useful'::tsvector;
+ tsvector
+---------------------------------
+ 'are' 'indexes' 'useful' 'very'
+(1 row)
+
+=# select 'indexes are very useful'::tsvector @@ to_tsquery('tst','indices');
+ ?column?
+----------
+ t
+(1 row)
+
+=# select to_tsvector('tst','indices');
+ to_tsvector
+-------------
+ 'index':1
+(1 row)
+</programlisting>
<para>
The only parameter required by the <literal>synonym</> template is
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.10 2009/01/01 17:23:48 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.11 2009/08/14 14:53:20 teodor Exp $
*
*-------------------------------------------------------------------------
*/
{
char *in;
char *out;
+ int outlen;
+ uint16 flags;
} Syn;
typedef struct
* Finds the next whitespace-delimited word within the 'in' string.
* Returns a pointer to the first character of the word, and a pointer
* to the next byte after the last character in the word (in *end).
+ * Character '*' at the end of word will not be threated as word
+ * charater if flags is not null.
*/
static char *
-findwrd(char *in, char **end)
+findwrd(char *in, char **end, uint16 *flags)
{
char *start;
+ char *lastchar;
/* Skip leading spaces */
while (*in && t_isspace(in))
return NULL;
}
- start = in;
+ lastchar = start = in;
/* Find end of word */
while (*in && !t_isspace(in))
+ {
+ lastchar = in;
in += pg_mblen(in);
+ }
+
+ if ( in - lastchar == 1 && t_iseq(lastchar, '*') && flags )
+ {
+ *flags = TSL_PREFIX;
+ *end = lastchar;
+ }
+ else
+ {
+ if (flags)
+ *flags = 0;
+ *end = in;
+ }
- *end = in;
return start;
}
*end = NULL;
int cur = 0;
char *line = NULL;
+ uint16 flags = 0;
foreach(l, dictoptions)
{
while ((line = tsearch_readline(&trst)) != NULL)
{
- starti = findwrd(line, &end);
+ starti = findwrd(line, &end, NULL);
if (!starti)
{
/* Empty line */
}
*end = '\0';
- starto = findwrd(end + 1, &end);
+ starto = findwrd(end + 1, &end, &flags);
if (!starto)
{
/* A line with only one word (+whitespace). Ignore silently. */
d->syn[cur].out = lowerstr(starto);
}
+ d->syn[cur].outlen = strlen(starto);
+ d->syn[cur].flags = flags;
+
cur++;
skipline:
PG_RETURN_POINTER(NULL);
res = palloc0(sizeof(TSLexeme) * 2);
- res[0].lexeme = pstrdup(found->out);
+ res[0].lexeme = pnstrdup(found->out, found->outlen);
+ res[0].flags = found->flags;
PG_RETURN_POINTER(res);
}
postgresql pgsql
postgre pgsql
gogle googl
+indices index*
{googl}
(1 row)
+SELECT ts_lexize('synonym', 'indices');
+ ts_lexize
+-----------
+ {index}
+(1 row)
+
-- Create and simple test thesaurus dictionary
-- More tests in configuration checks because ts_lexize()
-- cannot pass more than one word to thesaurus.
'common':2 'googl':7,10 'instead':8 'mistak':3 'write':6
(1 row)
+SELECT to_tsvector('synonym_tst', 'Indexes or indices - Which is right plural form of index?');
+ to_tsvector
+----------------------------------------------
+ 'form':8 'index':1,3,10 'plural':7 'right':6
+(1 row)
+
+SELECT to_tsquery('synonym_tst', 'Index & indices');
+ to_tsquery
+---------------------
+ 'index' & 'index':*
+(1 row)
+
-- test thesaurus in configuration
-- see thesaurus_sample.ths to understand 'odd' resulting tsvector
CREATE TEXT SEARCH CONFIGURATION thesaurus_tst (
SELECT ts_lexize('synonym', 'PoStGrEs');
SELECT ts_lexize('synonym', 'Gogle');
+SELECT ts_lexize('synonym', 'indices');
-- Create and simple test thesaurus dictionary
-- More tests in configuration checks because ts_lexize()
SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgsql and pronounced as postgre');
SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google');
+SELECT to_tsvector('synonym_tst', 'Indexes or indices - Which is right plural form of index?');
+SELECT to_tsquery('synonym_tst', 'Index & indices');
-- test thesaurus in configuration
-- see thesaurus_sample.ths to understand 'odd' resulting tsvector