-<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.41 2008/03/04 03:17:18 momjian Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.42 2008/03/10 03:01:28 tgl Exp $ -->
<chapter id="textsearch">
<title id="textsearch-title">Full Text Search</title>
dictionary can be used to overcome linguistic problems, for example, to
prevent an English stemmer dictionary from reducing the word 'Paris' to
'pari'. It is enough to have a <literal>Paris paris</literal> line in the
- synonym dictionary and put it before the <literal>english_stem</> dictionary:
+ synonym dictionary and put it before the <literal>english_stem</>
+ dictionary. For example:
<programlisting>
SELECT * FROM ts_debug('english', 'Paris');
<productname>PostgreSQL</> installation's shared-data directory).
The file format is just one line
per word to be substituted, with the word followed by its synonym,
- separated by white space. Blank lines and trailing spaces are ignored,
- and upper case is folded to lower case.
+ separated by white space. Blank lines and trailing spaces are ignored.
</para>
+ <para>
+ The <literal>synonym</> template also has an optional parameter
+ <literal>CaseSensitive</>, which defaults to <literal>false</>. When
+ <literal>CaseSensitive</> is <literal>false</>, words in the synonym file
+ are folded to lower case, as are input tokens. When it is
+ <literal>true</>, words and tokens are not folded to lower case,
+ but are compared as-is.
+ </para>
</sect2>
<sect2 id="textsearch-thesaurus">
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.7 2008/01/01 19:45:52 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.8 2008/03/10 03:01:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
{
int len; /* length of syn array */
Syn *syn;
+ bool case_sensitive;
} DictSyn;
/*
DictSyn *d;
ListCell *l;
char *filename = NULL;
+ bool case_sensitive = false;
FILE *fin;
char *starti,
*starto,
if (pg_strcasecmp("Synonyms", defel->defname) == 0)
filename = defGetString(defel);
+ else if (pg_strcasecmp("CaseSensitive", defel->defname) == 0)
+ case_sensitive = defGetBoolean(defel);
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
}
}
- d->syn[cur].in = lowerstr(starti);
- d->syn[cur].out = lowerstr(starto);
+ if (case_sensitive)
+ {
+ d->syn[cur].in = pstrdup(starti);
+ d->syn[cur].out = pstrdup(starto);
+ }
+ else
+ {
+ d->syn[cur].in = lowerstr(starti);
+ d->syn[cur].out = lowerstr(starto);
+ }
cur++;
d->len = cur;
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
+ d->case_sensitive = case_sensitive;
+
PG_RETURN_POINTER(d);
}
if (len <= 0 || d->len <= 0)
PG_RETURN_POINTER(NULL);
- key.in = lowerstr_with_len(in, len);
+ if (d->case_sensitive)
+ key.in = pnstrdup(in, len);
+ else
+ key.in = lowerstr_with_len(in, len);
+
key.out = NULL;
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);