Add a CaseSensitive parameter to synonym dictionaries.

author Tom Lane <tgl@sss.pgh.pa.us>

Mon, 10 Mar 2008 03:01:28 +0000 (03:01 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Mon, 10 Mar 2008 03:01:28 +0000 (03:01 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Mon, 10 Mar 2008 03:01:28 +0000 (03:01 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Mon, 10 Mar 2008 03:01:28 +0000 (03:01 +0000)
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml

index e484ddcbdcd3cd56600b3db403ce0c1a3814686b..1aec17efd97f0b4eea55ce5516651923d0cb9f9e 100644 (file)
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.41 2008/03/04 03:17:18 momjian Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.42 2008/03/10 03:01:28 tgl Exp $ -->
  
  <chapter id="textsearch">
   <title id="textsearch-title">Full Text Search</title>
@@ -2209,7 +2209,8 @@ SELECT ts_lexize('public.simple_dict','The');
      dictionary can be used to overcome linguistic problems, for example, to
      prevent an English stemmer dictionary from reducing the word 'Paris' to
      'pari'.  It is enough to have a <literal>Paris paris</literal> line in the
-    synonym dictionary and put it before the <literal>english_stem</> dictionary:
+    synonym dictionary and put it before the <literal>english_stem</>
+    dictionary.  For example:
  
  <programlisting>
  SELECT * FROM ts_debug('english', 'Paris');
@@ -2242,10 +2243,17 @@ SELECT * FROM ts_debug('english', 'Paris');
      <productname>PostgreSQL</> installation's shared-data directory).
      The file format is just one line
      per word to be substituted, with the word followed by its synonym,
-    separated by white space.  Blank lines and trailing spaces are ignored,
-    and upper case is folded to lower case.
+    separated by white space.  Blank lines and trailing spaces are ignored.
     </para>
  
+   <para>
+    The <literal>synonym</> template also has an optional parameter
+    <literal>CaseSensitive</>, which defaults to <literal>false</>.  When
+    <literal>CaseSensitive</> is <literal>false</>, words in the synonym file
+    are folded to lower case, as are input tokens.  When it is
+    <literal>true</>, words and tokens are not folded to lower case,
+    but are compared as-is.
+   </para>
    </sect2>
  
    <sect2 id="textsearch-thesaurus">
diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c

index 16eec98d8b2ba0847460996bfad5dfc7e7e3c7a6..6f263603d7a3900fb02acf3474ba58496fbbeaef 100644 (file)
--- a/src/backend/tsearch/dict_synonym.c
+++ b/src/backend/tsearch/dict_synonym.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.7 2008/01/01 19:45:52 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.8 2008/03/10 03:01:28 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -30,6 +30,7 @@ typedef struct
  {
         int                     len;                    /* length of syn array */
         Syn                *syn;
+       bool            case_sensitive;
  } DictSyn;
  
  /*
@@ -77,6 +78,7 @@ dsynonym_init(PG_FUNCTION_ARGS)
         DictSyn    *d;
         ListCell   *l;
         char       *filename = NULL;
+       bool            case_sensitive = false;
         FILE       *fin;
         char       *starti,
                            *starto,
@@ -90,6 +92,8 @@ dsynonym_init(PG_FUNCTION_ARGS)
  
                 if (pg_strcasecmp("Synonyms", defel->defname) == 0)
                         filename = defGetString(defel);
+               else if (pg_strcasecmp("CaseSensitive", defel->defname) == 0)
+                       case_sensitive = defGetBoolean(defel);
                 else
                         ereport(ERROR,
                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -154,8 +158,16 @@ dsynonym_init(PG_FUNCTION_ARGS)
                         }
                 }
  
-               d->syn[cur].in = lowerstr(starti);
-               d->syn[cur].out = lowerstr(starto);
+               if (case_sensitive)
+               {
+                       d->syn[cur].in = pstrdup(starti);
+                       d->syn[cur].out = pstrdup(starto);
+               }
+               else
+               {
+                       d->syn[cur].in = lowerstr(starti);
+                       d->syn[cur].out = lowerstr(starto);
+               }
  
                 cur++;
  
@@ -168,6 +180,8 @@ skipline:
         d->len = cur;
         qsort(d->syn, d->len, sizeof(Syn), compareSyn);
  
+       d->case_sensitive = case_sensitive;
+
         PG_RETURN_POINTER(d);
  }
  
@@ -185,7 +199,11 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
         if (len <= 0 || d->len <= 0)
                 PG_RETURN_POINTER(NULL);
  
-       key.in = lowerstr_with_len(in, len);
+       if (d->case_sensitive)
+               key.in = pnstrdup(in, len);
+       else
+               key.in = lowerstr_with_len(in, len);
+
         key.out = NULL;
  
         found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
author	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 10 Mar 2008 03:01:28 +0000 (03:01 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 10 Mar 2008 03:01:28 +0000 (03:01 +0000)
doc/src/sgml/textsearch.sgml		patch \| blob \| history
src/backend/tsearch/dict_synonym.c		patch \| blob \| history