Add SGML docs for contrib/dict_int and contrib/dict_xsyn.

author Tom Lane <tgl@sss.pgh.pa.us>

Sun, 2 Dec 2007 21:13:34 +0000 (21:13 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sun, 2 Dec 2007 21:13:34 +0000 (21:13 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Sun, 2 Dec 2007 21:13:34 +0000 (21:13 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sun, 2 Dec 2007 21:13:34 +0000 (21:13 +0000)
diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml

index 0ee4be87f099a5f1efd550cbdb020e934c5c3ea2..cf4495e3b8925c15fccd9d2224fe40f7c1cc0ed8 100644 (file)
--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/contrib.sgml,v 1.4 2007/11/14 02:36:43 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/contrib.sgml,v 1.5 2007/12/02 21:13:34 tgl Exp $ -->
  
  <appendix id="contrib">
   <title>Additional Supplied Modules</title>
@@ -82,6 +82,8 @@ psql -d dbname -f <replaceable>SHAREDIR</>/contrib/<replaceable>module</>.sql
   &chkpass;
   &cube;
   &dblink;
+ &dict-int;
+ &dict-xsyn;
   &earthdistance;
   &fuzzystrmatch;
   &hstore;
diff --git a/doc/src/sgml/dict-int.sgml b/doc/src/sgml/dict-int.sgml

new file mode 100644 (file)

index 0000000..4da6271
--- /dev/null
+++ b/doc/src/sgml/dict-int.sgml
@@ -0,0 +1,78 @@
+<sect1 id="dict-int">
+ <title>dict_int</title>
+ 
+ <indexterm zone="dict-int">
+  <primary>dict_int</primary>
+ </indexterm>
+
+ <para>
+  The motivation for this example dictionary is to control the indexing of
+  integers (signed and unsigned), and, consequently, to minimize the number of
+  unique words which greatly affect the performance of searching.
+ </para>
+
+ <sect2>
+  <title>Configuration</title>
+  <para>
+   The dictionary accepts two options: 
+  </para>
+
+  <itemizedlist>
+   <listitem>
+    <para>
+     The MAXLEN parameter specifies the maximum length (number of digits)
+     allowed in an integer word.  The default value is 6.
+    </para>
+   </listitem>
+   <listitem>
+    <para>
+     The REJECTLONG parameter specifies if an overlength integer should be
+     truncated or ignored. If REJECTLONG=FALSE (default), the dictionary returns
+     the first MAXLEN digits of the integer. If REJECTLONG=TRUE, the
+     dictionary treats an overlength integer as a stop word, so that it will
+     not be indexed.
+    </para>
+   </listitem>
+  </itemizedlist>
+ </sect2>
+
+ <sect2>
+  <title>Usage</title>
+
+  <para>
+   Running the installation script creates a text search template
+   <literal>intdict_template</> and a dictionary <literal>intdict</>
+   based on it, with the default parameters.  You can alter the
+   parameters, for example
+
+<programlisting>
+mydb# ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 4, REJECTLONG = true);
+ALTER TEXT SEARCH DICTIONARY
+</programlisting>
+
+   or create new dictionaries based on the template.
+  </para>
+
+  <para>
+   To test the dictionary, you can try
+
+<programlisting>
+mydb# select ts_lexize('intdict', '12345678');
+ ts_lexize
+-----------
+ {123456}
+</programlisting>
+
+   but real-world usage will involve including it in a text search
+   configuration as described in <xref linkend="textsearch">.
+   That might look like this:
+
+<programlisting>
+ALTER TEXT SEARCH CONFIGURATION english
+    ALTER MAPPING FOR int, uint WITH intdict;
+</programlisting>
+
+  </para>
+ </sect2>
+
+</sect1>
diff --git a/doc/src/sgml/dict-xsyn.sgml b/doc/src/sgml/dict-xsyn.sgml

new file mode 100644 (file)

index 0000000..8126075
--- /dev/null
+++ b/doc/src/sgml/dict-xsyn.sgml
@@ -0,0 +1,78 @@
+<sect1 id="dict-xsyn">
+ <title>dict_xsyn</title>
+ 
+ <indexterm zone="dict-xsyn">
+  <primary>dict_xsyn</primary>
+ </indexterm>
+
+ <para>
+  The Extended Synonym Dictionary module replaces words with groups of their
+  synonyms, and so makes it possible to search for a word using any of its
+  synonyms.
+ </para>
+
+ <sect2>
+  <title>Configuration</title>
+  <para>
+   A <literal>dict_xsyn</> dictionary accepts the following options:
+  </para>
+  <itemizedlist>
+   <listitem>
+    <para>
+     KEEPORIG controls whether the original word is included, or only its
+     synonyms. Default is 'true'.
+    </para>
+   </listitem>
+   <listitem>
+    <para>
+     RULES is the base name of the file containing the list of synonyms.
+     This file must be in $(prefix)/share/tsearch_data/, and its name must
+     end in ".rules" (which is not included in the RULES parameter).
+    </para>
+   </listitem>
+  </itemizedlist>
+  <para>
+   The rules file has the following format:
+  </para>
+  <itemizedlist>
+   <listitem>
+    <para>
+     Each line represents a group of synonyms for a single word, which is
+     given first on the line. Synonyms are separated by whitespace:
+    </para>
+    <programlisting>
+word syn1 syn2 syn3
+    </programlisting>
+   </listitem>
+   <listitem>
+    <para>
+     Sharp ('#') sign is a comment delimiter. It may appear at any position
+     inside the line.  The rest of the line will be skipped.
+    </para>
+   </listitem>
+  </itemizedlist>
+
+  <para>
+   Look at xsyn_sample.rules, which is installed in $(prefix)/share/tsearch_data/,
+   for an example.
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>Usage</title>
+  <programlisting>
+mydb=# SELECT ts_lexize('xsyn','word');
+ts_lexize
+----------------
+{word,syn1,syn2,syn3)
+  </programlisting>
+  <para>
+   Change dictionary options:
+  </para>
+  <programlisting>
+mydb# ALTER TEXT SEARCH DICTIONARY xsyn (KEEPORIG=false);
+ALTER TEXT SEARCH DICTIONARY
+  </programlisting>
+ </sect2>
+
+</sect1>
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml

index 6857e8dda7d1122c556373d9fb9a30e835bb0737..1a5064660bcf6da5d6c5c43d9f408e5d9410b3c0 100644 (file)
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/filelist.sgml,v 1.53 2007/11/14 01:09:50 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/filelist.sgml,v 1.54 2007/12/02 21:13:34 tgl Exp $ -->
  
  <!entity history    SYSTEM "history.sgml">
  <!entity info       SYSTEM "info.sgml">
@@ -96,6 +96,8 @@
  <!entity chkpass         SYSTEM "chkpass.sgml">
  <!entity cube            SYSTEM "cube.sgml">
  <!entity dblink          SYSTEM "dblink.sgml">
+<!entity dict-int        SYSTEM "dict-int.sgml">
+<!entity dict-xsyn       SYSTEM "dict-xsyn.sgml">
  <!entity earthdistance   SYSTEM "earthdistance.sgml">
  <!entity fuzzystrmatch   SYSTEM "fuzzystrmatch.sgml">
  <!entity hstore          SYSTEM "hstore.sgml">
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 2 Dec 2007 21:13:34 +0000 (21:13 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 2 Dec 2007 21:13:34 +0000 (21:13 +0000)
doc/src/sgml/contrib.sgml		patch \| blob \| history
doc/src/sgml/dict-int.sgml	[new file with mode: 0644]	patch \| blob
doc/src/sgml/dict-xsyn.sgml	[new file with mode: 0644]	patch \| blob
doc/src/sgml/filelist.sgml		patch \| blob \| history