-# $PostgreSQL: pgsql/contrib/Makefile,v 1.80 2007/10/13 22:59:43 tgl Exp $
+# $PostgreSQL: pgsql/contrib/Makefile,v 1.81 2007/10/15 21:36:49 tgl Exp $
subdir = contrib
top_builddir = ..
chkpass \
cube \
dblink \
+ dict_int \
+ dict_xsyn \
earthdistance \
fuzzystrmatch \
hstore \
seg \
spi \
tablefunc \
+ test_parser \
vacuumlo
ifeq ($(with_openssl),yes)
-
The PostgreSQL contrib tree
---------------------------
by Dave Page <dpage@vale-housing.co.uk>
btree_gist -
- Support for emulating BTREE indexing in GiST
- by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
+ Support for emulating BTREE indexing in GiST
+ by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
chkpass -
An auto-encrypted password datatype
Allows remote query execution
by Joe Conway <mail@joeconway.com>
+dict_int -
+ Text search dictionary template for integers
+ by Sergey Karpov <karpov@sao.ru>
+
+dict_xsyn -
+ Text search dictionary template for extended synonym processing
+ by Sergey Karpov <karpov@sao.ru>
+
earthdistance -
- Operator for computing earth distance for two points
+ Operator for computing earth distance between two points
by Hal Snyder <hal@vailsys.com>
fuzzystrmatch -
by Joe Conway <mail@joeconway.com>, Joel Burton <jburton@scw.org>
hstore -
- Hstore - module for storing (key,value) pairs
- by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
+ Module for storing (key, value) pairs
+ by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
intagg -
Integer aggregator
Displays the contents of the free space map (FSM)
by Mark Kirkwood <markir@paradise.net.nz>
+pg_standby -
+ Sample archive_command for warm standby operation
+ by Simon Riggs <simon@2ndquadrant.com>
+
pg_trgm -
Functions for determining the similarity of text based on trigram
matching.
by Tatsuo Ishii <ishii@sraoss.co.jp>
pgstattuple -
- A function to return statistics about "dead" tuples and free
+ Functions to return statistics about "dead" tuples and free
space within a table
by Tatsuo Ishii <ishii@sraoss.co.jp>
by Victor Wagner <vitus@cryptocom.ru>
start-scripts -
- Scripts for starting the server at boot time.
+ Scripts for starting the server at boot time on various platforms.
tablefunc -
Examples of functions returning tables
by Joe Conway <mail@joeconway.com>
+test_parser -
+ Sample text search parser
+ by Sergey Karpov <karpov@sao.ru>
+
tsearch2 -
Full-text-index support using GiST
by Teodor Sigaev <teodor@sigaev.ru> and Oleg Bartunov
--- /dev/null
+# $PostgreSQL: pgsql/contrib/dict_int/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
+
+MODULE_big = dict_int
+OBJS = dict_int.o
+DATA_built = dict_int.sql
+DATA = uninstall_dict_int.sql
+DOCS = README.dict_int
+REGRESS = dict_int
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/dict_int
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
--- /dev/null
+Dictionary for integers
+=======================
+
+The motivation for this example dictionary is to control the indexing of
+integers (signed and unsigned), and, consequently, to minimize the number of
+unique words which greatly affect the performance of searching.
+
+* Configuration
+
+The dictionary accepts two options:
+
+ - The MAXLEN parameter specifies the maximum length (number of digits)
+ allowed in an integer word. The default value is 6.
+
+ - The REJECTLONG parameter specifies if an overlength integer should be
+ truncated or ignored. If REJECTLONG=FALSE (default), the dictionary returns
+ the first MAXLEN digits of the integer. If REJECTLONG=TRUE, the
+ dictionary treats an overlength integer as a stop word, so that it will
+ not be indexed.
+
+* Usage
+
+1. Compile and install
+
+2. Load dictionary
+
+ psql mydb < dict_int.sql
+
+3. Test it
+
+ mydb# select ts_lexize('intdict', '12345678');
+ ts_lexize
+ -----------
+ {123456}
+
+4. Change its options as you wish
+
+ mydb# ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 4, REJECTLONG = true);
+ ALTER TEXT SEARCH DICTIONARY
+
+That's all.
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * dict_int.c
+ * Text search dictionary for integers
+ *
+ * Copyright (c) 2007, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/contrib/dict_int/dict_int.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "commands/defrem.h"
+#include "fmgr.h"
+#include "tsearch/ts_public.h"
+
+PG_MODULE_MAGIC;
+
+
+typedef struct {
+ int maxlen;
+ bool rejectlong;
+} DictInt;
+
+
+PG_FUNCTION_INFO_V1(dintdict_init);
+Datum dintdict_init(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(dintdict_lexize);
+Datum dintdict_lexize(PG_FUNCTION_ARGS);
+
+Datum
+dintdict_init(PG_FUNCTION_ARGS)
+{
+ List *dictoptions = (List *) PG_GETARG_POINTER(0);
+ DictInt *d;
+ ListCell *l;
+
+ d = (DictInt *) palloc0(sizeof(DictInt));
+ d->maxlen = 6;
+ d->rejectlong = false;
+
+ foreach(l, dictoptions)
+ {
+ DefElem *defel = (DefElem *) lfirst(l);
+
+ if (pg_strcasecmp(defel->defname, "MAXLEN") == 0)
+ {
+ d->maxlen = atoi(defGetString(defel));
+ }
+ else if (pg_strcasecmp(defel->defname, "REJECTLONG") == 0)
+ {
+ d->rejectlong = defGetBoolean(defel);
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized intdict parameter: \"%s\"",
+ defel->defname)));
+ }
+ }
+
+ PG_RETURN_POINTER(d);
+}
+
+Datum
+dintdict_lexize(PG_FUNCTION_ARGS)
+{
+ DictInt *d = (DictInt*)PG_GETARG_POINTER(0);
+ char *in = (char*)PG_GETARG_POINTER(1);
+ char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+ TSLexeme *res=palloc(sizeof(TSLexeme)*2);
+
+ res[1].lexeme = NULL;
+ if (PG_GETARG_INT32(2) > d->maxlen)
+ {
+ if ( d->rejectlong )
+ {
+ /* reject by returning void array */
+ pfree(txt);
+ res[0].lexeme = NULL;
+ }
+ else
+ {
+ /* trim integer */
+ txt[d->maxlen] = '\0';
+ res[0].lexeme = txt;
+ }
+ }
+ else
+ {
+ res[0].lexeme = txt;
+ }
+
+ PG_RETURN_POINTER(res);
+}
--- /dev/null
+-- $PostgreSQL: pgsql/contrib/dict_int/dict_int.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
+
+-- Adjust this setting to control where the objects get created.
+SET search_path = public;
+
+BEGIN;
+
+CREATE FUNCTION dintdict_init(internal)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C STRICT;
+
+CREATE FUNCTION dintdict_lexize(internal, internal, internal, internal)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C STRICT;
+
+CREATE TEXT SEARCH TEMPLATE intdict_template (
+ LEXIZE = dintdict_lexize,
+ INIT = dintdict_init
+);
+
+CREATE TEXT SEARCH DICTIONARY intdict (
+ TEMPLATE = intdict_template
+);
+
+COMMENT ON TEXT SEARCH DICTIONARY intdict IS 'dictionary for integers';
+
+END;
--- /dev/null
+--
+-- first, define the datatype. Turn off echoing so that expected file
+-- does not depend on contents of this file.
+--
+SET client_min_messages = warning;
+\set ECHO none
+RESET client_min_messages;
+--lexize
+select ts_lexize('intdict', '511673');
+ ts_lexize
+-----------
+ {511673}
+(1 row)
+
+select ts_lexize('intdict', '129');
+ ts_lexize
+-----------
+ {129}
+(1 row)
+
+select ts_lexize('intdict', '40865854');
+ ts_lexize
+-----------
+ {408658}
+(1 row)
+
+select ts_lexize('intdict', '952');
+ ts_lexize
+-----------
+ {952}
+(1 row)
+
+select ts_lexize('intdict', '654980341');
+ ts_lexize
+-----------
+ {654980}
+(1 row)
+
+select ts_lexize('intdict', '09810106');
+ ts_lexize
+-----------
+ {098101}
+(1 row)
+
+select ts_lexize('intdict', '14262713');
+ ts_lexize
+-----------
+ {142627}
+(1 row)
+
+select ts_lexize('intdict', '6532082986');
+ ts_lexize
+-----------
+ {653208}
+(1 row)
+
+select ts_lexize('intdict', '0150061');
+ ts_lexize
+-----------
+ {015006}
+(1 row)
+
+select ts_lexize('intdict', '7778');
+ ts_lexize
+-----------
+ {7778}
+(1 row)
+
+select ts_lexize('intdict', '9547');
+ ts_lexize
+-----------
+ {9547}
+(1 row)
+
+select ts_lexize('intdict', '753395478');
+ ts_lexize
+-----------
+ {753395}
+(1 row)
+
+select ts_lexize('intdict', '647652');
+ ts_lexize
+-----------
+ {647652}
+(1 row)
+
+select ts_lexize('intdict', '6988655574');
+ ts_lexize
+-----------
+ {698865}
+(1 row)
+
+select ts_lexize('intdict', '1279');
+ ts_lexize
+-----------
+ {1279}
+(1 row)
+
+select ts_lexize('intdict', '1266645909');
+ ts_lexize
+-----------
+ {126664}
+(1 row)
+
+select ts_lexize('intdict', '7594193969');
+ ts_lexize
+-----------
+ {759419}
+(1 row)
+
+select ts_lexize('intdict', '16928207');
+ ts_lexize
+-----------
+ {169282}
+(1 row)
+
+select ts_lexize('intdict', '196850350328');
+ ts_lexize
+-----------
+ {196850}
+(1 row)
+
+select ts_lexize('intdict', '22026985592');
+ ts_lexize
+-----------
+ {220269}
+(1 row)
+
+select ts_lexize('intdict', '2063765');
+ ts_lexize
+-----------
+ {206376}
+(1 row)
+
+select ts_lexize('intdict', '242387310');
+ ts_lexize
+-----------
+ {242387}
+(1 row)
+
+select ts_lexize('intdict', '93595');
+ ts_lexize
+-----------
+ {93595}
+(1 row)
+
+select ts_lexize('intdict', '9374');
+ ts_lexize
+-----------
+ {9374}
+(1 row)
+
+select ts_lexize('intdict', '996969');
+ ts_lexize
+-----------
+ {996969}
+(1 row)
+
+select ts_lexize('intdict', '353595982');
+ ts_lexize
+-----------
+ {353595}
+(1 row)
+
+select ts_lexize('intdict', '925860');
+ ts_lexize
+-----------
+ {925860}
+(1 row)
+
+select ts_lexize('intdict', '11848378337');
+ ts_lexize
+-----------
+ {118483}
+(1 row)
+
+select ts_lexize('intdict', '333');
+ ts_lexize
+-----------
+ {333}
+(1 row)
+
+select ts_lexize('intdict', '799287416765');
+ ts_lexize
+-----------
+ {799287}
+(1 row)
+
+select ts_lexize('intdict', '745939');
+ ts_lexize
+-----------
+ {745939}
+(1 row)
+
+select ts_lexize('intdict', '67601305734');
+ ts_lexize
+-----------
+ {676013}
+(1 row)
+
+select ts_lexize('intdict', '3361113');
+ ts_lexize
+-----------
+ {336111}
+(1 row)
+
+select ts_lexize('intdict', '9033778607');
+ ts_lexize
+-----------
+ {903377}
+(1 row)
+
+select ts_lexize('intdict', '7507648');
+ ts_lexize
+-----------
+ {750764}
+(1 row)
+
+select ts_lexize('intdict', '1166');
+ ts_lexize
+-----------
+ {1166}
+(1 row)
+
+select ts_lexize('intdict', '9360498');
+ ts_lexize
+-----------
+ {936049}
+(1 row)
+
+select ts_lexize('intdict', '917795');
+ ts_lexize
+-----------
+ {917795}
+(1 row)
+
+select ts_lexize('intdict', '9387894');
+ ts_lexize
+-----------
+ {938789}
+(1 row)
+
+select ts_lexize('intdict', '42764329');
+ ts_lexize
+-----------
+ {427643}
+(1 row)
+
+select ts_lexize('intdict', '564062');
+ ts_lexize
+-----------
+ {564062}
+(1 row)
+
+select ts_lexize('intdict', '5413377');
+ ts_lexize
+-----------
+ {541337}
+(1 row)
+
+select ts_lexize('intdict', '060965');
+ ts_lexize
+-----------
+ {060965}
+(1 row)
+
+select ts_lexize('intdict', '08273593');
+ ts_lexize
+-----------
+ {082735}
+(1 row)
+
+select ts_lexize('intdict', '593556010144');
+ ts_lexize
+-----------
+ {593556}
+(1 row)
+
+select ts_lexize('intdict', '17988843352');
+ ts_lexize
+-----------
+ {179888}
+(1 row)
+
+select ts_lexize('intdict', '252281774');
+ ts_lexize
+-----------
+ {252281}
+(1 row)
+
+select ts_lexize('intdict', '313425');
+ ts_lexize
+-----------
+ {313425}
+(1 row)
+
+select ts_lexize('intdict', '641439323669');
+ ts_lexize
+-----------
+ {641439}
+(1 row)
+
+select ts_lexize('intdict', '314532610153');
+ ts_lexize
+-----------
+ {314532}
+(1 row)
+
--- /dev/null
+--
+-- first, define the datatype. Turn off echoing so that expected file
+-- does not depend on contents of this file.
+--
+SET client_min_messages = warning;
+\set ECHO none
+\i dict_int.sql
+\set ECHO all
+RESET client_min_messages;
+
+--lexize
+select ts_lexize('intdict', '511673');
+select ts_lexize('intdict', '129');
+select ts_lexize('intdict', '40865854');
+select ts_lexize('intdict', '952');
+select ts_lexize('intdict', '654980341');
+select ts_lexize('intdict', '09810106');
+select ts_lexize('intdict', '14262713');
+select ts_lexize('intdict', '6532082986');
+select ts_lexize('intdict', '0150061');
+select ts_lexize('intdict', '7778');
+select ts_lexize('intdict', '9547');
+select ts_lexize('intdict', '753395478');
+select ts_lexize('intdict', '647652');
+select ts_lexize('intdict', '6988655574');
+select ts_lexize('intdict', '1279');
+select ts_lexize('intdict', '1266645909');
+select ts_lexize('intdict', '7594193969');
+select ts_lexize('intdict', '16928207');
+select ts_lexize('intdict', '196850350328');
+select ts_lexize('intdict', '22026985592');
+select ts_lexize('intdict', '2063765');
+select ts_lexize('intdict', '242387310');
+select ts_lexize('intdict', '93595');
+select ts_lexize('intdict', '9374');
+select ts_lexize('intdict', '996969');
+select ts_lexize('intdict', '353595982');
+select ts_lexize('intdict', '925860');
+select ts_lexize('intdict', '11848378337');
+select ts_lexize('intdict', '333');
+select ts_lexize('intdict', '799287416765');
+select ts_lexize('intdict', '745939');
+select ts_lexize('intdict', '67601305734');
+select ts_lexize('intdict', '3361113');
+select ts_lexize('intdict', '9033778607');
+select ts_lexize('intdict', '7507648');
+select ts_lexize('intdict', '1166');
+select ts_lexize('intdict', '9360498');
+select ts_lexize('intdict', '917795');
+select ts_lexize('intdict', '9387894');
+select ts_lexize('intdict', '42764329');
+select ts_lexize('intdict', '564062');
+select ts_lexize('intdict', '5413377');
+select ts_lexize('intdict', '060965');
+select ts_lexize('intdict', '08273593');
+select ts_lexize('intdict', '593556010144');
+select ts_lexize('intdict', '17988843352');
+select ts_lexize('intdict', '252281774');
+select ts_lexize('intdict', '313425');
+select ts_lexize('intdict', '641439323669');
+select ts_lexize('intdict', '314532610153');
--- /dev/null
+SET search_path = public;
+
+DROP TEXT SEARCH DICTIONARY intdict;
+
+DROP TEXT SEARCH TEMPLATE intdict_template;
+
+DROP FUNCTION dintdict_init(internal);
+
+DROP FUNCTION dintdict_lexize(internal,internal,internal,internal);
--- /dev/null
+# $PostgreSQL: pgsql/contrib/dict_xsyn/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
+
+MODULE_big = dict_xsyn
+OBJS = dict_xsyn.o
+DATA_built = dict_xsyn.sql
+DATA = uninstall_dict_xsyn.sql
+DOCS = README.dict_xsyn
+REGRESS = dict_xsyn
+
+DICTDIR = tsearch_data
+DICTFILES = xsyn_sample.rules
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/dict_xsyn
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+install: install-data
+
+.PHONY: install-data
+install-data: $(DICTFILES)
+ for i in $(DICTFILES); \
+ do $(INSTALL_DATA) $(srcdir)/$$i '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i; \
+ done
+
+uninstall: uninstall-data
+
+.PHONY: uninstall-data
+uninstall-data:
+ for i in $(DICTFILES); \
+ do rm -rf '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i ; \
+ done
--- /dev/null
+Extended Synonym dictionary
+===========================
+
+This is a simple synonym dictionary. It replaces words with groups of their
+synonyms, and so makes it possible to search for a word using any of its
+synonyms.
+
+* Configuration
+
+It accepts the following options:
+
+ - KEEPORIG controls whether the original word is included, or only its
+ synonyms. Default is 'true'.
+
+ - RULES is the base name of the file containing the list of synonyms.
+ This file must be in $(prefix)/share/tsearch_data/, and its name must
+ end in ".rules" (which is not included in the RULES parameter).
+
+The rules file has the following format:
+
+ - Each line represents a group of synonyms for a single word, which is
+ given first on the line. Synonyms are separated by whitespace:
+
+ word syn1 syn2 syn3
+
+ - Sharp ('#') sign is a comment delimiter. It may appear at any position
+ inside the line. The rest of the line will be skipped.
+
+Look at xsyn_sample.rules, which is installed in $(prefix)/share/tsearch_data/,
+for an example.
+
+* Usage
+
+1. Compile and install
+
+2. Load dictionary
+
+ psql mydb < dict_xsyn.sql
+
+3. Test it
+
+ mydb=# SELECT ts_lexize('xsyn','word');
+ ts_lexize
+ ----------------
+ {word,syn1,syn2,syn3)
+
+4. Change the dictionary options as you wish
+
+ mydb# ALTER TEXT SEARCH DICTIONARY xsyn (KEEPORIG=false);
+ ALTER TEXT SEARCH DICTIONARY
+
+That's all.
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * dict_xsyn.c
+ * Extended synonym dictionary
+ *
+ * Copyright (c) 2007, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "commands/defrem.h"
+#include "fmgr.h"
+#include "storage/fd.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+
+PG_MODULE_MAGIC;
+
+typedef struct
+{
+ char *key; /* Word */
+ char *value; /* Unparsed list of synonyms, including the word itself */
+} Syn;
+
+typedef struct
+{
+ int len;
+ Syn *syn;
+
+ bool keeporig;
+} DictSyn;
+
+
+PG_FUNCTION_INFO_V1(dxsyn_init);
+Datum dxsyn_init(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(dxsyn_lexize);
+Datum dxsyn_lexize(PG_FUNCTION_ARGS);
+
+static char *
+find_word(char *in, char **end)
+{
+ char *start;
+
+ *end = NULL;
+ while (*in && t_isspace(in))
+ in += pg_mblen(in);
+
+ if (!*in || *in == '#')
+ return NULL;
+ start = in;
+
+ while (*in && !t_isspace(in))
+ in += pg_mblen(in);
+
+ *end = in;
+
+ return start;
+}
+
+static int
+compare_syn(const void *a, const void *b)
+{
+ return strcmp(((Syn *) a)->key, ((Syn *) b)->key);
+}
+
+static void
+read_dictionary(DictSyn *d, char *filename)
+{
+ char *real_filename = get_tsearch_config_filename(filename, "rules");
+ FILE *fin;
+ char *line;
+ int cur = 0;
+
+ if ((fin = AllocateFile(real_filename, "r")) == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("could not open synonym file \"%s\": %m",
+ real_filename)));
+
+ while ((line = t_readline(fin)) != NULL)
+ {
+ char *value;
+ char *key;
+ char *end = NULL;
+
+ if (*line == '\0')
+ continue;
+
+ value = lowerstr(line);
+ pfree(line);
+
+ key = find_word(value, &end);
+ if (!key)
+ {
+ pfree(value);
+ continue;
+ }
+
+ if (cur == d->len)
+ {
+ d->len = (d->len > 0) ? 2 * d->len : 16;
+ if (d->syn)
+ d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
+ else
+ d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
+ }
+
+ d->syn[cur].key = pnstrdup(key, end - key);
+ d->syn[cur].value = value;
+
+ cur++;
+ }
+
+ FreeFile(fin);
+
+ d->len = cur;
+ if (cur > 1)
+ qsort(d->syn, d->len, sizeof(Syn), compare_syn);
+
+ pfree(real_filename);
+}
+
+Datum
+dxsyn_init(PG_FUNCTION_ARGS)
+{
+ List *dictoptions = (List *) PG_GETARG_POINTER(0);
+ DictSyn *d;
+ ListCell *l;
+
+ d = (DictSyn *) palloc0(sizeof(DictSyn));
+ d->len = 0;
+ d->syn = NULL;
+ d->keeporig = true;
+
+ foreach(l, dictoptions)
+ {
+ DefElem *defel = (DefElem *) lfirst(l);
+
+ if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
+ {
+ d->keeporig = defGetBoolean(defel);
+ }
+ else if (pg_strcasecmp(defel->defname, "RULES") == 0)
+ {
+ read_dictionary(d, defGetString(defel));
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized xsyn parameter: \"%s\"",
+ defel->defname)));
+ }
+ }
+
+ PG_RETURN_POINTER(d);
+}
+
+Datum
+dxsyn_lexize(PG_FUNCTION_ARGS)
+{
+ DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
+ char *in = (char *) PG_GETARG_POINTER(1);
+ int length = PG_GETARG_INT32(2);
+ Syn word;
+ Syn *found;
+ TSLexeme *res = NULL;
+
+ if (!length || d->len == 0)
+ PG_RETURN_POINTER(NULL);
+
+ /* Create search pattern */
+ {
+ char *temp = pnstrdup(in, length);
+
+ word.key = lowerstr(temp);
+ pfree(temp);
+ word.value = NULL;
+ }
+
+ /* Look for matching syn */
+ found = (Syn *)bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
+ pfree(word.key);
+
+ if (!found)
+ PG_RETURN_POINTER(NULL);
+
+ /* Parse string of synonyms and return array of words */
+ {
+ char *value = pstrdup(found->value);
+ int value_length = strlen(value);
+ char *pos = value;
+ int nsyns = 0;
+ bool is_first = true;
+
+ res = palloc(0);
+
+ while(pos < value + value_length)
+ {
+ char *end;
+ char *syn = find_word(pos, &end);
+
+ if (!syn)
+ break;
+ *end = '\0';
+
+ res = repalloc(res, sizeof(TSLexeme)*(nsyns + 2));
+ res[nsyns].lexeme = NULL;
+
+ /* first word is added to result only if KEEPORIG flag is set */
+ if(d->keeporig || !is_first)
+ {
+ res[nsyns].lexeme = pstrdup(syn);
+ res[nsyns + 1].lexeme = NULL;
+
+ nsyns++;
+ }
+
+ is_first = false;
+
+ pos = end + 1;
+ }
+
+ pfree(value);
+ }
+
+ PG_RETURN_POINTER(res);
+}
--- /dev/null
+-- $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
+
+-- Adjust this setting to control where the objects get created.
+SET search_path = public;
+
+BEGIN;
+
+CREATE FUNCTION dxsyn_init(internal)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C STRICT;
+
+CREATE FUNCTION dxsyn_lexize(internal, internal, internal, internal)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C STRICT;
+
+CREATE TEXT SEARCH TEMPLATE xsyn_template (
+ LEXIZE = dxsyn_lexize,
+ INIT = dxsyn_init
+);
+
+CREATE TEXT SEARCH DICTIONARY xsyn (
+ TEMPLATE = xsyn_template
+);
+
+COMMENT ON TEXT SEARCH DICTIONARY xsyn IS 'eXtended synonym dictionary';
+
+END;
--- /dev/null
+--
+-- first, define the datatype. Turn off echoing so that expected file
+-- does not depend on contents of this file.
+--
+SET client_min_messages = warning;
+\set ECHO none
+RESET client_min_messages;
+--configuration
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
+--lexize
+SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+----------------
+ {sn,sne,1987a}
+(1 row)
+
+SELECT ts_lexize('xsyn', 'grb');
+ ts_lexize
+-----------
+
+(1 row)
+
--- /dev/null
+--
+-- first, define the datatype. Turn off echoing so that expected file
+-- does not depend on contents of this file.
+--
+SET client_min_messages = warning;
+\set ECHO none
+\i dict_xsyn.sql
+\set ECHO all
+RESET client_min_messages;
+
+--configuration
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
+
+--lexize
+SELECT ts_lexize('xsyn', 'supernova');
+SELECT ts_lexize('xsyn', 'grb');
--- /dev/null
+SET search_path = public;
+
+DROP TEXT SEARCH DICTIONARY xsyn;
+
+DROP TEXT SEARCH TEMPLATE xsyn_template;
+
+DROP FUNCTION dxsyn_init(internal);
+
+DROP FUNCTION dxsyn_lexize(internal,internal,internal,internal);
--- /dev/null
+# Sample rules file for eXtended Synonym (xsyn) dictionary
+# format is as follows:
+#
+# word synonym1 synonym2 ...
+#
+supernova sn sne 1987a
--- /dev/null
+# $PostgreSQL: pgsql/contrib/test_parser/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
+
+MODULE_big = test_parser
+OBJS = test_parser.o
+DATA_built = test_parser.sql
+DATA = uninstall_test_parser.sql
+DOCS = README.test_parser
+REGRESS = test_parser
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/test_parser
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
--- /dev/null
+Example parser
+==============
+
+This is an example of a custom parser for full text search.
+
+It recognizes space-delimited words and returns only two token types:
+
+ - 3, word, Word
+
+ - 12, blank, Space symbols
+
+The token numbers have been chosen to keep compatibility with the default
+ts_headline() function, since we do not want to implement our own version.
+
+* Configuration
+
+The parser has no user-configurable parameters.
+
+* Usage
+
+1. Compile and install
+
+2. Load dictionary
+
+ psql mydb < test_parser.sql
+
+3. Test it
+
+ mydb# SELECT * FROM ts_parse('testparser','That''s my first own parser');
+ tokid | token
+ -------+--------
+ 3 | That's
+ 12 |
+ 3 | my
+ 12 |
+ 3 | first
+ 12 |
+ 3 | own
+ 12 |
+ 3 | parser
+
+ mydb# SELECT to_tsvector('testcfg','That''s my first own parser');
+ to_tsvector
+ -------------------------------------------------
+ 'my':2 'own':4 'first':3 'parser':5 'that''s':1
+
+ mydb# SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', to_tsquery('testcfg', 'star'));
+ headline
+ -----------------------------------------------------------------
+ Supernovae <b>stars</b> are the brightest phenomena in galaxies
+
+That's all.
--- /dev/null
+--
+-- first, define the parser. Turn off echoing so that expected file
+-- does not depend on contents of this file.
+--
+SET client_min_messages = warning;
+\set ECHO none
+RESET client_min_messages;
+-- make test configuration using parser
+CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser);
+ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
+-- ts_parse
+SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/');
+ tokid | token
+-------+-----------------------
+ 3 | That's
+ 12 |
+ 3 | simple
+ 12 |
+ 3 | parser
+ 12 |
+ 3 | can't
+ 12 |
+ 3 | parse
+ 12 |
+ 3 | urls
+ 12 |
+ 3 | like
+ 12 |
+ 3 | http://some.url/here/
+(15 rows)
+
+SELECT to_tsvector('testcfg','That''s my first own parser');
+ to_tsvector
+-------------------------------------------------
+ 'my':2 'own':4 'first':3 'parser':5 'that''s':1
+(1 row)
+
+SELECT to_tsquery('testcfg', 'star');
+ to_tsquery
+------------
+ 'star'
+(1 row)
+
+SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies',
+ to_tsquery('testcfg', 'stars'));
+ ts_headline
+-----------------------------------------------------------------
+ Supernovae <b>stars</b> are the brightest phenomena in galaxies
+(1 row)
+
--- /dev/null
+--
+-- first, define the parser. Turn off echoing so that expected file
+-- does not depend on contents of this file.
+--
+SET client_min_messages = warning;
+\set ECHO none
+\i test_parser.sql
+\set ECHO all
+RESET client_min_messages;
+
+-- make test configuration using parser
+
+CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser);
+
+ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
+
+-- ts_parse
+
+SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/');
+
+SELECT to_tsvector('testcfg','That''s my first own parser');
+
+SELECT to_tsquery('testcfg', 'star');
+
+SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies',
+ to_tsquery('testcfg', 'stars'));
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * test_parser.c
+ * Simple example of a text search parser
+ *
+ * Copyright (c) 2007, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/contrib/test_parser/test_parser.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "fmgr.h"
+
+PG_MODULE_MAGIC;
+
+
+/*
+ * types
+ */
+
+/* self-defined type */
+typedef struct {
+ char * buffer; /* text to parse */
+ int len; /* length of the text in buffer */
+ int pos; /* position of the parser */
+} ParserState;
+
+/* copy-paste from wparser.h of tsearch2 */
+typedef struct {
+ int lexid;
+ char *alias;
+ char *descr;
+} LexDescr;
+
+/*
+ * prototypes
+ */
+PG_FUNCTION_INFO_V1(testprs_start);
+Datum testprs_start(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(testprs_getlexeme);
+Datum testprs_getlexeme(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(testprs_end);
+Datum testprs_end(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(testprs_lextype);
+Datum testprs_lextype(PG_FUNCTION_ARGS);
+
+/*
+ * functions
+ */
+Datum testprs_start(PG_FUNCTION_ARGS)
+{
+ ParserState *pst = (ParserState *) palloc0(sizeof(ParserState));
+ pst->buffer = (char *) PG_GETARG_POINTER(0);
+ pst->len = PG_GETARG_INT32(1);
+ pst->pos = 0;
+
+ PG_RETURN_POINTER(pst);
+}
+
+Datum testprs_getlexeme(PG_FUNCTION_ARGS)
+{
+ ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
+ char **t = (char **) PG_GETARG_POINTER(1);
+ int *tlen = (int *) PG_GETARG_POINTER(2);
+ int type;
+
+ *tlen = pst->pos;
+ *t = pst->buffer + pst->pos;
+
+ if ((pst->buffer)[pst->pos] == ' ')
+ {
+ /* blank type */
+ type = 12;
+ /* go to the next non-white-space character */
+ while ((pst->buffer)[pst->pos] == ' ' &&
+ pst->pos < pst->len)
+ (pst->pos)++;
+ } else {
+ /* word type */
+ type = 3;
+ /* go to the next white-space character */
+ while ((pst->buffer)[pst->pos] != ' ' &&
+ pst->pos < pst->len)
+ (pst->pos)++;
+ }
+
+ *tlen = pst->pos - *tlen;
+
+ /* we are finished if (*tlen == 0) */
+ if (*tlen == 0)
+ type=0;
+
+ PG_RETURN_INT32(type);
+}
+
+Datum testprs_end(PG_FUNCTION_ARGS)
+{
+ ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
+ pfree(pst);
+ PG_RETURN_VOID();
+}
+
+Datum testprs_lextype(PG_FUNCTION_ARGS)
+{
+ /*
+ * Remarks:
+ * - we have to return the blanks for headline reason
+ * - we use the same lexids like Teodor in the default
+ * word parser; in this way we can reuse the headline
+ * function of the default word parser.
+ */
+ LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2+1));
+
+ /* there are only two types in this parser */
+ descr[0].lexid = 3;
+ descr[0].alias = pstrdup("word");
+ descr[0].descr = pstrdup("Word");
+ descr[1].lexid = 12;
+ descr[1].alias = pstrdup("blank");
+ descr[1].descr = pstrdup("Space symbols");
+ descr[2].lexid = 0;
+
+ PG_RETURN_POINTER(descr);
+}
--- /dev/null
+-- $PostgreSQL: pgsql/contrib/test_parser/test_parser.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
+
+-- Adjust this setting to control where the objects get created.
+SET search_path = public;
+
+BEGIN;
+
+CREATE FUNCTION testprs_start(internal, int4)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C STRICT;
+
+CREATE FUNCTION testprs_getlexeme(internal, internal, internal)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C STRICT;
+
+CREATE FUNCTION testprs_end(internal)
+ RETURNS void
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C STRICT;
+
+CREATE FUNCTION testprs_lextype(internal)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C STRICT;
+
+CREATE TEXT SEARCH PARSER testparser (
+ START = testprs_start,
+ GETTOKEN = testprs_getlexeme,
+ END = testprs_end,
+ HEADLINE = pg_catalog.prsd_headline,
+ LEXTYPES = testprs_lextype
+);
+
+END;
--- /dev/null
+SET search_path = public;
+
+DROP TEXT SEARCH PARSER testparser;
+
+DROP FUNCTION testprs_start(internal, int4);
+
+DROP FUNCTION testprs_getlexeme(internal, internal, internal);
+
+DROP FUNCTION testprs_end(internal);
+
+DROP FUNCTION testprs_lextype(internal);