]> granicus.if.org Git - postgresql/blob - contrib/dict_xsyn/dict_xsyn.c
Fix bogus code in contrib/ tsearch dictionary examples.
[postgresql] / contrib / dict_xsyn / dict_xsyn.c
1 /*-------------------------------------------------------------------------
2  *
3  * dict_xsyn.c
4  *        Extended synonym dictionary
5  *
6  * Copyright (c) 2007-2011, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *        contrib/dict_xsyn/dict_xsyn.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14
15 #include <ctype.h>
16
17 #include "commands/defrem.h"
18 #include "tsearch/ts_locale.h"
19 #include "tsearch/ts_utils.h"
20
21 PG_MODULE_MAGIC;
22
23 typedef struct
24 {
25         char       *key;                        /* Word */
26         char       *value;                      /* Unparsed list of synonyms, including the
27                                                                  * word itself */
28 } Syn;
29
30 typedef struct
31 {
32         int                     len;
33         Syn                *syn;
34
35         bool            matchorig;
36         bool            keeporig;
37         bool            matchsynonyms;
38         bool            keepsynonyms;
39 } DictSyn;
40
41
42 PG_FUNCTION_INFO_V1(dxsyn_init);
43 Datum           dxsyn_init(PG_FUNCTION_ARGS);
44
45 PG_FUNCTION_INFO_V1(dxsyn_lexize);
46 Datum           dxsyn_lexize(PG_FUNCTION_ARGS);
47
48 static char *
49 find_word(char *in, char **end)
50 {
51         char       *start;
52
53         *end = NULL;
54         while (*in && t_isspace(in))
55                 in += pg_mblen(in);
56
57         if (!*in || *in == '#')
58                 return NULL;
59         start = in;
60
61         while (*in && !t_isspace(in))
62                 in += pg_mblen(in);
63
64         *end = in;
65
66         return start;
67 }
68
69 static int
70 compare_syn(const void *a, const void *b)
71 {
72         return strcmp(((const Syn *) a)->key, ((const Syn *) b)->key);
73 }
74
75 static void
76 read_dictionary(DictSyn *d, char *filename)
77 {
78         char       *real_filename = get_tsearch_config_filename(filename, "rules");
79         tsearch_readline_state trst;
80         char       *line;
81         int                     cur = 0;
82
83         if (!tsearch_readline_begin(&trst, real_filename))
84                 ereport(ERROR,
85                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
86                                  errmsg("could not open synonym file \"%s\": %m",
87                                                 real_filename)));
88
89         while ((line = tsearch_readline(&trst)) != NULL)
90         {
91                 char       *value;
92                 char       *key;
93                 char       *pos;
94                 char       *end;
95
96                 if (*line == '\0')
97                         continue;
98
99                 value = lowerstr(line);
100                 pfree(line);
101
102                 pos = value;
103                 while ((key = find_word(pos, &end)) != NULL)
104                 {
105                         /* Enlarge syn structure if full */
106                         if (cur == d->len)
107                         {
108                                 d->len = (d->len > 0) ? 2 * d->len : 16;
109                                 if (d->syn)
110                                         d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
111                                 else
112                                         d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
113                         }
114
115                         /* Save first word only if we will match it */
116                         if (pos != value || d->matchorig)
117                         {
118                                 d->syn[cur].key = pnstrdup(key, end - key);
119                                 d->syn[cur].value = pstrdup(value);
120
121                                 cur++;
122                         }
123
124                         pos = end;
125
126                         /* Don't bother scanning synonyms if we will not match them */
127                         if (!d->matchsynonyms)
128                                 break;
129                 }
130
131                 pfree(value);
132         }
133
134         tsearch_readline_end(&trst);
135
136         d->len = cur;
137         if (cur > 1)
138                 qsort(d->syn, d->len, sizeof(Syn), compare_syn);
139
140         pfree(real_filename);
141 }
142
143 Datum
144 dxsyn_init(PG_FUNCTION_ARGS)
145 {
146         List       *dictoptions = (List *) PG_GETARG_POINTER(0);
147         DictSyn    *d;
148         ListCell   *l;
149         char       *filename = NULL;
150
151         d = (DictSyn *) palloc0(sizeof(DictSyn));
152         d->len = 0;
153         d->syn = NULL;
154         d->matchorig = true;
155         d->keeporig = true;
156         d->matchsynonyms = false;
157         d->keepsynonyms = true;
158
159         foreach(l, dictoptions)
160         {
161                 DefElem    *defel = (DefElem *) lfirst(l);
162
163                 if (pg_strcasecmp(defel->defname, "MATCHORIG") == 0)
164                 {
165                         d->matchorig = defGetBoolean(defel);
166                 }
167                 else if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
168                 {
169                         d->keeporig = defGetBoolean(defel);
170                 }
171                 else if (pg_strcasecmp(defel->defname, "MATCHSYNONYMS") == 0)
172                 {
173                         d->matchsynonyms = defGetBoolean(defel);
174                 }
175                 else if (pg_strcasecmp(defel->defname, "KEEPSYNONYMS") == 0)
176                 {
177                         d->keepsynonyms = defGetBoolean(defel);
178                 }
179                 else if (pg_strcasecmp(defel->defname, "RULES") == 0)
180                 {
181                         /* we can't read the rules before parsing all options! */
182                         filename = defGetString(defel);
183                 }
184                 else
185                 {
186                         ereport(ERROR,
187                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
188                                          errmsg("unrecognized xsyn parameter: \"%s\"",
189                                                         defel->defname)));
190                 }
191         }
192
193         if (filename)
194                 read_dictionary(d, filename);
195
196         PG_RETURN_POINTER(d);
197 }
198
199 Datum
200 dxsyn_lexize(PG_FUNCTION_ARGS)
201 {
202         DictSyn    *d = (DictSyn *) PG_GETARG_POINTER(0);
203         char       *in = (char *) PG_GETARG_POINTER(1);
204         int                     length = PG_GETARG_INT32(2);
205         Syn                     word;
206         Syn                *found;
207         TSLexeme   *res = NULL;
208
209         if (!length || d->len == 0)
210                 PG_RETURN_POINTER(NULL);
211
212         /* Create search pattern */
213         {
214                 char       *temp = pnstrdup(in, length);
215
216                 word.key = lowerstr(temp);
217                 pfree(temp);
218                 word.value = NULL;
219         }
220
221         /* Look for matching syn */
222         found = (Syn *) bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
223         pfree(word.key);
224
225         if (!found)
226                 PG_RETURN_POINTER(NULL);
227
228         /* Parse string of synonyms and return array of words */
229         {
230                 char       *value = found->value;
231                 char       *syn;
232                 char       *pos;
233                 char       *end;
234                 int                     nsyns = 0;
235
236                 res = palloc(sizeof(TSLexeme));
237
238                 pos = value;
239                 while ((syn = find_word(pos, &end)) != NULL)
240                 {
241                         res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
242
243                         /* The first word is output only if keeporig=true */
244                         if (pos != value || d->keeporig)
245                         {
246                                 res[nsyns].lexeme = pnstrdup(syn, end - syn);
247                                 res[nsyns].nvariant = 0;
248                                 res[nsyns].flags = 0;
249                                 nsyns++;
250                         }
251
252                         pos = end;
253
254                         /* Stop if we are not to output the synonyms */
255                         if (!d->keepsynonyms)
256                                 break;
257                 }
258                 res[nsyns].lexeme = NULL;
259         }
260
261         PG_RETURN_POINTER(res);
262 }