]> granicus.if.org Git - postgresql/blob - src/backend/tsearch/dict_synonym.c
Update copyright for the year 2010.
[postgresql] / src / backend / tsearch / dict_synonym.c
1 /*-------------------------------------------------------------------------
2  *
3  * dict_synonym.c
4  *              Synonym dictionary: replace word by its synonym
5  *
6  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.12 2010/01/02 16:57:53 momjian Exp $
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include "commands/defrem.h"
17 #include "tsearch/ts_locale.h"
18 #include "tsearch/ts_public.h"
19 #include "tsearch/ts_utils.h"
20 #include "utils/builtins.h"
21
22 typedef struct
23 {
24         char       *in;
25         char       *out;
26         int                     outlen;
27         uint16          flags;
28 } Syn;
29
30 typedef struct
31 {
32         int                     len;                    /* length of syn array */
33         Syn                *syn;
34         bool            case_sensitive;
35 } DictSyn;
36
37 /*
38  * Finds the next whitespace-delimited word within the 'in' string.
39  * Returns a pointer to the first character of the word, and a pointer
40  * to the next byte after the last character in the word (in *end).
41  * Character '*' at the end of word will not be threated as word
42  * charater if flags is not null.
43  */
44 static char *
45 findwrd(char *in, char **end, uint16 *flags)
46 {
47         char       *start;
48         char       *lastchar;
49
50         /* Skip leading spaces */
51         while (*in && t_isspace(in))
52                 in += pg_mblen(in);
53
54         /* Return NULL on empty lines */
55         if (*in == '\0')
56         {
57                 *end = NULL;
58                 return NULL;
59         }
60
61         lastchar = start = in;
62
63         /* Find end of word */
64         while (*in && !t_isspace(in))
65         {
66                 lastchar = in;
67                 in += pg_mblen(in);
68         }
69
70         if ( in - lastchar == 1 && t_iseq(lastchar, '*') && flags )
71         {
72                 *flags = TSL_PREFIX;
73                 *end = lastchar;
74         }
75         else
76         {
77                 if (flags)
78                                 *flags = 0;
79                 *end = in;
80         }
81
82         return start;
83 }
84
85 static int
86 compareSyn(const void *a, const void *b)
87 {
88         return strcmp(((Syn *) a)->in, ((Syn *) b)->in);
89 }
90
91
92 Datum
93 dsynonym_init(PG_FUNCTION_ARGS)
94 {
95         List       *dictoptions = (List *) PG_GETARG_POINTER(0);
96         DictSyn    *d;
97         ListCell   *l;
98         char       *filename = NULL;
99         bool            case_sensitive = false;
100         tsearch_readline_state trst;
101         char       *starti,
102                            *starto,
103                            *end = NULL;
104         int                     cur = 0;
105         char       *line = NULL;
106         uint16          flags = 0;
107
108         foreach(l, dictoptions)
109         {
110                 DefElem    *defel = (DefElem *) lfirst(l);
111
112                 if (pg_strcasecmp("Synonyms", defel->defname) == 0)
113                         filename = defGetString(defel);
114                 else if (pg_strcasecmp("CaseSensitive", defel->defname) == 0)
115                         case_sensitive = defGetBoolean(defel);
116                 else
117                         ereport(ERROR,
118                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
119                                          errmsg("unrecognized synonym parameter: \"%s\"",
120                                                         defel->defname)));
121         }
122
123         if (!filename)
124                 ereport(ERROR,
125                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
126                                  errmsg("missing Synonyms parameter")));
127
128         filename = get_tsearch_config_filename(filename, "syn");
129
130         if (!tsearch_readline_begin(&trst, filename))
131                 ereport(ERROR,
132                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
133                                  errmsg("could not open synonym file \"%s\": %m",
134                                                 filename)));
135
136         d = (DictSyn *) palloc0(sizeof(DictSyn));
137
138         while ((line = tsearch_readline(&trst)) != NULL)
139         {
140                 starti = findwrd(line, &end, NULL);
141                 if (!starti)
142                 {
143                         /* Empty line */
144                         goto skipline;
145                 }
146                 if (*end == '\0')
147                 {
148                         /* A line with only one word. Ignore silently. */
149                         goto skipline;
150                 }
151                 *end = '\0';
152
153                 starto = findwrd(end + 1, &end, &flags);
154                 if (!starto)
155                 {
156                         /* A line with only one word (+whitespace). Ignore silently. */
157                         goto skipline;
158                 }
159                 *end = '\0';
160
161                 /*
162                  * starti now points to the first word, and starto to the second word
163                  * on the line, with a \0 terminator at the end of both words.
164                  */
165
166                 if (cur >= d->len)
167                 {
168                         if (d->len == 0)
169                         {
170                                 d->len = 64;
171                                 d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
172                         }
173                         else
174                         {
175                                 d->len *= 2;
176                                 d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
177                         }
178                 }
179
180                 if (case_sensitive)
181                 {
182                         d->syn[cur].in = pstrdup(starti);
183                         d->syn[cur].out = pstrdup(starto);
184                 }
185                 else
186                 {
187                         d->syn[cur].in = lowerstr(starti);
188                         d->syn[cur].out = lowerstr(starto);
189                 }
190
191                 d->syn[cur].outlen = strlen(starto);
192                 d->syn[cur].flags = flags; 
193
194                 cur++;
195
196 skipline:
197                 pfree(line);
198         }
199
200         tsearch_readline_end(&trst);
201
202         d->len = cur;
203         qsort(d->syn, d->len, sizeof(Syn), compareSyn);
204
205         d->case_sensitive = case_sensitive;
206
207         PG_RETURN_POINTER(d);
208 }
209
210 Datum
211 dsynonym_lexize(PG_FUNCTION_ARGS)
212 {
213         DictSyn    *d = (DictSyn *) PG_GETARG_POINTER(0);
214         char       *in = (char *) PG_GETARG_POINTER(1);
215         int32           len = PG_GETARG_INT32(2);
216         Syn                     key,
217                            *found;
218         TSLexeme   *res;
219
220         /* note: d->len test protects against Solaris bsearch-of-no-items bug */
221         if (len <= 0 || d->len <= 0)
222                 PG_RETURN_POINTER(NULL);
223
224         if (d->case_sensitive)
225                 key.in = pnstrdup(in, len);
226         else
227                 key.in = lowerstr_with_len(in, len);
228
229         key.out = NULL;
230
231         found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
232         pfree(key.in);
233
234         if (!found)
235                 PG_RETURN_POINTER(NULL);
236
237         res = palloc0(sizeof(TSLexeme) * 2);
238         res[0].lexeme = pnstrdup(found->out, found->outlen);
239         res[0].flags = found->flags;
240
241         PG_RETURN_POINTER(res);
242 }