]> granicus.if.org Git - postgresql/blob - src/backend/tsearch/dict_synonym.c
8ca65f3deda3644ac5d71e8e9865331bbb3c21ef
[postgresql] / src / backend / tsearch / dict_synonym.c
1 /*-------------------------------------------------------------------------
2  *
3  * dict_synonym.c
4  *              Synonym dictionary: replace word by its synonym
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        src/backend/tsearch/dict_synonym.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include "commands/defrem.h"
17 #include "tsearch/ts_locale.h"
18 #include "tsearch/ts_utils.h"
19 #include "utils/builtins.h"
20
21 typedef struct
22 {
23         char       *in;
24         char       *out;
25         int                     outlen;
26         uint16          flags;
27 } Syn;
28
29 typedef struct
30 {
31         int                     len;                    /* length of syn array */
32         Syn                *syn;
33         bool            case_sensitive;
34 } DictSyn;
35
36 /*
37  * Finds the next whitespace-delimited word within the 'in' string.
38  * Returns a pointer to the first character of the word, and a pointer
39  * to the next byte after the last character in the word (in *end).
40  * Character '*' at the end of word will not be threated as word
41  * character if flags is not null.
42  */
43 static char *
44 findwrd(char *in, char **end, uint16 *flags)
45 {
46         char       *start;
47         char       *lastchar;
48
49         /* Skip leading spaces */
50         while (*in && t_isspace(in))
51                 in += pg_mblen(in);
52
53         /* Return NULL on empty lines */
54         if (*in == '\0')
55         {
56                 *end = NULL;
57                 return NULL;
58         }
59
60         lastchar = start = in;
61
62         /* Find end of word */
63         while (*in && !t_isspace(in))
64         {
65                 lastchar = in;
66                 in += pg_mblen(in);
67         }
68
69         if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)
70         {
71                 *flags = TSL_PREFIX;
72                 *end = lastchar;
73         }
74         else
75         {
76                 if (flags)
77                         *flags = 0;
78                 *end = in;
79         }
80
81         return start;
82 }
83
84 static int
85 compareSyn(const void *a, const void *b)
86 {
87         return strcmp(((const Syn *) a)->in, ((const Syn *) b)->in);
88 }
89
90
91 Datum
92 dsynonym_init(PG_FUNCTION_ARGS)
93 {
94         List       *dictoptions = (List *) PG_GETARG_POINTER(0);
95         DictSyn    *d;
96         ListCell   *l;
97         char       *filename = NULL;
98         bool            case_sensitive = false;
99         tsearch_readline_state trst;
100         char       *starti,
101                            *starto,
102                            *end = NULL;
103         int                     cur = 0;
104         char       *line = NULL;
105         uint16          flags = 0;
106
107         foreach(l, dictoptions)
108         {
109                 DefElem    *defel = (DefElem *) lfirst(l);
110
111                 if (pg_strcasecmp("Synonyms", defel->defname) == 0)
112                         filename = defGetString(defel);
113                 else if (pg_strcasecmp("CaseSensitive", defel->defname) == 0)
114                         case_sensitive = defGetBoolean(defel);
115                 else
116                         ereport(ERROR,
117                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
118                                          errmsg("unrecognized synonym parameter: \"%s\"",
119                                                         defel->defname)));
120         }
121
122         if (!filename)
123                 ereport(ERROR,
124                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
125                                  errmsg("missing Synonyms parameter")));
126
127         filename = get_tsearch_config_filename(filename, "syn");
128
129         if (!tsearch_readline_begin(&trst, filename))
130                 ereport(ERROR,
131                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
132                                  errmsg("could not open synonym file \"%s\": %m",
133                                                 filename)));
134
135         d = (DictSyn *) palloc0(sizeof(DictSyn));
136
137         while ((line = tsearch_readline(&trst)) != NULL)
138         {
139                 starti = findwrd(line, &end, NULL);
140                 if (!starti)
141                 {
142                         /* Empty line */
143                         goto skipline;
144                 }
145                 if (*end == '\0')
146                 {
147                         /* A line with only one word. Ignore silently. */
148                         goto skipline;
149                 }
150                 *end = '\0';
151
152                 starto = findwrd(end + 1, &end, &flags);
153                 if (!starto)
154                 {
155                         /* A line with only one word (+whitespace). Ignore silently. */
156                         goto skipline;
157                 }
158                 *end = '\0';
159
160                 /*
161                  * starti now points to the first word, and starto to the second word
162                  * on the line, with a \0 terminator at the end of both words.
163                  */
164
165                 if (cur >= d->len)
166                 {
167                         if (d->len == 0)
168                         {
169                                 d->len = 64;
170                                 d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
171                         }
172                         else
173                         {
174                                 d->len *= 2;
175                                 d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
176                         }
177                 }
178
179                 if (case_sensitive)
180                 {
181                         d->syn[cur].in = pstrdup(starti);
182                         d->syn[cur].out = pstrdup(starto);
183                 }
184                 else
185                 {
186                         d->syn[cur].in = lowerstr(starti);
187                         d->syn[cur].out = lowerstr(starto);
188                 }
189
190                 d->syn[cur].outlen = strlen(starto);
191                 d->syn[cur].flags = flags;
192
193                 cur++;
194
195 skipline:
196                 pfree(line);
197         }
198
199         tsearch_readline_end(&trst);
200
201         d->len = cur;
202         qsort(d->syn, d->len, sizeof(Syn), compareSyn);
203
204         d->case_sensitive = case_sensitive;
205
206         PG_RETURN_POINTER(d);
207 }
208
209 Datum
210 dsynonym_lexize(PG_FUNCTION_ARGS)
211 {
212         DictSyn    *d = (DictSyn *) PG_GETARG_POINTER(0);
213         char       *in = (char *) PG_GETARG_POINTER(1);
214         int32           len = PG_GETARG_INT32(2);
215         Syn                     key,
216                            *found;
217         TSLexeme   *res;
218
219         /* note: d->len test protects against Solaris bsearch-of-no-items bug */
220         if (len <= 0 || d->len <= 0)
221                 PG_RETURN_POINTER(NULL);
222
223         if (d->case_sensitive)
224                 key.in = pnstrdup(in, len);
225         else
226                 key.in = lowerstr_with_len(in, len);
227
228         key.out = NULL;
229
230         found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
231         pfree(key.in);
232
233         if (!found)
234                 PG_RETURN_POINTER(NULL);
235
236         res = palloc0(sizeof(TSLexeme) * 2);
237         res[0].lexeme = pnstrdup(found->out, found->outlen);
238         res[0].flags = found->flags;
239
240         PG_RETURN_POINTER(res);
241 }