]> granicus.if.org Git - postgresql/blob - contrib/tsearch2/dict_syn.c
Reduce WAL activity for page splits:
[postgresql] / contrib / tsearch2 / dict_syn.c
1 /* $PostgreSQL: pgsql/contrib/tsearch2/dict_syn.c,v 1.11 2006/12/04 09:26:57 teodor Exp $ */
2
3 /*
4  * ISpell interface
5  * Teodor Sigaev <teodor@sigaev.ru>
6  */
7 #include "postgres.h"
8
9 #include <ctype.h>
10
11 #include "dict.h"
12 #include "common.h"
13 #include "ts_locale.h"
14
15 #define SYNBUFLEN       4096
16 typedef struct
17 {
18         char       *in;
19         char       *out;
20 }       Syn;
21
22 typedef struct
23 {
24         int                     len;
25         Syn                *syn;
26 }       DictSyn;
27
28 PG_FUNCTION_INFO_V1(syn_init);
29 Datum           syn_init(PG_FUNCTION_ARGS);
30
31 PG_FUNCTION_INFO_V1(syn_lexize);
32 Datum           syn_lexize(PG_FUNCTION_ARGS);
33
34 static char *
35 findwrd(char *in, char **end)
36 {
37         char       *start;
38
39         *end = NULL;
40         while (*in && isspace((unsigned char) *in))
41                 in++;
42
43         if (!in)
44                 return NULL;
45         start = in;
46
47         while (*in && !isspace((unsigned char) *in))
48                 in++;
49
50         *end = in;
51         return start;
52 }
53
54 static int
55 compareSyn(const void *a, const void *b)
56 {
57         return strcmp(((Syn *) a)->in, ((Syn *) b)->in);
58 }
59
60
61 Datum
62 syn_init(PG_FUNCTION_ARGS)
63 {
64         text       *in;
65         DictSyn    *d;
66         int                     cur = 0;
67         FILE       *fin;
68         char       *filename;
69         char            buf[SYNBUFLEN];
70         char       *starti,
71                            *starto,
72                            *end = NULL;
73         int                     slen;
74
75         if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
76                 ereport(ERROR,
77                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
78                                  errmsg("NULL config")));
79
80         in = PG_GETARG_TEXT_P(0);
81         if (VARSIZE(in) - VARHDRSZ == 0)
82                 ereport(ERROR,
83                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
84                                  errmsg("VOID config")));
85
86         filename = text2char(in);
87         PG_FREE_IF_COPY(in, 0);
88         if ((fin = fopen(filename, "r")) == NULL)
89                 ereport(ERROR,
90                                 (errcode_for_file_access(),
91                                  errmsg("could not open file \"%s\": %m",
92                                                 filename)));
93
94         d = (DictSyn *) malloc(sizeof(DictSyn));
95         if (!d)
96         {
97                 fclose(fin);
98                 ereport(ERROR,
99                                 (errcode(ERRCODE_OUT_OF_MEMORY),
100                                  errmsg("out of memory")));
101         }
102         memset(d, 0, sizeof(DictSyn));
103
104         while (fgets(buf, SYNBUFLEN, fin))
105         {
106                 slen = strlen(buf) - 1;
107                 buf[slen] = '\0';
108                 if (*buf == '\0')
109                         continue;
110                 if (cur == d->len)
111                 {
112                         d->len = (d->len) ? 2 * d->len : 16;
113                         d->syn = (Syn *) realloc(d->syn, sizeof(Syn) * d->len);
114                         if (!d->syn)
115                         {
116                                 fclose(fin);
117                                 ereport(ERROR,
118                                                 (errcode(ERRCODE_OUT_OF_MEMORY),
119                                                  errmsg("out of memory")));
120                         }
121                 }
122
123                 starti = findwrd(buf, &end);
124                 if (!starti)
125                         continue;
126                 *end = '\0';
127                 if (end >= buf + slen)
128                         continue;
129
130                 starto = findwrd(end + 1, &end);
131                 if (!starto)
132                         continue;
133                 *end = '\0';
134
135                 d->syn[cur].in = strdup(lowerstr(starti));
136                 d->syn[cur].out = strdup(lowerstr(starto));
137                 if (!(d->syn[cur].in && d->syn[cur].out))
138                 {
139                         fclose(fin);
140                         ereport(ERROR,
141                                         (errcode(ERRCODE_OUT_OF_MEMORY),
142                                          errmsg("out of memory")));
143                 }
144
145                 cur++;
146         }
147
148         fclose(fin);
149
150         d->len = cur;
151         if (cur > 1)
152                 qsort(d->syn, d->len, sizeof(Syn), compareSyn);
153
154         pfree(filename);
155         PG_RETURN_POINTER(d);
156 }
157
158 Datum
159 syn_lexize(PG_FUNCTION_ARGS)
160 {
161         DictSyn    *d = (DictSyn *) PG_GETARG_POINTER(0);
162         char       *in = (char *) PG_GETARG_POINTER(1);
163         Syn                     key,
164                            *found;
165         TSLexeme   *res = NULL;
166         char       *wrd;
167
168         if (!PG_GETARG_INT32(2))
169                 PG_RETURN_POINTER(NULL);
170
171         key.out = NULL;
172         wrd = pnstrdup(in, PG_GETARG_INT32(2));
173         key.in = lowerstr(wrd);
174         pfree(wrd);
175
176         found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
177         pfree(key.in);
178
179         if (!found)
180                 PG_RETURN_POINTER(NULL);
181
182         res = palloc(sizeof(TSLexeme) * 2);
183         memset(res, 0, sizeof(TSLexeme) * 2);
184         res[0].lexeme = pstrdup(found->out);
185
186         PG_RETURN_POINTER(res);
187 }