]> granicus.if.org Git - postgresql/blob - src/backend/tsearch/regis.c
f6befcea8a9e0983bddfde5adf9fae8e5008054d
[postgresql] / src / backend / tsearch / regis.c
1 /*-------------------------------------------------------------------------
2  *
3  * regis.c
4  *              Fast regex subset
5  *
6  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        $PostgreSQL: pgsql/src/backend/tsearch/regis.c,v 1.5 2008/06/17 16:09:06 momjian Exp $
11  *
12  *-------------------------------------------------------------------------
13  */
14
15 #include "postgres.h"
16
17 #include "tsearch/dicts/regis.h"
18 #include "tsearch/ts_locale.h"
19
20 #define RS_IN_ONEOF 1
21 #define RS_IN_ONEOF_IN  2
22 #define RS_IN_NONEOF    3
23 #define RS_IN_WAIT      4
24
25
26 /*
27  * Test whether a regex is of the subset supported here.
28  * Keep this in sync with RS_compile!
29  */
30 bool
31 RS_isRegis(const char *str)
32 {
33         int                     state = RS_IN_WAIT;
34         const char *c = str;
35
36         while (*c)
37         {
38                 if (state == RS_IN_WAIT)
39                 {
40                         if (t_isalpha(c))
41                                 /* okay */ ;
42                         else if (t_iseq(c, '['))
43                                 state = RS_IN_ONEOF;
44                         else
45                                 return false;
46                 }
47                 else if (state == RS_IN_ONEOF)
48                 {
49                         if (t_iseq(c, '^'))
50                                 state = RS_IN_NONEOF;
51                         else if (t_isalpha(c))
52                                 state = RS_IN_ONEOF_IN;
53                         else
54                                 return false;
55                 }
56                 else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
57                 {
58                         if (t_isalpha(c))
59                                 /* okay */ ;
60                         else if (t_iseq(c, ']'))
61                                 state = RS_IN_WAIT;
62                         else
63                                 return false;
64                 }
65                 else
66                         elog(ERROR, "internal error in RS_isRegis: state %d", state);
67                 c += pg_mblen(c);
68         }
69
70         return (state == RS_IN_WAIT);
71 }
72
73 static RegisNode *
74 newRegisNode(RegisNode *prev, int len)
75 {
76         RegisNode  *ptr;
77
78         ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1);
79         if (prev)
80                 prev->next = ptr;
81         return ptr;
82 }
83
84 void
85 RS_compile(Regis *r, bool issuffix, const char *str)
86 {
87         int                     len = strlen(str);
88         int                     state = RS_IN_WAIT;
89         const char *c = str;
90         RegisNode  *ptr = NULL;
91
92         memset(r, 0, sizeof(Regis));
93         r->issuffix = (issuffix) ? 1 : 0;
94
95         while (*c)
96         {
97                 if (state == RS_IN_WAIT)
98                 {
99                         if (t_isalpha(c))
100                         {
101                                 if (ptr)
102                                         ptr = newRegisNode(ptr, len);
103                                 else
104                                         ptr = r->node = newRegisNode(NULL, len);
105                                 COPYCHAR(ptr->data, c);
106                                 ptr->type = RSF_ONEOF;
107                                 ptr->len = pg_mblen(c);
108                         }
109                         else if (t_iseq(c, '['))
110                         {
111                                 if (ptr)
112                                         ptr = newRegisNode(ptr, len);
113                                 else
114                                         ptr = r->node = newRegisNode(NULL, len);
115                                 ptr->type = RSF_ONEOF;
116                                 state = RS_IN_ONEOF;
117                         }
118                         else                            /* shouldn't get here */
119                                 elog(ERROR, "invalid regis pattern: \"%s\"", str);
120                 }
121                 else if (state == RS_IN_ONEOF)
122                 {
123                         if (t_iseq(c, '^'))
124                         {
125                                 ptr->type = RSF_NONEOF;
126                                 state = RS_IN_NONEOF;
127                         }
128                         else if (t_isalpha(c))
129                         {
130                                 COPYCHAR(ptr->data, c);
131                                 ptr->len = pg_mblen(c);
132                                 state = RS_IN_ONEOF_IN;
133                         }
134                         else                            /* shouldn't get here */
135                                 elog(ERROR, "invalid regis pattern: \"%s\"", str);
136                 }
137                 else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
138                 {
139                         if (t_isalpha(c))
140                         {
141                                 COPYCHAR(ptr->data + ptr->len, c);
142                                 ptr->len += pg_mblen(c);
143                         }
144                         else if (t_iseq(c, ']'))
145                                 state = RS_IN_WAIT;
146                         else                            /* shouldn't get here */
147                                 elog(ERROR, "invalid regis pattern: \"%s\"", str);
148                 }
149                 else
150                         elog(ERROR, "internal error in RS_compile: state %d", state);
151                 c += pg_mblen(c);
152         }
153
154         if (state != RS_IN_WAIT)                /* shouldn't get here */
155                 elog(ERROR, "invalid regis pattern: \"%s\"", str);
156
157         ptr = r->node;
158         while (ptr)
159         {
160                 r->nchar++;
161                 ptr = ptr->next;
162         }
163 }
164
165 void
166 RS_free(Regis *r)
167 {
168         RegisNode  *ptr = r->node,
169                            *tmp;
170
171         while (ptr)
172         {
173                 tmp = ptr->next;
174                 pfree(ptr);
175                 ptr = tmp;
176         }
177
178         r->node = NULL;
179 }
180
181 #ifdef USE_WIDE_UPPER_LOWER
182 static bool
183 mb_strchr(char *str, char *c)
184 {
185         int                     clen = pg_mblen(c),
186                                 plen,
187                                 i;
188         char       *ptr = str;
189         bool            res = false;
190
191         clen = pg_mblen(c);
192         while (*ptr && !res)
193         {
194                 plen = pg_mblen(ptr);
195                 if (plen == clen)
196                 {
197                         i = plen;
198                         res = true;
199                         while (i--)
200                                 if (*(ptr + i) != *(c + i))
201                                 {
202                                         res = false;
203                                         break;
204                                 }
205                 }
206
207                 ptr += plen;
208         }
209
210         return res;
211 }
212 #else
213 #define mb_strchr(s,c)  ( (strchr((s),*(c)) == NULL) ? false : true )
214 #endif
215
216
217 bool
218 RS_execute(Regis *r, char *str)
219 {
220         RegisNode  *ptr = r->node;
221         char       *c = str;
222         int                     len = 0;
223
224         while (*c)
225         {
226                 len++;
227                 c += pg_mblen(c);
228         }
229
230         if (len < r->nchar)
231                 return 0;
232
233         c = str;
234         if (r->issuffix)
235         {
236                 len -= r->nchar;
237                 while (len-- > 0)
238                         c += pg_mblen(c);
239         }
240
241
242         while (ptr)
243         {
244                 switch (ptr->type)
245                 {
246                         case RSF_ONEOF:
247                                 if (mb_strchr((char *) ptr->data, c) != true)
248                                         return false;
249                                 break;
250                         case RSF_NONEOF:
251                                 if (mb_strchr((char *) ptr->data, c) == true)
252                                         return false;
253                                 break;
254                         default:
255                                 elog(ERROR, "unrecognized regis node type: %d", ptr->type);
256                 }
257                 ptr = ptr->next;
258                 c += pg_mblen(c);
259         }
260
261         return true;
262 }