]> granicus.if.org Git - postgresql/blob - contrib/tsearch2/wparser.c
For some reason access/tupmacs.h has been #including utils/memutils.h,
[postgresql] / contrib / tsearch2 / wparser.c
1 /*
2  * interface functions to parser
3  * Teodor Sigaev <teodor@sigaev.ru>
4  */
5 #include "postgres.h"
6
7 #include <ctype.h>
8
9 #include "catalog/pg_type.h"
10 #include "executor/spi.h"
11 #include "fmgr.h"
12 #include "funcapi.h"
13 #include "utils/array.h"
14 #include "utils/memutils.h"
15
16 #include "wparser.h"
17 #include "ts_cfg.h"
18 #include "snmap.h"
19 #include "common.h"
20
21 /*********top interface**********/
22
23 static Oid      current_parser_id = InvalidOid;
24
25 void
26 init_prs(Oid id, WParserInfo * prs)
27 {
28         Oid                     arg[1];
29         bool            isnull;
30         Datum           pars[1];
31         int                     stat;
32         void       *plan;
33         char            buf[1024],
34                            *nsp;
35
36         arg[0] = OIDOID;
37         pars[0] = ObjectIdGetDatum(id);
38
39         memset(prs, 0, sizeof(WParserInfo));
40         SPI_connect();
41         nsp = get_namespace(TSNSP_FunctionOid);
42         sprintf(buf, "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from %s.pg_ts_parser where oid = $1", nsp);
43         pfree(nsp);
44         plan = SPI_prepare(buf, 1, arg);
45         if (!plan)
46                 ts_error(ERROR, "SPI_prepare() failed");
47
48         stat = SPI_execp(plan, pars, " ", 1);
49         if (stat < 0)
50                 ts_error(ERROR, "SPI_execp return %d", stat);
51         if (SPI_processed > 0)
52         {
53                 Oid                     oid = InvalidOid;
54
55                 oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
56                 fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
57                 oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull));
58                 fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
59                 oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
60                 fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
61                 prs->lextype = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull));
62                 oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull));
63                 fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
64                 prs->prs_id = id;
65         }
66         else
67                 ts_error(ERROR, "No parser with id %d", id);
68         SPI_freeplan(plan);
69         SPI_finish();
70 }
71
72 typedef struct
73 {
74         WParserInfo *last_prs;
75         int                     len;
76         int                     reallen;
77         WParserInfo *list;
78         SNMap           name2id_map;
79 }       PrsList;
80
81 static PrsList PList = {NULL, 0, 0, NULL, {0, 0, NULL}};
82
83 void
84 reset_prs(void)
85 {
86         freeSNMap(&(PList.name2id_map));
87         if (PList.list)
88                 free(PList.list);
89         memset(&PList, 0, sizeof(PrsList));
90 }
91
92 static int
93 compareprs(const void *a, const void *b)
94 {
95         if ( ((WParserInfo *) a)->prs_id == ((WParserInfo *) b)->prs_id )
96                 return 0;
97         return ( ((WParserInfo *) a)->prs_id < ((WParserInfo *) b)->prs_id ) ? -1 : 1;
98 }
99
100 WParserInfo *
101 findprs(Oid id)
102 {
103         /* last used prs */
104         if (PList.last_prs && PList.last_prs->prs_id == id)
105                 return PList.last_prs;
106
107         /* already used prs */
108         if (PList.len != 0)
109         {
110                 WParserInfo key;
111
112                 key.prs_id = id;
113                 PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
114                 if (PList.last_prs != NULL)
115                         return PList.last_prs;
116         }
117
118         /* last chance */
119         if (PList.len == PList.reallen)
120         {
121                 WParserInfo *tmp;
122                 int                     reallen = (PList.reallen) ? 2 * PList.reallen : 16;
123
124                 tmp = (WParserInfo *) realloc(PList.list, sizeof(WParserInfo) * reallen);
125                 if (!tmp)
126                         ts_error(ERROR, "No memory");
127                 PList.reallen = reallen;
128                 PList.list = tmp;
129         }
130         PList.last_prs = &(PList.list[PList.len]);
131         init_prs(id, PList.last_prs);
132         PList.len++;
133         qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
134         return findprs(id); /* qsort changed order!! */ ;
135 }
136
137 Oid
138 name2id_prs(text *name)
139 {
140         Oid                     arg[1];
141         bool            isnull;
142         Datum           pars[1];
143         int                     stat;
144         Oid                     id = findSNMap_t(&(PList.name2id_map), name);
145         char            buf[1024],
146                            *nsp;
147         void       *plan;
148
149         arg[0] = TEXTOID;
150         pars[0] = PointerGetDatum(name);
151
152         if (id)
153                 return id;
154
155         SPI_connect();
156         nsp = get_namespace(TSNSP_FunctionOid);
157         sprintf(buf, "select oid from %s.pg_ts_parser where prs_name = $1", nsp);
158         pfree(nsp);
159         plan = SPI_prepare(buf, 1, arg);
160         if (!plan)
161                 ts_error(ERROR, "SPI_prepare() failed");
162
163         stat = SPI_execp(plan, pars, " ", 1);
164         if (stat < 0)
165                 ts_error(ERROR, "SPI_execp return %d", stat);
166         if (SPI_processed > 0)
167                 id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
168         else
169                 ts_error(ERROR, "No parser '%s'", text2char(name));
170         SPI_freeplan(plan);
171         SPI_finish();
172         addSNMap_t(&(PList.name2id_map), name, id);
173         return id;
174 }
175
176
177 /******sql-level interface******/
178 typedef struct
179 {
180         int                     cur;
181         LexDescr   *list;
182 }       TypeStorage;
183
184 static void
185 setup_firstcall(FuncCallContext *funcctx, Oid prsid)
186 {
187         TupleDesc       tupdesc;
188         MemoryContext oldcontext;
189         TypeStorage *st;
190         WParserInfo *prs = findprs(prsid);
191
192         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
193
194         st = (TypeStorage *) palloc(sizeof(TypeStorage));
195         st->cur = 0;
196         st->list = (LexDescr *) DatumGetPointer(
197                                 OidFunctionCall1(prs->lextype, PointerGetDatum(prs->prs))
198                 );
199         funcctx->user_fctx = (void *) st;
200         tupdesc = RelationNameGetTupleDesc("tokentype");
201         funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
202         MemoryContextSwitchTo(oldcontext);
203 }
204
205 static Datum
206 process_call(FuncCallContext *funcctx)
207 {
208         TypeStorage *st;
209
210         st = (TypeStorage *) funcctx->user_fctx;
211         if (st->list && st->list[st->cur].lexid)
212         {
213                 Datum           result;
214                 char       *values[3];
215                 char            txtid[16];
216                 HeapTuple       tuple;
217
218                 values[0] = txtid;
219                 sprintf(txtid, "%d", st->list[st->cur].lexid);
220                 values[1] = st->list[st->cur].alias;
221                 values[2] = st->list[st->cur].descr;
222
223                 tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
224                 result = HeapTupleGetDatum(tuple);
225
226                 pfree(values[1]);
227                 pfree(values[2]);
228                 st->cur++;
229                 return result;
230         }
231         else
232         {
233                 if (st->list)
234                         pfree(st->list);
235                 pfree(st);
236         }
237         return (Datum) 0;
238 }
239
240 PG_FUNCTION_INFO_V1(token_type);
241 Datum           token_type(PG_FUNCTION_ARGS);
242
243 Datum
244 token_type(PG_FUNCTION_ARGS)
245 {
246         FuncCallContext *funcctx;
247         Datum           result;
248
249         SET_FUNCOID();
250         if (SRF_IS_FIRSTCALL())
251         {
252                 funcctx = SRF_FIRSTCALL_INIT();
253                 setup_firstcall(funcctx, PG_GETARG_OID(0));
254         }
255
256         funcctx = SRF_PERCALL_SETUP();
257
258         if ((result = process_call(funcctx)) != (Datum) 0)
259                 SRF_RETURN_NEXT(funcctx, result);
260         SRF_RETURN_DONE(funcctx);
261 }
262
263 PG_FUNCTION_INFO_V1(token_type_byname);
264 Datum           token_type_byname(PG_FUNCTION_ARGS);
265 Datum
266 token_type_byname(PG_FUNCTION_ARGS)
267 {
268         FuncCallContext *funcctx;
269         Datum           result;
270
271         SET_FUNCOID();
272         if (SRF_IS_FIRSTCALL())
273         {
274                 text       *name = PG_GETARG_TEXT_P(0);
275
276                 funcctx = SRF_FIRSTCALL_INIT();
277                 setup_firstcall(funcctx, name2id_prs(name));
278                 PG_FREE_IF_COPY(name, 0);
279         }
280
281         funcctx = SRF_PERCALL_SETUP();
282
283         if ((result = process_call(funcctx)) != (Datum) 0)
284                 SRF_RETURN_NEXT(funcctx, result);
285         SRF_RETURN_DONE(funcctx);
286 }
287
288 PG_FUNCTION_INFO_V1(token_type_current);
289 Datum           token_type_current(PG_FUNCTION_ARGS);
290 Datum
291 token_type_current(PG_FUNCTION_ARGS)
292 {
293         FuncCallContext *funcctx;
294         Datum           result;
295
296         SET_FUNCOID();
297         if (SRF_IS_FIRSTCALL())
298         {
299                 funcctx = SRF_FIRSTCALL_INIT();
300                 if (current_parser_id == InvalidOid)
301                         current_parser_id = name2id_prs(char2text("default"));
302                 setup_firstcall(funcctx, current_parser_id);
303         }
304
305         funcctx = SRF_PERCALL_SETUP();
306
307         if ((result = process_call(funcctx)) != (Datum) 0)
308                 SRF_RETURN_NEXT(funcctx, result);
309         SRF_RETURN_DONE(funcctx);
310 }
311
312
313 PG_FUNCTION_INFO_V1(set_curprs);
314 Datum           set_curprs(PG_FUNCTION_ARGS);
315 Datum
316 set_curprs(PG_FUNCTION_ARGS)
317 {
318         SET_FUNCOID();
319         findprs(PG_GETARG_OID(0));
320         current_parser_id = PG_GETARG_OID(0);
321         PG_RETURN_VOID();
322 }
323
324 PG_FUNCTION_INFO_V1(set_curprs_byname);
325 Datum           set_curprs_byname(PG_FUNCTION_ARGS);
326 Datum
327 set_curprs_byname(PG_FUNCTION_ARGS)
328 {
329         text       *name = PG_GETARG_TEXT_P(0);
330
331         SET_FUNCOID();
332         DirectFunctionCall1(
333                                                 set_curprs,
334                                                 ObjectIdGetDatum(name2id_prs(name))
335                 );
336         PG_FREE_IF_COPY(name, 0);
337         PG_RETURN_VOID();
338 }
339
340 typedef struct
341 {
342         int                     type;
343         char       *lexem;
344 }       LexemEntry;
345
346 typedef struct
347 {
348         int                     cur;
349         int                     len;
350         LexemEntry *list;
351 }       PrsStorage;
352
353
354 static void
355 prs_setup_firstcall(FuncCallContext *funcctx, int prsid, text *txt)
356 {
357         TupleDesc       tupdesc;
358         MemoryContext oldcontext;
359         PrsStorage *st;
360         WParserInfo *prs = findprs(prsid);
361         char       *lex = NULL;
362         int                     llen = 0,
363                                 type = 0;
364
365         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
366
367         st = (PrsStorage *) palloc(sizeof(PrsStorage));
368         st->cur = 0;
369         st->len = 16;
370         st->list = (LexemEntry *) palloc(sizeof(LexemEntry) * st->len);
371
372         prs->prs = (void *) DatumGetPointer(
373                                                                                 FunctionCall2(
374                                                                                                           &(prs->start_info),
375                                                                                    PointerGetDatum(VARDATA(txt)),
376                                                                    Int32GetDatum(VARSIZE(txt) - VARHDRSZ)
377                                                                                                           )
378                 );
379
380         while ((type = DatumGetInt32(FunctionCall3(
381                                                                                            &(prs->getlexeme_info),
382                                                                                            PointerGetDatum(prs->prs),
383                                                                                            PointerGetDatum(&lex),
384                                                                                   PointerGetDatum(&llen)))) != 0)
385         {
386
387                 if (st->cur >= st->len)
388                 {
389                         st->len = 2 * st->len;
390                         st->list = (LexemEntry *) repalloc(st->list, sizeof(LexemEntry) * st->len);
391                 }
392                 st->list[st->cur].lexem = palloc(llen + 1);
393                 memcpy(st->list[st->cur].lexem, lex, llen);
394                 st->list[st->cur].lexem[llen] = '\0';
395                 st->list[st->cur].type = type;
396                 st->cur++;
397         }
398
399         FunctionCall1(
400                                   &(prs->end_info),
401                                   PointerGetDatum(prs->prs)
402                 );
403
404         st->len = st->cur;
405         st->cur = 0;
406
407         funcctx->user_fctx = (void *) st;
408         tupdesc = RelationNameGetTupleDesc("tokenout");
409         funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
410         MemoryContextSwitchTo(oldcontext);
411 }
412
413 static Datum
414 prs_process_call(FuncCallContext *funcctx)
415 {
416         PrsStorage *st;
417
418         st = (PrsStorage *) funcctx->user_fctx;
419         if (st->cur < st->len)
420         {
421                 Datum           result;
422                 char       *values[2];
423                 char            tid[16];
424                 HeapTuple       tuple;
425
426                 values[0] = tid;
427                 sprintf(tid, "%d", st->list[st->cur].type);
428                 values[1] = st->list[st->cur].lexem;
429                 tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
430                 result = HeapTupleGetDatum(tuple);
431
432                 pfree(values[1]);
433                 st->cur++;
434                 return result;
435         }
436         else
437         {
438                 if (st->list)
439                         pfree(st->list);
440                 pfree(st);
441         }
442         return (Datum) 0;
443 }
444
445
446
447 PG_FUNCTION_INFO_V1(parse);
448 Datum           parse(PG_FUNCTION_ARGS);
449 Datum
450 parse(PG_FUNCTION_ARGS)
451 {
452         FuncCallContext *funcctx;
453         Datum           result;
454
455         SET_FUNCOID();
456         if (SRF_IS_FIRSTCALL())
457         {
458                 text       *txt = PG_GETARG_TEXT_P(1);
459
460                 funcctx = SRF_FIRSTCALL_INIT();
461                 prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
462                 PG_FREE_IF_COPY(txt, 1);
463         }
464
465         funcctx = SRF_PERCALL_SETUP();
466
467         if ((result = prs_process_call(funcctx)) != (Datum) 0)
468                 SRF_RETURN_NEXT(funcctx, result);
469         SRF_RETURN_DONE(funcctx);
470 }
471
472 PG_FUNCTION_INFO_V1(parse_byname);
473 Datum           parse_byname(PG_FUNCTION_ARGS);
474 Datum
475 parse_byname(PG_FUNCTION_ARGS)
476 {
477         FuncCallContext *funcctx;
478         Datum           result;
479
480         SET_FUNCOID();
481         if (SRF_IS_FIRSTCALL())
482         {
483                 text       *name = PG_GETARG_TEXT_P(0);
484                 text       *txt = PG_GETARG_TEXT_P(1);
485
486                 funcctx = SRF_FIRSTCALL_INIT();
487                 prs_setup_firstcall(funcctx, name2id_prs(name), txt);
488                 PG_FREE_IF_COPY(name, 0);
489                 PG_FREE_IF_COPY(txt, 1);
490         }
491
492         funcctx = SRF_PERCALL_SETUP();
493
494         if ((result = prs_process_call(funcctx)) != (Datum) 0)
495                 SRF_RETURN_NEXT(funcctx, result);
496         SRF_RETURN_DONE(funcctx);
497 }
498
499
500 PG_FUNCTION_INFO_V1(parse_current);
501 Datum           parse_current(PG_FUNCTION_ARGS);
502 Datum
503 parse_current(PG_FUNCTION_ARGS)
504 {
505         FuncCallContext *funcctx;
506         Datum           result;
507
508         SET_FUNCOID();
509         if (SRF_IS_FIRSTCALL())
510         {
511                 text       *txt = PG_GETARG_TEXT_P(0);
512
513                 funcctx = SRF_FIRSTCALL_INIT();
514                 if (current_parser_id == InvalidOid)
515                         current_parser_id = name2id_prs(char2text("default"));
516                 prs_setup_firstcall(funcctx, current_parser_id, txt);
517                 PG_FREE_IF_COPY(txt, 0);
518         }
519
520         funcctx = SRF_PERCALL_SETUP();
521
522         if ((result = prs_process_call(funcctx)) != (Datum) 0)
523                 SRF_RETURN_NEXT(funcctx, result);
524         SRF_RETURN_DONE(funcctx);
525 }
526
527 PG_FUNCTION_INFO_V1(headline);
528 Datum           headline(PG_FUNCTION_ARGS);
529 Datum
530 headline(PG_FUNCTION_ARGS)
531 {
532         text       *in = PG_GETARG_TEXT_P(1);
533         QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
534         text       *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
535         HLPRSTEXT       prs;
536         text       *out;
537         TSCfgInfo  *cfg;
538         WParserInfo *prsobj;
539
540         SET_FUNCOID();
541         cfg = findcfg(PG_GETARG_OID(0));
542         prsobj = findprs(cfg->prs_id);
543
544         memset(&prs, 0, sizeof(HLPRSTEXT));
545         prs.lenwords = 32;
546         prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
547         hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
548
549
550         FunctionCall3(
551                                   &(prsobj->headline_info),
552                                   PointerGetDatum(&prs),
553                                   PointerGetDatum(opt),
554                                   PointerGetDatum(query)
555                 );
556
557         out = genhl(&prs);
558
559         PG_FREE_IF_COPY(in, 1);
560         PG_FREE_IF_COPY(query, 2);
561         if (opt)
562                 PG_FREE_IF_COPY(opt, 3);
563         pfree(prs.words);
564         pfree(prs.startsel);
565         pfree(prs.stopsel);
566
567         PG_RETURN_POINTER(out);
568 }
569
570
571 PG_FUNCTION_INFO_V1(headline_byname);
572 Datum           headline_byname(PG_FUNCTION_ARGS);
573 Datum
574 headline_byname(PG_FUNCTION_ARGS)
575 {
576         text       *cfg = PG_GETARG_TEXT_P(0);
577
578         Datum           out;
579
580         SET_FUNCOID();
581         out = DirectFunctionCall4(
582                                                           headline,
583                                                           ObjectIdGetDatum(name2id_cfg(cfg)),
584                                                           PG_GETARG_DATUM(1),
585                                                           PG_GETARG_DATUM(2),
586                         (PG_NARGS() > 3) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
587                 );
588
589         PG_FREE_IF_COPY(cfg, 0);
590         PG_RETURN_DATUM(out);
591 }
592
593 PG_FUNCTION_INFO_V1(headline_current);
594 Datum           headline_current(PG_FUNCTION_ARGS);
595 Datum
596 headline_current(PG_FUNCTION_ARGS)
597 {
598         SET_FUNCOID();
599         PG_RETURN_DATUM(DirectFunctionCall4(
600                                                                                 headline,
601                                                                                 ObjectIdGetDatum(get_currcfg()),
602                                                                                 PG_GETARG_DATUM(0),
603                                                                                 PG_GETARG_DATUM(1),
604                         (PG_NARGS() > 2) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
605                                                                                 ));
606 }