]> granicus.if.org Git - postgresql/blob - contrib/tsearch2/ts_cfg.c
Reduce WAL activity for page splits:
[postgresql] / contrib / tsearch2 / ts_cfg.c
1 /*
2  * interface functions to tscfg
3  * Teodor Sigaev <teodor@sigaev.ru>
4  */
5 #include "postgres.h"
6
7 #include <ctype.h>
8 #include <locale.h>
9
10 #include "catalog/pg_type.h"
11 #include "executor/spi.h"
12 #include "fmgr.h"
13 #include "utils/array.h"
14 #include "utils/memutils.h"
15
16 #include "ts_cfg.h"
17 #include "dict.h"
18 #include "wparser.h"
19 #include "snmap.h"
20 #include "common.h"
21 #include "tsvector.h"
22
23 PG_MODULE_MAGIC;
24
25 #define IGNORE_LONGLEXEME       1
26
27 /*********top interface**********/
28
29 static Oid      current_cfg_id = 0;
30
31 void
32 init_cfg(Oid id, TSCfgInfo * cfg)
33 {
34         Oid                     arg[2];
35         bool            isnull;
36         Datum           pars[2];
37         int                     stat,
38                                 i,
39                                 j;
40         text       *ptr;
41         text       *prsname = NULL;
42         char       *nsp = get_namespace(TSNSP_FunctionOid);
43         char            buf[1024];
44         MemoryContext oldcontext;
45         void       *plan;
46
47         arg[0] = OIDOID;
48         arg[1] = OIDOID;
49         pars[0] = ObjectIdGetDatum(id);
50         pars[1] = ObjectIdGetDatum(id);
51
52         memset(cfg, 0, sizeof(TSCfgInfo));
53         SPI_connect();
54
55         sprintf(buf, "select prs_name from %s.pg_ts_cfg where oid = $1", nsp);
56         plan = SPI_prepare(buf, 1, arg);
57         if (!plan)
58                 ts_error(ERROR, "SPI_prepare() failed");
59
60         stat = SPI_execp(plan, pars, " ", 1);
61         if (stat < 0)
62                 ts_error(ERROR, "SPI_execp return %d", stat);
63         if (SPI_processed > 0)
64         {
65                 prsname = (text *) DatumGetPointer(
66                                                                                    SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)
67                         );
68                 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
69                 prsname = ptextdup(prsname);
70                 MemoryContextSwitchTo(oldcontext);
71
72                 cfg->id = id;
73         }
74         else
75                 ts_error(ERROR, "No tsearch cfg with id %d", id);
76
77         SPI_freeplan(plan);
78
79         arg[0] = TEXTOID;
80         sprintf(buf, "select lt.tokid, map.dict_name from %s.pg_ts_cfgmap as map, %s.pg_ts_cfg as cfg, %s.token_type( $1 ) as lt where lt.alias =  map.tok_alias and map.ts_name = cfg.ts_name and cfg.oid= $2 order by lt.tokid desc;", nsp, nsp, nsp);
81         plan = SPI_prepare(buf, 2, arg);
82         if (!plan)
83                 ts_error(ERROR, "SPI_prepare() failed");
84
85         pars[0] = PointerGetDatum(prsname);
86         stat = SPI_execp(plan, pars, " ", 0);
87         if (stat < 0)
88                 ts_error(ERROR, "SPI_execp return %d", stat);
89         if (SPI_processed <= 0)
90                 ts_error(ERROR, "No parser with id %d", id);
91
92         for (i = 0; i < SPI_processed; i++)
93         {
94                 int                     lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
95                 ArrayType  *toasted_a = (ArrayType *) PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
96                 ArrayType  *a;
97
98                 if (!cfg->map)
99                 {
100                         cfg->len = lexid + 1;
101                         cfg->map = (ListDictionary *) malloc(sizeof(ListDictionary) * cfg->len);
102                         if (!cfg->map)
103                                 ereport(ERROR,
104                                                 (errcode(ERRCODE_OUT_OF_MEMORY),
105                                                  errmsg("out of memory")));
106                         memset(cfg->map, 0, sizeof(ListDictionary) * cfg->len);
107                 }
108
109                 if (isnull)
110                         continue;
111
112                 a = (ArrayType *) PointerGetDatum(PG_DETOAST_DATUM(DatumGetPointer(toasted_a)));
113
114                 if (ARR_NDIM(a) != 1)
115                         ts_error(ERROR, "Wrong dimension");
116                 if (ARRNELEMS(a) < 1)
117                         continue;
118                 if (ARR_HASNULL(a))
119                         ts_error(ERROR, "Array must not contain nulls");
120
121                 cfg->map[lexid].len = ARRNELEMS(a);
122                 cfg->map[lexid].dict_id = (Datum *) malloc(sizeof(Datum) * cfg->map[lexid].len);
123                 if (!cfg->map[lexid].dict_id)
124                         ts_error(ERROR, "No memory");
125
126                 memset(cfg->map[lexid].dict_id, 0, sizeof(Datum) * cfg->map[lexid].len);
127                 ptr = (text *) ARR_DATA_PTR(a);
128                 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
129                 for (j = 0; j < cfg->map[lexid].len; j++)
130                 {
131                         cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
132                         ptr = NEXTVAL(ptr);
133                 }
134                 MemoryContextSwitchTo(oldcontext);
135
136                 if (a != toasted_a)
137                         pfree(a);
138         }
139
140         SPI_freeplan(plan);
141         SPI_finish();
142         cfg->prs_id = name2id_prs(prsname);
143         pfree(prsname);
144         pfree(nsp);
145         for (i = 0; i < cfg->len; i++)
146         {
147                 for (j = 0; j < cfg->map[i].len; j++)
148                 {
149                         ptr = (text *) DatumGetPointer(cfg->map[i].dict_id[j]);
150                         cfg->map[i].dict_id[j] = ObjectIdGetDatum(name2id_dict(ptr));
151                         pfree(ptr);
152                 }
153         }
154 }
155
156 typedef struct
157 {
158         TSCfgInfo  *last_cfg;
159         int                     len;
160         int                     reallen;
161         TSCfgInfo  *list;
162         SNMap           name2id_map;
163 }       CFGList;
164
165 static CFGList CList = {NULL, 0, 0, NULL, {0, 0, NULL}};
166
167 void
168 reset_cfg(void)
169 {
170         freeSNMap(&(CList.name2id_map));
171         if (CList.list)
172         {
173                 int                     i,
174                                         j;
175
176                 for (i = 0; i < CList.len; i++)
177                         if (CList.list[i].map)
178                         {
179                                 for (j = 0; j < CList.list[i].len; j++)
180                                         if (CList.list[i].map[j].dict_id)
181                                                 free(CList.list[i].map[j].dict_id);
182                                 free(CList.list[i].map);
183                         }
184                 free(CList.list);
185         }
186         memset(&CList, 0, sizeof(CFGList));
187 }
188
189 static int
190 comparecfg(const void *a, const void *b)
191 {
192         if (((TSCfgInfo *) a)->id == ((TSCfgInfo *) b)->id)
193                 return 0;
194         return (((TSCfgInfo *) a)->id < ((TSCfgInfo *) b)->id) ? -1 : 1;
195 }
196
197 TSCfgInfo *
198 findcfg(Oid id)
199 {
200         /* last used cfg */
201         if (CList.last_cfg && CList.last_cfg->id == id)
202                 return CList.last_cfg;
203
204         /* already used cfg */
205         if (CList.len != 0)
206         {
207                 TSCfgInfo       key;
208
209                 key.id = id;
210                 CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
211                 if (CList.last_cfg != NULL)
212                         return CList.last_cfg;
213         }
214
215         /* last chance */
216         if (CList.len == CList.reallen)
217         {
218                 TSCfgInfo  *tmp;
219                 int                     reallen = (CList.reallen) ? 2 * CList.reallen : 16;
220
221                 tmp = (TSCfgInfo *) realloc(CList.list, sizeof(TSCfgInfo) * reallen);
222                 if (!tmp)
223                         ts_error(ERROR, "No memory");
224                 CList.reallen = reallen;
225                 CList.list = tmp;
226         }
227         CList.last_cfg = &(CList.list[CList.len]);
228         init_cfg(id, CList.last_cfg);
229         CList.len++;
230         qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
231         return findcfg(id); /* qsort changed order!! */ ;
232 }
233
234
235 Oid
236 name2id_cfg(text *name)
237 {
238         Oid                     arg[1];
239         bool            isnull;
240         Datum           pars[1];
241         int                     stat;
242         Oid                     id = findSNMap_t(&(CList.name2id_map), name);
243         void       *plan;
244         char       *nsp;
245         char            buf[1024];
246
247         arg[0] = TEXTOID;
248         pars[0] = PointerGetDatum(name);
249
250         if (id)
251                 return id;
252
253         nsp = get_namespace(TSNSP_FunctionOid);
254         SPI_connect();
255         sprintf(buf, "select oid from %s.pg_ts_cfg where ts_name = $1", nsp);
256         plan = SPI_prepare(buf, 1, arg);
257         if (!plan)
258                 /* internal error */
259                 elog(ERROR, "SPI_prepare() failed");
260
261         stat = SPI_execp(plan, pars, " ", 1);
262         if (stat < 0)
263                 /* internal error */
264                 elog(ERROR, "SPI_execp return %d", stat);
265         if (SPI_processed > 0)
266         {
267                 id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
268                 if (isnull)
269                         ereport(ERROR,
270                                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
271                                          errmsg("null id for tsearch config")));
272         }
273         else
274                 ereport(ERROR,
275                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
276                                  errmsg("no tsearch config")));
277
278         SPI_freeplan(plan);
279         SPI_finish();
280         addSNMap_t(&(CList.name2id_map), name, id);
281         return id;
282 }
283
284 void
285 parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
286 {
287         int                     type,
288                                 lenlemm;
289         char       *lemm = NULL;
290         WParserInfo *prsobj = findprs(cfg->prs_id);
291         LexizeData      ldata;
292         TSLexeme   *norms;
293
294         prsobj->prs = (void *) DatumGetPointer(
295                                                                                    FunctionCall2(
296                                                                                                            &(prsobj->start_info),
297                                                                                                                  PointerGetDatum(buf),
298                                                                                                                  Int32GetDatum(buflen)
299                                                                                                                  )
300                 );
301
302         LexizeInit(&ldata, cfg);
303
304         do
305         {
306                 type = DatumGetInt32(FunctionCall3(
307                                                                                    &(prsobj->getlexeme_info),
308                                                                                    PointerGetDatum(prsobj->prs),
309                                                                                    PointerGetDatum(&lemm),
310                                                                                    PointerGetDatum(&lenlemm)));
311
312                 if (type > 0 && lenlemm >= MAXSTRLEN)
313                 {
314 #ifdef IGNORE_LONGLEXEME
315                         ereport(NOTICE,
316                                         (errcode(ERRCODE_SYNTAX_ERROR),
317                                          errmsg("A word you are indexing is too long. It will be ignored.")));
318                         continue;
319 #else
320                         ereport(ERROR,
321                                         (errcode(ERRCODE_SYNTAX_ERROR),
322                                          errmsg("A word you are indexing is too long")));
323 #endif
324                 }
325
326                 LexizeAddLemm(&ldata, type, lemm, lenlemm);
327
328                 while ((norms = LexizeExec(&ldata, NULL)) != NULL)
329                 {
330                         TSLexeme   *ptr = norms;
331
332                         prs->pos++;                     /* set pos */
333
334                         while (ptr->lexeme)
335                         {
336                                 if (prs->curwords == prs->lenwords)
337                                 {
338                                         prs->lenwords *= 2;
339                                         prs->words = (TSWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(TSWORD));
340                                 }
341
342                                 if (ptr->flags & TSL_ADDPOS)
343                                         prs->pos++;
344                                 prs->words[prs->curwords].len = strlen(ptr->lexeme);
345                                 prs->words[prs->curwords].word = ptr->lexeme;
346                                 prs->words[prs->curwords].nvariant = ptr->nvariant;
347                                 prs->words[prs->curwords].alen = 0;
348                                 prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
349                                 ptr++;
350                                 prs->curwords++;
351                         }
352                         pfree(norms);
353                 }
354         } while (type > 0);
355
356         FunctionCall1(
357                                   &(prsobj->end_info),
358                                   PointerGetDatum(prsobj->prs)
359                 );
360 }
361
362 static void
363 hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type)
364 {
365         while (prs->curwords >= prs->lenwords)
366         {
367                 prs->lenwords *= 2;
368                 prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
369         }
370         memset(&(prs->words[prs->curwords]), 0, sizeof(HLWORD));
371         prs->words[prs->curwords].type = (uint8) type;
372         prs->words[prs->curwords].len = buflen;
373         prs->words[prs->curwords].word = palloc(buflen);
374         memcpy(prs->words[prs->curwords].word, buf, buflen);
375         prs->curwords++;
376 }
377
378 static void
379 hlfinditem(HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int buflen)
380 {
381         int                     i;
382         ITEM       *item = GETQUERY(query);
383         HLWORD     *word;
384
385         while (prs->curwords + query->size >= prs->lenwords)
386         {
387                 prs->lenwords *= 2;
388                 prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
389         }
390
391         word = &(prs->words[prs->curwords - 1]);
392         for (i = 0; i < query->size; i++)
393         {
394                 if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0)
395                 {
396                         if (word->item)
397                         {
398                                 memcpy(&(prs->words[prs->curwords]), word, sizeof(HLWORD));
399                                 prs->words[prs->curwords].item = item;
400                                 prs->words[prs->curwords].repeated = 1;
401                                 prs->curwords++;
402                         }
403                         else
404                                 word->item = item;
405                 }
406                 item++;
407         }
408 }
409
410 static void
411 addHLParsedLex(HLPRSTEXT * prs, QUERYTYPE * query, ParsedLex * lexs, TSLexeme * norms)
412 {
413         ParsedLex  *tmplexs;
414         TSLexeme   *ptr;
415
416         while (lexs)
417         {
418
419                 if (lexs->type > 0)
420                         hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
421
422                 ptr = norms;
423                 while (ptr && ptr->lexeme)
424                 {
425                         hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
426                         ptr++;
427                 }
428
429                 tmplexs = lexs->next;
430                 pfree(lexs);
431                 lexs = tmplexs;
432         }
433
434         if (norms)
435         {
436                 ptr = norms;
437                 while (ptr->lexeme)
438                 {
439                         pfree(ptr->lexeme);
440                         ptr++;
441                 }
442                 pfree(norms);
443         }
444 }
445
446 void
447 hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 buflen)
448 {
449         int                     type,
450                                 lenlemm;
451         char       *lemm = NULL;
452         WParserInfo *prsobj = findprs(cfg->prs_id);
453         LexizeData      ldata;
454         TSLexeme   *norms;
455         ParsedLex  *lexs;
456
457         prsobj->prs = (void *) DatumGetPointer(
458                                                                                    FunctionCall2(
459                                                                                                            &(prsobj->start_info),
460                                                                                                                  PointerGetDatum(buf),
461                                                                                                                  Int32GetDatum(buflen)
462                                                                                                                  )
463                 );
464
465         LexizeInit(&ldata, cfg);
466
467         do
468         {
469                 type = DatumGetInt32(FunctionCall3(
470                                                                                    &(prsobj->getlexeme_info),
471                                                                                    PointerGetDatum(prsobj->prs),
472                                                                                    PointerGetDatum(&lemm),
473                                                                                    PointerGetDatum(&lenlemm)));
474
475                 if (type > 0 && lenlemm >= MAXSTRLEN)
476                 {
477 #ifdef IGNORE_LONGLEXEME
478                         ereport(NOTICE,
479                                         (errcode(ERRCODE_SYNTAX_ERROR),
480                                          errmsg("A word you are indexing is too long. It will be ignored.")));
481                         continue;
482 #else
483                         ereport(ERROR,
484                                         (errcode(ERRCODE_SYNTAX_ERROR),
485                                          errmsg("A word you are indexing is too long")));
486 #endif
487                 }
488
489                 LexizeAddLemm(&ldata, type, lemm, lenlemm);
490
491                 do
492                 {
493                         if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
494                                 addHLParsedLex(prs, query, lexs, norms);
495                         else
496                                 addHLParsedLex(prs, query, lexs, NULL);
497                 } while (norms);
498
499         } while (type > 0);
500
501         FunctionCall1(
502                                   &(prsobj->end_info),
503                                   PointerGetDatum(prsobj->prs)
504                 );
505 }
506
507 text *
508 genhl(HLPRSTEXT * prs)
509 {
510         text       *out;
511         int                     len = 128;
512         char       *ptr;
513         HLWORD     *wrd = prs->words;
514
515         out = (text *) palloc(len);
516         ptr = ((char *) out) + VARHDRSZ;
517
518         while (wrd - prs->words < prs->curwords)
519         {
520                 while (wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char *) out)) >= len)
521                 {
522                         int                     dist = ptr - ((char *) out);
523
524                         len *= 2;
525                         out = (text *) repalloc(out, len);
526                         ptr = ((char *) out) + dist;
527                 }
528
529                 if (wrd->in && !wrd->repeated)
530                 {
531                         if (wrd->replace)
532                         {
533                                 *ptr = ' ';
534                                 ptr++;
535                         }
536                         else
537                         {
538                                 if (wrd->selected)
539                                 {
540                                         memcpy(ptr, prs->startsel, prs->startsellen);
541                                         ptr += prs->startsellen;
542                                 }
543                                 memcpy(ptr, wrd->word, wrd->len);
544                                 ptr += wrd->len;
545                                 if (wrd->selected)
546                                 {
547                                         memcpy(ptr, prs->stopsel, prs->stopsellen);
548                                         ptr += prs->stopsellen;
549                                 }
550                         }
551                 }
552                 else if (!wrd->repeated)
553                         pfree(wrd->word);
554
555                 wrd++;
556         }
557
558         VARATT_SIZEP(out) = ptr - ((char *) out);
559         return out;
560 }
561
562 int
563 get_currcfg(void)
564 {
565         Oid                     arg[1] = {TEXTOID};
566         const char *curlocale;
567         Datum           pars[1];
568         bool            isnull;
569         int                     stat;
570         char            buf[1024];
571         char       *nsp;
572         void       *plan;
573
574         if (current_cfg_id > 0)
575                 return current_cfg_id;
576
577         nsp = get_namespace(TSNSP_FunctionOid);
578         SPI_connect();
579         sprintf(buf, "select oid from %s.pg_ts_cfg where locale = $1 ", nsp);
580         pfree(nsp);
581         plan = SPI_prepare(buf, 1, arg);
582         if (!plan)
583                 /* internal error */
584                 elog(ERROR, "SPI_prepare() failed");
585
586         curlocale = setlocale(LC_CTYPE, NULL);
587         pars[0] = PointerGetDatum(char2text((char *) curlocale));
588         stat = SPI_execp(plan, pars, " ", 1);
589
590         if (stat < 0)
591                 /* internal error */
592                 elog(ERROR, "SPI_execp return %d", stat);
593         if (SPI_processed > 0)
594                 current_cfg_id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
595         else
596                 ereport(ERROR,
597                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
598                                  errmsg("could not find tsearch config by locale")));
599
600         pfree(DatumGetPointer(pars[0]));
601         SPI_freeplan(plan);
602         SPI_finish();
603         return current_cfg_id;
604 }
605
606 PG_FUNCTION_INFO_V1(set_curcfg);
607 Datum           set_curcfg(PG_FUNCTION_ARGS);
608 Datum
609 set_curcfg(PG_FUNCTION_ARGS)
610 {
611         SET_FUNCOID();
612         findcfg(PG_GETARG_OID(0));
613         current_cfg_id = PG_GETARG_OID(0);
614         PG_RETURN_VOID();
615 }
616
617 PG_FUNCTION_INFO_V1(set_curcfg_byname);
618 Datum           set_curcfg_byname(PG_FUNCTION_ARGS);
619 Datum
620 set_curcfg_byname(PG_FUNCTION_ARGS)
621 {
622         text       *name = PG_GETARG_TEXT_P(0);
623
624         SET_FUNCOID();
625         DirectFunctionCall1(
626                                                 set_curcfg,
627                                                 ObjectIdGetDatum(name2id_cfg(name))
628                 );
629         PG_FREE_IF_COPY(name, 0);
630         PG_RETURN_VOID();
631 }
632
633 PG_FUNCTION_INFO_V1(show_curcfg);
634 Datum           show_curcfg(PG_FUNCTION_ARGS);
635 Datum
636 show_curcfg(PG_FUNCTION_ARGS)
637 {
638         SET_FUNCOID();
639         PG_RETURN_OID(get_currcfg());
640 }
641
642 PG_FUNCTION_INFO_V1(reset_tsearch);
643 Datum           reset_tsearch(PG_FUNCTION_ARGS);
644 Datum
645 reset_tsearch(PG_FUNCTION_ARGS)
646 {
647         SET_FUNCOID();
648         ts_error(NOTICE, "TSearch cache cleaned");
649         PG_RETURN_VOID();
650 }