]> granicus.if.org Git - postgresql/blob - src/backend/tsearch/ts_utils.c
Implement SEMI and ANTI joins in the planner and executor. (Semijoins replace
[postgresql] / src / backend / tsearch / ts_utils.c
1 /*-------------------------------------------------------------------------
2  *
3  * ts_utils.c
4  *              various support functions
5  *
6  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.12 2008/06/19 16:52:24 tgl Exp $
11  *
12  *-------------------------------------------------------------------------
13  */
14
15 #include "postgres.h"
16
17 #include <ctype.h>
18
19 #include "miscadmin.h"
20 #include "tsearch/ts_locale.h"
21 #include "tsearch/ts_public.h"
22 #include "tsearch/ts_utils.h"
23 #include "utils/builtins.h"
24
25
26 /*
27  * Given the base name and extension of a tsearch config file, return
28  * its full path name.  The base name is assumed to be user-supplied,
29  * and is checked to prevent pathname attacks.  The extension is assumed
30  * to be safe.
31  *
32  * The result is a palloc'd string.
33  */
34 char *
35 get_tsearch_config_filename(const char *basename,
36                                                         const char *extension)
37 {
38         char            sharepath[MAXPGPATH];
39         char       *result;
40
41         /*
42          * We limit the basename to contain a-z, 0-9, and underscores.  This may
43          * be overly restrictive, but we don't want to allow access to anything
44          * outside the tsearch_data directory, so for instance '/' *must* be
45          * rejected, and on some platforms '\' and ':' are risky as well. Allowing
46          * uppercase might result in incompatible behavior between case-sensitive
47          * and case-insensitive filesystems, and non-ASCII characters create other
48          * interesting risks, so on the whole a tight policy seems best.
49          */
50         if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_") != strlen(basename))
51                 ereport(ERROR,
52                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
53                                  errmsg("invalid text search configuration file name \"%s\"",
54                                                 basename)));
55
56         get_share_path(my_exec_path, sharepath);
57         result = palloc(MAXPGPATH);
58         snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s",
59                          sharepath, basename, extension);
60
61         return result;
62 }
63
64 static int
65 comparestr(const void *a, const void *b)
66 {
67         return strcmp(*(char **) a, *(char **) b);
68 }
69
70 /*
71  * Reads a stop-word file. Each word is run through 'wordop'
72  * function, if given.  wordop may either modify the input in-place,
73  * or palloc a new version.
74  */
75 void
76 readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
77 {
78         char      **stop = NULL;
79
80         s->len = 0;
81         if (fname && *fname)
82         {
83                 char       *filename = get_tsearch_config_filename(fname, "stop");
84                 tsearch_readline_state trst;
85                 char       *line;
86                 int                     reallen = 0;
87
88                 if (!tsearch_readline_begin(&trst, filename))
89                         ereport(ERROR,
90                                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
91                                          errmsg("could not open stop-word file \"%s\": %m",
92                                                         filename)));
93
94                 while ((line = tsearch_readline(&trst)) != NULL)
95                 {
96                         char       *pbuf = line;
97
98                         /* Trim trailing space */
99                         while (*pbuf && !t_isspace(pbuf))
100                                 pbuf += pg_mblen(pbuf);
101                         *pbuf = '\0';
102
103                         /* Skip empty lines */
104                         if (*line == '\0')
105                         {
106                                 pfree(line);
107                                 continue;
108                         }
109
110                         if (s->len >= reallen)
111                         {
112                                 if (reallen == 0)
113                                 {
114                                         reallen = 64;
115                                         stop = (char **) palloc(sizeof(char *) * reallen);
116                                 }
117                                 else
118                                 {
119                                         reallen *= 2;
120                                         stop = (char **) repalloc((void *) stop,
121                                                                                           sizeof(char *) * reallen);
122                                 }
123                         }
124
125                         if (wordop)
126                         {
127                                 stop[s->len] = wordop(line);
128                                 if (stop[s->len] != line)
129                                         pfree(line);
130                         }
131                         else
132                                 stop[s->len] = line;
133
134                         (s->len)++;
135                 }
136
137                 tsearch_readline_end(&trst);
138                 pfree(filename);
139         }
140
141         s->stop = stop;
142
143         /* Sort to allow binary searching */
144         if (s->stop && s->len > 0)
145                 qsort(s->stop, s->len, sizeof(char *), comparestr);
146 }
147
148 bool
149 searchstoplist(StopList *s, char *key)
150 {
151         return (s->stop && s->len > 0 &&
152                         bsearch(&key, s->stop, s->len,
153                                         sizeof(char *), comparestr)) ? true : false;
154 }