/*------------------------------------------------------------------------- * * ts_utils.c * various support functions * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.12 2008/06/19 16:52:24 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include #include "miscadmin.h" #include "tsearch/ts_locale.h" #include "tsearch/ts_public.h" #include "tsearch/ts_utils.h" #include "utils/builtins.h" /* * Given the base name and extension of a tsearch config file, return * its full path name. The base name is assumed to be user-supplied, * and is checked to prevent pathname attacks. The extension is assumed * to be safe. * * The result is a palloc'd string. */ char * get_tsearch_config_filename(const char *basename, const char *extension) { char sharepath[MAXPGPATH]; char *result; /* * We limit the basename to contain a-z, 0-9, and underscores. This may * be overly restrictive, but we don't want to allow access to anything * outside the tsearch_data directory, so for instance '/' *must* be * rejected, and on some platforms '\' and ':' are risky as well. Allowing * uppercase might result in incompatible behavior between case-sensitive * and case-insensitive filesystems, and non-ASCII characters create other * interesting risks, so on the whole a tight policy seems best. */ if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_") != strlen(basename)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid text search configuration file name \"%s\"", basename))); get_share_path(my_exec_path, sharepath); result = palloc(MAXPGPATH); snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s", sharepath, basename, extension); return result; } static int comparestr(const void *a, const void *b) { return strcmp(*(char **) a, *(char **) b); } /* * Reads a stop-word file. Each word is run through 'wordop' * function, if given. wordop may either modify the input in-place, * or palloc a new version. */ void readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *)) { char **stop = NULL; s->len = 0; if (fname && *fname) { char *filename = get_tsearch_config_filename(fname, "stop"); tsearch_readline_state trst; char *line; int reallen = 0; if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("could not open stop-word file \"%s\": %m", filename))); while ((line = tsearch_readline(&trst)) != NULL) { char *pbuf = line; /* Trim trailing space */ while (*pbuf && !t_isspace(pbuf)) pbuf += pg_mblen(pbuf); *pbuf = '\0'; /* Skip empty lines */ if (*line == '\0') { pfree(line); continue; } if (s->len >= reallen) { if (reallen == 0) { reallen = 64; stop = (char **) palloc(sizeof(char *) * reallen); } else { reallen *= 2; stop = (char **) repalloc((void *) stop, sizeof(char *) * reallen); } } if (wordop) { stop[s->len] = wordop(line); if (stop[s->len] != line) pfree(line); } else stop[s->len] = line; (s->len)++; } tsearch_readline_end(&trst); pfree(filename); } s->stop = stop; /* Sort to allow binary searching */ if (s->stop && s->len > 0) qsort(s->stop, s->len, sizeof(char *), comparestr); } bool searchstoplist(StopList *s, char *key) { return (s->stop && s->len > 0 && bsearch(&key, s->stop, s->len, sizeof(char *), comparestr)) ? true : false; }