From c7faf451608a08e1aa65951bb02fac7f524f1d7c Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Fri, 2 Jun 2006 15:35:42 +0000 Subject: [PATCH] Add more strict check of stop and non-recognized words, allow only recognized words in thezaurus configuration file. --- contrib/tsearch2/dict_thesaurus.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/contrib/tsearch2/dict_thesaurus.c b/contrib/tsearch2/dict_thesaurus.c index 8e543a4db7..a584aa15b7 100644 --- a/contrib/tsearch2/dict_thesaurus.c +++ b/contrib/tsearch2/dict_thesaurus.c @@ -1,4 +1,4 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/dict_thesaurus.c,v 1.1 2006/05/31 14:05:31 teodor Exp $ */ +/* $PostgreSQL: pgsql/contrib/tsearch2/dict_thesaurus.c,v 1.2 2006/06/02 15:35:42 teodor Exp $ */ /* * thesaurus @@ -330,8 +330,12 @@ compileTheLexeme(DictThesaurus *d) { ); if ( !(ptr && ptr->lexeme) ) { + if ( !ptr ) + elog(NOTICE,"Thesaurus: word '%s' isn't recognized by subdictionary", d->wrds[i].lexeme); + else + elog(ERROR,"Thesaurus: word '%s' is recognized as stop-word, assign any stop-word", d->wrds[i].lexeme); + newwrds = addCompiledLexeme( newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0); - elog(NOTICE,"Thesaurus: word '%s' isn't recognized by subdictionary or it's a stop-word, assign any non-recognized word", d->wrds[i].lexeme); } else { while( ptr->lexeme ) { TSLexeme *remptr = ptr+1; @@ -420,7 +424,7 @@ compileTheSubstitute(DictThesaurus *d) { ); reml = lexized; - if ( lexized ) { + if ( lexized && lexized->lexeme ) { int toset = (lexized->lexeme && outptr != d->subst[i].res ) ? (outptr - d->subst[i].res) : -1; while( lexized->lexeme ) { @@ -443,6 +447,8 @@ compileTheSubstitute(DictThesaurus *d) { if ( toset > 0) d->subst[i].res[toset].flags |= TSL_ADDPOS; + } else { + elog(NOTICE,"Thesaurus: word '%s' isn't recognized by subdictionary or it's a stop-word, ignored", inptr->lexeme); } if ( inptr->lexeme ) @@ -450,6 +456,9 @@ compileTheSubstitute(DictThesaurus *d) { inptr++; } + if ( outptr == d->subst[i].res ) + elog(ERROR,"Thesaurus: all words in subsitution aren't recognized by subdictionary"); + d->subst[i].reslen = outptr - d->subst[i].res; free(rem); -- 2.40.0