From 8bf358c18ec930ddfb998873369e2fc38608d3e1 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 13 Apr 2018 13:49:52 -0400
Subject: [PATCH] Improve regression test coverage for
 src/backend/tsearch/spell.c.

In passing, throw an error if the AF count is too small, rather than
just silently discarding extra affix entries.

Note that the new regression test cases require installing the
updated src/backend/tsearch/dicts files.

Arthur Zakirov

Discussion: https://postgr.es/m/20180413113447.GA32474@zakirov.localdomain
---
 .../tsearch/dicts/hunspell_sample_long.affix  | 25 ++++++++++++++--
 .../tsearch/dicts/hunspell_sample_long.dict   |  3 ++
 .../tsearch/dicts/hunspell_sample_num.affix   |  8 +++++
 .../tsearch/dicts/hunspell_sample_num.dict    |  3 +-
 src/backend/tsearch/spell.c                   |  9 ++++--
 src/test/regress/expected/tsdicts.out         | 30 +++++++++++++++++++
 src/test/regress/sql/tsdicts.sql              |  5 ++++
 7 files changed, 77 insertions(+), 6 deletions(-)

diff --git a/src/backend/tsearch/dicts/hunspell_sample_long.affix b/src/backend/tsearch/dicts/hunspell_sample_long.affix
index fc0d485a4d..d5df7a31f3 100644
--- a/src/backend/tsearch/dicts/hunspell_sample_long.affix
+++ b/src/backend/tsearch/dicts/hunspell_sample_long.affix
@@ -1,15 +1,23 @@
 FLAG long
 
-AF 7
+AF 11
 AF cZ		#1
 AF cL		#2
 AF sGsJpUsS	#3
 AF sSpB		#4
 AF cZsS		#5
-AF sScZs\	#6
+AF sScZs\sE	#6
 AF sA		#7
+AF CaCp		#8
+AF CcCp		#9
+AF sD		#10
+AF sB		#11
 
 COMPOUNDFLAG cZ
+COMPOUNDBEGIN Ca
+COMPOUNDMIDDLE Cb
+COMPOUNDEND Cc
+COMPOUNDPERMITFLAG Cp
 ONLYINCOMPOUND cL
 
 PFX pB Y 1
@@ -28,7 +36,18 @@ SFX sS Y 1
 SFX sS   0	S	[^SXZHY]
 
 SFX sA Y 1
-SFX sA   Y	IES	[^AEIOU]Y
+SFX sA   Y	IES	[^AEIOU]Y{1}
 
+SFX sB Y 1
+SFX sB   0	ED	K{1}
+
+# Affixes with compound flags
 SFX s\ N 1
 SFX s\   0	Y/2	[^Y]
+
+SFX sE N 1
+SFX sE   0	S/2	[^S]
+
+# Check duplicate affixes
+SFX sD N 1
+SFX sD   0	S/2	[^S]
diff --git a/src/backend/tsearch/dicts/hunspell_sample_long.dict b/src/backend/tsearch/dicts/hunspell_sample_long.dict
index 96ecbf007a..5f60a07ce2 100644
--- a/src/backend/tsearch/dicts/hunspell_sample_long.dict
+++ b/src/backend/tsearch/dicts/hunspell_sample_long.dict
@@ -1,4 +1,5 @@
 book/3
+book/11
 booking/4
 footballklubber
 foot/5
@@ -6,3 +7,5 @@ football/1
 ball/6
 klubber/1
 sky/7
+ex-/8
+machina/9
\ No newline at end of file
diff --git a/src/backend/tsearch/dicts/hunspell_sample_num.affix b/src/backend/tsearch/dicts/hunspell_sample_num.affix
index 5f2f8e9bca..0c4766a191 100644
--- a/src/backend/tsearch/dicts/hunspell_sample_num.affix
+++ b/src/backend/tsearch/dicts/hunspell_sample_num.affix
@@ -18,6 +18,14 @@ SFX 302   0	ING		[^E]
 SFX 303 Y 1
 SFX 303   0	S	[^SXZHY]
 
+# Remove ED suffix from lexeme for base words with K ending
+SFX 306 Y 1
+SFX 306   0	ED	K{1}
+
+# Just add Y to lexeme for base words with Y ending
+SFX 307 Y 1
+SFX 307   Y	0	Y*
+
 SFX 304 Y 1
 SFX 304   Y	IES	[^AEIOU]Y
 
diff --git a/src/backend/tsearch/dicts/hunspell_sample_num.dict b/src/backend/tsearch/dicts/hunspell_sample_num.dict
index 9db29dc780..fbc321d5b1 100644
--- a/src/backend/tsearch/dicts/hunspell_sample_num.dict
+++ b/src/backend/tsearch/dicts/hunspell_sample_num.dict
@@ -1,8 +1,9 @@
 book/302,301,202,303
+book/306
 booking/303,201
 footballklubber
 foot/101,303
 football/101
 ball/303,101,305
 klubber/101
-sky/304
+sky/304,307
diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c
index 65d99da292..6f5b635413 100644
--- a/src/backend/tsearch/spell.c
+++ b/src/backend/tsearch/spell.c
@@ -1303,7 +1303,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
 			{
 				Conf->useFlagAliases = true;
 				naffix = atoi(sflag);
-				if (naffix == 0)
+				if (naffix <= 0)
 					ereport(ERROR,
 							(errcode(ERRCODE_CONFIG_FILE_ERROR),
 							 errmsg("invalid number of flag vector aliases")));
@@ -1318,7 +1318,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
 				Conf->AffixData[curaffix] = VoidString;
 				curaffix++;
 			}
-			/* Other lines is aliases */
+			/* Other lines are aliases */
 			else
 			{
 				if (curaffix < naffix)
@@ -1326,6 +1326,11 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
 					Conf->AffixData[curaffix] = cpstrdup(Conf, sflag);
 					curaffix++;
 				}
+				else
+					ereport(ERROR,
+							(errcode(ERRCODE_CONFIG_FILE_ERROR),
+							 errmsg("number of aliases exceeds specified number %d",
+									naffix - 1)));
 			}
 			goto nextline;
 		}
diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out
index 0c1d7c7675..2524ec2768 100644
--- a/src/test/regress/expected/tsdicts.out
+++ b/src/test/regress/expected/tsdicts.out
@@ -263,6 +263,12 @@ SELECT ts_lexize('hunspell_long', 'unbook');
  {book}
 (1 row)
 
+SELECT ts_lexize('hunspell_long', 'booked');
+ ts_lexize 
+-----------
+ {book}
+(1 row)
+
 SELECT ts_lexize('hunspell_long', 'footklubber');
    ts_lexize    
 ----------------
@@ -281,12 +287,24 @@ SELECT ts_lexize('hunspell_long', 'ballyklubber');
  {ball,klubber}
 (1 row)
 
+SELECT ts_lexize('hunspell_long', 'ballsklubber');
+   ts_lexize    
+----------------
+ {ball,klubber}
+(1 row)
+
 SELECT ts_lexize('hunspell_long', 'footballyklubber');
       ts_lexize      
 ---------------------
  {foot,ball,klubber}
 (1 row)
 
+SELECT ts_lexize('hunspell_long', 'ex-machina');
+   ts_lexize   
+---------------
+ {ex-,machina}
+(1 row)
+
 -- Test ISpell dictionary with hunspell affix file with FLAG num parameter
 CREATE TEXT SEARCH DICTIONARY hunspell_num (
                         Template=ispell,
@@ -299,6 +317,12 @@ SELECT ts_lexize('hunspell_num', 'skies');
  {sky}
 (1 row)
 
+SELECT ts_lexize('hunspell_num', 'sk');
+ ts_lexize 
+-----------
+ {sky}
+(1 row)
+
 SELECT ts_lexize('hunspell_num', 'bookings');
    ts_lexize    
 ----------------
@@ -359,6 +383,12 @@ SELECT ts_lexize('hunspell_num', 'unbook');
  {book}
 (1 row)
 
+SELECT ts_lexize('hunspell_num', 'booked');
+ ts_lexize 
+-----------
+ {book}
+(1 row)
+
 SELECT ts_lexize('hunspell_num', 'footklubber');
    ts_lexize    
 ----------------
diff --git a/src/test/regress/sql/tsdicts.sql b/src/test/regress/sql/tsdicts.sql
index 1633c0d066..60906f6549 100644
--- a/src/test/regress/sql/tsdicts.sql
+++ b/src/test/regress/sql/tsdicts.sql
@@ -66,11 +66,14 @@ SELECT ts_lexize('hunspell_long', 'rebook');
 SELECT ts_lexize('hunspell_long', 'unbookings');
 SELECT ts_lexize('hunspell_long', 'unbooking');
 SELECT ts_lexize('hunspell_long', 'unbook');
+SELECT ts_lexize('hunspell_long', 'booked');
 
 SELECT ts_lexize('hunspell_long', 'footklubber');
 SELECT ts_lexize('hunspell_long', 'footballklubber');
 SELECT ts_lexize('hunspell_long', 'ballyklubber');
+SELECT ts_lexize('hunspell_long', 'ballsklubber');
 SELECT ts_lexize('hunspell_long', 'footballyklubber');
+SELECT ts_lexize('hunspell_long', 'ex-machina');
 
 -- Test ISpell dictionary with hunspell affix file with FLAG num parameter
 CREATE TEXT SEARCH DICTIONARY hunspell_num (
@@ -80,6 +83,7 @@ CREATE TEXT SEARCH DICTIONARY hunspell_num (
 );
 
 SELECT ts_lexize('hunspell_num', 'skies');
+SELECT ts_lexize('hunspell_num', 'sk');
 SELECT ts_lexize('hunspell_num', 'bookings');
 SELECT ts_lexize('hunspell_num', 'booking');
 SELECT ts_lexize('hunspell_num', 'foot');
@@ -90,6 +94,7 @@ SELECT ts_lexize('hunspell_num', 'rebook');
 SELECT ts_lexize('hunspell_num', 'unbookings');
 SELECT ts_lexize('hunspell_num', 'unbooking');
 SELECT ts_lexize('hunspell_num', 'unbook');
+SELECT ts_lexize('hunspell_num', 'booked');
 
 SELECT ts_lexize('hunspell_num', 'footklubber');
 SELECT ts_lexize('hunspell_num', 'footballklubber');
-- 
2.40.0