]> granicus.if.org Git - postgresql/commitdiff
Add strict_word_similarity to pg_trgm module
authorTeodor Sigaev <teodor@sigaev.ru>
Wed, 21 Mar 2018 11:57:42 +0000 (14:57 +0300)
committerTeodor Sigaev <teodor@sigaev.ru>
Wed, 21 Mar 2018 11:57:42 +0000 (14:57 +0300)
strict_word_similarity is similar to existing word_similarity function but
it takes into account word boundaries to compute similarity.

Author: Alexander Korotkov
Review by: David Steele, Liudmila Mantrova, me
Discussion: https://www.postgresql.org/message-id/flat/CY4PR17MB13207ED8310F847CF117EED0D85A0@CY4PR17MB1320.namprd17.prod.outlook.com

contrib/pg_trgm/Makefile
contrib/pg_trgm/expected/pg_strict_word_trgm.out [new file with mode: 0644]
contrib/pg_trgm/pg_trgm--1.3--1.4.sql [new file with mode: 0644]
contrib/pg_trgm/pg_trgm.control
contrib/pg_trgm/sql/pg_strict_word_trgm.sql [new file with mode: 0644]
contrib/pg_trgm/trgm.h
contrib/pg_trgm/trgm_gin.c
contrib/pg_trgm/trgm_gist.c
contrib/pg_trgm/trgm_op.c
doc/src/sgml/pgtrgm.sgml

index 212a89039a6cd98f6aca8557aa4c247f331b5473..dfecc2a37fc0aad8aaa2a487d7dc6179fba00f92 100644 (file)
@@ -4,11 +4,12 @@ MODULE_big = pg_trgm
 OBJS = trgm_op.o trgm_gist.o trgm_gin.o trgm_regexp.o $(WIN32RES)
 
 EXTENSION = pg_trgm
-DATA = pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \
+DATA = pg_trgm--1.3--1.4.sql \
+       pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \
        pg_trgm--1.0--1.1.sql pg_trgm--unpackaged--1.0.sql
 PGFILEDESC = "pg_trgm - trigram matching"
 
-REGRESS = pg_trgm pg_word_trgm
+REGRESS = pg_trgm pg_word_trgm pg_strict_word_trgm
 
 ifdef USE_PGXS
 PG_CONFIG = pg_config
diff --git a/contrib/pg_trgm/expected/pg_strict_word_trgm.out b/contrib/pg_trgm/expected/pg_strict_word_trgm.out
new file mode 100644 (file)
index 0000000..43898a3
--- /dev/null
@@ -0,0 +1,1025 @@
+DROP INDEX trgm_idx2;
+\copy test_trgm3 from 'data/trgm2.data'
+ERROR:  relation "test_trgm3" does not exist
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+                  t                  |   sml    
+-------------------------------------+----------
+ Baykal                              |        1
+ Boloto Baykal                       |        1
+ Boloto Malyy Baykal                 |        1
+ Kolkhoz Krasnyy Baykal              |        1
+ Ozero Baykal                        |        1
+ Polevoy Stan Baykal                 |        1
+ Port Baykal                         |        1
+ Prud Novyy Baykal                   |        1
+ Sanatoriy Baykal                    |        1
+ Stantsiya Baykal                    |        1
+ Zaliv Baykal                        |        1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo                           | 0.545455
+ Baykalsko                           | 0.545455
+ Maloye Baykalovo                    | 0.545455
+ Baykalikha                          |      0.5
+ Baykalovsk                          |      0.5
+(17 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+              t               |   sml    
+------------------------------+----------
+ Kabankala                    |        1
+ Kabankalan City Public Plaza |     0.75
+ Abankala                     | 0.583333
+ Kabakala                     | 0.583333
+(4 rows)
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+                  t                  |   sml    
+-------------------------------------+----------
+ Baykal                              |        1
+ Boloto Baykal                       |        1
+ Boloto Malyy Baykal                 |        1
+ Kolkhoz Krasnyy Baykal              |        1
+ Ozero Baykal                        |        1
+ Polevoy Stan Baykal                 |        1
+ Port Baykal                         |        1
+ Prud Novyy Baykal                   |        1
+ Sanatoriy Baykal                    |        1
+ Stantsiya Baykal                    |        1
+ Zaliv Baykal                        |        1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo                           | 0.545455
+ Baykalsko                           | 0.545455
+ Maloye Baykalovo                    | 0.545455
+ Baykalikha                          |      0.5
+ Baykalovsk                          |      0.5
+(17 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+              t               |   sml    
+------------------------------+----------
+ Kabankala                    |        1
+ Kabankalan City Public Plaza |     0.75
+ Abankala                     | 0.583333
+ Kabakala                     | 0.583333
+(4 rows)
+
+select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7;
+ ?column? |            t             
+----------+--------------------------
+        0 | Alaikallupoddakulam
+     0.25 | Alaikallupodda Alankulam
+     0.32 | Alaikalluppodda Kulam
+ 0.615385 | Mulaikallu Kulam
+ 0.724138 | Koraikalapu Kulam
+     0.75 | Vaikaliththevakulam
+ 0.766667 | Karaivaikal Kulam
+(7 rows)
+
+create index trgm_idx2 on test_trgm2 using gist (t gist_trgm_ops);
+set enable_seqscan=off;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+                  t                  |   sml    
+-------------------------------------+----------
+ Baykal                              |        1
+ Boloto Baykal                       |        1
+ Boloto Malyy Baykal                 |        1
+ Kolkhoz Krasnyy Baykal              |        1
+ Ozero Baykal                        |        1
+ Polevoy Stan Baykal                 |        1
+ Port Baykal                         |        1
+ Prud Novyy Baykal                   |        1
+ Sanatoriy Baykal                    |        1
+ Stantsiya Baykal                    |        1
+ Zaliv Baykal                        |        1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo                           | 0.545455
+ Baykalsko                           | 0.545455
+ Maloye Baykalovo                    | 0.545455
+ Baykalikha                          |      0.5
+ Baykalovsk                          |      0.5
+(17 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+              t               |   sml    
+------------------------------+----------
+ Kabankala                    |        1
+ Kabankalan City Public Plaza |     0.75
+ Abankala                     | 0.583333
+ Kabakala                     | 0.583333
+(4 rows)
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+                  t                  |   sml    
+-------------------------------------+----------
+ Baykal                              |        1
+ Boloto Baykal                       |        1
+ Boloto Malyy Baykal                 |        1
+ Kolkhoz Krasnyy Baykal              |        1
+ Ozero Baykal                        |        1
+ Polevoy Stan Baykal                 |        1
+ Port Baykal                         |        1
+ Prud Novyy Baykal                   |        1
+ Sanatoriy Baykal                    |        1
+ Stantsiya Baykal                    |        1
+ Zaliv Baykal                        |        1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo                           | 0.545455
+ Baykalsko                           | 0.545455
+ Maloye Baykalovo                    | 0.545455
+ Baykalikha                          |      0.5
+ Baykalovsk                          |      0.5
+(17 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+              t               |   sml    
+------------------------------+----------
+ Kabankala                    |        1
+ Kabankalan City Public Plaza |     0.75
+ Abankala                     | 0.583333
+ Kabakala                     | 0.583333
+(4 rows)
+
+explain (costs off)
+select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Limit
+   ->  Index Scan using trgm_idx2 on test_trgm2
+         Order By: (t <->>> 'Alaikallupoddakulam'::text)
+(3 rows)
+
+select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7;
+ ?column? |            t             
+----------+--------------------------
+        0 | Alaikallupoddakulam
+     0.25 | Alaikallupodda Alankulam
+     0.32 | Alaikalluppodda Kulam
+ 0.615385 | Mulaikallu Kulam
+ 0.724138 | Koraikalapu Kulam
+     0.75 | Vaikaliththevakulam
+ 0.766667 | Karaivaikal Kulam
+(7 rows)
+
+drop index trgm_idx2;
+create index trgm_idx2 on test_trgm2 using gin (t gin_trgm_ops);
+set enable_seqscan=off;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+                  t                  |   sml    
+-------------------------------------+----------
+ Baykal                              |        1
+ Boloto Baykal                       |        1
+ Boloto Malyy Baykal                 |        1
+ Kolkhoz Krasnyy Baykal              |        1
+ Ozero Baykal                        |        1
+ Polevoy Stan Baykal                 |        1
+ Port Baykal                         |        1
+ Prud Novyy Baykal                   |        1
+ Sanatoriy Baykal                    |        1
+ Stantsiya Baykal                    |        1
+ Zaliv Baykal                        |        1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo                           | 0.545455
+ Baykalsko                           | 0.545455
+ Maloye Baykalovo                    | 0.545455
+ Baykalikha                          |      0.5
+ Baykalovsk                          |      0.5
+(17 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+              t               |   sml    
+------------------------------+----------
+ Kabankala                    |        1
+ Kabankalan City Public Plaza |     0.75
+ Abankala                     | 0.583333
+ Kabakala                     | 0.583333
+(4 rows)
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+                  t                  |   sml    
+-------------------------------------+----------
+ Baykal                              |        1
+ Boloto Baykal                       |        1
+ Boloto Malyy Baykal                 |        1
+ Kolkhoz Krasnyy Baykal              |        1
+ Ozero Baykal                        |        1
+ Polevoy Stan Baykal                 |        1
+ Port Baykal                         |        1
+ Prud Novyy Baykal                   |        1
+ Sanatoriy Baykal                    |        1
+ Stantsiya Baykal                    |        1
+ Zaliv Baykal                        |        1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo                           | 0.545455
+ Baykalsko                           | 0.545455
+ Maloye Baykalovo                    | 0.545455
+ Baykalikha                          |      0.5
+ Baykalovsk                          |      0.5
+(17 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+              t               |   sml    
+------------------------------+----------
+ Kabankala                    |        1
+ Kabankalan City Public Plaza |     0.75
+ Abankala                     | 0.583333
+ Kabakala                     | 0.583333
+(4 rows)
+
+set "pg_trgm.strict_word_similarity_threshold" to 0.4;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+                  t                  |   sml    
+-------------------------------------+----------
+ Baykal                              |        1
+ Boloto Baykal                       |        1
+ Boloto Malyy Baykal                 |        1
+ Kolkhoz Krasnyy Baykal              |        1
+ Ozero Baykal                        |        1
+ Polevoy Stan Baykal                 |        1
+ Port Baykal                         |        1
+ Prud Novyy Baykal                   |        1
+ Sanatoriy Baykal                    |        1
+ Stantsiya Baykal                    |        1
+ Zaliv Baykal                        |        1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo                           | 0.545455
+ Baykalsko                           | 0.545455
+ Maloye Baykalovo                    | 0.545455
+ Baykalikha                          |      0.5
+ Baykalovsk                          |      0.5
+ Zabaykal                            | 0.454545
+ Air Bakal-kecil                     | 0.444444
+ Bakal                               | 0.444444
+ Bakal Batu                          | 0.444444
+ Bakal Dos                           | 0.444444
+ Bakal Julu                          | 0.444444
+ Bakal Khel                          | 0.444444
+ Bakal Lama                          | 0.444444
+ Bakal Tres                          | 0.444444
+ Bakal Uno                           | 0.444444
+ Daang Bakal                         | 0.444444
+ Desa Bakal                          | 0.444444
+ Eat Bakal                           | 0.444444
+ Gunung Bakal                        | 0.444444
+ Sidi Bakal                          | 0.444444
+ Stantsiya Bakal                     | 0.444444
+ Sungai Bakal                        | 0.444444
+ Talang Bakal                        | 0.444444
+ Uruk Bakal                          | 0.444444
+ Zaouia Oulad Bakal                  | 0.444444
+ Baykalovskiy                        | 0.428571
+ Baykalovskiy Rayon                  | 0.428571
+ Baikal                              |      0.4
+ Baikal Airfield                     |      0.4
+ Baikal Business Centre              |      0.4
+ Baikal Hotel Moscow                 |      0.4
+ Baikal Listvyanka Hotel             |      0.4
+ Baikal Mountains                    |      0.4
+ Baikal Plaza                        |      0.4
+ Bajkal                              |      0.4
+ Bankal                              |      0.4
+ Bankal School                       |      0.4
+ Barkal                              |      0.4
+ Jabal Barkal                        |      0.4
+ Lake Baikal                         |      0.4
+ Oulad el Bakkal                     |      0.4
+ Sidi Mohammed Bakkal                |      0.4
+(54 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+              t               |   sml    
+------------------------------+----------
+ Kabankala                    |        1
+ Kabankalan City Public Plaza |     0.75
+ Abankala                     | 0.583333
+ Kabakala                     | 0.583333
+ Kabikala                     | 0.461538
+(5 rows)
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+                  t                  |   sml    
+-------------------------------------+----------
+ Baykal                              |        1
+ Boloto Baykal                       |        1
+ Boloto Malyy Baykal                 |        1
+ Kolkhoz Krasnyy Baykal              |        1
+ Ozero Baykal                        |        1
+ Polevoy Stan Baykal                 |        1
+ Port Baykal                         |        1
+ Prud Novyy Baykal                   |        1
+ Sanatoriy Baykal                    |        1
+ Stantsiya Baykal                    |        1
+ Zaliv Baykal                        |        1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo                           | 0.545455
+ Baykalsko                           | 0.545455
+ Maloye Baykalovo                    | 0.545455
+ Baykalikha                          |      0.5
+ Baykalovsk                          |      0.5
+ Zabaykal                            | 0.454545
+ Air Bakal-kecil                     | 0.444444
+ Bakal                               | 0.444444
+ Bakal Batu                          | 0.444444
+ Bakal Dos                           | 0.444444
+ Bakal Julu                          | 0.444444
+ Bakal Khel                          | 0.444444
+ Bakal Lama                          | 0.444444
+ Bakal Tres                          | 0.444444
+ Bakal Uno                           | 0.444444
+ Daang Bakal                         | 0.444444
+ Desa Bakal                          | 0.444444
+ Eat Bakal                           | 0.444444
+ Gunung Bakal                        | 0.444444
+ Sidi Bakal                          | 0.444444
+ Stantsiya Bakal                     | 0.444444
+ Sungai Bakal                        | 0.444444
+ Talang Bakal                        | 0.444444
+ Uruk Bakal                          | 0.444444
+ Zaouia Oulad Bakal                  | 0.444444
+ Baykalovskiy                        | 0.428571
+ Baykalovskiy Rayon                  | 0.428571
+ Baikal                              |      0.4
+ Baikal Airfield                     |      0.4
+ Baikal Business Centre              |      0.4
+ Baikal Hotel Moscow                 |      0.4
+ Baikal Listvyanka Hotel             |      0.4
+ Baikal Mountains                    |      0.4
+ Baikal Plaza                        |      0.4
+ Bajkal                              |      0.4
+ Bankal                              |      0.4
+ Bankal School                       |      0.4
+ Barkal                              |      0.4
+ Jabal Barkal                        |      0.4
+ Lake Baikal                         |      0.4
+ Oulad el Bakkal                     |      0.4
+ Sidi Mohammed Bakkal                |      0.4
+(54 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+              t               |   sml    
+------------------------------+----------
+ Kabankala                    |        1
+ Kabankalan City Public Plaza |     0.75
+ Abankala                     | 0.583333
+ Kabakala                     | 0.583333
+ Kabikala                     | 0.461538
+(5 rows)
+
+set "pg_trgm.strict_word_similarity_threshold" to 0.2;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+                             t                             |   sml    
+-----------------------------------------------------------+----------
+ Baykal                                                    |        1
+ Boloto Baykal                                             |        1
+ Boloto Malyy Baykal                                       |        1
+ Kolkhoz Krasnyy Baykal                                    |        1
+ Ozero Baykal                                              |        1
+ Polevoy Stan Baykal                                       |        1
+ Port Baykal                                               |        1
+ Prud Novyy Baykal                                         |        1
+ Sanatoriy Baykal                                          |        1
+ Stantsiya Baykal                                          |        1
+ Zaliv Baykal                                              |        1
+ Baykalo-Amurskaya Zheleznaya Doroga                       | 0.666667
+ Baykalovo                                                 | 0.545455
+ Baykalsko                                                 | 0.545455
+ Maloye Baykalovo                                          | 0.545455
+ Baykalikha                                                |      0.5
+ Baykalovsk                                                |      0.5
+ Zabaykal                                                  | 0.454545
+ Air Bakal-kecil                                           | 0.444444
+ Bakal                                                     | 0.444444
+ Bakal Batu                                                | 0.444444
+ Bakal Dos                                                 | 0.444444
+ Bakal Julu                                                | 0.444444
+ Bakal Khel                                                | 0.444444
+ Bakal Lama                                                | 0.444444
+ Bakal Tres                                                | 0.444444
+ Bakal Uno                                                 | 0.444444
+ Daang Bakal                                               | 0.444444
+ Desa Bakal                                                | 0.444444
+ Eat Bakal                                                 | 0.444444
+ Gunung Bakal                                              | 0.444444
+ Sidi Bakal                                                | 0.444444
+ Stantsiya Bakal                                           | 0.444444
+ Sungai Bakal                                              | 0.444444
+ Talang Bakal                                              | 0.444444
+ Uruk Bakal                                                | 0.444444
+ Zaouia Oulad Bakal                                        | 0.444444
+ Baykalovskiy                                              | 0.428571
+ Baykalovskiy Rayon                                        | 0.428571
+ Baikal                                                    |      0.4
+ Baikal Airfield                                           |      0.4
+ Baikal Business Centre                                    |      0.4
+ Baikal Hotel Moscow                                       |      0.4
+ Baikal Listvyanka Hotel                                   |      0.4
+ Baikal Mountains                                          |      0.4
+ Baikal Plaza                                              |      0.4
+ Bajkal                                                    |      0.4
+ Bankal                                                    |      0.4
+ Bankal School                                             |      0.4
+ Barkal                                                    |      0.4
+ Jabal Barkal                                              |      0.4
+ Lake Baikal                                               |      0.4
+ Oulad el Bakkal                                           |      0.4
+ Sidi Mohammed Bakkal                                      |      0.4
+ Bay of Backaland                                          |    0.375
+ Boikalakalawa Bay                                         |    0.375
+ Waikalabubu Bay                                           |    0.375
+ Bairkal                                                   | 0.363636
+ Bairkal Dhora                                             | 0.363636
+ Bairkal Jabal                                             | 0.363636
+ Batikal                                                   | 0.363636
+ Bakaleyka                                                 | 0.307692
+ Bakkalmal                                                 | 0.307692
+ Bikal                                                     |      0.3
+ Al Barkali                                                | 0.285714
+ Zabaykalka                                                | 0.285714
+ Baidal                                                    | 0.272727
+ Baihal                                                    | 0.272727
+ Baipal                                                    | 0.272727
+ Bakala                                                    | 0.272727
+ Bakala Koupi                                              | 0.272727
+ Bakale                                                    | 0.272727
+ Bakali                                                    | 0.272727
+ Bakall                                                    | 0.272727
+ Bakaly                                                    | 0.272727
+ Bakaly TV Mast                                            | 0.272727
+ Buur Bakale                                               | 0.272727
+ Gory Bakaly                                               | 0.272727
+ Kusu-Bakali                                               | 0.272727
+ Kwala Bakala                                              | 0.272727
+ Mbay Bakala                                               | 0.272727
+ Ngao Bakala                                               | 0.272727
+ Sidi Mohammed el Bakali                                   | 0.272727
+ Sopka Bakaly                                              | 0.272727
+ Sungai Bakala                                             | 0.272727
+ Urochishche Bakaly                                        | 0.272727
+ Alue Bakkala                                              |     0.25
+ Azib el Bakkali                                           |     0.25
+ Ba Kaliin                                                 |     0.25
+ Baikaluobbal                                              |     0.25
+ Bakalam                                                   |     0.25
+ Bakalan                                                   |     0.25
+ Bakalan Barat                                             |     0.25
+ Bakalan Dua                                               |     0.25
+ Bakalan Kidul                                             |     0.25
+ Bakalan Kulon                                             |     0.25
+ Bakalan Lor                                               |     0.25
+ Bakalan River                                             |     0.25
+ Bakalan Tengah                                            |     0.25
+ Bakalan Wetan                                             |     0.25
+ Bakalao Asibi Point                                       |     0.25
+ Bakalao Point                                             |     0.25
+ Bakalar Air Force Base (historical)                       |     0.25
+ Bakalar Lake                                              |     0.25
+ Bakalar Library                                           |     0.25
+ Bakalda                                                   |     0.25
+ Bakaldy                                                   |     0.25
+ Bakaley                                                   |     0.25
+ Bakalha                                                   |     0.25
+ Bakalia Char                                              |     0.25
+ Bakalka                                                   |     0.25
+ Bakalod Island                                            |     0.25
+ Bakalou                                                   |     0.25
+ Bakalua                                                   |     0.25
+ Bakalum                                                   |     0.25
+ Bakkala Cemetery                                          |     0.25
+ Bankali                                                   |     0.25
+ Barkala                                                   |     0.25
+ Barkala Park                                              |     0.25
+ Barkala Rao                                               |     0.25
+ Barkala Reserved Forest                                   |     0.25
+ Barkald                                                   |     0.25
+ Barkald stasjon                                           |     0.25
+ Barkale                                                   |     0.25
+ Barkali                                                   |     0.25
+ Baukala                                                   |     0.25
+ Buur Bakaley                                              |     0.25
+ Columbus Bakalar Municipal Airport                        |     0.25
+ Dakshin Bakalia                                           |     0.25
+ Danau Bakalan                                             |     0.25
+ Desa Bakalan                                              |     0.25
+ Gunung Bakalan                                            |     0.25
+ Kali Bakalan                                              |     0.25
+ Khrebet Batkali                                           |     0.25
+ Kordon Barkalo                                            |     0.25
+ Krajan Bakalan                                            |     0.25
+ Ovrag Bakalda                                             |     0.25
+ Pulau Bakalan                                             |     0.25
+ Selat Bakalan                                             |     0.25
+ Teluk Bakalan                                             |     0.25
+ Tukad Bakalan                                             |     0.25
+ Urochishche Batkali                                       |     0.25
+ Babakale                                                  | 0.230769
+ Babakalo                                                  | 0.230769
+ Bagkalen                                                  | 0.230769
+ Bakalalan Airport                                         | 0.230769
+ Bakalang                                                  | 0.230769
+ Bakalarr                                                  | 0.230769
+ Bakalawa                                                  | 0.230769
+ Bakaldum                                                  | 0.230769
+ Bakaleko                                                  | 0.230769
+ Bakalica                                                  | 0.230769
+ Bakalino                                                  | 0.230769
+ Bakalite                                                  | 0.230769
+ Bakalovo                                                  | 0.230769
+ Bakalsen                                                  | 0.230769
+ Bakaltua Bank                                             | 0.230769
+ Bakalukalu                                                | 0.230769
+ Bakalukalu Shan                                           | 0.230769
+ Bakkalia                                                  | 0.230769
+ Bankalol                                                  | 0.230769
+ Barkaleh                                                  | 0.230769
+ Barkalne                                                  | 0.230769
+ Barkalow Hollow                                           | 0.230769
+ Bawkalut                                                  | 0.230769
+ Bawkalut Chaung                                           | 0.230769
+ Clifton T Barkalow Elementary School                      | 0.230769
+ Efrejtor Bakalovo                                         | 0.230769
+ Efreytor-Bakalovo                                         | 0.230769
+ Gora Barkalyu                                             | 0.230769
+ Ile Bakalibu                                              | 0.230769
+ Khor Bakallii                                             | 0.230769
+ Nehalla Bankalah Reserved Forest                          | 0.230769
+ Ragha Bakalzai                                            | 0.230769
+ Tanjung Batikala                                          | 0.230769
+ Teluk Bakalang                                            | 0.230769
+ Urochishche Bakalovo                                      | 0.230769
+ Banjar Kubakal                                            | 0.222222
+ Darreh Pumba Kal                                          | 0.222222
+ Zabaykalovskiy                                            | 0.222222
+ Aparthotel Adagio Premium Dubai Al Barsha                 | 0.214286
+ Babakalia                                                 | 0.214286
+ Bahkalleh                                                 | 0.214286
+ Baikalovo                                                 | 0.214286
+ Bakalaale                                                 | 0.214286
+ Bakalabwa Pans                                            | 0.214286
+ Bakalaeng                                                 | 0.214286
+ Bakalauri                                                 | 0.214286
+ Bakalbhar                                                 | 0.214286
+ Bakalbuah                                                 | 0.214286
+ Bakalerek                                                 | 0.214286
+ Bakalinga                                                 | 0.214286
+ Bakalipur                                                 | 0.214286
+ Bakaljaya                                                 | 0.214286
+ Bakalnica                                                 | 0.214286
+ Bakalongo                                                 | 0.214286
+ Bakalovka                                                 | 0.214286
+ Bakalrejo                                                 | 0.214286
+ Bakkalale                                                 | 0.214286
+ Bambakala                                                 | 0.214286
+ Bambakalo                                                 | 0.214286
+ Barkalare                                                 | 0.214286
+ Barkalden                                                 | 0.214286
+ Barkallou                                                 | 0.214286
+ Barkalova                                                 | 0.214286
+ Baskalino                                                 | 0.214286
+ Baskaltsi                                                 | 0.214286
+ Desa Bakalrejo                                            | 0.214286
+ Doubletree By Hilton Dubai Al Barsha Hotel and Res        | 0.214286
+ Doubletree By Hilton Hotel and Apartments Dubai Al Barsha | 0.214286
+ Doubletree Res.Dubai-Al Barsha                            | 0.214286
+ Gora Barkalova                                            | 0.214286
+ Holiday Inn Dubai Al Barsha                               | 0.214286
+ Novotel Dubai Al Barsha                                   | 0.214286
+ Park Inn By Radisson Dubai Al Barsha                      | 0.214286
+ Ramee Rose Hotel Dubai Al Barsha                          | 0.214286
+ Ras Barkallah                                             | 0.214286
+ Salu Bakalaeng                                            | 0.214286
+ Tanjung Bakalinga                                         | 0.214286
+ Tubu Bakalekuk                                            | 0.214286
+ Baikalakko                                                |      0.2
+ Bakalauri1                                                |      0.2
+ Bakalauri2                                                |      0.2
+ Bakalauri3                                                |      0.2
+ Bakalauri4                                                |      0.2
+ Bakalauri5                                                |      0.2
+ Bakalauri6                                                |      0.2
+ Bakalauri7                                                |      0.2
+ Bakalauri8                                                |      0.2
+ Bakalauri9                                                |      0.2
+ Bakaldalam                                                |      0.2
+ Bakaldukuh                                                |      0.2
+ Bakaloolay                                                |      0.2
+ Bakalovina                                                |      0.2
+ Bakalpokok                                                |      0.2
+ Bakalshile                                                |      0.2
+ Bakalukudu                                                |      0.2
+ Bambakalia                                                |      0.2
+ Barkaladja Pool                                           |      0.2
+ Barkalovka                                                |      0.2
+ Bavkalasis                                                |      0.2
+ Gora Bakalyadyr                                           |      0.2
+ Kampong Bakaladong                                        |      0.2
+ Urochishche Bakalarnyn-Ayasy                              |      0.2
+ Urochishche Bakaldikha                                    |      0.2
+(245 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+                t                 |   sml    
+----------------------------------+----------
+ Kabankala                        |        1
+ Kabankalan City Public Plaza     |     0.75
+ Abankala                         | 0.583333
+ Kabakala                         | 0.583333
+ Kabikala                         | 0.461538
+ Ntombankala School               |    0.375
+ Nehalla Bankalah Reserved Forest | 0.357143
+ Jabba Kalai                      | 0.333333
+ Kambakala                        | 0.333333
+ Ker Samba Kalla                  | 0.333333
+ Bankal                           | 0.307692
+ Bankal School                    | 0.307692
+ Kanampumba-Kalawa                | 0.307692
+ Bankali                          | 0.285714
+ Mwalaba-Kalamba                  | 0.285714
+ Tumba-Kalamba                    | 0.285714
+ Darreh Pumba Kal                 | 0.272727
+ Bankalol                         | 0.266667
+ Dabakala                         | 0.266667
+ Purba Kalaujan                   | 0.266667
+ Kali Purbakala                   | 0.263158
+ Dalabakala                       |     0.25
+ Demba Kali                       |     0.25
+ Gagaba Kalo                      |     0.25
+ Golba Kalo                       |     0.25
+ Habakkala                        |     0.25
+ Kali Bakalan                     |     0.25
+ Kimbakala                        |     0.25
+ Kombakala                        |     0.25
+ Jaba Kalle                       | 0.235294
+ Kaikalahun Indian Reserve 25     | 0.235294
+ Kwala Bakala                     | 0.235294
+ Gereba Kaler                     | 0.230769
+ Goth Soba Kaloi                  | 0.230769
+ Guba Kaldo                       | 0.230769
+ Gulba Kalle                      | 0.230769
+ Guba Kalgalaksha                 | 0.222222
+ Kalibakalako                     | 0.222222
+ Ba Kaliin                        | 0.214286
+ Bakala                           | 0.214286
+ Bakala Koupi                     | 0.214286
+ Bikala                           | 0.214286
+ Bikala Madila                    | 0.214286
+ Bugor Arba-Kalgan                | 0.214286
+ Bumba-Kaloki                     | 0.214286
+ Guba Kalita                      | 0.214286
+ Kamba-Kalele                     | 0.214286
+ Mbay Bakala                      | 0.214286
+ Ngao Bakala                      | 0.214286
+ Sungai Bakala                    | 0.214286
+ Fayzabadkala                     | 0.210526
+ Gora Fayzabadkala                | 0.210526
+ Alue Bakkala                     |      0.2
+ Bakkala Cemetery                 |      0.2
+ Barkala                          |      0.2
+ Barkala Park                     |      0.2
+ Barkala Rao                      |      0.2
+ Barkala Reserved Forest          |      0.2
+ Baukala                          |      0.2
+ Beikala                          |      0.2
+ Bomba-Kalende                    |      0.2
+ Bumba-Kalumba                    |      0.2
+ Haikala                          |      0.2
+ Kahambikalela                    |      0.2
+ Kaikalapettai                    |      0.2
+ Kaikale                          |      0.2
+ Laikala                          |      0.2
+ Maikala Range                    |      0.2
+ Matamba-Kalenga                  |      0.2
+ Matamba-Kalenge                  |      0.2
+ Naikala                          |      0.2
+ Tumba-Kalumba                    |      0.2
+ Tumba-Kalunga                    |      0.2
+ Waikala                          |      0.2
+(74 rows)
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+                             t                             |   sml    
+-----------------------------------------------------------+----------
+ Baykal                                                    |        1
+ Boloto Baykal                                             |        1
+ Boloto Malyy Baykal                                       |        1
+ Kolkhoz Krasnyy Baykal                                    |        1
+ Ozero Baykal                                              |        1
+ Polevoy Stan Baykal                                       |        1
+ Port Baykal                                               |        1
+ Prud Novyy Baykal                                         |        1
+ Sanatoriy Baykal                                          |        1
+ Stantsiya Baykal                                          |        1
+ Zaliv Baykal                                              |        1
+ Baykalo-Amurskaya Zheleznaya Doroga                       | 0.666667
+ Baykalovo                                                 | 0.545455
+ Baykalsko                                                 | 0.545455
+ Maloye Baykalovo                                          | 0.545455
+ Baykalikha                                                |      0.5
+ Baykalovsk                                                |      0.5
+ Zabaykal                                                  | 0.454545
+ Air Bakal-kecil                                           | 0.444444
+ Bakal                                                     | 0.444444
+ Bakal Batu                                                | 0.444444
+ Bakal Dos                                                 | 0.444444
+ Bakal Julu                                                | 0.444444
+ Bakal Khel                                                | 0.444444
+ Bakal Lama                                                | 0.444444
+ Bakal Tres                                                | 0.444444
+ Bakal Uno                                                 | 0.444444
+ Daang Bakal                                               | 0.444444
+ Desa Bakal                                                | 0.444444
+ Eat Bakal                                                 | 0.444444
+ Gunung Bakal                                              | 0.444444
+ Sidi Bakal                                                | 0.444444
+ Stantsiya Bakal                                           | 0.444444
+ Sungai Bakal                                              | 0.444444
+ Talang Bakal                                              | 0.444444
+ Uruk Bakal                                                | 0.444444
+ Zaouia Oulad Bakal                                        | 0.444444
+ Baykalovskiy                                              | 0.428571
+ Baykalovskiy Rayon                                        | 0.428571
+ Baikal                                                    |      0.4
+ Baikal Airfield                                           |      0.4
+ Baikal Business Centre                                    |      0.4
+ Baikal Hotel Moscow                                       |      0.4
+ Baikal Listvyanka Hotel                                   |      0.4
+ Baikal Mountains                                          |      0.4
+ Baikal Plaza                                              |      0.4
+ Bajkal                                                    |      0.4
+ Bankal                                                    |      0.4
+ Bankal School                                             |      0.4
+ Barkal                                                    |      0.4
+ Jabal Barkal                                              |      0.4
+ Lake Baikal                                               |      0.4
+ Oulad el Bakkal                                           |      0.4
+ Sidi Mohammed Bakkal                                      |      0.4
+ Bay of Backaland                                          |    0.375
+ Boikalakalawa Bay                                         |    0.375
+ Waikalabubu Bay                                           |    0.375
+ Bairkal                                                   | 0.363636
+ Bairkal Dhora                                             | 0.363636
+ Bairkal Jabal                                             | 0.363636
+ Batikal                                                   | 0.363636
+ Bakaleyka                                                 | 0.307692
+ Bakkalmal                                                 | 0.307692
+ Bikal                                                     |      0.3
+ Al Barkali                                                | 0.285714
+ Zabaykalka                                                | 0.285714
+ Baidal                                                    | 0.272727
+ Baihal                                                    | 0.272727
+ Baipal                                                    | 0.272727
+ Bakala                                                    | 0.272727
+ Bakala Koupi                                              | 0.272727
+ Bakale                                                    | 0.272727
+ Bakali                                                    | 0.272727
+ Bakall                                                    | 0.272727
+ Bakaly                                                    | 0.272727
+ Bakaly TV Mast                                            | 0.272727
+ Buur Bakale                                               | 0.272727
+ Gory Bakaly                                               | 0.272727
+ Kusu-Bakali                                               | 0.272727
+ Kwala Bakala                                              | 0.272727
+ Mbay Bakala                                               | 0.272727
+ Ngao Bakala                                               | 0.272727
+ Sidi Mohammed el Bakali                                   | 0.272727
+ Sopka Bakaly                                              | 0.272727
+ Sungai Bakala                                             | 0.272727
+ Urochishche Bakaly                                        | 0.272727
+ Alue Bakkala                                              |     0.25
+ Azib el Bakkali                                           |     0.25
+ Ba Kaliin                                                 |     0.25
+ Baikaluobbal                                              |     0.25
+ Bakalam                                                   |     0.25
+ Bakalan                                                   |     0.25
+ Bakalan Barat                                             |     0.25
+ Bakalan Dua                                               |     0.25
+ Bakalan Kidul                                             |     0.25
+ Bakalan Kulon                                             |     0.25
+ Bakalan Lor                                               |     0.25
+ Bakalan River                                             |     0.25
+ Bakalan Tengah                                            |     0.25
+ Bakalan Wetan                                             |     0.25
+ Bakalao Asibi Point                                       |     0.25
+ Bakalao Point                                             |     0.25
+ Bakalar Air Force Base (historical)                       |     0.25
+ Bakalar Lake                                              |     0.25
+ Bakalar Library                                           |     0.25
+ Bakalda                                                   |     0.25
+ Bakaldy                                                   |     0.25
+ Bakaley                                                   |     0.25
+ Bakalha                                                   |     0.25
+ Bakalia Char                                              |     0.25
+ Bakalka                                                   |     0.25
+ Bakalod Island                                            |     0.25
+ Bakalou                                                   |     0.25
+ Bakalua                                                   |     0.25
+ Bakalum                                                   |     0.25
+ Bakkala Cemetery                                          |     0.25
+ Bankali                                                   |     0.25
+ Barkala                                                   |     0.25
+ Barkala Park                                              |     0.25
+ Barkala Rao                                               |     0.25
+ Barkala Reserved Forest                                   |     0.25
+ Barkald                                                   |     0.25
+ Barkald stasjon                                           |     0.25
+ Barkale                                                   |     0.25
+ Barkali                                                   |     0.25
+ Baukala                                                   |     0.25
+ Buur Bakaley                                              |     0.25
+ Columbus Bakalar Municipal Airport                        |     0.25
+ Dakshin Bakalia                                           |     0.25
+ Danau Bakalan                                             |     0.25
+ Desa Bakalan                                              |     0.25
+ Gunung Bakalan                                            |     0.25
+ Kali Bakalan                                              |     0.25
+ Khrebet Batkali                                           |     0.25
+ Kordon Barkalo                                            |     0.25
+ Krajan Bakalan                                            |     0.25
+ Ovrag Bakalda                                             |     0.25
+ Pulau Bakalan                                             |     0.25
+ Selat Bakalan                                             |     0.25
+ Teluk Bakalan                                             |     0.25
+ Tukad Bakalan                                             |     0.25
+ Urochishche Batkali                                       |     0.25
+ Babakale                                                  | 0.230769
+ Babakalo                                                  | 0.230769
+ Bagkalen                                                  | 0.230769
+ Bakalalan Airport                                         | 0.230769
+ Bakalang                                                  | 0.230769
+ Bakalarr                                                  | 0.230769
+ Bakalawa                                                  | 0.230769
+ Bakaldum                                                  | 0.230769
+ Bakaleko                                                  | 0.230769
+ Bakalica                                                  | 0.230769
+ Bakalino                                                  | 0.230769
+ Bakalite                                                  | 0.230769
+ Bakalovo                                                  | 0.230769
+ Bakalsen                                                  | 0.230769
+ Bakaltua Bank                                             | 0.230769
+ Bakalukalu                                                | 0.230769
+ Bakalukalu Shan                                           | 0.230769
+ Bakkalia                                                  | 0.230769
+ Bankalol                                                  | 0.230769
+ Barkaleh                                                  | 0.230769
+ Barkalne                                                  | 0.230769
+ Barkalow Hollow                                           | 0.230769
+ Bawkalut                                                  | 0.230769
+ Bawkalut Chaung                                           | 0.230769
+ Clifton T Barkalow Elementary School                      | 0.230769
+ Efrejtor Bakalovo                                         | 0.230769
+ Efreytor-Bakalovo                                         | 0.230769
+ Gora Barkalyu                                             | 0.230769
+ Ile Bakalibu                                              | 0.230769
+ Khor Bakallii                                             | 0.230769
+ Nehalla Bankalah Reserved Forest                          | 0.230769
+ Ragha Bakalzai                                            | 0.230769
+ Tanjung Batikala                                          | 0.230769
+ Teluk Bakalang                                            | 0.230769
+ Urochishche Bakalovo                                      | 0.230769
+ Banjar Kubakal                                            | 0.222222
+ Darreh Pumba Kal                                          | 0.222222
+ Zabaykalovskiy                                            | 0.222222
+ Aparthotel Adagio Premium Dubai Al Barsha                 | 0.214286
+ Babakalia                                                 | 0.214286
+ Bahkalleh                                                 | 0.214286
+ Baikalovo                                                 | 0.214286
+ Bakalaale                                                 | 0.214286
+ Bakalabwa Pans                                            | 0.214286
+ Bakalaeng                                                 | 0.214286
+ Bakalauri                                                 | 0.214286
+ Bakalbhar                                                 | 0.214286
+ Bakalbuah                                                 | 0.214286
+ Bakalerek                                                 | 0.214286
+ Bakalinga                                                 | 0.214286
+ Bakalipur                                                 | 0.214286
+ Bakaljaya                                                 | 0.214286
+ Bakalnica                                                 | 0.214286
+ Bakalongo                                                 | 0.214286
+ Bakalovka                                                 | 0.214286
+ Bakalrejo                                                 | 0.214286
+ Bakkalale                                                 | 0.214286
+ Bambakala                                                 | 0.214286
+ Bambakalo                                                 | 0.214286
+ Barkalare                                                 | 0.214286
+ Barkalden                                                 | 0.214286
+ Barkallou                                                 | 0.214286
+ Barkalova                                                 | 0.214286
+ Baskalino                                                 | 0.214286
+ Baskaltsi                                                 | 0.214286
+ Desa Bakalrejo                                            | 0.214286
+ Doubletree By Hilton Dubai Al Barsha Hotel and Res        | 0.214286
+ Doubletree By Hilton Hotel and Apartments Dubai Al Barsha | 0.214286
+ Doubletree Res.Dubai-Al Barsha                            | 0.214286
+ Gora Barkalova                                            | 0.214286
+ Holiday Inn Dubai Al Barsha                               | 0.214286
+ Novotel Dubai Al Barsha                                   | 0.214286
+ Park Inn By Radisson Dubai Al Barsha                      | 0.214286
+ Ramee Rose Hotel Dubai Al Barsha                          | 0.214286
+ Ras Barkallah                                             | 0.214286
+ Salu Bakalaeng                                            | 0.214286
+ Tanjung Bakalinga                                         | 0.214286
+ Tubu Bakalekuk                                            | 0.214286
+ Baikalakko                                                |      0.2
+ Bakalauri1                                                |      0.2
+ Bakalauri2                                                |      0.2
+ Bakalauri3                                                |      0.2
+ Bakalauri4                                                |      0.2
+ Bakalauri5                                                |      0.2
+ Bakalauri6                                                |      0.2
+ Bakalauri7                                                |      0.2
+ Bakalauri8                                                |      0.2
+ Bakalauri9                                                |      0.2
+ Bakaldalam                                                |      0.2
+ Bakaldukuh                                                |      0.2
+ Bakaloolay                                                |      0.2
+ Bakalovina                                                |      0.2
+ Bakalpokok                                                |      0.2
+ Bakalshile                                                |      0.2
+ Bakalukudu                                                |      0.2
+ Bambakalia                                                |      0.2
+ Barkaladja Pool                                           |      0.2
+ Barkalovka                                                |      0.2
+ Bavkalasis                                                |      0.2
+ Gora Bakalyadyr                                           |      0.2
+ Kampong Bakaladong                                        |      0.2
+ Urochishche Bakalarnyn-Ayasy                              |      0.2
+ Urochishche Bakaldikha                                    |      0.2
+(245 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+                t                 |   sml    
+----------------------------------+----------
+ Kabankala                        |        1
+ Kabankalan City Public Plaza     |     0.75
+ Abankala                         | 0.583333
+ Kabakala                         | 0.583333
+ Kabikala                         | 0.461538
+ Ntombankala School               |    0.375
+ Nehalla Bankalah Reserved Forest | 0.357143
+ Jabba Kalai                      | 0.333333
+ Kambakala                        | 0.333333
+ Ker Samba Kalla                  | 0.333333
+ Bankal                           | 0.307692
+ Bankal School                    | 0.307692
+ Kanampumba-Kalawa                | 0.307692
+ Bankali                          | 0.285714
+ Mwalaba-Kalamba                  | 0.285714
+ Tumba-Kalamba                    | 0.285714
+ Darreh Pumba Kal                 | 0.272727
+ Bankalol                         | 0.266667
+ Dabakala                         | 0.266667
+ Purba Kalaujan                   | 0.266667
+ Kali Purbakala                   | 0.263158
+ Dalabakala                       |     0.25
+ Demba Kali                       |     0.25
+ Gagaba Kalo                      |     0.25
+ Golba Kalo                       |     0.25
+ Habakkala                        |     0.25
+ Kali Bakalan                     |     0.25
+ Kimbakala                        |     0.25
+ Kombakala                        |     0.25
+ Jaba Kalle                       | 0.235294
+ Kaikalahun Indian Reserve 25     | 0.235294
+ Kwala Bakala                     | 0.235294
+ Gereba Kaler                     | 0.230769
+ Goth Soba Kaloi                  | 0.230769
+ Guba Kaldo                       | 0.230769
+ Gulba Kalle                      | 0.230769
+ Guba Kalgalaksha                 | 0.222222
+ Kalibakalako                     | 0.222222
+ Ba Kaliin                        | 0.214286
+ Bakala                           | 0.214286
+ Bakala Koupi                     | 0.214286
+ Bikala                           | 0.214286
+ Bikala Madila                    | 0.214286
+ Bugor Arba-Kalgan                | 0.214286
+ Bumba-Kaloki                     | 0.214286
+ Guba Kalita                      | 0.214286
+ Kamba-Kalele                     | 0.214286
+ Mbay Bakala                      | 0.214286
+ Ngao Bakala                      | 0.214286
+ Sungai Bakala                    | 0.214286
+ Fayzabadkala                     | 0.210526
+ Gora Fayzabadkala                | 0.210526
+ Alue Bakkala                     |      0.2
+ Bakkala Cemetery                 |      0.2
+ Barkala                          |      0.2
+ Barkala Park                     |      0.2
+ Barkala Rao                      |      0.2
+ Barkala Reserved Forest          |      0.2
+ Baukala                          |      0.2
+ Beikala                          |      0.2
+ Bomba-Kalende                    |      0.2
+ Bumba-Kalumba                    |      0.2
+ Haikala                          |      0.2
+ Kahambikalela                    |      0.2
+ Kaikalapettai                    |      0.2
+ Kaikale                          |      0.2
+ Laikala                          |      0.2
+ Maikala Range                    |      0.2
+ Matamba-Kalenga                  |      0.2
+ Matamba-Kalenge                  |      0.2
+ Naikala                          |      0.2
+ Tumba-Kalumba                    |      0.2
+ Tumba-Kalunga                    |      0.2
+ Waikala                          |      0.2
+(74 rows)
+
diff --git a/contrib/pg_trgm/pg_trgm--1.3--1.4.sql b/contrib/pg_trgm/pg_trgm--1.3--1.4.sql
new file mode 100644 (file)
index 0000000..64a0c21
--- /dev/null
@@ -0,0 +1,68 @@
+/* contrib/pg_trgm/pg_trgm--1.3--1.4.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.4'" to load this file. \quit
+
+CREATE FUNCTION strict_word_similarity(text,text)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE FUNCTION strict_word_similarity_op(text,text)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE PARALLEL SAFE;  -- stable because depends on pg_trgm.word_similarity_threshold
+
+CREATE FUNCTION strict_word_similarity_commutator_op(text,text)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE PARALLEL SAFE;  -- stable because depends on pg_trgm.word_similarity_threshold
+
+CREATE OPERATOR <<% (
+        LEFTARG = text,
+        RIGHTARG = text,
+        PROCEDURE = strict_word_similarity_op,
+        COMMUTATOR = '%>>',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+CREATE OPERATOR %>> (
+        LEFTARG = text,
+        RIGHTARG = text,
+        PROCEDURE = strict_word_similarity_commutator_op,
+        COMMUTATOR = '<<%',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+CREATE FUNCTION strict_word_similarity_dist_op(text,text)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE FUNCTION strict_word_similarity_dist_commutator_op(text,text)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE OPERATOR <<<-> (
+        LEFTARG = text,
+        RIGHTARG = text,
+        PROCEDURE = strict_word_similarity_dist_op,
+        COMMUTATOR = '<->>>'
+);
+
+CREATE OPERATOR <->>> (
+        LEFTARG = text,
+        RIGHTARG = text,
+        PROCEDURE = strict_word_similarity_dist_commutator_op,
+        COMMUTATOR = '<<<->'
+);
+
+ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD
+        OPERATOR        9       %>> (text, text),
+        OPERATOR        10       <->>> (text, text) FOR ORDER BY pg_catalog.float_ops;
+
+ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
+        OPERATOR        9       %>> (text, text);
index 06f274f01ac02d38c26a55b57f9d942b95d48df1..3e325dde0035cf1d471cdb2ba18953e49e998c15 100644 (file)
@@ -1,5 +1,5 @@
 # pg_trgm extension
 comment = 'text similarity measurement and index searching based on trigrams'
-default_version = '1.3'
+default_version = '1.4'
 module_pathname = '$libdir/pg_trgm'
 relocatable = true
diff --git a/contrib/pg_trgm/sql/pg_strict_word_trgm.sql b/contrib/pg_trgm/sql/pg_strict_word_trgm.sql
new file mode 100644 (file)
index 0000000..98e0d37
--- /dev/null
@@ -0,0 +1,42 @@
+DROP INDEX trgm_idx2;
+
+\copy test_trgm3 from 'data/trgm2.data'
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7;
+
+create index trgm_idx2 on test_trgm2 using gist (t gist_trgm_ops);
+set enable_seqscan=off;
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+
+explain (costs off)
+select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7;
+select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7;
+
+drop index trgm_idx2;
+create index trgm_idx2 on test_trgm2 using gin (t gin_trgm_ops);
+set enable_seqscan=off;
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+
+set "pg_trgm.strict_word_similarity_threshold" to 0.4;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+
+set "pg_trgm.strict_word_similarity_threshold" to 0.2;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
index 45df91875ab1e87772443f7589a7b89dff3bae13..f0ab50dd05ca485831d0f88b19e680e2b2906b9a 100644 (file)
@@ -6,6 +6,7 @@
 
 #include "access/gist.h"
 #include "access/itup.h"
+#include "access/stratnum.h"
 #include "storage/bufpage.h"
 
 /*
 #define DIVUNION
 
 /* operator strategy numbers */
-#define SimilarityStrategyNumber               1
-#define DistanceStrategyNumber                 2
-#define LikeStrategyNumber                             3
-#define ILikeStrategyNumber                            4
-#define RegExpStrategyNumber                   5
-#define RegExpICaseStrategyNumber              6
-#define WordSimilarityStrategyNumber   7
-#define WordDistanceStrategyNumber             8
+#define SimilarityStrategyNumber                       1
+#define DistanceStrategyNumber                         2
+#define LikeStrategyNumber                                     3
+#define ILikeStrategyNumber                                    4
+#define RegExpStrategyNumber                           5
+#define RegExpICaseStrategyNumber                      6
+#define WordSimilarityStrategyNumber           7
+#define WordDistanceStrategyNumber                     8
+#define StrictWordSimilarityStrategyNumber     9
+#define StrictWordDistanceStrategyNumber       10
 
 typedef char trgm[3];
 
@@ -120,7 +123,9 @@ typedef struct TrgmPackedGraph TrgmPackedGraph;
 
 extern double similarity_threshold;
 extern double word_similarity_threshold;
+extern double strict_word_similarity_threshold;
 
+extern double index_strategy_get_limit(StrategyNumber strategy);
 extern uint32 trgm2int(trgm *ptr);
 extern void compact_trigram(trgm *tptr, char *str, int bytelen);
 extern TRGM *generate_trgm(char *str, int slen);
index e4b3daea446b4177a34dfaf7c052af1dde8c00b6..1b9809b565a3481c0b100278766de74a0057afb8 100644 (file)
@@ -90,6 +90,7 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS)
        {
                case SimilarityStrategyNumber:
                case WordSimilarityStrategyNumber:
+               case StrictWordSimilarityStrategyNumber:
                        trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
                        break;
                case ILikeStrategyNumber:
@@ -187,8 +188,8 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
        {
                case SimilarityStrategyNumber:
                case WordSimilarityStrategyNumber:
-                       nlimit = (strategy == SimilarityStrategyNumber) ?
-                               similarity_threshold : word_similarity_threshold;
+               case StrictWordSimilarityStrategyNumber:
+                       nlimit = index_strategy_get_limit(strategy);
 
                        /* Count the matches */
                        ntrue = 0;
@@ -282,8 +283,8 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
        {
                case SimilarityStrategyNumber:
                case WordSimilarityStrategyNumber:
-                       nlimit = (strategy == SimilarityStrategyNumber) ?
-                               similarity_threshold : word_similarity_threshold;
+               case StrictWordSimilarityStrategyNumber:
+                       nlimit = index_strategy_get_limit(strategy);
 
                        /* Count the matches */
                        ntrue = 0;
index e55dc19a65c715e178a9d0490e7f6d2c1060657a..53e6830ab1bd0b41725f3000caed2f25dd366ae9 100644 (file)
@@ -221,6 +221,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
                {
                        case SimilarityStrategyNumber:
                        case WordSimilarityStrategyNumber:
+                       case StrictWordSimilarityStrategyNumber:
                                qtrg = generate_trgm(VARDATA(query),
                                                                         querysize - VARHDRSZ);
                                break;
@@ -290,10 +291,11 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
        {
                case SimilarityStrategyNumber:
                case WordSimilarityStrategyNumber:
-                       /* Similarity search is exact. Word similarity search is inexact */
-                       *recheck = (strategy == WordSimilarityStrategyNumber);
-                       nlimit = (strategy == SimilarityStrategyNumber) ?
-                               similarity_threshold : word_similarity_threshold;
+               case StrictWordSimilarityStrategyNumber:
+                       /* Similarity search is exact. (Strict) word similarity search is inexact */
+                       *recheck = (strategy != SimilarityStrategyNumber);
+
+                       nlimit = index_strategy_get_limit(strategy);
 
                        if (GIST_LEAF(entry))
                        {                                       /* all leafs contains orig trgm */
@@ -468,7 +470,9 @@ gtrgm_distance(PG_FUNCTION_ARGS)
        {
                case DistanceStrategyNumber:
                case WordDistanceStrategyNumber:
-                       *recheck = strategy == WordDistanceStrategyNumber;
+               case StrictWordDistanceStrategyNumber:
+                       /* Only plain trigram distance is exact */
+                       *recheck = (strategy != DistanceStrategyNumber);
                        if (GIST_LEAF(entry))
                        {                                       /* all leafs contains orig trgm */
 
index 306d60bd3bb5470b96a45027ee68d8a8be12a687..b572d087d83769a8de8317585afe7c3d9764b417 100644 (file)
@@ -18,6 +18,7 @@ PG_MODULE_MAGIC;
 /* GUC variables */
 double         similarity_threshold = 0.3f;
 double         word_similarity_threshold = 0.6f;
+double         strict_word_similarity_threshold = 0.5f;
 
 void           _PG_init(void);
 
@@ -26,12 +27,17 @@ PG_FUNCTION_INFO_V1(show_limit);
 PG_FUNCTION_INFO_V1(show_trgm);
 PG_FUNCTION_INFO_V1(similarity);
 PG_FUNCTION_INFO_V1(word_similarity);
+PG_FUNCTION_INFO_V1(strict_word_similarity);
 PG_FUNCTION_INFO_V1(similarity_dist);
 PG_FUNCTION_INFO_V1(similarity_op);
 PG_FUNCTION_INFO_V1(word_similarity_op);
 PG_FUNCTION_INFO_V1(word_similarity_commutator_op);
 PG_FUNCTION_INFO_V1(word_similarity_dist_op);
 PG_FUNCTION_INFO_V1(word_similarity_dist_commutator_op);
+PG_FUNCTION_INFO_V1(strict_word_similarity_op);
+PG_FUNCTION_INFO_V1(strict_word_similarity_commutator_op);
+PG_FUNCTION_INFO_V1(strict_word_similarity_dist_op);
+PG_FUNCTION_INFO_V1(strict_word_similarity_dist_commutator_op);
 
 /* Trigram with position */
 typedef struct
@@ -40,6 +46,17 @@ typedef struct
        int                     index;
 } pos_trgm;
 
+/* Trigram bound type */
+typedef uint8 TrgmBound;
+#define TRGM_BOUND_LEFT                                (0x01) /* trigram is left bound of word */
+#define TRGM_BOUND_RIGHT                       (0x02) /* trigram is right bound of word */
+
+/* Word similarity flags */
+#define WORD_SIMILARITY_CHECK_ONLY     (0x01) /* if set then only check existence
+                                                                                       * of similar search pattern in text */
+#define WORD_SIMILARITY_STRICT         (0x02) /* force bounds of extent to match
+                                                                                       * word bounds */
+
 /*
  * Module load callback
  */
@@ -71,6 +88,18 @@ _PG_init(void)
                                                         NULL,
                                                         NULL,
                                                         NULL);
+       DefineCustomRealVariable("pg_trgm.strict_word_similarity_threshold",
+                                                        "Sets the threshold used by the <<%% operator.",
+                                                        "Valid range is 0.0 .. 1.0.",
+                                                        &strict_word_similarity_threshold,
+                                                        0.5,
+                                                        0.0,
+                                                        1.0,
+                                                        PGC_USERSET,
+                                                        0,
+                                                        NULL,
+                                                        NULL,
+                                                        NULL);
 }
 
 /*
@@ -95,6 +124,29 @@ set_limit(PG_FUNCTION_ARGS)
        PG_RETURN_FLOAT4(similarity_threshold);
 }
 
+
+/*
+ * Get similarity threshold for given index scan strategy number.
+ */
+double
+index_strategy_get_limit(StrategyNumber strategy)
+{
+       switch (strategy)
+       {
+               case SimilarityStrategyNumber:
+                       return similarity_threshold;
+               case WordSimilarityStrategyNumber:
+                       return word_similarity_threshold;
+               case StrictWordSimilarityStrategyNumber:
+                       return strict_word_similarity_threshold;
+               default:
+                       elog(ERROR, "unrecognized strategy number: %d", strategy);
+                       break;
+       }
+
+       return 0.0;     /* keep compiler quiet */
+}
+
 /*
  * Deprecated function.
  * Use "pg_trgm.similarity_threshold" GUC variable instead of this function.
@@ -235,11 +287,12 @@ make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
  *
  * trg: where to return the array of trigrams.
  * str: source string, of length slen bytes.
+ * bounds: where to return bounds of trigrams (if needed).
  *
  * Returns length of the generated array.
  */
 static int
-generate_trgm_only(trgm *trg, char *str, int slen)
+generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
 {
        trgm       *tptr;
        char       *buf;
@@ -282,11 +335,13 @@ generate_trgm_only(trgm *trg, char *str, int slen)
                buf[LPADDING + bytelen] = ' ';
                buf[LPADDING + bytelen + 1] = ' ';
 
-               /*
-                * count trigrams
-                */
+               /* Calculate trigrams marking their bounds if needed */
+               if (bounds)
+                       bounds[tptr - trg] |= TRGM_BOUND_LEFT;
                tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING,
                                                         charlen + LPADDING + RPADDING);
+               if (bounds)
+                       bounds[tptr - trg - 1] |= TRGM_BOUND_RIGHT;
        }
 
        pfree(buf);
@@ -328,7 +383,7 @@ generate_trgm(char *str, int slen)
        trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
        trg->flag = ARRKEY;
 
-       len = generate_trgm_only(GETARR(trg), str, slen);
+       len = generate_trgm_only(GETARR(trg), str, slen, NULL);
        SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
 
        if (len == 0)
@@ -413,8 +468,8 @@ comp_ptrgm(const void *v1, const void *v2)
  * ulen1: count of unique trigrams of array "trg1".
  * len2: length of array "trg2" and array "trg2indexes".
  * len: length of the array "found".
- * check_only: if true then only check existence of similar search pattern in
- *                        text.
+ * lags: set of boolean flags parametrizing similarity calculation.
+ * bounds: whether each trigram is left/right bound of word.
  *
  * Returns word similarity.
  */
@@ -424,16 +479,32 @@ iterate_word_similarity(int *trg2indexes,
                                                int ulen1,
                                                int len2,
                                                int len,
-                                               bool check_only)
+                                               uint8 flags,
+                                               TrgmBound *bounds)
 {
        int                *lastpos,
                                i,
                                ulen2 = 0,
                                count = 0,
                                upper = -1,
-                               lower = -1;
+                               lower;
        float4          smlr_cur,
                                smlr_max = 0.0f;
+       double          threshold;
+
+       Assert(bounds || !(flags & WORD_SIMILARITY_STRICT));
+
+       /* Select appropriate threshold */
+       threshold = (flags & WORD_SIMILARITY_STRICT) ?
+                                strict_word_similarity_threshold :
+                                word_similarity_threshold;
+
+       /*
+        * Consider first trigram as initial lower bount for strict word similarity,
+        * or initialize it later with first trigram present for plain word
+        * similarity.
+        */
+       lower = (flags & WORD_SIMILARITY_STRICT) ? 0 : -1;
 
        /* Memorise last position of each trigram */
        lastpos = (int *) palloc(sizeof(int) * len);
@@ -456,8 +527,13 @@ iterate_word_similarity(int *trg2indexes,
                        lastpos[trgindex] = i;
                }
 
-               /* Adjust upper bound if this trigram is present in required substring */
-               if (found[trgindex])
+               /*
+                * Adjust upper bound if trigram is upper bound of word for strict
+                * word similarity, or if trigram is present in required substring for
+                * plain word similarity
+                */
+               if ((flags & WORD_SIMILARITY_STRICT) ? (bounds[i] & TRGM_BOUND_RIGHT)
+                                                                                        : found[trgindex])
                {
                        int                     prev_lower,
                                                tmp_ulen2,
@@ -479,24 +555,35 @@ iterate_word_similarity(int *trg2indexes,
                        prev_lower = lower;
                        for (tmp_lower = lower; tmp_lower <= upper; tmp_lower++)
                        {
-                               float           smlr_tmp = CALCSML(tmp_count, ulen1, tmp_ulen2);
+                               float           smlr_tmp;
                                int                     tmp_trgindex;
 
-                               if (smlr_tmp > smlr_cur)
-                               {
-                                       smlr_cur = smlr_tmp;
-                                       ulen2 = tmp_ulen2;
-                                       lower = tmp_lower;
-                                       count = tmp_count;
-                               }
-
                                /*
-                                * if we only check that word similarity is greater than
-                                * pg_trgm.word_similarity_threshold we do not need to
-                                * calculate a maximum similarity.
+                                * Adjust lower bound only if trigram is lower bound of word
+                                * for strict word similarity, or consider every trigram as
+                                * lower bound for plain word similarity.
                                 */
-                               if (check_only && smlr_cur >= word_similarity_threshold)
-                                       break;
+                               if (!(flags & WORD_SIMILARITY_STRICT)
+                                       || (bounds[tmp_lower] & TRGM_BOUND_LEFT))
+                               {
+                                       smlr_tmp = CALCSML(tmp_count, ulen1, tmp_ulen2);
+                                       if (smlr_tmp > smlr_cur)
+                                       {
+                                               smlr_cur = smlr_tmp;
+                                               ulen2 = tmp_ulen2;
+                                               lower = tmp_lower;
+                                               count = tmp_count;
+                                       }
+
+                                       /*
+                                        * If we only check that word similarity is greater than
+                                        * threshold we do not need to calculate a maximum
+                                        * similarity.
+                                        */
+                                       if ((flags & WORD_SIMILARITY_CHECK_ONLY)
+                                               && smlr_cur >= threshold)
+                                               break;
+                               }
 
                                tmp_trgindex = trg2indexes[tmp_lower];
                                if (lastpos[tmp_trgindex] == tmp_lower)
@@ -511,10 +598,9 @@ iterate_word_similarity(int *trg2indexes,
 
                        /*
                         * if we only check that word similarity is greater than
-                        * pg_trgm.word_similarity_threshold we do not need to calculate a
-                        * maximum similarity
+                        * threshold we do not need to calculate a maximum similarity.
                         */
-                       if (check_only && smlr_max >= word_similarity_threshold)
+                       if ((flags & WORD_SIMILARITY_CHECK_ONLY) && smlr_max >= threshold)
                                break;
 
                        for (tmp_lower = prev_lower; tmp_lower < lower; tmp_lower++)
@@ -547,14 +633,13 @@ iterate_word_similarity(int *trg2indexes,
  *
  * str1: search pattern string, of length slen1 bytes.
  * str2: text in which we are looking for a word, of length slen2 bytes.
- * check_only: if true then only check existence of similar search pattern in
- *                        text.
+ * flags: set of boolean flags parametrizing similarity calculation.
  *
  * Returns word similarity.
  */
 static float4
 calc_word_similarity(char *str1, int slen1, char *str2, int slen2,
-                                        bool check_only)
+                                        uint8 flags)
 {
        bool       *found;
        pos_trgm   *ptrg;
@@ -568,15 +653,20 @@ calc_word_similarity(char *str1, int slen1, char *str2, int slen2,
                                ulen1;
        int                *trg2indexes;
        float4          result;
+       TrgmBound          *bounds;
 
        protect_out_of_mem(slen1 + slen2);
 
        /* Make positional trigrams */
        trg1 = (trgm *) palloc(sizeof(trgm) * (slen1 / 2 + 1) * 3);
        trg2 = (trgm *) palloc(sizeof(trgm) * (slen2 / 2 + 1) * 3);
+       if (flags & WORD_SIMILARITY_STRICT)
+               bounds = (TrgmBound *) palloc0(sizeof(TrgmBound) * (slen2 / 2 + 1) * 3);
+       else
+               bounds = NULL;
 
-       len1 = generate_trgm_only(trg1, str1, slen1);
-       len2 = generate_trgm_only(trg2, str2, slen2);
+       len1 = generate_trgm_only(trg1, str1, slen1, NULL);
+       len2 = generate_trgm_only(trg2, str2, slen2, bounds);
 
        ptrg = make_positional_trgm(trg1, len1, trg2, len2);
        len = len1 + len2;
@@ -622,7 +712,7 @@ calc_word_similarity(char *str1, int slen1, char *str2, int slen2,
 
        /* Run iterative procedure to find maximum similarity with word */
        result = iterate_word_similarity(trg2indexes, found, ulen1, len2, len,
-                                                                        check_only);
+                                                                        flags, bounds);
 
        pfree(trg2indexes);
        pfree(found);
@@ -1081,7 +1171,23 @@ word_similarity(PG_FUNCTION_ARGS)
 
        res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
                                                           VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
-                                                          false);
+                                                          0);
+
+       PG_FREE_IF_COPY(in1, 0);
+       PG_FREE_IF_COPY(in2, 1);
+       PG_RETURN_FLOAT4(res);
+}
+
+Datum
+strict_word_similarity(PG_FUNCTION_ARGS)
+{
+       text       *in1 = PG_GETARG_TEXT_PP(0);
+       text       *in2 = PG_GETARG_TEXT_PP(1);
+       float4          res;
+
+       res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
+                                                          VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
+                                                          WORD_SIMILARITY_STRICT);
 
        PG_FREE_IF_COPY(in1, 0);
        PG_FREE_IF_COPY(in2, 1);
@@ -1117,7 +1223,7 @@ word_similarity_op(PG_FUNCTION_ARGS)
 
        res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
                                                           VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
-                                                          true);
+                                                          WORD_SIMILARITY_CHECK_ONLY);
 
        PG_FREE_IF_COPY(in1, 0);
        PG_FREE_IF_COPY(in2, 1);
@@ -1133,7 +1239,7 @@ word_similarity_commutator_op(PG_FUNCTION_ARGS)
 
        res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
                                                           VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
-                                                          true);
+                                                          WORD_SIMILARITY_CHECK_ONLY);
 
        PG_FREE_IF_COPY(in1, 0);
        PG_FREE_IF_COPY(in2, 1);
@@ -1149,7 +1255,7 @@ word_similarity_dist_op(PG_FUNCTION_ARGS)
 
        res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
                                                           VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
-                                                          false);
+                                                          0);
 
        PG_FREE_IF_COPY(in1, 0);
        PG_FREE_IF_COPY(in2, 1);
@@ -1165,7 +1271,71 @@ word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
 
        res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
                                                           VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
-                                                          false);
+                                                          0);
+
+       PG_FREE_IF_COPY(in1, 0);
+       PG_FREE_IF_COPY(in2, 1);
+       PG_RETURN_FLOAT4(1.0 - res);
+}
+
+Datum
+strict_word_similarity_op(PG_FUNCTION_ARGS)
+{
+       text       *in1 = PG_GETARG_TEXT_PP(0);
+       text       *in2 = PG_GETARG_TEXT_PP(1);
+       float4          res;
+
+       res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
+                                                          VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
+                                                          WORD_SIMILARITY_CHECK_ONLY | WORD_SIMILARITY_STRICT);
+
+       PG_FREE_IF_COPY(in1, 0);
+       PG_FREE_IF_COPY(in2, 1);
+       PG_RETURN_BOOL(res >= strict_word_similarity_threshold);
+}
+
+Datum
+strict_word_similarity_commutator_op(PG_FUNCTION_ARGS)
+{
+       text       *in1 = PG_GETARG_TEXT_PP(0);
+       text       *in2 = PG_GETARG_TEXT_PP(1);
+       float4          res;
+
+       res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
+                                                          VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
+                                                          WORD_SIMILARITY_CHECK_ONLY | WORD_SIMILARITY_STRICT);
+
+       PG_FREE_IF_COPY(in1, 0);
+       PG_FREE_IF_COPY(in2, 1);
+       PG_RETURN_BOOL(res >= strict_word_similarity_threshold);
+}
+
+Datum
+strict_word_similarity_dist_op(PG_FUNCTION_ARGS)
+{
+       text       *in1 = PG_GETARG_TEXT_PP(0);
+       text       *in2 = PG_GETARG_TEXT_PP(1);
+       float4          res;
+
+       res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
+                                                          VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
+                                                          WORD_SIMILARITY_STRICT);
+
+       PG_FREE_IF_COPY(in1, 0);
+       PG_FREE_IF_COPY(in2, 1);
+       PG_RETURN_FLOAT4(1.0 - res);
+}
+
+Datum
+strict_word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
+{
+       text       *in1 = PG_GETARG_TEXT_PP(0);
+       text       *in2 = PG_GETARG_TEXT_PP(1);
+       float4          res;
+
+       res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
+                                                          VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
+                                                          WORD_SIMILARITY_STRICT);
 
        PG_FREE_IF_COPY(in1, 0);
        PG_FREE_IF_COPY(in2, 1);
index b5d893c9fbb070ef6b6d261dc7306ac386bf30dd..8f395296d8f58f7a6965634cc63a9a9cc6b45c02 100644 (file)
        the explanation below.
       </entry>
      </row>
+     <row>
+      <entry>
+       <function>strict_word_similarity(text, text)</function>
+       <indexterm><primary>strict_word_similarity</primary></indexterm>
+      </entry>
+      <entry><type>real</type></entry>
+      <entry>
+       Same as <function>word_similarity(text, text)</function>, but forces
+       extent boundaries to match word boundaries.
+      </entry>
+     </row>
      <row>
       <entry><function>show_limit()</function><indexterm><primary>show_limit</primary></indexterm></entry>
       <entry><type>real</type></entry>
    a part of the word.
   </para>
 
+  <para>
+   At the same time, <function>strict_word_similarity(text, text)</function>
+   has to select an extent that matches word boundaries.  In the example above,
+   <function>strict_word_similarity(text, text)</function> would select the
+   extent <literal>{"  w"," wo","wor","ord","rds", ds "}</literal>, which
+   corresponds to the whole word <literal>'words'</literal>.
+
+<programlisting>
+# SELECT strict_word_similarity('word', 'two words'), similarity('word', 'words');
+ strict_word_similarity | similarity
+------------------------+------------
+               0.571429 |   0.571429
+(1 row)
+</programlisting>
+  </para>
+
+  <para>
+   Thus, the <function>strict_word_similarity(text, text)</function> function
+   is useful for finding similar subsets of whole words, while
+   <function>word_similarity(text, text)</function> is more suitable for
+   searching similar parts of words.
+  </para>
+
   <table id="pgtrgm-op-table">
    <title><filename>pg_trgm</filename> Operators</title>
    <tgroup cols="3">
        Commutator of the <literal>&lt;%</literal> operator.
       </entry>
      </row>
+     <row>
+      <entry><type>text</type> <literal>&lt;&lt;%</literal> <type>text</type></entry>
+      <entry><type>boolean</type></entry>
+      <entry>
+       Returns <literal>true</literal> if its second argument has a continuous
+       extent of an ordered trigram set that matches word boundaries,
+       and its similarity to the trigram set of the first argument is greater
+       than the current strict word similarity threshold set by the
+       <varname>pg_trgm.strict_word_similarity_threshold</varname> parameter.
+      </entry>
+     </row>
+     <row>
+      <entry><type>text</type> <literal>%&gt;&gt;</literal> <type>text</type></entry>
+      <entry><type>boolean</type></entry>
+      <entry>
+       Commutator of the <literal>&lt;&lt;%</literal> operator.
+      </entry>
+     </row>
      <row>
       <entry><type>text</type> <literal>&lt;-&gt;</literal> <type>text</type></entry>
       <entry><type>real</type></entry>
        Commutator of the <literal>&lt;&lt;-&gt;</literal> operator.
       </entry>
      </row>
+     <row>
+      <entry>
+       <type>text</type> <literal>&lt;&lt;&lt;-&gt;</literal> <type>text</type>
+      </entry>
+      <entry><type>real</type></entry>
+      <entry>
+       Returns the <quote>distance</quote> between the arguments, that is
+       one minus the <function>strict_word_similarity()</function> value.
+      </entry>
+     </row>
+     <row>
+      <entry>
+       <type>text</type> <literal>&lt;-&gt;&gt;&gt;</literal> <type>text</type>
+      </entry>
+      <entry><type>real</type></entry>
+      <entry>
+       Commutator of the <literal>&lt;&lt;&lt;-&gt;</literal> operator.
+      </entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
@@ -322,12 +393,19 @@ SELECT t, t &lt;-&gt; '<replaceable>word</replaceable>' AS dist
 
   <para>
    Also you can use an index on the <structfield>t</structfield> column for word
-   similarity.  For example:
+   similarity or strict word similarity.  Typical queries are:
 <programlisting>
 SELECT t, word_similarity('<replaceable>word</replaceable>', t) AS sml
   FROM test_trgm
   WHERE '<replaceable>word</replaceable>' &lt;% t
   ORDER BY sml DESC, t;
+</programlisting>
+   and
+<programlisting>
+SELECT t, strict_word_similarity('<replaceable>word</replaceable>', t) AS sml
+  FROM test_trgm
+  WHERE '<replaceable>word</replaceable>' &lt;&lt;% t
+  ORDER BY sml DESC, t;
 </programlisting>
    This will return all values in the text column for which there is a
    continuous extent in the corresponding ordered trigram set that is
@@ -337,11 +415,17 @@ SELECT t, word_similarity('<replaceable>word</replaceable>', t) AS sml
   </para>
 
   <para>
-   A variant of the above query is
+   Possible variants of the above queries are:
 <programlisting>
 SELECT t, '<replaceable>word</replaceable>' &lt;&lt;-&gt; t AS dist
   FROM test_trgm
   ORDER BY dist LIMIT 10;
+</programlisting>
+   and
+<programlisting>
+SELECT t, '<replaceable>word</replaceable>' &lt;&lt;&lt;-&gt; t AS dist
+  FROM test_trgm
+  ORDER BY dist LIMIT 10;
 </programlisting>
    This can be implemented quite efficiently by GiST indexes, but not
    by GIN indexes.