OBJS = trgm_op.o trgm_gist.o trgm_gin.o trgm_regexp.o $(WIN32RES)
EXTENSION = pg_trgm
-DATA = pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \
+DATA = pg_trgm--1.3--1.4.sql \
+ pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \
pg_trgm--1.0--1.1.sql pg_trgm--unpackaged--1.0.sql
PGFILEDESC = "pg_trgm - trigram matching"
-REGRESS = pg_trgm pg_word_trgm
+REGRESS = pg_trgm pg_word_trgm pg_strict_word_trgm
ifdef USE_PGXS
PG_CONFIG = pg_config
--- /dev/null
+DROP INDEX trgm_idx2;
+\copy test_trgm3 from 'data/trgm2.data'
+ERROR: relation "test_trgm3" does not exist
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+ t | sml
+-------------------------------------+----------
+ Baykal | 1
+ Boloto Baykal | 1
+ Boloto Malyy Baykal | 1
+ Kolkhoz Krasnyy Baykal | 1
+ Ozero Baykal | 1
+ Polevoy Stan Baykal | 1
+ Port Baykal | 1
+ Prud Novyy Baykal | 1
+ Sanatoriy Baykal | 1
+ Stantsiya Baykal | 1
+ Zaliv Baykal | 1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo | 0.545455
+ Baykalsko | 0.545455
+ Maloye Baykalovo | 0.545455
+ Baykalikha | 0.5
+ Baykalovsk | 0.5
+(17 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+ t | sml
+------------------------------+----------
+ Kabankala | 1
+ Kabankalan City Public Plaza | 0.75
+ Abankala | 0.583333
+ Kabakala | 0.583333
+(4 rows)
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+ t | sml
+-------------------------------------+----------
+ Baykal | 1
+ Boloto Baykal | 1
+ Boloto Malyy Baykal | 1
+ Kolkhoz Krasnyy Baykal | 1
+ Ozero Baykal | 1
+ Polevoy Stan Baykal | 1
+ Port Baykal | 1
+ Prud Novyy Baykal | 1
+ Sanatoriy Baykal | 1
+ Stantsiya Baykal | 1
+ Zaliv Baykal | 1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo | 0.545455
+ Baykalsko | 0.545455
+ Maloye Baykalovo | 0.545455
+ Baykalikha | 0.5
+ Baykalovsk | 0.5
+(17 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+ t | sml
+------------------------------+----------
+ Kabankala | 1
+ Kabankalan City Public Plaza | 0.75
+ Abankala | 0.583333
+ Kabakala | 0.583333
+(4 rows)
+
+select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7;
+ ?column? | t
+----------+--------------------------
+ 0 | Alaikallupoddakulam
+ 0.25 | Alaikallupodda Alankulam
+ 0.32 | Alaikalluppodda Kulam
+ 0.615385 | Mulaikallu Kulam
+ 0.724138 | Koraikalapu Kulam
+ 0.75 | Vaikaliththevakulam
+ 0.766667 | Karaivaikal Kulam
+(7 rows)
+
+create index trgm_idx2 on test_trgm2 using gist (t gist_trgm_ops);
+set enable_seqscan=off;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+ t | sml
+-------------------------------------+----------
+ Baykal | 1
+ Boloto Baykal | 1
+ Boloto Malyy Baykal | 1
+ Kolkhoz Krasnyy Baykal | 1
+ Ozero Baykal | 1
+ Polevoy Stan Baykal | 1
+ Port Baykal | 1
+ Prud Novyy Baykal | 1
+ Sanatoriy Baykal | 1
+ Stantsiya Baykal | 1
+ Zaliv Baykal | 1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo | 0.545455
+ Baykalsko | 0.545455
+ Maloye Baykalovo | 0.545455
+ Baykalikha | 0.5
+ Baykalovsk | 0.5
+(17 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+ t | sml
+------------------------------+----------
+ Kabankala | 1
+ Kabankalan City Public Plaza | 0.75
+ Abankala | 0.583333
+ Kabakala | 0.583333
+(4 rows)
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+ t | sml
+-------------------------------------+----------
+ Baykal | 1
+ Boloto Baykal | 1
+ Boloto Malyy Baykal | 1
+ Kolkhoz Krasnyy Baykal | 1
+ Ozero Baykal | 1
+ Polevoy Stan Baykal | 1
+ Port Baykal | 1
+ Prud Novyy Baykal | 1
+ Sanatoriy Baykal | 1
+ Stantsiya Baykal | 1
+ Zaliv Baykal | 1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo | 0.545455
+ Baykalsko | 0.545455
+ Maloye Baykalovo | 0.545455
+ Baykalikha | 0.5
+ Baykalovsk | 0.5
+(17 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+ t | sml
+------------------------------+----------
+ Kabankala | 1
+ Kabankalan City Public Plaza | 0.75
+ Abankala | 0.583333
+ Kabakala | 0.583333
+(4 rows)
+
+explain (costs off)
+select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7;
+ QUERY PLAN
+---------------------------------------------------------
+ Limit
+ -> Index Scan using trgm_idx2 on test_trgm2
+ Order By: (t <->>> 'Alaikallupoddakulam'::text)
+(3 rows)
+
+select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7;
+ ?column? | t
+----------+--------------------------
+ 0 | Alaikallupoddakulam
+ 0.25 | Alaikallupodda Alankulam
+ 0.32 | Alaikalluppodda Kulam
+ 0.615385 | Mulaikallu Kulam
+ 0.724138 | Koraikalapu Kulam
+ 0.75 | Vaikaliththevakulam
+ 0.766667 | Karaivaikal Kulam
+(7 rows)
+
+drop index trgm_idx2;
+create index trgm_idx2 on test_trgm2 using gin (t gin_trgm_ops);
+set enable_seqscan=off;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+ t | sml
+-------------------------------------+----------
+ Baykal | 1
+ Boloto Baykal | 1
+ Boloto Malyy Baykal | 1
+ Kolkhoz Krasnyy Baykal | 1
+ Ozero Baykal | 1
+ Polevoy Stan Baykal | 1
+ Port Baykal | 1
+ Prud Novyy Baykal | 1
+ Sanatoriy Baykal | 1
+ Stantsiya Baykal | 1
+ Zaliv Baykal | 1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo | 0.545455
+ Baykalsko | 0.545455
+ Maloye Baykalovo | 0.545455
+ Baykalikha | 0.5
+ Baykalovsk | 0.5
+(17 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+ t | sml
+------------------------------+----------
+ Kabankala | 1
+ Kabankalan City Public Plaza | 0.75
+ Abankala | 0.583333
+ Kabakala | 0.583333
+(4 rows)
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+ t | sml
+-------------------------------------+----------
+ Baykal | 1
+ Boloto Baykal | 1
+ Boloto Malyy Baykal | 1
+ Kolkhoz Krasnyy Baykal | 1
+ Ozero Baykal | 1
+ Polevoy Stan Baykal | 1
+ Port Baykal | 1
+ Prud Novyy Baykal | 1
+ Sanatoriy Baykal | 1
+ Stantsiya Baykal | 1
+ Zaliv Baykal | 1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo | 0.545455
+ Baykalsko | 0.545455
+ Maloye Baykalovo | 0.545455
+ Baykalikha | 0.5
+ Baykalovsk | 0.5
+(17 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+ t | sml
+------------------------------+----------
+ Kabankala | 1
+ Kabankalan City Public Plaza | 0.75
+ Abankala | 0.583333
+ Kabakala | 0.583333
+(4 rows)
+
+set "pg_trgm.strict_word_similarity_threshold" to 0.4;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+ t | sml
+-------------------------------------+----------
+ Baykal | 1
+ Boloto Baykal | 1
+ Boloto Malyy Baykal | 1
+ Kolkhoz Krasnyy Baykal | 1
+ Ozero Baykal | 1
+ Polevoy Stan Baykal | 1
+ Port Baykal | 1
+ Prud Novyy Baykal | 1
+ Sanatoriy Baykal | 1
+ Stantsiya Baykal | 1
+ Zaliv Baykal | 1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo | 0.545455
+ Baykalsko | 0.545455
+ Maloye Baykalovo | 0.545455
+ Baykalikha | 0.5
+ Baykalovsk | 0.5
+ Zabaykal | 0.454545
+ Air Bakal-kecil | 0.444444
+ Bakal | 0.444444
+ Bakal Batu | 0.444444
+ Bakal Dos | 0.444444
+ Bakal Julu | 0.444444
+ Bakal Khel | 0.444444
+ Bakal Lama | 0.444444
+ Bakal Tres | 0.444444
+ Bakal Uno | 0.444444
+ Daang Bakal | 0.444444
+ Desa Bakal | 0.444444
+ Eat Bakal | 0.444444
+ Gunung Bakal | 0.444444
+ Sidi Bakal | 0.444444
+ Stantsiya Bakal | 0.444444
+ Sungai Bakal | 0.444444
+ Talang Bakal | 0.444444
+ Uruk Bakal | 0.444444
+ Zaouia Oulad Bakal | 0.444444
+ Baykalovskiy | 0.428571
+ Baykalovskiy Rayon | 0.428571
+ Baikal | 0.4
+ Baikal Airfield | 0.4
+ Baikal Business Centre | 0.4
+ Baikal Hotel Moscow | 0.4
+ Baikal Listvyanka Hotel | 0.4
+ Baikal Mountains | 0.4
+ Baikal Plaza | 0.4
+ Bajkal | 0.4
+ Bankal | 0.4
+ Bankal School | 0.4
+ Barkal | 0.4
+ Jabal Barkal | 0.4
+ Lake Baikal | 0.4
+ Oulad el Bakkal | 0.4
+ Sidi Mohammed Bakkal | 0.4
+(54 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+ t | sml
+------------------------------+----------
+ Kabankala | 1
+ Kabankalan City Public Plaza | 0.75
+ Abankala | 0.583333
+ Kabakala | 0.583333
+ Kabikala | 0.461538
+(5 rows)
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+ t | sml
+-------------------------------------+----------
+ Baykal | 1
+ Boloto Baykal | 1
+ Boloto Malyy Baykal | 1
+ Kolkhoz Krasnyy Baykal | 1
+ Ozero Baykal | 1
+ Polevoy Stan Baykal | 1
+ Port Baykal | 1
+ Prud Novyy Baykal | 1
+ Sanatoriy Baykal | 1
+ Stantsiya Baykal | 1
+ Zaliv Baykal | 1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo | 0.545455
+ Baykalsko | 0.545455
+ Maloye Baykalovo | 0.545455
+ Baykalikha | 0.5
+ Baykalovsk | 0.5
+ Zabaykal | 0.454545
+ Air Bakal-kecil | 0.444444
+ Bakal | 0.444444
+ Bakal Batu | 0.444444
+ Bakal Dos | 0.444444
+ Bakal Julu | 0.444444
+ Bakal Khel | 0.444444
+ Bakal Lama | 0.444444
+ Bakal Tres | 0.444444
+ Bakal Uno | 0.444444
+ Daang Bakal | 0.444444
+ Desa Bakal | 0.444444
+ Eat Bakal | 0.444444
+ Gunung Bakal | 0.444444
+ Sidi Bakal | 0.444444
+ Stantsiya Bakal | 0.444444
+ Sungai Bakal | 0.444444
+ Talang Bakal | 0.444444
+ Uruk Bakal | 0.444444
+ Zaouia Oulad Bakal | 0.444444
+ Baykalovskiy | 0.428571
+ Baykalovskiy Rayon | 0.428571
+ Baikal | 0.4
+ Baikal Airfield | 0.4
+ Baikal Business Centre | 0.4
+ Baikal Hotel Moscow | 0.4
+ Baikal Listvyanka Hotel | 0.4
+ Baikal Mountains | 0.4
+ Baikal Plaza | 0.4
+ Bajkal | 0.4
+ Bankal | 0.4
+ Bankal School | 0.4
+ Barkal | 0.4
+ Jabal Barkal | 0.4
+ Lake Baikal | 0.4
+ Oulad el Bakkal | 0.4
+ Sidi Mohammed Bakkal | 0.4
+(54 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+ t | sml
+------------------------------+----------
+ Kabankala | 1
+ Kabankalan City Public Plaza | 0.75
+ Abankala | 0.583333
+ Kabakala | 0.583333
+ Kabikala | 0.461538
+(5 rows)
+
+set "pg_trgm.strict_word_similarity_threshold" to 0.2;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+ t | sml
+-----------------------------------------------------------+----------
+ Baykal | 1
+ Boloto Baykal | 1
+ Boloto Malyy Baykal | 1
+ Kolkhoz Krasnyy Baykal | 1
+ Ozero Baykal | 1
+ Polevoy Stan Baykal | 1
+ Port Baykal | 1
+ Prud Novyy Baykal | 1
+ Sanatoriy Baykal | 1
+ Stantsiya Baykal | 1
+ Zaliv Baykal | 1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo | 0.545455
+ Baykalsko | 0.545455
+ Maloye Baykalovo | 0.545455
+ Baykalikha | 0.5
+ Baykalovsk | 0.5
+ Zabaykal | 0.454545
+ Air Bakal-kecil | 0.444444
+ Bakal | 0.444444
+ Bakal Batu | 0.444444
+ Bakal Dos | 0.444444
+ Bakal Julu | 0.444444
+ Bakal Khel | 0.444444
+ Bakal Lama | 0.444444
+ Bakal Tres | 0.444444
+ Bakal Uno | 0.444444
+ Daang Bakal | 0.444444
+ Desa Bakal | 0.444444
+ Eat Bakal | 0.444444
+ Gunung Bakal | 0.444444
+ Sidi Bakal | 0.444444
+ Stantsiya Bakal | 0.444444
+ Sungai Bakal | 0.444444
+ Talang Bakal | 0.444444
+ Uruk Bakal | 0.444444
+ Zaouia Oulad Bakal | 0.444444
+ Baykalovskiy | 0.428571
+ Baykalovskiy Rayon | 0.428571
+ Baikal | 0.4
+ Baikal Airfield | 0.4
+ Baikal Business Centre | 0.4
+ Baikal Hotel Moscow | 0.4
+ Baikal Listvyanka Hotel | 0.4
+ Baikal Mountains | 0.4
+ Baikal Plaza | 0.4
+ Bajkal | 0.4
+ Bankal | 0.4
+ Bankal School | 0.4
+ Barkal | 0.4
+ Jabal Barkal | 0.4
+ Lake Baikal | 0.4
+ Oulad el Bakkal | 0.4
+ Sidi Mohammed Bakkal | 0.4
+ Bay of Backaland | 0.375
+ Boikalakalawa Bay | 0.375
+ Waikalabubu Bay | 0.375
+ Bairkal | 0.363636
+ Bairkal Dhora | 0.363636
+ Bairkal Jabal | 0.363636
+ Batikal | 0.363636
+ Bakaleyka | 0.307692
+ Bakkalmal | 0.307692
+ Bikal | 0.3
+ Al Barkali | 0.285714
+ Zabaykalka | 0.285714
+ Baidal | 0.272727
+ Baihal | 0.272727
+ Baipal | 0.272727
+ Bakala | 0.272727
+ Bakala Koupi | 0.272727
+ Bakale | 0.272727
+ Bakali | 0.272727
+ Bakall | 0.272727
+ Bakaly | 0.272727
+ Bakaly TV Mast | 0.272727
+ Buur Bakale | 0.272727
+ Gory Bakaly | 0.272727
+ Kusu-Bakali | 0.272727
+ Kwala Bakala | 0.272727
+ Mbay Bakala | 0.272727
+ Ngao Bakala | 0.272727
+ Sidi Mohammed el Bakali | 0.272727
+ Sopka Bakaly | 0.272727
+ Sungai Bakala | 0.272727
+ Urochishche Bakaly | 0.272727
+ Alue Bakkala | 0.25
+ Azib el Bakkali | 0.25
+ Ba Kaliin | 0.25
+ Baikaluobbal | 0.25
+ Bakalam | 0.25
+ Bakalan | 0.25
+ Bakalan Barat | 0.25
+ Bakalan Dua | 0.25
+ Bakalan Kidul | 0.25
+ Bakalan Kulon | 0.25
+ Bakalan Lor | 0.25
+ Bakalan River | 0.25
+ Bakalan Tengah | 0.25
+ Bakalan Wetan | 0.25
+ Bakalao Asibi Point | 0.25
+ Bakalao Point | 0.25
+ Bakalar Air Force Base (historical) | 0.25
+ Bakalar Lake | 0.25
+ Bakalar Library | 0.25
+ Bakalda | 0.25
+ Bakaldy | 0.25
+ Bakaley | 0.25
+ Bakalha | 0.25
+ Bakalia Char | 0.25
+ Bakalka | 0.25
+ Bakalod Island | 0.25
+ Bakalou | 0.25
+ Bakalua | 0.25
+ Bakalum | 0.25
+ Bakkala Cemetery | 0.25
+ Bankali | 0.25
+ Barkala | 0.25
+ Barkala Park | 0.25
+ Barkala Rao | 0.25
+ Barkala Reserved Forest | 0.25
+ Barkald | 0.25
+ Barkald stasjon | 0.25
+ Barkale | 0.25
+ Barkali | 0.25
+ Baukala | 0.25
+ Buur Bakaley | 0.25
+ Columbus Bakalar Municipal Airport | 0.25
+ Dakshin Bakalia | 0.25
+ Danau Bakalan | 0.25
+ Desa Bakalan | 0.25
+ Gunung Bakalan | 0.25
+ Kali Bakalan | 0.25
+ Khrebet Batkali | 0.25
+ Kordon Barkalo | 0.25
+ Krajan Bakalan | 0.25
+ Ovrag Bakalda | 0.25
+ Pulau Bakalan | 0.25
+ Selat Bakalan | 0.25
+ Teluk Bakalan | 0.25
+ Tukad Bakalan | 0.25
+ Urochishche Batkali | 0.25
+ Babakale | 0.230769
+ Babakalo | 0.230769
+ Bagkalen | 0.230769
+ Bakalalan Airport | 0.230769
+ Bakalang | 0.230769
+ Bakalarr | 0.230769
+ Bakalawa | 0.230769
+ Bakaldum | 0.230769
+ Bakaleko | 0.230769
+ Bakalica | 0.230769
+ Bakalino | 0.230769
+ Bakalite | 0.230769
+ Bakalovo | 0.230769
+ Bakalsen | 0.230769
+ Bakaltua Bank | 0.230769
+ Bakalukalu | 0.230769
+ Bakalukalu Shan | 0.230769
+ Bakkalia | 0.230769
+ Bankalol | 0.230769
+ Barkaleh | 0.230769
+ Barkalne | 0.230769
+ Barkalow Hollow | 0.230769
+ Bawkalut | 0.230769
+ Bawkalut Chaung | 0.230769
+ Clifton T Barkalow Elementary School | 0.230769
+ Efrejtor Bakalovo | 0.230769
+ Efreytor-Bakalovo | 0.230769
+ Gora Barkalyu | 0.230769
+ Ile Bakalibu | 0.230769
+ Khor Bakallii | 0.230769
+ Nehalla Bankalah Reserved Forest | 0.230769
+ Ragha Bakalzai | 0.230769
+ Tanjung Batikala | 0.230769
+ Teluk Bakalang | 0.230769
+ Urochishche Bakalovo | 0.230769
+ Banjar Kubakal | 0.222222
+ Darreh Pumba Kal | 0.222222
+ Zabaykalovskiy | 0.222222
+ Aparthotel Adagio Premium Dubai Al Barsha | 0.214286
+ Babakalia | 0.214286
+ Bahkalleh | 0.214286
+ Baikalovo | 0.214286
+ Bakalaale | 0.214286
+ Bakalabwa Pans | 0.214286
+ Bakalaeng | 0.214286
+ Bakalauri | 0.214286
+ Bakalbhar | 0.214286
+ Bakalbuah | 0.214286
+ Bakalerek | 0.214286
+ Bakalinga | 0.214286
+ Bakalipur | 0.214286
+ Bakaljaya | 0.214286
+ Bakalnica | 0.214286
+ Bakalongo | 0.214286
+ Bakalovka | 0.214286
+ Bakalrejo | 0.214286
+ Bakkalale | 0.214286
+ Bambakala | 0.214286
+ Bambakalo | 0.214286
+ Barkalare | 0.214286
+ Barkalden | 0.214286
+ Barkallou | 0.214286
+ Barkalova | 0.214286
+ Baskalino | 0.214286
+ Baskaltsi | 0.214286
+ Desa Bakalrejo | 0.214286
+ Doubletree By Hilton Dubai Al Barsha Hotel and Res | 0.214286
+ Doubletree By Hilton Hotel and Apartments Dubai Al Barsha | 0.214286
+ Doubletree Res.Dubai-Al Barsha | 0.214286
+ Gora Barkalova | 0.214286
+ Holiday Inn Dubai Al Barsha | 0.214286
+ Novotel Dubai Al Barsha | 0.214286
+ Park Inn By Radisson Dubai Al Barsha | 0.214286
+ Ramee Rose Hotel Dubai Al Barsha | 0.214286
+ Ras Barkallah | 0.214286
+ Salu Bakalaeng | 0.214286
+ Tanjung Bakalinga | 0.214286
+ Tubu Bakalekuk | 0.214286
+ Baikalakko | 0.2
+ Bakalauri1 | 0.2
+ Bakalauri2 | 0.2
+ Bakalauri3 | 0.2
+ Bakalauri4 | 0.2
+ Bakalauri5 | 0.2
+ Bakalauri6 | 0.2
+ Bakalauri7 | 0.2
+ Bakalauri8 | 0.2
+ Bakalauri9 | 0.2
+ Bakaldalam | 0.2
+ Bakaldukuh | 0.2
+ Bakaloolay | 0.2
+ Bakalovina | 0.2
+ Bakalpokok | 0.2
+ Bakalshile | 0.2
+ Bakalukudu | 0.2
+ Bambakalia | 0.2
+ Barkaladja Pool | 0.2
+ Barkalovka | 0.2
+ Bavkalasis | 0.2
+ Gora Bakalyadyr | 0.2
+ Kampong Bakaladong | 0.2
+ Urochishche Bakalarnyn-Ayasy | 0.2
+ Urochishche Bakaldikha | 0.2
+(245 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+ t | sml
+----------------------------------+----------
+ Kabankala | 1
+ Kabankalan City Public Plaza | 0.75
+ Abankala | 0.583333
+ Kabakala | 0.583333
+ Kabikala | 0.461538
+ Ntombankala School | 0.375
+ Nehalla Bankalah Reserved Forest | 0.357143
+ Jabba Kalai | 0.333333
+ Kambakala | 0.333333
+ Ker Samba Kalla | 0.333333
+ Bankal | 0.307692
+ Bankal School | 0.307692
+ Kanampumba-Kalawa | 0.307692
+ Bankali | 0.285714
+ Mwalaba-Kalamba | 0.285714
+ Tumba-Kalamba | 0.285714
+ Darreh Pumba Kal | 0.272727
+ Bankalol | 0.266667
+ Dabakala | 0.266667
+ Purba Kalaujan | 0.266667
+ Kali Purbakala | 0.263158
+ Dalabakala | 0.25
+ Demba Kali | 0.25
+ Gagaba Kalo | 0.25
+ Golba Kalo | 0.25
+ Habakkala | 0.25
+ Kali Bakalan | 0.25
+ Kimbakala | 0.25
+ Kombakala | 0.25
+ Jaba Kalle | 0.235294
+ Kaikalahun Indian Reserve 25 | 0.235294
+ Kwala Bakala | 0.235294
+ Gereba Kaler | 0.230769
+ Goth Soba Kaloi | 0.230769
+ Guba Kaldo | 0.230769
+ Gulba Kalle | 0.230769
+ Guba Kalgalaksha | 0.222222
+ Kalibakalako | 0.222222
+ Ba Kaliin | 0.214286
+ Bakala | 0.214286
+ Bakala Koupi | 0.214286
+ Bikala | 0.214286
+ Bikala Madila | 0.214286
+ Bugor Arba-Kalgan | 0.214286
+ Bumba-Kaloki | 0.214286
+ Guba Kalita | 0.214286
+ Kamba-Kalele | 0.214286
+ Mbay Bakala | 0.214286
+ Ngao Bakala | 0.214286
+ Sungai Bakala | 0.214286
+ Fayzabadkala | 0.210526
+ Gora Fayzabadkala | 0.210526
+ Alue Bakkala | 0.2
+ Bakkala Cemetery | 0.2
+ Barkala | 0.2
+ Barkala Park | 0.2
+ Barkala Rao | 0.2
+ Barkala Reserved Forest | 0.2
+ Baukala | 0.2
+ Beikala | 0.2
+ Bomba-Kalende | 0.2
+ Bumba-Kalumba | 0.2
+ Haikala | 0.2
+ Kahambikalela | 0.2
+ Kaikalapettai | 0.2
+ Kaikale | 0.2
+ Laikala | 0.2
+ Maikala Range | 0.2
+ Matamba-Kalenga | 0.2
+ Matamba-Kalenge | 0.2
+ Naikala | 0.2
+ Tumba-Kalumba | 0.2
+ Tumba-Kalunga | 0.2
+ Waikala | 0.2
+(74 rows)
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+ t | sml
+-----------------------------------------------------------+----------
+ Baykal | 1
+ Boloto Baykal | 1
+ Boloto Malyy Baykal | 1
+ Kolkhoz Krasnyy Baykal | 1
+ Ozero Baykal | 1
+ Polevoy Stan Baykal | 1
+ Port Baykal | 1
+ Prud Novyy Baykal | 1
+ Sanatoriy Baykal | 1
+ Stantsiya Baykal | 1
+ Zaliv Baykal | 1
+ Baykalo-Amurskaya Zheleznaya Doroga | 0.666667
+ Baykalovo | 0.545455
+ Baykalsko | 0.545455
+ Maloye Baykalovo | 0.545455
+ Baykalikha | 0.5
+ Baykalovsk | 0.5
+ Zabaykal | 0.454545
+ Air Bakal-kecil | 0.444444
+ Bakal | 0.444444
+ Bakal Batu | 0.444444
+ Bakal Dos | 0.444444
+ Bakal Julu | 0.444444
+ Bakal Khel | 0.444444
+ Bakal Lama | 0.444444
+ Bakal Tres | 0.444444
+ Bakal Uno | 0.444444
+ Daang Bakal | 0.444444
+ Desa Bakal | 0.444444
+ Eat Bakal | 0.444444
+ Gunung Bakal | 0.444444
+ Sidi Bakal | 0.444444
+ Stantsiya Bakal | 0.444444
+ Sungai Bakal | 0.444444
+ Talang Bakal | 0.444444
+ Uruk Bakal | 0.444444
+ Zaouia Oulad Bakal | 0.444444
+ Baykalovskiy | 0.428571
+ Baykalovskiy Rayon | 0.428571
+ Baikal | 0.4
+ Baikal Airfield | 0.4
+ Baikal Business Centre | 0.4
+ Baikal Hotel Moscow | 0.4
+ Baikal Listvyanka Hotel | 0.4
+ Baikal Mountains | 0.4
+ Baikal Plaza | 0.4
+ Bajkal | 0.4
+ Bankal | 0.4
+ Bankal School | 0.4
+ Barkal | 0.4
+ Jabal Barkal | 0.4
+ Lake Baikal | 0.4
+ Oulad el Bakkal | 0.4
+ Sidi Mohammed Bakkal | 0.4
+ Bay of Backaland | 0.375
+ Boikalakalawa Bay | 0.375
+ Waikalabubu Bay | 0.375
+ Bairkal | 0.363636
+ Bairkal Dhora | 0.363636
+ Bairkal Jabal | 0.363636
+ Batikal | 0.363636
+ Bakaleyka | 0.307692
+ Bakkalmal | 0.307692
+ Bikal | 0.3
+ Al Barkali | 0.285714
+ Zabaykalka | 0.285714
+ Baidal | 0.272727
+ Baihal | 0.272727
+ Baipal | 0.272727
+ Bakala | 0.272727
+ Bakala Koupi | 0.272727
+ Bakale | 0.272727
+ Bakali | 0.272727
+ Bakall | 0.272727
+ Bakaly | 0.272727
+ Bakaly TV Mast | 0.272727
+ Buur Bakale | 0.272727
+ Gory Bakaly | 0.272727
+ Kusu-Bakali | 0.272727
+ Kwala Bakala | 0.272727
+ Mbay Bakala | 0.272727
+ Ngao Bakala | 0.272727
+ Sidi Mohammed el Bakali | 0.272727
+ Sopka Bakaly | 0.272727
+ Sungai Bakala | 0.272727
+ Urochishche Bakaly | 0.272727
+ Alue Bakkala | 0.25
+ Azib el Bakkali | 0.25
+ Ba Kaliin | 0.25
+ Baikaluobbal | 0.25
+ Bakalam | 0.25
+ Bakalan | 0.25
+ Bakalan Barat | 0.25
+ Bakalan Dua | 0.25
+ Bakalan Kidul | 0.25
+ Bakalan Kulon | 0.25
+ Bakalan Lor | 0.25
+ Bakalan River | 0.25
+ Bakalan Tengah | 0.25
+ Bakalan Wetan | 0.25
+ Bakalao Asibi Point | 0.25
+ Bakalao Point | 0.25
+ Bakalar Air Force Base (historical) | 0.25
+ Bakalar Lake | 0.25
+ Bakalar Library | 0.25
+ Bakalda | 0.25
+ Bakaldy | 0.25
+ Bakaley | 0.25
+ Bakalha | 0.25
+ Bakalia Char | 0.25
+ Bakalka | 0.25
+ Bakalod Island | 0.25
+ Bakalou | 0.25
+ Bakalua | 0.25
+ Bakalum | 0.25
+ Bakkala Cemetery | 0.25
+ Bankali | 0.25
+ Barkala | 0.25
+ Barkala Park | 0.25
+ Barkala Rao | 0.25
+ Barkala Reserved Forest | 0.25
+ Barkald | 0.25
+ Barkald stasjon | 0.25
+ Barkale | 0.25
+ Barkali | 0.25
+ Baukala | 0.25
+ Buur Bakaley | 0.25
+ Columbus Bakalar Municipal Airport | 0.25
+ Dakshin Bakalia | 0.25
+ Danau Bakalan | 0.25
+ Desa Bakalan | 0.25
+ Gunung Bakalan | 0.25
+ Kali Bakalan | 0.25
+ Khrebet Batkali | 0.25
+ Kordon Barkalo | 0.25
+ Krajan Bakalan | 0.25
+ Ovrag Bakalda | 0.25
+ Pulau Bakalan | 0.25
+ Selat Bakalan | 0.25
+ Teluk Bakalan | 0.25
+ Tukad Bakalan | 0.25
+ Urochishche Batkali | 0.25
+ Babakale | 0.230769
+ Babakalo | 0.230769
+ Bagkalen | 0.230769
+ Bakalalan Airport | 0.230769
+ Bakalang | 0.230769
+ Bakalarr | 0.230769
+ Bakalawa | 0.230769
+ Bakaldum | 0.230769
+ Bakaleko | 0.230769
+ Bakalica | 0.230769
+ Bakalino | 0.230769
+ Bakalite | 0.230769
+ Bakalovo | 0.230769
+ Bakalsen | 0.230769
+ Bakaltua Bank | 0.230769
+ Bakalukalu | 0.230769
+ Bakalukalu Shan | 0.230769
+ Bakkalia | 0.230769
+ Bankalol | 0.230769
+ Barkaleh | 0.230769
+ Barkalne | 0.230769
+ Barkalow Hollow | 0.230769
+ Bawkalut | 0.230769
+ Bawkalut Chaung | 0.230769
+ Clifton T Barkalow Elementary School | 0.230769
+ Efrejtor Bakalovo | 0.230769
+ Efreytor-Bakalovo | 0.230769
+ Gora Barkalyu | 0.230769
+ Ile Bakalibu | 0.230769
+ Khor Bakallii | 0.230769
+ Nehalla Bankalah Reserved Forest | 0.230769
+ Ragha Bakalzai | 0.230769
+ Tanjung Batikala | 0.230769
+ Teluk Bakalang | 0.230769
+ Urochishche Bakalovo | 0.230769
+ Banjar Kubakal | 0.222222
+ Darreh Pumba Kal | 0.222222
+ Zabaykalovskiy | 0.222222
+ Aparthotel Adagio Premium Dubai Al Barsha | 0.214286
+ Babakalia | 0.214286
+ Bahkalleh | 0.214286
+ Baikalovo | 0.214286
+ Bakalaale | 0.214286
+ Bakalabwa Pans | 0.214286
+ Bakalaeng | 0.214286
+ Bakalauri | 0.214286
+ Bakalbhar | 0.214286
+ Bakalbuah | 0.214286
+ Bakalerek | 0.214286
+ Bakalinga | 0.214286
+ Bakalipur | 0.214286
+ Bakaljaya | 0.214286
+ Bakalnica | 0.214286
+ Bakalongo | 0.214286
+ Bakalovka | 0.214286
+ Bakalrejo | 0.214286
+ Bakkalale | 0.214286
+ Bambakala | 0.214286
+ Bambakalo | 0.214286
+ Barkalare | 0.214286
+ Barkalden | 0.214286
+ Barkallou | 0.214286
+ Barkalova | 0.214286
+ Baskalino | 0.214286
+ Baskaltsi | 0.214286
+ Desa Bakalrejo | 0.214286
+ Doubletree By Hilton Dubai Al Barsha Hotel and Res | 0.214286
+ Doubletree By Hilton Hotel and Apartments Dubai Al Barsha | 0.214286
+ Doubletree Res.Dubai-Al Barsha | 0.214286
+ Gora Barkalova | 0.214286
+ Holiday Inn Dubai Al Barsha | 0.214286
+ Novotel Dubai Al Barsha | 0.214286
+ Park Inn By Radisson Dubai Al Barsha | 0.214286
+ Ramee Rose Hotel Dubai Al Barsha | 0.214286
+ Ras Barkallah | 0.214286
+ Salu Bakalaeng | 0.214286
+ Tanjung Bakalinga | 0.214286
+ Tubu Bakalekuk | 0.214286
+ Baikalakko | 0.2
+ Bakalauri1 | 0.2
+ Bakalauri2 | 0.2
+ Bakalauri3 | 0.2
+ Bakalauri4 | 0.2
+ Bakalauri5 | 0.2
+ Bakalauri6 | 0.2
+ Bakalauri7 | 0.2
+ Bakalauri8 | 0.2
+ Bakalauri9 | 0.2
+ Bakaldalam | 0.2
+ Bakaldukuh | 0.2
+ Bakaloolay | 0.2
+ Bakalovina | 0.2
+ Bakalpokok | 0.2
+ Bakalshile | 0.2
+ Bakalukudu | 0.2
+ Bambakalia | 0.2
+ Barkaladja Pool | 0.2
+ Barkalovka | 0.2
+ Bavkalasis | 0.2
+ Gora Bakalyadyr | 0.2
+ Kampong Bakaladong | 0.2
+ Urochishche Bakalarnyn-Ayasy | 0.2
+ Urochishche Bakaldikha | 0.2
+(245 rows)
+
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+ t | sml
+----------------------------------+----------
+ Kabankala | 1
+ Kabankalan City Public Plaza | 0.75
+ Abankala | 0.583333
+ Kabakala | 0.583333
+ Kabikala | 0.461538
+ Ntombankala School | 0.375
+ Nehalla Bankalah Reserved Forest | 0.357143
+ Jabba Kalai | 0.333333
+ Kambakala | 0.333333
+ Ker Samba Kalla | 0.333333
+ Bankal | 0.307692
+ Bankal School | 0.307692
+ Kanampumba-Kalawa | 0.307692
+ Bankali | 0.285714
+ Mwalaba-Kalamba | 0.285714
+ Tumba-Kalamba | 0.285714
+ Darreh Pumba Kal | 0.272727
+ Bankalol | 0.266667
+ Dabakala | 0.266667
+ Purba Kalaujan | 0.266667
+ Kali Purbakala | 0.263158
+ Dalabakala | 0.25
+ Demba Kali | 0.25
+ Gagaba Kalo | 0.25
+ Golba Kalo | 0.25
+ Habakkala | 0.25
+ Kali Bakalan | 0.25
+ Kimbakala | 0.25
+ Kombakala | 0.25
+ Jaba Kalle | 0.235294
+ Kaikalahun Indian Reserve 25 | 0.235294
+ Kwala Bakala | 0.235294
+ Gereba Kaler | 0.230769
+ Goth Soba Kaloi | 0.230769
+ Guba Kaldo | 0.230769
+ Gulba Kalle | 0.230769
+ Guba Kalgalaksha | 0.222222
+ Kalibakalako | 0.222222
+ Ba Kaliin | 0.214286
+ Bakala | 0.214286
+ Bakala Koupi | 0.214286
+ Bikala | 0.214286
+ Bikala Madila | 0.214286
+ Bugor Arba-Kalgan | 0.214286
+ Bumba-Kaloki | 0.214286
+ Guba Kalita | 0.214286
+ Kamba-Kalele | 0.214286
+ Mbay Bakala | 0.214286
+ Ngao Bakala | 0.214286
+ Sungai Bakala | 0.214286
+ Fayzabadkala | 0.210526
+ Gora Fayzabadkala | 0.210526
+ Alue Bakkala | 0.2
+ Bakkala Cemetery | 0.2
+ Barkala | 0.2
+ Barkala Park | 0.2
+ Barkala Rao | 0.2
+ Barkala Reserved Forest | 0.2
+ Baukala | 0.2
+ Beikala | 0.2
+ Bomba-Kalende | 0.2
+ Bumba-Kalumba | 0.2
+ Haikala | 0.2
+ Kahambikalela | 0.2
+ Kaikalapettai | 0.2
+ Kaikale | 0.2
+ Laikala | 0.2
+ Maikala Range | 0.2
+ Matamba-Kalenga | 0.2
+ Matamba-Kalenge | 0.2
+ Naikala | 0.2
+ Tumba-Kalumba | 0.2
+ Tumba-Kalunga | 0.2
+ Waikala | 0.2
+(74 rows)
+
--- /dev/null
+/* contrib/pg_trgm/pg_trgm--1.3--1.4.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.4'" to load this file. \quit
+
+CREATE FUNCTION strict_word_similarity(text,text)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE FUNCTION strict_word_similarity_op(text,text)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE PARALLEL SAFE; -- stable because depends on pg_trgm.word_similarity_threshold
+
+CREATE FUNCTION strict_word_similarity_commutator_op(text,text)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE PARALLEL SAFE; -- stable because depends on pg_trgm.word_similarity_threshold
+
+CREATE OPERATOR <<% (
+ LEFTARG = text,
+ RIGHTARG = text,
+ PROCEDURE = strict_word_similarity_op,
+ COMMUTATOR = '%>>',
+ RESTRICT = contsel,
+ JOIN = contjoinsel
+);
+
+CREATE OPERATOR %>> (
+ LEFTARG = text,
+ RIGHTARG = text,
+ PROCEDURE = strict_word_similarity_commutator_op,
+ COMMUTATOR = '<<%',
+ RESTRICT = contsel,
+ JOIN = contjoinsel
+);
+
+CREATE FUNCTION strict_word_similarity_dist_op(text,text)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE FUNCTION strict_word_similarity_dist_commutator_op(text,text)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE OPERATOR <<<-> (
+ LEFTARG = text,
+ RIGHTARG = text,
+ PROCEDURE = strict_word_similarity_dist_op,
+ COMMUTATOR = '<->>>'
+);
+
+CREATE OPERATOR <->>> (
+ LEFTARG = text,
+ RIGHTARG = text,
+ PROCEDURE = strict_word_similarity_dist_commutator_op,
+ COMMUTATOR = '<<<->'
+);
+
+ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD
+ OPERATOR 9 %>> (text, text),
+ OPERATOR 10 <->>> (text, text) FOR ORDER BY pg_catalog.float_ops;
+
+ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
+ OPERATOR 9 %>> (text, text);
# pg_trgm extension
comment = 'text similarity measurement and index searching based on trigrams'
-default_version = '1.3'
+default_version = '1.4'
module_pathname = '$libdir/pg_trgm'
relocatable = true
--- /dev/null
+DROP INDEX trgm_idx2;
+
+\copy test_trgm3 from 'data/trgm2.data'
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7;
+
+create index trgm_idx2 on test_trgm2 using gist (t gist_trgm_ops);
+set enable_seqscan=off;
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+
+explain (costs off)
+select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7;
+select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7;
+
+drop index trgm_idx2;
+create index trgm_idx2 on test_trgm2 using gin (t gin_trgm_ops);
+set enable_seqscan=off;
+
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+
+set "pg_trgm.strict_word_similarity_threshold" to 0.4;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
+
+set "pg_trgm.strict_word_similarity_threshold" to 0.2;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t;
+select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t;
+select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t;
#include "access/gist.h"
#include "access/itup.h"
+#include "access/stratnum.h"
#include "storage/bufpage.h"
/*
#define DIVUNION
/* operator strategy numbers */
-#define SimilarityStrategyNumber 1
-#define DistanceStrategyNumber 2
-#define LikeStrategyNumber 3
-#define ILikeStrategyNumber 4
-#define RegExpStrategyNumber 5
-#define RegExpICaseStrategyNumber 6
-#define WordSimilarityStrategyNumber 7
-#define WordDistanceStrategyNumber 8
+#define SimilarityStrategyNumber 1
+#define DistanceStrategyNumber 2
+#define LikeStrategyNumber 3
+#define ILikeStrategyNumber 4
+#define RegExpStrategyNumber 5
+#define RegExpICaseStrategyNumber 6
+#define WordSimilarityStrategyNumber 7
+#define WordDistanceStrategyNumber 8
+#define StrictWordSimilarityStrategyNumber 9
+#define StrictWordDistanceStrategyNumber 10
typedef char trgm[3];
extern double similarity_threshold;
extern double word_similarity_threshold;
+extern double strict_word_similarity_threshold;
+extern double index_strategy_get_limit(StrategyNumber strategy);
extern uint32 trgm2int(trgm *ptr);
extern void compact_trigram(trgm *tptr, char *str, int bytelen);
extern TRGM *generate_trgm(char *str, int slen);
{
case SimilarityStrategyNumber:
case WordSimilarityStrategyNumber:
+ case StrictWordSimilarityStrategyNumber:
trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
break;
case ILikeStrategyNumber:
{
case SimilarityStrategyNumber:
case WordSimilarityStrategyNumber:
- nlimit = (strategy == SimilarityStrategyNumber) ?
- similarity_threshold : word_similarity_threshold;
+ case StrictWordSimilarityStrategyNumber:
+ nlimit = index_strategy_get_limit(strategy);
/* Count the matches */
ntrue = 0;
{
case SimilarityStrategyNumber:
case WordSimilarityStrategyNumber:
- nlimit = (strategy == SimilarityStrategyNumber) ?
- similarity_threshold : word_similarity_threshold;
+ case StrictWordSimilarityStrategyNumber:
+ nlimit = index_strategy_get_limit(strategy);
/* Count the matches */
ntrue = 0;
{
case SimilarityStrategyNumber:
case WordSimilarityStrategyNumber:
+ case StrictWordSimilarityStrategyNumber:
qtrg = generate_trgm(VARDATA(query),
querysize - VARHDRSZ);
break;
{
case SimilarityStrategyNumber:
case WordSimilarityStrategyNumber:
- /* Similarity search is exact. Word similarity search is inexact */
- *recheck = (strategy == WordSimilarityStrategyNumber);
- nlimit = (strategy == SimilarityStrategyNumber) ?
- similarity_threshold : word_similarity_threshold;
+ case StrictWordSimilarityStrategyNumber:
+ /* Similarity search is exact. (Strict) word similarity search is inexact */
+ *recheck = (strategy != SimilarityStrategyNumber);
+
+ nlimit = index_strategy_get_limit(strategy);
if (GIST_LEAF(entry))
{ /* all leafs contains orig trgm */
{
case DistanceStrategyNumber:
case WordDistanceStrategyNumber:
- *recheck = strategy == WordDistanceStrategyNumber;
+ case StrictWordDistanceStrategyNumber:
+ /* Only plain trigram distance is exact */
+ *recheck = (strategy != DistanceStrategyNumber);
if (GIST_LEAF(entry))
{ /* all leafs contains orig trgm */
/* GUC variables */
double similarity_threshold = 0.3f;
double word_similarity_threshold = 0.6f;
+double strict_word_similarity_threshold = 0.5f;
void _PG_init(void);
PG_FUNCTION_INFO_V1(show_trgm);
PG_FUNCTION_INFO_V1(similarity);
PG_FUNCTION_INFO_V1(word_similarity);
+PG_FUNCTION_INFO_V1(strict_word_similarity);
PG_FUNCTION_INFO_V1(similarity_dist);
PG_FUNCTION_INFO_V1(similarity_op);
PG_FUNCTION_INFO_V1(word_similarity_op);
PG_FUNCTION_INFO_V1(word_similarity_commutator_op);
PG_FUNCTION_INFO_V1(word_similarity_dist_op);
PG_FUNCTION_INFO_V1(word_similarity_dist_commutator_op);
+PG_FUNCTION_INFO_V1(strict_word_similarity_op);
+PG_FUNCTION_INFO_V1(strict_word_similarity_commutator_op);
+PG_FUNCTION_INFO_V1(strict_word_similarity_dist_op);
+PG_FUNCTION_INFO_V1(strict_word_similarity_dist_commutator_op);
/* Trigram with position */
typedef struct
int index;
} pos_trgm;
+/* Trigram bound type */
+typedef uint8 TrgmBound;
+#define TRGM_BOUND_LEFT (0x01) /* trigram is left bound of word */
+#define TRGM_BOUND_RIGHT (0x02) /* trigram is right bound of word */
+
+/* Word similarity flags */
+#define WORD_SIMILARITY_CHECK_ONLY (0x01) /* if set then only check existence
+ * of similar search pattern in text */
+#define WORD_SIMILARITY_STRICT (0x02) /* force bounds of extent to match
+ * word bounds */
+
/*
* Module load callback
*/
NULL,
NULL,
NULL);
+ DefineCustomRealVariable("pg_trgm.strict_word_similarity_threshold",
+ "Sets the threshold used by the <<%% operator.",
+ "Valid range is 0.0 .. 1.0.",
+ &strict_word_similarity_threshold,
+ 0.5,
+ 0.0,
+ 1.0,
+ PGC_USERSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
}
/*
PG_RETURN_FLOAT4(similarity_threshold);
}
+
+/*
+ * Get similarity threshold for given index scan strategy number.
+ */
+double
+index_strategy_get_limit(StrategyNumber strategy)
+{
+ switch (strategy)
+ {
+ case SimilarityStrategyNumber:
+ return similarity_threshold;
+ case WordSimilarityStrategyNumber:
+ return word_similarity_threshold;
+ case StrictWordSimilarityStrategyNumber:
+ return strict_word_similarity_threshold;
+ default:
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+ break;
+ }
+
+ return 0.0; /* keep compiler quiet */
+}
+
/*
* Deprecated function.
* Use "pg_trgm.similarity_threshold" GUC variable instead of this function.
*
* trg: where to return the array of trigrams.
* str: source string, of length slen bytes.
+ * bounds: where to return bounds of trigrams (if needed).
*
* Returns length of the generated array.
*/
static int
-generate_trgm_only(trgm *trg, char *str, int slen)
+generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
{
trgm *tptr;
char *buf;
buf[LPADDING + bytelen] = ' ';
buf[LPADDING + bytelen + 1] = ' ';
- /*
- * count trigrams
- */
+ /* Calculate trigrams marking their bounds if needed */
+ if (bounds)
+ bounds[tptr - trg] |= TRGM_BOUND_LEFT;
tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING,
charlen + LPADDING + RPADDING);
+ if (bounds)
+ bounds[tptr - trg - 1] |= TRGM_BOUND_RIGHT;
}
pfree(buf);
trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
trg->flag = ARRKEY;
- len = generate_trgm_only(GETARR(trg), str, slen);
+ len = generate_trgm_only(GETARR(trg), str, slen, NULL);
SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
if (len == 0)
* ulen1: count of unique trigrams of array "trg1".
* len2: length of array "trg2" and array "trg2indexes".
* len: length of the array "found".
- * check_only: if true then only check existence of similar search pattern in
- * text.
+ * lags: set of boolean flags parametrizing similarity calculation.
+ * bounds: whether each trigram is left/right bound of word.
*
* Returns word similarity.
*/
int ulen1,
int len2,
int len,
- bool check_only)
+ uint8 flags,
+ TrgmBound *bounds)
{
int *lastpos,
i,
ulen2 = 0,
count = 0,
upper = -1,
- lower = -1;
+ lower;
float4 smlr_cur,
smlr_max = 0.0f;
+ double threshold;
+
+ Assert(bounds || !(flags & WORD_SIMILARITY_STRICT));
+
+ /* Select appropriate threshold */
+ threshold = (flags & WORD_SIMILARITY_STRICT) ?
+ strict_word_similarity_threshold :
+ word_similarity_threshold;
+
+ /*
+ * Consider first trigram as initial lower bount for strict word similarity,
+ * or initialize it later with first trigram present for plain word
+ * similarity.
+ */
+ lower = (flags & WORD_SIMILARITY_STRICT) ? 0 : -1;
/* Memorise last position of each trigram */
lastpos = (int *) palloc(sizeof(int) * len);
lastpos[trgindex] = i;
}
- /* Adjust upper bound if this trigram is present in required substring */
- if (found[trgindex])
+ /*
+ * Adjust upper bound if trigram is upper bound of word for strict
+ * word similarity, or if trigram is present in required substring for
+ * plain word similarity
+ */
+ if ((flags & WORD_SIMILARITY_STRICT) ? (bounds[i] & TRGM_BOUND_RIGHT)
+ : found[trgindex])
{
int prev_lower,
tmp_ulen2,
prev_lower = lower;
for (tmp_lower = lower; tmp_lower <= upper; tmp_lower++)
{
- float smlr_tmp = CALCSML(tmp_count, ulen1, tmp_ulen2);
+ float smlr_tmp;
int tmp_trgindex;
- if (smlr_tmp > smlr_cur)
- {
- smlr_cur = smlr_tmp;
- ulen2 = tmp_ulen2;
- lower = tmp_lower;
- count = tmp_count;
- }
-
/*
- * if we only check that word similarity is greater than
- * pg_trgm.word_similarity_threshold we do not need to
- * calculate a maximum similarity.
+ * Adjust lower bound only if trigram is lower bound of word
+ * for strict word similarity, or consider every trigram as
+ * lower bound for plain word similarity.
*/
- if (check_only && smlr_cur >= word_similarity_threshold)
- break;
+ if (!(flags & WORD_SIMILARITY_STRICT)
+ || (bounds[tmp_lower] & TRGM_BOUND_LEFT))
+ {
+ smlr_tmp = CALCSML(tmp_count, ulen1, tmp_ulen2);
+ if (smlr_tmp > smlr_cur)
+ {
+ smlr_cur = smlr_tmp;
+ ulen2 = tmp_ulen2;
+ lower = tmp_lower;
+ count = tmp_count;
+ }
+
+ /*
+ * If we only check that word similarity is greater than
+ * threshold we do not need to calculate a maximum
+ * similarity.
+ */
+ if ((flags & WORD_SIMILARITY_CHECK_ONLY)
+ && smlr_cur >= threshold)
+ break;
+ }
tmp_trgindex = trg2indexes[tmp_lower];
if (lastpos[tmp_trgindex] == tmp_lower)
/*
* if we only check that word similarity is greater than
- * pg_trgm.word_similarity_threshold we do not need to calculate a
- * maximum similarity
+ * threshold we do not need to calculate a maximum similarity.
*/
- if (check_only && smlr_max >= word_similarity_threshold)
+ if ((flags & WORD_SIMILARITY_CHECK_ONLY) && smlr_max >= threshold)
break;
for (tmp_lower = prev_lower; tmp_lower < lower; tmp_lower++)
*
* str1: search pattern string, of length slen1 bytes.
* str2: text in which we are looking for a word, of length slen2 bytes.
- * check_only: if true then only check existence of similar search pattern in
- * text.
+ * flags: set of boolean flags parametrizing similarity calculation.
*
* Returns word similarity.
*/
static float4
calc_word_similarity(char *str1, int slen1, char *str2, int slen2,
- bool check_only)
+ uint8 flags)
{
bool *found;
pos_trgm *ptrg;
ulen1;
int *trg2indexes;
float4 result;
+ TrgmBound *bounds;
protect_out_of_mem(slen1 + slen2);
/* Make positional trigrams */
trg1 = (trgm *) palloc(sizeof(trgm) * (slen1 / 2 + 1) * 3);
trg2 = (trgm *) palloc(sizeof(trgm) * (slen2 / 2 + 1) * 3);
+ if (flags & WORD_SIMILARITY_STRICT)
+ bounds = (TrgmBound *) palloc0(sizeof(TrgmBound) * (slen2 / 2 + 1) * 3);
+ else
+ bounds = NULL;
- len1 = generate_trgm_only(trg1, str1, slen1);
- len2 = generate_trgm_only(trg2, str2, slen2);
+ len1 = generate_trgm_only(trg1, str1, slen1, NULL);
+ len2 = generate_trgm_only(trg2, str2, slen2, bounds);
ptrg = make_positional_trgm(trg1, len1, trg2, len2);
len = len1 + len2;
/* Run iterative procedure to find maximum similarity with word */
result = iterate_word_similarity(trg2indexes, found, ulen1, len2, len,
- check_only);
+ flags, bounds);
pfree(trg2indexes);
pfree(found);
res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
- false);
+ 0);
+
+ PG_FREE_IF_COPY(in1, 0);
+ PG_FREE_IF_COPY(in2, 1);
+ PG_RETURN_FLOAT4(res);
+}
+
+Datum
+strict_word_similarity(PG_FUNCTION_ARGS)
+{
+ text *in1 = PG_GETARG_TEXT_PP(0);
+ text *in2 = PG_GETARG_TEXT_PP(1);
+ float4 res;
+
+ res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
+ VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
+ WORD_SIMILARITY_STRICT);
PG_FREE_IF_COPY(in1, 0);
PG_FREE_IF_COPY(in2, 1);
res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
- true);
+ WORD_SIMILARITY_CHECK_ONLY);
PG_FREE_IF_COPY(in1, 0);
PG_FREE_IF_COPY(in2, 1);
res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
- true);
+ WORD_SIMILARITY_CHECK_ONLY);
PG_FREE_IF_COPY(in1, 0);
PG_FREE_IF_COPY(in2, 1);
res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
- false);
+ 0);
PG_FREE_IF_COPY(in1, 0);
PG_FREE_IF_COPY(in2, 1);
res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
- false);
+ 0);
+
+ PG_FREE_IF_COPY(in1, 0);
+ PG_FREE_IF_COPY(in2, 1);
+ PG_RETURN_FLOAT4(1.0 - res);
+}
+
+Datum
+strict_word_similarity_op(PG_FUNCTION_ARGS)
+{
+ text *in1 = PG_GETARG_TEXT_PP(0);
+ text *in2 = PG_GETARG_TEXT_PP(1);
+ float4 res;
+
+ res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
+ VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
+ WORD_SIMILARITY_CHECK_ONLY | WORD_SIMILARITY_STRICT);
+
+ PG_FREE_IF_COPY(in1, 0);
+ PG_FREE_IF_COPY(in2, 1);
+ PG_RETURN_BOOL(res >= strict_word_similarity_threshold);
+}
+
+Datum
+strict_word_similarity_commutator_op(PG_FUNCTION_ARGS)
+{
+ text *in1 = PG_GETARG_TEXT_PP(0);
+ text *in2 = PG_GETARG_TEXT_PP(1);
+ float4 res;
+
+ res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
+ VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
+ WORD_SIMILARITY_CHECK_ONLY | WORD_SIMILARITY_STRICT);
+
+ PG_FREE_IF_COPY(in1, 0);
+ PG_FREE_IF_COPY(in2, 1);
+ PG_RETURN_BOOL(res >= strict_word_similarity_threshold);
+}
+
+Datum
+strict_word_similarity_dist_op(PG_FUNCTION_ARGS)
+{
+ text *in1 = PG_GETARG_TEXT_PP(0);
+ text *in2 = PG_GETARG_TEXT_PP(1);
+ float4 res;
+
+ res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
+ VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
+ WORD_SIMILARITY_STRICT);
+
+ PG_FREE_IF_COPY(in1, 0);
+ PG_FREE_IF_COPY(in2, 1);
+ PG_RETURN_FLOAT4(1.0 - res);
+}
+
+Datum
+strict_word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
+{
+ text *in1 = PG_GETARG_TEXT_PP(0);
+ text *in2 = PG_GETARG_TEXT_PP(1);
+ float4 res;
+
+ res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
+ VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
+ WORD_SIMILARITY_STRICT);
PG_FREE_IF_COPY(in1, 0);
PG_FREE_IF_COPY(in2, 1);
the explanation below.
</entry>
</row>
+ <row>
+ <entry>
+ <function>strict_word_similarity(text, text)</function>
+ <indexterm><primary>strict_word_similarity</primary></indexterm>
+ </entry>
+ <entry><type>real</type></entry>
+ <entry>
+ Same as <function>word_similarity(text, text)</function>, but forces
+ extent boundaries to match word boundaries.
+ </entry>
+ </row>
<row>
<entry><function>show_limit()</function><indexterm><primary>show_limit</primary></indexterm></entry>
<entry><type>real</type></entry>
a part of the word.
</para>
+ <para>
+ At the same time, <function>strict_word_similarity(text, text)</function>
+ has to select an extent that matches word boundaries. In the example above,
+ <function>strict_word_similarity(text, text)</function> would select the
+ extent <literal>{" w"," wo","wor","ord","rds", ds "}</literal>, which
+ corresponds to the whole word <literal>'words'</literal>.
+
+<programlisting>
+# SELECT strict_word_similarity('word', 'two words'), similarity('word', 'words');
+ strict_word_similarity | similarity
+------------------------+------------
+ 0.571429 | 0.571429
+(1 row)
+</programlisting>
+ </para>
+
+ <para>
+ Thus, the <function>strict_word_similarity(text, text)</function> function
+ is useful for finding similar subsets of whole words, while
+ <function>word_similarity(text, text)</function> is more suitable for
+ searching similar parts of words.
+ </para>
+
<table id="pgtrgm-op-table">
<title><filename>pg_trgm</filename> Operators</title>
<tgroup cols="3">
Commutator of the <literal><%</literal> operator.
</entry>
</row>
+ <row>
+ <entry><type>text</type> <literal><<%</literal> <type>text</type></entry>
+ <entry><type>boolean</type></entry>
+ <entry>
+ Returns <literal>true</literal> if its second argument has a continuous
+ extent of an ordered trigram set that matches word boundaries,
+ and its similarity to the trigram set of the first argument is greater
+ than the current strict word similarity threshold set by the
+ <varname>pg_trgm.strict_word_similarity_threshold</varname> parameter.
+ </entry>
+ </row>
+ <row>
+ <entry><type>text</type> <literal>%>></literal> <type>text</type></entry>
+ <entry><type>boolean</type></entry>
+ <entry>
+ Commutator of the <literal><<%</literal> operator.
+ </entry>
+ </row>
<row>
<entry><type>text</type> <literal><-></literal> <type>text</type></entry>
<entry><type>real</type></entry>
Commutator of the <literal><<-></literal> operator.
</entry>
</row>
+ <row>
+ <entry>
+ <type>text</type> <literal><<<-></literal> <type>text</type>
+ </entry>
+ <entry><type>real</type></entry>
+ <entry>
+ Returns the <quote>distance</quote> between the arguments, that is
+ one minus the <function>strict_word_similarity()</function> value.
+ </entry>
+ </row>
+ <row>
+ <entry>
+ <type>text</type> <literal><->>></literal> <type>text</type>
+ </entry>
+ <entry><type>real</type></entry>
+ <entry>
+ Commutator of the <literal><<<-></literal> operator.
+ </entry>
+ </row>
</tbody>
</tgroup>
</table>
<para>
Also you can use an index on the <structfield>t</structfield> column for word
- similarity. For example:
+ similarity or strict word similarity. Typical queries are:
<programlisting>
SELECT t, word_similarity('<replaceable>word</replaceable>', t) AS sml
FROM test_trgm
WHERE '<replaceable>word</replaceable>' <% t
ORDER BY sml DESC, t;
+</programlisting>
+ and
+<programlisting>
+SELECT t, strict_word_similarity('<replaceable>word</replaceable>', t) AS sml
+ FROM test_trgm
+ WHERE '<replaceable>word</replaceable>' <<% t
+ ORDER BY sml DESC, t;
</programlisting>
This will return all values in the text column for which there is a
continuous extent in the corresponding ordered trigram set that is
</para>
<para>
- A variant of the above query is
+ Possible variants of the above queries are:
<programlisting>
SELECT t, '<replaceable>word</replaceable>' <<-> t AS dist
FROM test_trgm
ORDER BY dist LIMIT 10;
+</programlisting>
+ and
+<programlisting>
+SELECT t, '<replaceable>word</replaceable>' <<<-> t AS dist
+ FROM test_trgm
+ ORDER BY dist LIMIT 10;
</programlisting>
This can be implemented quite efficiently by GiST indexes, but not
by GIN indexes.