From: Regina Obe Date: Wed, 29 Jun 2011 10:35:21 +0000 (+0000) Subject: #1069: Support for specifying max_results to return. Regress already committed X-Git-Tag: 2.0.0alpha1~1323 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c561b95e22c1fc6419ab0f55368a20799b466b1f;p=postgis #1069: Support for specifying max_results to return. Regress already committed git-svn-id: http://svn.osgeo.org/postgis/trunk@7518 b70326c6-7e19-0410-871a-916f4a2858ee --- diff --git a/doc/extras_tigergeocoder.xml b/doc/extras_tigergeocoder.xml index c2d6f989a..02bc7603c 100644 --- a/doc/extras_tigergeocoder.xml +++ b/doc/extras_tigergeocoder.xml @@ -43,7 +43,8 @@ setof record geocode - address varchar + varchar address + integer max_results=10 norm_addy OUT addy geometry OUT geomout integer OUT rating @@ -51,6 +52,7 @@ setof record geocode norm_addy in_addy + integer max_results=10 norm_addy OUT addy geometry OUT geomout integer OUT rating @@ -64,9 +66,8 @@ Takes in an address as a string (or already normalized address) and outputs a set of possible locations which include a point geometry in NAD 83 long lat, a normalized_address (addy) for each, and the rating. The lower the rating the more likely the match. Results are sorted by lowest rating first. Uses Tiger data (edges,faces,addr), PostgreSQL fuzzy string matching (soundex,levenshtein) and PostGIS line interpolation functions to interpolate address along the Tiger edges. The higher the rating the less likely the geocode is right. - Enhanced: 2.0.0 to support Tiger 2010 structured data and revised some logic to improve speed. - - + Enhanced: 2.0.0 to support Tiger 2010 structured data and revised some logic to improve speed and accuracy of geocoding. New parameter max_results useful for specifying ot just return the best result. + @@ -111,7 +112,7 @@ 70 | POINT(-71.0646 42.35105) | 31 | Stuart | St | Boston | MA | 02116 (11 rows) -Using to do a batch geocode of addresses. USE DISTINCT ON to return the best address match (lowest rating), when more than one match is returned. Only process those not yet geocoded (have no rating). +Using to do a batch geocode of addresses. Easiest is to set max_results=1. Only process those not yet geocoded (have no rating). CREATE TABLE addresses_to_geocode(addid serial PRIMARY KEY, address text, lon numeric, lat numeric, new_address text, rating integer); @@ -122,30 +123,30 @@ VALUES ('529 Main Street, Boston MA, 02129'), ('124 Mount Auburn St, Cambridge, Massachusetts 02138'), ('950 Main Street, Worcester, MA 01610'); --- only update the first two addresses (850 ms) -- +-- only update the first two addresses (323-704 ms - there are caching and shared memory effects so first geocode you do is always slower) -- -- for large numbers of addresses you don't want to update all at once -- since the whole geocode must commit at once UPDATE addresses_to_geocode SET (rating, new_address, lon, lat) - = (g.rating, pprint_addy(g.addy), - ST_X(g.geomout)::numeric(8,5), ST_Y(g.geomout)::numeric(8,5) ) -FROM (SELECT DISTINCT ON (addid) addid, (g1.geo).* - FROM (SELECT addid, (geocode(address)) As geo -FROM addresses_to_geocode As ag - WHERE ag.rating IS NULL ) As g1 -ORDER BY addid, rating LIMIT 2) As g + = ( (g.geo).rating, pprint_addy((g.geo).addy), + ST_X((g.geo).geomout)::numeric(8,5), ST_Y((g.geo).geomout)::numeric(8,5) ) +FROM (SELECT addid, (geocode(address,1)) As geo + FROM addresses_to_geocode As ag + WHERE ag.rating IS NULL ORDER BY addid LIMIT 5) As g WHERE g.addid = addresses_to_geocode.addid; result ----- -2 rows affected, 850 ms execution time. +5 rows affected, 345 ms execution time. SELECT * FROM addresses_to_geocode WHERE rating is not null; - - addid | address | lon | lat | new_address | rating - ------+----------------------------------------------+-----------+----------+-------------------------------------------+-------- - 1 | 529 Main Street, Boston MA, 02129 | -71.07187 | 42.38351 | 529 Main St, Boston, MA 02129 | 0 - 2 | 77 Massachusetts Avenue, Cambridge, MA 02139 | -71.09436 | 42.35981 | 77 Massachusetts Ave, Cambridge, MA 02139 | 0 + addid | address | lon | lat | new_address | rating +-------+-----------------------------------------------------+-----------+----------+-------------------------------------------+-------- + 1 | 529 Main Street, Boston MA, 02129 | -71.07187 | 42.38351 | 529 Main St, Boston, MA 02129 | 0 + 2 | 77 Massachusetts Avenue, Cambridge, MA 02139 | -71.09436 | 42.35981 | 77 Massachusetts Ave, Cambridge, MA 02139 | 0 + 3 | 28 Capen Street, Medford, MA | -71.12370 | 42.41108 | 28 Capen St, Medford, MA 02155 | 0 + 4 | 124 Mount Auburn St, Cambridge, Massachusetts 02138 | -71.12298 | 42.37336 | 124 Mount Auburn St, Cambridge, MA 02138 | 0 + 5 | 950 Main Street, Worcester, MA 01610 | -71.82361 | 42.24948 | 950 Main St, Worcester, MA 01610 | 0 diff --git a/extras/tiger_geocoder/tiger_2010/geocode/geocode.sql b/extras/tiger_geocoder/tiger_2010/geocode/geocode.sql index e09170a79..34b0bcf6b 100644 --- a/extras/tiger_geocoder/tiger_2010/geocode/geocode.sql +++ b/extras/tiger_geocoder/tiger_2010/geocode/geocode.sql @@ -1,6 +1,6 @@ --$Id$ CREATE OR REPLACE FUNCTION geocode( - input VARCHAR, + input VARCHAR, max_results integer DEFAULT 10, OUT ADDY NORM_ADDY, OUT GEOMOUT GEOMETRY, OUT RATING INTEGER @@ -20,7 +20,7 @@ BEGIN RETURN; END IF; - FOR rec IN SELECT * FROM geocode(ADDY) +/* FOR rec IN SELECT * FROM geocode(ADDY) LOOP ADDY := rec.addy; @@ -28,16 +28,16 @@ BEGIN RATING := rec.rating; RETURN NEXT; - END LOOP; - - RETURN; + END LOOP;*/ + + RETURN QUERY SELECT g.addy, g.geomout, g.rating FROM geocode(ADDY, max_results) As g ORDER BY g.rating; END; $_$ LANGUAGE plpgsql STABLE; CREATE OR REPLACE FUNCTION geocode( - IN_ADDY NORM_ADDY, + IN_ADDY NORM_ADDY, max_results integer DEFAULT 10, OUT ADDY NORM_ADDY, OUT GEOMOUT GEOMETRY, OUT RATING INTEGER @@ -85,7 +85,7 @@ BEGIN (a.addy).zip, a.rating ) as b - ORDER BY b.rating + ORDER BY b.rating LIMIT max_results LOOP ADDY := rec.addy; diff --git a/extras/tiger_geocoder/tiger_2010/upgrade_geocode.sql b/extras/tiger_geocoder/tiger_2010/upgrade_geocode.sql index fc03a30df..9be9edcb1 100644 --- a/extras/tiger_geocoder/tiger_2010/upgrade_geocode.sql +++ b/extras/tiger_geocoder/tiger_2010/upgrade_geocode.sql @@ -24,6 +24,8 @@ CREATE INDEX tiger_place_the_geom_gist ON place USING gist(the_geom); CREATE INDEX tiger_edges_the_geom_gist ON edges USING gist(the_geom); CREATE INDEX tiger_state_the_geom_gist ON faces USING gist(the_geom); DROP FUNCTION IF EXISTS reverse_geocode(geometry); /** changed to use default parameters **/ +DROP FUNCTION IF EXISTS geocode(varchar); /** changed to include default parameter for max_results **/ +DROP FUNCTION IF EXISTS geocode(norm_addy); /** changed to include default parameter for max_results **/ -- TODO: Put in logic to update lookup tables as they change. street_type_lookup has changed since initial release -- CREATE TABLE zcta5