From: Regina Obe Date: Wed, 11 May 2011 12:55:13 +0000 (+0000) Subject: more usability and performance enhancements. Change geocoder/reverse_geocode to... X-Git-Tag: 2.0.0alpha1~1681 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f6e2365ce2d6b7b1a7e2fb3b3eecd59f868805d6;p=postgis more usability and performance enhancements. Change geocoder/reverse_geocode to use tiger.state_lookup instead of each schema state table (to achieve this had to add statefp to lookup table). Also put in upgrade scripts and notes on upgrading in README. (current script will only upgrade an alpha tiger 2010 (PostGIS 2.0.0 install) ). git-svn-id: http://svn.osgeo.org/postgis/trunk@7127 b70326c6-7e19-0410-871a-916f4a2858ee --- diff --git a/extras/tiger_geocoder/tiger_2010/README b/extras/tiger_geocoder/tiger_2010/README index 17939ec3e..078acbde2 100644 --- a/extras/tiger_geocoder/tiger_2010/README +++ b/extras/tiger_geocoder/tiger_2010/README @@ -50,6 +50,6 @@ SELECT g.rating, FROM geocode('1731 New Hampshire Avenue Northwest, Washington, DC 20010') As g; Steps to upgrade your install: -If you need to upgrade the geocoder -- run the create_geocode.sql. -CAUTION: The upgrade script will drop any table columns that have a norm_addy type for a column type. This is rarely if ever done so you should be fine. -We plan to fix this later. \ No newline at end of file +If you need to upgrade the geocoder/tiger loader from a pre-release 2.0.0 install -- run the upgrade_geocoder.sh or upgrade_geocoder.bat script. +CAUTION: The upgrade script will drop any table columns that have a norm_addy type for a column type. This is rare if ever done so you should be fine. +We plan to fix this later. It will also drop any customizations you have made to the tiger_loader configuration tables. \ No newline at end of file diff --git a/extras/tiger_geocoder/tiger_2010/geocode/geocode_address.sql b/extras/tiger_geocoder/tiger_2010/geocode/geocode_address.sql index e55bc96c1..56e99d5b1 100644 --- a/extras/tiger_geocoder/tiger_2010/geocode/geocode_address.sql +++ b/extras/tiger_geocoder/tiger_2010/geocode/geocode_address.sql @@ -16,7 +16,7 @@ BEGIN ADDY.internal := parsed.internal; - in_statefp := statefp FROM state WHERE state.stusps = parsed.stateAbbrev; + in_statefp := statefp FROM state_lookup As s WHERE s.abbrev = parsed.stateAbbrev; -- There are a couple of different things to try, from the highest preference and falling back -- to lower-preference options. diff --git a/extras/tiger_geocoder/tiger_2010/geocode/reverse_geocode.sql b/extras/tiger_geocoder/tiger_2010/geocode/reverse_geocode.sql index 1b0638c19..2bfa399cc 100644 --- a/extras/tiger_geocoder/tiger_2010/geocode/reverse_geocode.sql +++ b/extras/tiger_geocoder/tiger_2010/geocode/reverse_geocode.sql @@ -24,71 +24,90 @@ DECLARE var_primary_line geometry := NULL; var_primary_dist numeric(10,2) ; var_pt geometry; + var_debug boolean = true; BEGIN - IF pt IS NULL THEN - RETURN; - ELSE - IF ST_SRID(pt) = 4269 THEN - var_pt := pt; - ELSE - var_pt := ST_Transform(pt, 4269); - END IF; - END IF; - -- Determine state tables to check - -- this is needed to take advantage of constraint exclusion - var_states := ARRAY(SELECT statefp FROM state WHERE ST_Intersects(the_geom, var_pt) ); - IF array_upper(var_states, 1) IS NULL THEN - -- We don't have any data for this state - RETURN; - END IF; + IF pt IS NULL THEN + RETURN; + ELSE + IF ST_SRID(pt) = 4269 THEN + var_pt := pt; + ELSIF ST_SRID(pt) > 0 THEN + var_pt := ST_Transform(pt, 4269); + ELSE --If srid is unknown, assume its 4269 + var_pt := ST_SetSRID(pt, 4269); + END IF; + END IF; + -- Determine state tables to check + -- this is needed to take advantage of constraint exclusion + IF var_debug THEN + RAISE NOTICE 'Get matching states start: %', clock_timestamp(); + END IF; + var_states := ARRAY(SELECT statefp FROM state WHERE ST_Intersects(the_geom, var_pt) ); + IF var_debug THEN + RAISE NOTICE 'Get matching states end: %', clock_timestamp(); + END IF; + IF array_upper(var_states, 1) IS NULL THEN + -- We don't have any data for this state + RETURN; + END IF; + + -- Find the street edges that this point is closest to with tolerance of 0.005 but only consider the edge if the point is contained in the right or left face + -- Then order addresses by proximity to road + IF var_debug THEN + RAISE NOTICE 'Get matching edges start: %', clock_timestamp(); + END IF; + FOR var_redge IN + SELECT * + FROM (SELECT DISTINCT ON(fullname) foo.fullname, foo.stusps, foo.zip, + (SELECT z.place FROM zip_state_loc AS z WHERE z.zip = foo.zip and z.statefp = foo.statefp LIMIT 1) As place, foo.center_pt, + side, to_number(fromhn, '999999') As fromhn, to_number(tohn, '999999') As tohn, ST_GeometryN(ST_Multi(line),1) As line, foo.dist + FROM + (SELECT e.the_geom As line, e.fullname, a.zip, s.abbrev As stusps, ST_ClosestPoint(e.the_geom, var_pt) As center_pt, e.statefp, a.side, a.fromhn, a.tohn, ST_Distance_Sphere(e.the_geom, var_pt) As dist + FROM (SELECT * FROM edges WHERE statefp = ANY(var_states) ) AS e INNER JOIN (SELECT * FROM state_lookup WHERE statefp = ANY(var_states) ) As s ON (e.statefp = s.statefp ) + INNER JOIN (SELECT * FROM faces WHERE statefp = ANY(var_states) ) As fl ON (e.tfidl = fl.tfid AND e.statefp = fl.statefp) + INNER JOIN (SELECT * FROM faces WHERE statefp = ANY(var_states) ) As fr ON (e.tfidr = fr.tfid AND e.statefp = fr.statefp) + INNER JOIN (SELECT * FROM addr WHERE statefp = ANY(var_states) ) As a ON ( e.tlid = a.tlid AND e.statefp = a.statefp AND + ( ( ST_Covers(fl.the_geom, var_pt) AND a.side = 'L') OR ( ST_Covers(fr.the_geom, var_pt) AND a.side = 'R' ) ) ) + -- INNER JOIN zip_state_loc As z ON (a.statefp = z.statefp AND a.zip = z.zip) /** really slow with this join **/ + WHERE ST_DWithin(e.the_geom, var_pt, 0.005) + ORDER BY ST_Distance_Sphere(e.the_geom, var_pt) LIMIT 4) As foo + WHERE dist < 150 --less than 150 m + ORDER BY foo.fullname, foo.dist) As f ORDER BY f.dist LOOP + IF var_debug THEN + RAISE NOTICE 'Get matching edges loop: %', clock_timestamp(); + END IF; + IF var_primary_line IS NULL THEN --this is the first time in the loop and our primary guess + var_primary_line := var_redge.line; + var_primary_dist := var_redge.dist; + END IF; + -- We only consider other edges as matches if they intersect our primary edge -- that would mean we are at a corner place + IF ST_Intersects(var_redge.line, var_primary_line) THEN + intpt := array_append(intpt,var_redge.center_pt); + IF var_redge.fullname IS NOT NULL THEN + street := array_append(street, (CASE WHEN include_strnum_range THEN COALESCE(var_redge.fromhn::varchar, '')::varchar || ' - ' || COALESCE(var_redge.tohn::varchar,'')::varchar || ' '::varchar ELSE '' END::varchar || var_redge.fullname::varchar)::varchar); + --interploate the number -- note that if fromhn > tohn we will be subtracting which is what we want + -- We only consider differential distances are reeally close from our primary pt + IF var_redge.dist < var_primary_dist*1.1 THEN + var_nstrnum := (var_redge.fromhn + ST_Line_Locate_Point(var_redge.line, var_pt)*(var_redge.tohn - var_redge.fromhn))::numeric(10); + -- The odd even street number side of street rule + IF (var_nstrnum % 2) != (var_redge.tohn % 2) THEN + var_nstrnum := CASE WHEN var_nstrnum + 1 NOT BETWEEN var_redge.fromhn AND var_redge.tohn THEN var_nstrnum - 1 ELSE var_nstrnum + 1 END; + END IF; + var_strnum := var_nstrnum::varchar; + var_addy := normalize_address( COALESCE(var_strnum::varchar || ' ', '') || var_redge.fullname || ', ' || var_redge.place || ', ' || var_redge.stusps || ' ' || var_redge.zip); + addy := array_append(addy, var_addy); + END IF; + END IF; + END IF; + END LOOP; - -- Find the street edges that this point is closest to with tolerance of 0.005 but only consider the edge if the point is contained in the right or left face - -- Then order addresses by proximity to road - FOR var_redge IN - SELECT * - FROM (SELECT DISTINCT ON(fullname) foo.fullname, foo.stusps, foo.zip, - (SELECT z.place FROM zip_state_loc AS z WHERE z.zip = foo.zip and z.statefp = foo.statefp LIMIT 1) As place, foo.center_pt, - side, to_number(fromhn, '999999') As fromhn, to_number(tohn, '999999') As tohn, ST_GeometryN(ST_Multi(line),1) As line, foo.dist - FROM - (SELECT e.the_geom As line, e.fullname, a.zip, s.stusps, ST_ClosestPoint(e.the_geom, var_pt) As center_pt, e.statefp, a.side, a.fromhn, a.tohn, ST_Distance_Sphere(e.the_geom, var_pt) As dist - FROM edges AS e INNER JOIN state As s ON (e.statefp = s.statefp AND s.statefp = ANY(var_states) ) - INNER JOIN faces As fl ON (e.tfidl = fl.tfid AND e.statefp = fl.statefp) - INNER JOIN faces As fr ON (e.tfidr = fr.tfid AND e.statefp = fr.statefp) - INNER JOIN addr As a ON ( e.tlid = a.tlid AND e.statefp = a.statefp AND - ( ( ST_Covers(fl.the_geom, var_pt) AND a.side = 'L') OR ( ST_Covers(fr.the_geom, var_pt) AND a.side = 'R' ) ) ) - -- INNER JOIN zip_state_loc As z ON (a.statefp = z.statefp AND a.zip = z.zip) /** really slow with this join **/ - WHERE e.statefp = ANY(var_states) AND a.statefp = ANY(var_states) AND ST_DWithin(e.the_geom, var_pt, 0.005) - ORDER BY ST_Distance_Sphere(e.the_geom, var_pt) LIMIT 4) As foo - WHERE dist < 150 --less than 150 m - ORDER BY foo.fullname, foo.dist) As f ORDER BY f.dist LOOP - IF var_primary_line IS NULL THEN --this is the first time in the loop and our primary guess - var_primary_line := var_redge.line; - var_primary_dist := var_redge.dist; - END IF; - -- We only consider other edges as matches if they intersect our primary edge -- that would mean we are at a corner place - IF ST_Intersects(var_redge.line, var_primary_line) THEN - intpt := array_append(intpt,var_redge.center_pt); - IF var_redge.fullname IS NOT NULL THEN - street := array_append(street, (CASE WHEN include_strnum_range THEN COALESCE(var_redge.fromhn::varchar, '')::varchar || ' - ' || COALESCE(var_redge.tohn::varchar,'')::varchar || ' '::varchar ELSE '' END::varchar || var_redge.fullname::varchar)::varchar); - --interploate the number -- note that if fromhn > tohn we will be subtracting which is what we want - -- We only consider differential distances are reeally close from our primary pt - IF var_redge.dist < var_primary_dist*1.1 THEN - var_nstrnum := (var_redge.fromhn + ST_Line_Locate_Point(var_redge.line, var_pt)*(var_redge.tohn - var_redge.fromhn))::numeric(10); - -- The odd even street number side of street rule - IF (var_nstrnum % 2) != (var_redge.tohn % 2) THEN - var_nstrnum := CASE WHEN var_nstrnum + 1 NOT BETWEEN var_redge.fromhn AND var_redge.tohn THEN var_nstrnum - 1 ELSE var_nstrnum + 1 END; - END IF; - var_strnum := var_nstrnum::varchar; - var_addy := normalize_address( COALESCE(var_strnum::varchar || ' ', '') || var_redge.fullname || ', ' || var_redge.place || ', ' || var_redge.stusps || ' ' || var_redge.zip); - addy := array_append(addy, var_addy); - END IF; - END IF; - END IF; - END LOOP; + IF var_debug THEN + RAISE NOTICE 'Get matching edges loop: %', clock_timestamp(); + END IF; - RETURN; + RETURN; END; -$_$ LANGUAGE plpgsql STABLE; +$_$ LANGUAGE plpgsql STABLE COST 1000; CREATE OR REPLACE FUNCTION reverse_geocode(IN pt geometry, OUT intpt geometry[], OUT addy NORM_ADDY[], @@ -98,4 +117,4 @@ $$ -- default to not include street range in cross streets SELECT reverse_geocode($1,false); $$ -language sql STABLE COST 1000; \ No newline at end of file +language sql STABLE; \ No newline at end of file diff --git a/extras/tiger_geocoder/tiger_2010/tables/lookup_tables_2010.sql b/extras/tiger_geocoder/tiger_2010/tables/lookup_tables_2010.sql index a47db7fcb..1f1c8d653 100644 --- a/extras/tiger_geocoder/tiger_2010/tables/lookup_tables_2010.sql +++ b/extras/tiger_geocoder/tiger_2010/tables/lookup_tables_2010.sql @@ -83,7 +83,7 @@ CREATE INDEX secondary_unit_lookup_abbrev_idx ON secondary_unit_lookup (abbrev); -- Create state lookup table DROP TABLE IF EXISTS state_lookup; -CREATE TABLE state_lookup (st_code INTEGER PRIMARY KEY, name VARCHAR(40) UNIQUE, abbrev VARCHAR(3) UNIQUE); +CREATE TABLE state_lookup (st_code INTEGER PRIMARY KEY, name VARCHAR(40) UNIQUE, abbrev VARCHAR(3) UNIQUE, statefp char(2) UNIQUE); INSERT INTO state_lookup (name, abbrev, st_code) VALUES ('Alabama', 'AL', '01'); INSERT INTO state_lookup (name, abbrev, st_code) VALUES ('Alaska', 'AK', '02'); INSERT INTO state_lookup (name, abbrev, st_code) VALUES ('American Samoa', 'AS', -1); @@ -143,6 +143,8 @@ INSERT INTO state_lookup (name, abbrev, st_code) VALUES ('Washington', 'WA', '53 INSERT INTO state_lookup (name, abbrev, st_code) VALUES ('West Virginia', 'WV', '54'); INSERT INTO state_lookup (name, abbrev, st_code) VALUES ('Wisconsin', 'WI', '55'); INSERT INTO state_lookup (name, abbrev, st_code) VALUES ('Wyoming', 'WY', '56'); +-- NOTE: fix later -- this is wrong for those - state code ones +UPDATE state_lookup SET statefp = lpad(st_code::text,2,'0'); -- Create street type lookup table diff --git a/extras/tiger_geocoder/tiger_2010/upgrade_geocode.sql b/extras/tiger_geocoder/tiger_2010/upgrade_geocode.sql new file mode 100644 index 000000000..06a5e4798 --- /dev/null +++ b/extras/tiger_geocoder/tiger_2010/upgrade_geocode.sql @@ -0,0 +1,63 @@ +--$Id: create_geocode.sql 7111 2011-05-08 16:36:55Z robe $ +-- Tiger is where we're going to create the functions, but we need +-- the PostGIS functions/types which are in public. +SET search_path TO tiger,public; +--this will fail if the column already exists which is fine +ALTER TABLE state_lookup ADD COLUMN statefp char(2); +UPDATE state_lookup SET statefp = lpad(st_code::text,2,'0') WHERE statefp IS NULL; +ALTER TABLE state_lookup ADD CONSTRAINT state_lookup_statefp_key UNIQUE(statefp); + +BEGIN; +-- Type used to pass around a normalized address between functions +-- This is s bit dangerous since it could potentially drop peoples tables +-- TODO: put in logic to check if any tables have norm_addy and don't drop if they do +DROP TYPE IF EXISTS norm_addy CASCADE; +CREATE TYPE norm_addy AS ( + address INTEGER, + preDirAbbrev VARCHAR, + streetName VARCHAR, + streetTypeAbbrev VARCHAR, + postDirAbbrev VARCHAR, + internal VARCHAR, + location VARCHAR, + stateAbbrev VARCHAR, + zip VARCHAR, + parsed BOOLEAN); + +-- System/General helper functions +\i utility/utmzone.sql +\i utility/cull_null.sql +\i utility/nullable_levenshtein.sql +\i utility/levenshtein_ignore_case.sql + +---- Address normalizer +-- General helpers +\i normalize/end_soundex.sql +\i normalize/count_words.sql +\i normalize/state_extract.sql +\i normalize/get_last_words.sql +-- Location extraction/normalization helpers +\i normalize/location_extract_countysub_exact.sql +\i normalize/location_extract_countysub_fuzzy.sql +\i normalize/location_extract_place_exact.sql +\i normalize/location_extract_place_fuzzy.sql +\i normalize/location_extract.sql +-- Normalization API, called by geocode mainly. +\i normalize/normalize_address.sql +\i normalize/pprint_addy.sql + +---- Geocoder functions +-- General helpers +\i geocode/other_helper_functions.sql +\i geocode/rate_attributes.sql +\i geocode/includes_address.sql +\i geocode/interpolate_from_address.sql +-- Actual lookups/geocoder helpers +\i geocode/geocode_address.sql +\i geocode/geocode_location.sql +-- Geocode API, called by user +\i geocode/geocode.sql + +-- Reverse Geocode API, called by user +\i geocode/reverse_geocode.sql +COMMIT; \ No newline at end of file diff --git a/extras/tiger_geocoder/tiger_2010/upgrade_geocoder.bat b/extras/tiger_geocoder/tiger_2010/upgrade_geocoder.bat new file mode 100644 index 000000000..e813e0234 --- /dev/null +++ b/extras/tiger_geocoder/tiger_2010/upgrade_geocoder.bat @@ -0,0 +1,12 @@ +REM $Id: create_geocode.bat 6774 2011-02-01 13:55:02Z robe $ +set PGPORT=5432 +set PGHOST=localhost +set PGUSER=postgres +set PGPASSWORD=yourpasswordhere +set THEDB=geocoder +set PGBIN=C:\Program Files\PostgreSQL\8.4\bin +set PGCONTRIB=C:\Program Files\PostgreSQL\8.4\share\contrib +"%PGBIN%\psql" -d "%THEDB%" -f "tiger_loader.sql" +"%PGBIN%\psql" -d "%THEDB%" -f "upgrade_geocode.sql" +pause + diff --git a/extras/tiger_geocoder/tiger_2010/upgrade_geocoder.sh b/extras/tiger_geocoder/tiger_2010/upgrade_geocoder.sh new file mode 100644 index 000000000..826bf4146 --- /dev/null +++ b/extras/tiger_geocoder/tiger_2010/upgrade_geocoder.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# $Id: create_geocode.sh 7035 2011-04-15 11:15:59Z robe $ +PGPORT=5432 +PGHOST=localhost +PGUSER=postgres +PGPASSWORD=yourpasswordhere +THEDB=geocoder +PSQL_CMD=/usr/bin/psql +PGCONTRIB=/usr/share/postgresql/contrib +${PSQL_CMD} -d "${THEDB}" -f "tiger_loader.sql" +${PSQL_CMD} -d "${THEDB}" -f "upgrade_geocode.sql" \ No newline at end of file