From: Regina Obe Date: Wed, 3 Aug 2011 02:06:31 +0000 (+0000) Subject: Major rework to improve speed, selectivity, and additional regress. Also added some... X-Git-Tag: 2.0.0alpha1~1158 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=09241cd2f9379198e7b6381e5ff7c7bf44fcc140;p=postgis Major rework to improve speed, selectivity, and additional regress. Also added some indexes and removed some -- should solve #1145, #1148, and #1131 (should be about 60% faster for most cases and in some as much as 20 times faster). Better performance if you set max results = 1. Some other minor cleanup. documentation of new management helper functions coming next. git-svn-id: http://svn.osgeo.org/postgis/trunk@7689 b70326c6-7e19-0410-871a-916f4a2858ee --- diff --git a/extras/tiger_geocoder/tiger_2010/geocode/geocode_address.sql b/extras/tiger_geocoder/tiger_2010/geocode/geocode_address.sql index d05857cc0..62148bc12 100644 --- a/extras/tiger_geocoder/tiger_2010/geocode/geocode_address.sql +++ b/extras/tiger_geocoder/tiger_2010/geocode/geocode_address.sql @@ -1,4 +1,5 @@ --$Id$ +--DROP FUNCTION IF EXISTS geocode_address(norm_addy, integer , geometry); CREATE OR REPLACE FUNCTION geocode_address(IN parsed norm_addy, max_results integer DEFAULT 10, restrict_geom geometry DEFAULT NULL, OUT addy norm_addy, OUT geomout geometry, OUT rating integer) RETURNS SETOF record AS $$ @@ -13,6 +14,7 @@ DECLARE var_n integer := 0; var_restrict_geom geometry := NULL; var_bfilter text := null; + var_bestrating integer := NULL; BEGIN IF parsed.streetName IS NULL THEN -- A street name must be given. Think about it. @@ -36,7 +38,208 @@ BEGIN WHERE zc.statefp = ' || quote_nullable(in_statefp) || ' AND ST_Intersects(zc.the_geom, ' || quote_literal(var_restrict_geom::text) || '::geometry) ' ; - -- There are a couple of different things to try, from the highest preference and falling back + SELECT NULL::varchar[] As zip INTO zip_info; + + IF parsed.zip IS NOT NULL THEN + -- Create an array of 5 zips containing 2 before and 2 after our target if our streetName is longer + IF length(parsed.streetName) > 7 THEN + SELECT zip_range(parsed.zip, -2, 2) As zip INTO zip_info; + ELSE + -- If our street name is short, we'll run into many false positives so reduce our zip window a bit + SELECT zip_range(parsed.zip, -1, 1) As zip INTO zip_info; + END IF; + --This signals bad zip input, only use the range if it falls in the place zip range + IF length(parsed.zip) != 5 AND parsed.location IS NOT NULL THEN + stmt := 'SELECT ARRAY(SELECT DISTINCT zip + FROM zip_lookup_base AS z + WHERE z.statefp = $1 + AND z.zip = ANY($3) AND lower(z.city) LIKE lower($2) || ''%''::text ' || COALESCE(' AND z.zip IN(' || var_bfilter || ')', '') || ')::varchar[] AS zip ORDER BY zip' ; + EXECUTE stmt INTO zip_info USING in_statefp, parsed.location, zip_info.zip; + IF var_debug THEN + RAISE NOTICE 'Bad zip newzip range: %', quote_nullable(zip_info.zip); + END IF; + IF array_upper(zip_info.zip,1) = 0 OR array_upper(zip_info.zip,1) IS NULL THEN + -- zips do not fall in city ignore them + IF var_debug THEN + RAISE NOTICE 'Ignore new zip range that is bad too: %', quote_nullable(zip_info.zip); + END IF; + zip_info.zip = NULL::varchar[]; + END IF; + END IF; + END IF; + IF zip_info.zip IS NULL THEN + -- If no good zips just include all for the location + -- We do a like instead of absolute check since tiger sometimes tacks things like Town at end of places + stmt := 'SELECT ARRAY(SELECT DISTINCT zip + FROM zip_lookup_base AS z + WHERE z.statefp = $1 + AND lower(z.city) LIKE lower($2) || ''%''::text ' || COALESCE(' AND z.zip IN(' || var_bfilter || ')', '') || ')::varchar[] AS zip ORDER BY zip' ; + EXECUTE stmt INTO zip_info USING in_statefp, parsed.location; + IF var_debug THEN + RAISE NOTICE 'Zip range based on only considering city: %', quote_nullable(zip_info.zip); + END IF; + END IF; + -- Brute force -- try to find perfect matches and exit if we have one + -- we first pull all the names in zip and rank by if zip matches input zip and streetname matches street + stmt := 'WITH a AS + ( SELECT * + FROM (SELECT f.*, ad.side, ad.zip, ad.fromhn, ad.tohn, + RANK() OVER(ORDER BY ' || CASE WHEN parsed.zip > '' THEN ' diff_zip(ad.zip,$7) + ' ELSE '' END + ||' CASE WHEN lower(f.name) = lower($2) THEN 0 ELSE levenshtein_ignore_case(f.name, lower($2) ) END + + levenshtein_ignore_case(f.fullname, lower($2 || '' '' || COALESCE($4,'''')) ) + + CASE WHEN (greatest_hn(ad.fromhn,ad.tohn) % 2)::integer = ($1 % 2)::integer THEN 0 ELSE 1 END + + CASE WHEN $1::integer BETWEEN least_hn(ad.fromhn,ad.tohn) AND greatest_hn(ad.fromhn, ad.tohn) + THEN 0 ELSE 4 END + + CASE WHEN lower($4) = lower(f.suftypabrv) OR lower($4) = lower(f.pretypabrv) THEN 0 ELSE 1 END + + rate_attributes($5, f.predirabrv,' + || ' $2, f.name , $4,' + || ' suftypabrv , $6,' + || ' sufdirabrv, prequalabr) + ) + As rank + FROM featnames As f INNER JOIN addr As ad ON (f.tlid = ad.tlid) + WHERE $10 = f.statefp AND $10 = ad.statefp + ' + || CASE WHEN length(parsed.streetName) > 5 THEN ' AND (lower(f.fullname) LIKE (COALESCE($5 || '' '','''') || lower($2) || ''%'')::text OR lower(f.name) = lower($2) OR soundex(f.name) = soundex($2) ) ' ELSE ' AND lower(f.name) = lower($2) ' END + || CASE WHEN zip_info.zip IS NOT NULL THEN ' AND ( ad.zip = ANY($9::varchar[]) ) ' ELSE '' END + || ' ) AS foo ORDER BY rank LIMIT ' || max_results*3 || ' ) + SELECT * FROM ( + SELECT DISTINCT ON (sub.predirabrv,sub.fename,COALESCE(sub.suftypabrv, sub.pretypabrv) ,sub.sufdirabrv,sub.place,s.stusps,sub.zip)' + || ' sub.predirabrv as fedirp,' + || ' sub.fename,' + || ' COALESCE(sub.suftypabrv, sub.pretypabrv) as fetype,' + || ' sub.sufdirabrv as fedirs,' + || ' sub.place ,' + || ' s.stusps as state,' + || ' sub.zip as zip,' + || ' interpolate_from_address($1, to_number(sub.fromhn,''99999999'')::integer,' + || ' to_number(sub.tohn,''99999999'')::integer, sub.the_geom) as address_geom,' + || ' sub.sub_rating + ' + || CASE WHEN parsed.zip > '' THEN ' least(coalesce(diff_zip($7 , sub.zip),0), 10)::integer ' + ELSE '1' END::text + || ' + coalesce(levenshtein_ignore_case($3, sub.place),5)' + || ' as sub_rating,' + || ' sub.exact_address as exact_address, sub.tohn, sub.fromhn ' + || ' FROM (' + || ' SELECT tlid, predirabrv, COALESCE(b.prequalabr || '' '','''' ) || b.name As fename, suftypabrv, sufdirabrv, fromhn, tohn, + side, zip, rate_attributes($5, predirabrv,' + || ' $2, b.name , $4,' + || ' suftypabrv , $6,' + || ' sufdirabrv, prequalabr) + ' + || ' CASE ' + || ' WHEN $1::integer IS NULL OR b.fromhn IS NULL THEN 20' + || ' WHEN $1::integer >= least_hn(b.fromhn, b.tohn) ' + || ' AND $1::integer <= greatest_hn(b.fromhn,b.tohn)' + || ' AND ($1::integer % 2) = (to_number(b.fromhn,''99999999'') % 2)::integer' + || ' THEN 0' + || ' WHEN $1::integer >= least_hn(b.fromhn,b.tohn)' + || ' AND $1::integer <= greatest_hn(b.fromhn,b.tohn)' + || ' THEN 2' + || ' ELSE' + || ' ((1.0 - ' + || '(least_hn($1::text,least_hn(b.fromhn,b.tohn)::text)::numeric /' + || ' (greatest(1,greatest_hn($1::text,greatest_hn(b.fromhn,b.tohn)::text))) )' + || ') * 5)::integer + 5' + || ' END' + || ' as sub_rating,$1::integer >= least_hn(b.fromhn,b.tohn) ' + || ' AND $1::integer <= greatest_hn(b.fromhn,b.tohn) ' + || ' AND ($1 % 2)::numeric::integer = (to_number(b.fromhn,''99999999'') % 2)' + || ' as exact_address, b.name, b.prequalabr, b.pretypabrv, b.tfidr, b.tfidl, b.the_geom, b.place ' + || ' FROM + (SELECT a.tlid, a.fullname, a.name, a.predirabrv, a.suftypabrv, a.sufdirabrv, a.prequalabr, a.pretypabrv, + b.the_geom, tfidr, tfidl, + a.side , + a.fromhn, + a.tohn, + a.zip, + p.name as place + + FROM a INNER JOIN edges As b ON (a.statefp = b.statefp AND a.tlid = b.tlid ' + || ') + INNER JOIN faces AS f ON ($10 = f.statefp AND ( (b.tfidl = f.tfid AND a.side = ''L'') OR (b.tfidr = f.tfid AND a.side = ''R'' ) )) + INNER JOIN place p ON ($10 = p.statefp AND f.placefp = p.placefp ' + || CASE WHEN parsed.location > '' AND zip_info.zip IS NULL THEN ' AND ( lower(p.name) LIKE (lower($3::text) || ''%'') ) ' ELSE '' END + || ') + WHERE a.statefp = $10 AND b.statefp = $10 ' + || CASE WHEN var_restrict_geom IS NOT NULL THEN ' AND ST_Intersects(b.the_geom, $8::geometry) ' ELSE '' END + || ' + + ) As b + ORDER BY 10 , 11 DESC + LIMIT 20 + ) AS sub + JOIN state s ON ($10 = s.statefp) + ORDER BY 1,2,3,4,5,6,7,9 + LIMIT 20) As foo ORDER BY sub_rating, exact_address DESC LIMIT ' || max_results ; + + IF var_debug THEN + RAISE NOTICE 'stmt: %', + replace(replace( replace( + replace( + replace(replace( replace(replace(replace(replace(stmt, '$10', quote_nullable(in_statefp) ), '$2',quote_nullable(parsed.streetName)),'$3', + quote_nullable(parsed.location)), '$4', quote_nullable(parsed.streetTypeAbbrev) ), + '$5', quote_nullable(parsed.preDirAbbrev) ), + '$6', quote_nullable(parsed.postDirAbbrev) ), + '$7', quote_nullable(parsed.zip) ), + '$8', quote_nullable(var_restrict_geom::text) ), + '$9', quote_nullable(zip_info.zip) ), '$1', quote_nullable(parsed.address) ); + --RAISE NOTICE 'PREPARE query_base_geo(integer, varchar,varchar,varchar,varchar,varchar,varchar,geometry,varchar[]) As %', stmt; + --RAISE NOTICE 'EXECUTE query_base_geo(%,%,%,%,%,%,%,%,%); ', parsed.address,quote_nullable(parsed.streetName), quote_nullable(parsed.location), quote_nullable(parsed.streetTypeAbbrev), quote_nullable(parsed.preDirAbbrev), quote_nullable(parsed.postDirAbbrev), quote_nullable(parsed.zip), quote_nullable(var_restrict_geom::text), quote_nullable(zip_info.zip); + --RAISE NOTICE 'DEALLOCATE query_base_geo;'; + END IF; + FOR results IN EXECUTE stmt USING parsed.address,parsed.streetName, parsed.location, parsed.streetTypeAbbrev, parsed.preDirAbbrev, parsed.postDirAbbrev, parsed.zip, var_restrict_geom, zip_info.zip, in_statefp LOOP + + -- If we found a match with an exact street, then don't bother + -- trying to do non-exact matches + + exact_street := true; + + IF results.exact_address THEN + ADDY.address := parsed.address; + ELSE + ADDY.address := CASE WHEN parsed.address > to_number(results.tohn,'99999999') AND parsed.address > to_number(results.fromhn, '99999999') THEN greatest_hn(results.fromhn, results.tohn)::integer + ELSE least_hn(results.fromhn, results.tohn)::integer END ; + END IF; + + ADDY.preDirAbbrev := results.fedirp; + ADDY.streetName := results.fename; + ADDY.streetTypeAbbrev := results.fetype; + ADDY.postDirAbbrev := results.fedirs; + ADDY.location := results.place; + ADDY.stateAbbrev := results.state; + ADDY.zip := results.zip; + ADDY.parsed := TRUE; + + GEOMOUT := results.address_geom; + RATING := results.sub_rating; + var_n := var_n + 1; + + IF var_bestrating IS NULL THEN + var_bestrating := RATING; /** the first record to come is our best rating we will ever get **/ + END IF; + + -- Only consider matches with decent ratings + IF RATING < 90 THEN + RETURN NEXT; + END IF; + + -- If we get an exact match, then just return that + IF RATING = 0 THEN + RETURN; + END IF; + + IF var_n >= max_results THEN --we have exceeded our desired limit + RETURN; + END IF; + + END LOOP; + + IF var_bestrating < 30 THEN --if we already have a match with a rating of 30 or less, its unlikely we can do any better + RETURN; + END IF; + + +-- There are a couple of different things to try, from the highest preference and falling back -- to lower-preference options. -- We start out with zip-code matching, where the zip code could possibly be in more than one -- state. We loop through each state its in. @@ -223,5 +426,6 @@ BEGIN END; $$ LANGUAGE 'plpgsql' STABLE COST 1000 ROWS 50; +--ALTER FUNCTION geocode_address(IN norm_addy, IN integer , IN geometry) SET enable_mergejoin='off'; diff --git a/extras/tiger_geocoder/tiger_2010/geocode/interpolate_from_address.sql b/extras/tiger_geocoder/tiger_2010/geocode/interpolate_from_address.sql index 9537cb444..b331786c5 100644 --- a/extras/tiger_geocoder/tiger_2010/geocode/interpolate_from_address.sql +++ b/extras/tiger_geocoder/tiger_2010/geocode/interpolate_from_address.sql @@ -42,6 +42,9 @@ BEGIN END IF; addrwidth := greatest(addr1,addr2) - least(addr1,addr2); + IF addrwidth = 0 or addrwidth IS NULL THEN + addrwidth = 1; + END IF; part := (given_address - least(addr1,addr2)) / trunc(addrwidth, 1); IF addr1 > addr2 THEN diff --git a/extras/tiger_geocoder/tiger_2010/geocode/other_helper_functions.sql b/extras/tiger_geocoder/tiger_2010/geocode/other_helper_functions.sql index 1bf5cfc27..ef7a882f5 100644 --- a/extras/tiger_geocoder/tiger_2010/geocode/other_helper_functions.sql +++ b/extras/tiger_geocoder/tiger_2010/geocode/other_helper_functions.sql @@ -3,7 +3,7 @@ * * Copyright (C) 2011 Regina Obe and Leo Hsu (Paragon Corporation) **/ --- Note we are wrapping this in a function so we can make it immutable and those useable in an index +-- Note we are wrapping this in a function so we can make it immutable and thus useable in an index -- It also allows us to shorten and possibly better cache the repetitive pattern in the code -- greatest(to_number(b.fromhn,''99999999''),to_number(b.tohn,''99999999'')) -- and least(to_number(b.fromhn,''99999999''),to_number(b.tohn,''99999999'')) @@ -47,6 +47,15 @@ $$ LANGUAGE sql IMMUTABLE COST 5; + +-- Generate script to drop all non-primary unique indexes on tiger and tiger_data tables +CREATE OR REPLACE FUNCTION drop_indexes_generate_script(tiger_data_schema text DEFAULT 'tiger_data') +RETURNS text AS +$$ +SELECT array_to_string(ARRAY(SELECT 'DROP INDEX ' || schemaname || '.' || indexname || ';' +FROM pg_catalog.pg_indexes where schemaname IN('tiger',$1) AND indexname NOT LIKE 'uidx%' AND indexname NOT LIKE 'pk_%' AND indexname NOT LIKE '%key'), E'\n'); +$$ +LANGUAGE sql STABLE; -- Generate script to create missing indexes in tiger tables. -- This will generate sql you can run to index commonly used join columns in geocoder for tiger and tiger_data schemas -- CREATE OR REPLACE FUNCTION missing_indexes_generate_script() @@ -54,10 +63,10 @@ RETURNS text AS $$ SELECT array_to_string(ARRAY( -- basic btree regular indexes -SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_' || c.column_name || ' ON ' || c.table_schema || '.' || c.table_name || ' USING btree(' || c.column_name || ')' As index +SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_' || c.column_name || ' ON ' || c.table_schema || '.' || c.table_name || ' USING btree(' || c.column_name || ');' As index FROM (SELECT table_name, table_schema FROM information_schema.tables WHERE table_type = 'BASE TABLE') As t INNER JOIN - (SELECT * FROM information_schema.columns WHERE column_name IN('countyfp','tlid', 'tfidl', 'tfidr', 'tfid', 'zip') ) AS c + (SELECT * FROM information_schema.columns WHERE column_name IN('countyfp', 'tlid', 'tfidl', 'tfidr', 'tfid', 'zip', 'placefp', 'cousubfp') ) AS c ON (t.table_name = c.table_name AND t.table_schema = c.table_schema) LEFT JOIN pg_catalog.pg_indexes i ON (i.tablename = c.table_name AND i.schemaname = c.table_schema @@ -65,7 +74,7 @@ FROM (SELECT table_name, table_schema FROM WHERE i.tablename IS NULL AND c.table_schema IN('tiger','tiger_data') -- Gist spatial indexes -- UNION ALL -SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_' || c.column_name || '_gist ON ' || c.table_schema || '.' || c.table_name || ' USING gist(' || c.column_name || ')' As index +SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_' || c.column_name || '_gist ON ' || c.table_schema || '.' || c.table_name || ' USING gist(' || c.column_name || ');' As index FROM (SELECT table_name, table_schema FROM information_schema.tables WHERE table_type = 'BASE TABLE') As t INNER JOIN (SELECT * FROM information_schema.columns WHERE column_name IN('the_geom', 'geom') ) AS c @@ -76,7 +85,7 @@ FROM (SELECT table_name, table_schema FROM WHERE i.tablename IS NULL AND c.table_schema IN('tiger','tiger_data') -- Soundex indexes -- UNION ALL -SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_snd_' || c.column_name || ' ON ' || c.table_schema || '.' || c.table_name || ' USING btree(soundex(' || c.column_name || '))' As index +SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_snd_' || c.column_name || ' ON ' || c.table_schema || '.' || c.table_name || ' USING btree(soundex(' || c.column_name || '));' As index FROM (SELECT table_name, table_schema FROM information_schema.tables WHERE table_type = 'BASE TABLE') As t INNER JOIN (SELECT * FROM information_schema.columns WHERE column_name IN('name', 'place', 'city') ) AS c @@ -86,22 +95,22 @@ FROM (SELECT table_name, table_schema FROM AND indexdef LIKE '%soundex(%' || c.column_name || '%' AND indexdef LIKE '%_snd_' || c.column_name || '%' ) WHERE i.tablename IS NULL AND c.table_schema IN('tiger','tiger_data') AND (c.table_name LIKE '%county%' OR c.table_name LIKE '%featnames' - OR c.table_name LIKE '%place' or c.table_name LIKE '%zip%') + OR c.table_name LIKE '%place' or c.table_name LIKE '%zip%' or c.table_name LIKE '%cousub') -- Lower indexes -- UNION ALL -SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_lower_' || c.column_name || ' ON ' || c.table_schema || '.' || c.table_name || ' USING btree(lower(' || c.column_name || '))' As index +SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_lower_' || c.column_name || ' ON ' || c.table_schema || '.' || c.table_name || ' USING btree(lower(' || c.column_name || '));' As index FROM (SELECT table_name, table_schema FROM information_schema.tables WHERE table_type = 'BASE TABLE') As t INNER JOIN (SELECT * FROM information_schema.columns WHERE column_name IN('name', 'place', 'city') ) AS c ON (t.table_name = c.table_name AND t.table_schema = c.table_schema) LEFT JOIN pg_catalog.pg_indexes i ON (i.tablename = c.table_name AND i.schemaname = c.table_schema - AND indexdef LIKE '%lower(%' || c.column_name || '%') + AND indexdef LIKE '%btree%(%lower(%' || c.column_name || '%') WHERE i.tablename IS NULL AND c.table_schema IN('tiger','tiger_data') - AND (c.table_name LIKE '%county%' OR c.table_name LIKE '%featnames' OR c.table_name LIKE '%place' or c.table_name LIKE '%zip%') + AND (c.table_name LIKE '%county%' OR c.table_name LIKE '%featnames' OR c.table_name LIKE '%place' or c.table_name LIKE '%zip%' or c.table_name LIKE '%cousub') -- Least address index btree least_hn(fromhn, tohn) UNION ALL -SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_least_address' || ' ON ' || c.table_schema || '.' || c.table_name || ' USING btree(least_hn(fromhn, tohn))' As index +SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_least_address' || ' ON ' || c.table_schema || '.' || c.table_name || ' USING btree(least_hn(fromhn, tohn));' As index FROM (SELECT table_name, table_schema FROM information_schema.tables WHERE table_type = 'BASE TABLE' AND table_name LIKE '%addr' AND table_schema IN('tiger','tiger_data')) As t INNER JOIN (SELECT * FROM information_schema.columns WHERE column_name IN('fromhn') ) AS c @@ -110,28 +119,49 @@ FROM (SELECT table_name, table_schema FROM (i.tablename = c.table_name AND i.schemaname = c.table_schema AND indexdef LIKE '%least_hn(%' || c.column_name || '%') WHERE i.tablename IS NULL --- var_ops fullname -- +-- var_ops lower -- UNION ALL SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_l' || c.column_name || '_var_ops' || ' ON ' || c.table_schema || '.' || c.table_name || ' USING btree(lower(' || c.column_name || ') varchar_pattern_ops);' As index FROM (SELECT table_name, table_schema FROM - information_schema.tables WHERE table_type = 'BASE TABLE' AND table_name LIKE '%featnames' AND table_schema IN('tiger','tiger_data')) As t INNER JOIN - (SELECT * FROM information_schema.columns WHERE column_name IN('fullname') ) AS c + information_schema.tables WHERE table_type = 'BASE TABLE' AND (table_name LIKE '%featnames' or table_name LIKE '%place' or table_name LIKE '%zip_lookup_base' or table_name LIKE '%zip_state_loc') AND table_schema IN('tiger','tiger_data')) As t INNER JOIN + (SELECT * FROM information_schema.columns WHERE column_name IN('name', 'city', 'place', 'fullname') ) AS c ON (t.table_name = c.table_name AND t.table_schema = c.table_schema) LEFT JOIN pg_catalog.pg_indexes i ON (i.tablename = c.table_name AND i.schemaname = c.table_schema AND indexdef LIKE '%btree%(%lower%' || c.column_name || ')%varchar_pattern_ops%') WHERE i.tablename IS NULL -- var_ops mtfcc -- -UNION ALL +/** UNION ALL SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_' || c.column_name || '_var_ops' || ' ON ' || c.table_schema || '.' || c.table_name || ' USING btree(' || c.column_name || ' varchar_pattern_ops);' As index FROM (SELECT table_name, table_schema FROM - information_schema.tables WHERE table_type = 'BASE TABLE' AND table_name LIKE '%featnames' or table_name LIKE '%edges' AND table_schema IN('tiger','tiger_data')) As t INNER JOIN + information_schema.tables WHERE table_type = 'BASE TABLE' AND (table_name LIKE '%featnames' or table_name LIKE '%edges') AND table_schema IN('tiger','tiger_data')) As t INNER JOIN (SELECT * FROM information_schema.columns WHERE column_name IN('mtfcc') ) AS c ON (t.table_name = c.table_name AND t.table_schema = c.table_schema) LEFT JOIN pg_catalog.pg_indexes i ON (i.tablename = c.table_name AND i.schemaname = c.table_schema AND indexdef LIKE '%btree%(' || c.column_name || '%varchar_pattern_ops%') +WHERE i.tablename IS NULL **/ +-- zipl zipr on edges -- +UNION ALL +SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_' || c.column_name || ' ON ' || c.table_schema || '.' || c.table_name || ' USING btree(' || c.column_name || ' );' As index +FROM (SELECT table_name, table_schema FROM + information_schema.tables WHERE table_type = 'BASE TABLE' AND table_name LIKE '%edges' AND table_schema IN('tiger','tiger_data')) As t INNER JOIN + (SELECT * FROM information_schema.columns WHERE column_name IN('zipl', 'zipr') ) AS c + ON (t.table_name = c.table_name AND t.table_schema = c.table_schema) + LEFT JOIN pg_catalog.pg_indexes i ON + (i.tablename = c.table_name AND i.schemaname = c.table_schema + AND indexdef LIKE '%btree%(' || c.column_name || '%)%') WHERE i.tablename IS NULL + +-- unique index on tlid state county -- +/*UNION ALL +SELECT 'CREATE UNIQUE INDEX uidx_' || t.table_schema || '_' || t.table_name || '_tlid_statefp_countyfp ON ' || t.table_schema || '.' || t.table_name || ' USING btree(tlid,statefp,countyfp);' As index +FROM (SELECT table_name, table_schema FROM + information_schema.tables WHERE table_type = 'BASE TABLE' AND table_name LIKE '%edges' AND table_schema IN('tiger','tiger_data')) As t + LEFT JOIN pg_catalog.pg_indexes i ON + (i.tablename = t.table_name AND i.schemaname = t.table_schema + AND indexdef LIKE '%btree%(%tlid,%statefp%countyfp%)%') +WHERE i.tablename IS NULL*/ --full text indexes on name field-- /**UNION ALL SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_fullname_ft_gist' || ' ON ' || c.table_schema || '.' || c.table_name || ' USING gist(to_tsvector(''english'',fullname))' As index @@ -155,7 +185,7 @@ FROM (SELECT table_name, table_schema FROM (i.tablename = c.table_name AND i.schemaname = c.table_schema AND indexdef LIKE '%gist%(' || c.column_name || '%gist_trgm_ops%') WHERE i.tablename IS NULL **/ -ORDER BY 1), E';\r'); +ORDER BY 1), E'\r'); $$ LANGUAGE sql VOLATILE; @@ -169,4 +199,43 @@ BEGIN RETURN true; END $$ -language plpgsql; \ No newline at end of file +language plpgsql; + + +CREATE OR REPLACE FUNCTION drop_dupe_featnames_generate_script() RETURNS text +AS +$$ + +SELECT array_to_string(ARRAY(SELECT 'CREATE TEMPORARY TABLE dup AS +SELECT min(f.gid) As min_gid, f.tlid, lower(f.fullname) As fname + FROM ONLY ' || t.table_schema || '.' || t.table_name || ' As f + GROUP BY f.tlid, lower(f.fullname) + HAVING count(*) > 1; + +DELETE FROM ' || t.table_schema || '.' || t.table_name || ' AS feat +WHERE EXISTS (SELECT tlid FROM dup WHERE feat.tlid = dup.tlid AND lower(feat.fullname) = dup.fname + AND feat.gid > dup.min_gid); +DROP TABLE dup; +CREATE INDEX idx_' || t.table_schema || '_' || t.table_name || '_tlid ' || ' ON ' || t.table_schema || '.' || t.table_name || ' USING btree(tlid); +' As drop_sql_create_index +FROM (SELECT table_name, table_schema FROM + information_schema.tables WHERE table_type = 'BASE TABLE' AND (table_name LIKE '%featnames' ) AND table_schema IN('tiger','tiger_data')) As t + LEFT JOIN pg_catalog.pg_indexes i ON + (i.tablename = t.table_name AND i.schemaname = t.table_schema + AND indexdef LIKE '%btree%(%tlid%') +WHERE i.tablename IS NULL) ,E'\r'); + +$$ +LANGUAGE sql VOLATILE; + +--DROP FUNCTION IF EXISTS zip_range(text,integer,integer); +-- Helper function that useful for catch slight mistakes in zip position given a 5 digit zip code +-- will return a range of zip codes that are between zip - num_before and zip - num_after +-- e.g. usage -> zip_range('02109', -1,+1) -> {'02108', '02109', '02110'} +CREATE OR REPLACE FUNCTION zip_range(zip text, range_start integer, range_end integer) RETURNS varchar[] AS +$$ + SELECT ARRAY( + SELECT lpad((to_number( CASE WHEN trim(substring($1,1,5)) ~ '^[0-9]+$' THEN $1 ELSE '0' END,'99999')::integer + i)::text, 5, '0')::varchar + FROM generate_series($2, $3) As i ); +$$ +LANGUAGE sql IMMUTABLE STRICT; \ No newline at end of file diff --git a/extras/tiger_geocoder/tiger_2010/geocode/reverse_geocode.sql b/extras/tiger_geocoder/tiger_2010/geocode/reverse_geocode.sql index bd393c509..e38f0c6a5 100644 --- a/extras/tiger_geocoder/tiger_2010/geocode/reverse_geocode.sql +++ b/extras/tiger_geocoder/tiger_2010/geocode/reverse_geocode.sql @@ -23,6 +23,7 @@ DECLARE var_primary_dist numeric(10,2) ; var_pt geometry; var_place varchar; + var_county varchar; var_stmt text; var_debug boolean = false; var_zip varchar := NULL; @@ -58,16 +59,21 @@ BEGIN RAISE NOTICE 'Get matching counties start: %', clock_timestamp(); END IF; -- locate county - SELECT countyfp INTO var_countyfp FROM county WHERE statefp = var_state AND ST_Intersects(the_geom, var_pt) LIMIT 1; + SELECT countyfp, name INTO var_countyfp, var_county FROM county WHERE statefp = var_state AND ST_Intersects(the_geom, var_pt) LIMIT 1; --locate zip SELECT zcta5ce INTO var_zip FROM zcta5 WHERE statefp = var_state AND ST_Intersects(the_geom, var_pt) LIMIT 1; - -- lcoate city + -- locate city IF var_zip > '' THEN var_addy.zip := var_zip ; - SELECT z.place INTO var_place FROM zip_state_loc AS z WHERE z.zip = var_zip and z.statefp = var_state LIMIT 1; - var_addy.location := var_place; + END IF; + --SELECT z.name INTO var_place FROM place As z WHERE z.statefp = var_state AND ST_Intersects(the_geom, var_pt) LIMIT 1; + IF var_place > '' THEN + var_addy.location := var_place; + ELSIF var_zip > '' THEN + SELECT z.city INTO var_place FROM zip_lookup_base As z WHERE z.statefp = var_state AND z.county = var_county AND z.zip = var_zip LIMIT 1; + var_addy.location := var_place; END IF; IF var_debug THEN @@ -90,14 +96,14 @@ BEGIN WITH ref AS ( SELECT ' || quote_literal(var_pt::text) || '::geometry As ref_geom ) , f AS - ( SELECT faces.* FROM faces CROSS JOIN ref - WHERE statefp = ' || quote_literal(var_state) || ' AND countyfp = ' || quote_literal(var_countyfp) || ' + ( SELECT faces.* FROM faces CROSS JOIN ref + WHERE faces.statefp = ' || quote_literal(var_state) || ' AND faces.countyfp = ' || quote_literal(var_countyfp) || ' AND ST_Intersects(faces.the_geom, ref_geom) ), e AS ( SELECT edges.* , CASE WHEN edges.tfidr = f.tfid THEN ''R'' WHEN edges.tfidl = f.tfid THEN ''L'' ELSE NULL END::varchar As eside, CASE WHEN edges.tfidr = f.tfid THEN rfromadd ELSE lfromadd END As fromhn, CASE WHEN edges.tfidr = f.tfid THEN rtoadd ELSE ltoadd END As tohn, - CASE WHEN edges.tfidr = f.tfid THEN zipr ELSE zipl END As zip , + CASE WHEN edges.tfidr = f.tfid THEN zipr ELSE zipl END As zip, ST_ClosestPoint(edges.the_geom,ref_geom) As center_pt, ref_geom FROM edges INNER JOIN f ON (f.statefp = edges.statefp AND (edges.tfidr = f.tfid OR edges.tfidl = f.tfid)) CROSS JOIN ref @@ -168,11 +174,16 @@ BEGIN END IF; var_addy.address := var_nstrnum; END IF; - IF var_redge.zip > '' AND COALESCE(var_addy.zip,'') <> var_redge.zip THEN - var_addy.zip := var_redge.zip; - SELECT z.place INTO var_place FROM zip_state_loc AS z WHERE z.zip = var_redge.zip and z.statefp = var_state LIMIT 1; - var_addy.location := var_place; - END IF; + IF var_redge.zip > '' THEN + var_addy.zip := var_redge.zip; + ELSE + var_addy.zip := var_zip; + END IF; + -- IF var_redge.location > '' THEN + -- var_addy.location := var_redge.location; + -- ELSE + -- var_addy.location := var_place; + -- END IF; -- This is a cross streets - only add if not the primary adress street IF var_redge.fullname > '' AND var_redge.fullname <> var_primary_fullname THEN diff --git a/extras/tiger_geocoder/tiger_2010/regress/geocode_regress b/extras/tiger_geocoder/tiger_2010/regress/geocode_regress index b9d908c39..2e027c749 100644 --- a/extras/tiger_geocoder/tiger_2010/regress/geocode_regress +++ b/extras/tiger_geocoder/tiger_2010/regress/geocode_regress @@ -4,103 +4,51 @@ T3|100 Federal St, Boston, MA 02110|POINT(-71.05631 42.35477)|1 T4|529 Main St, Boston, MA 02129|POINT(-71.07187 42.38351)|0 T5|75 State St, Boston, MA 02109|POINT(-71.0557 42.35908)|0 T6|100 Federal St, Boston, MA 02110|POINT(-71.05631 42.35477)|1 -T6|100 Federal St, Lynn, MA 01905|POINT(-70.96783 42.4659)|8 -T6|100 Federal St, Blackstone, MA 01504|POINT(-71.5247 42.02885)|8 -T6|100 Federal St, Millers Falls, MA 01349|POINT(-72.49842 42.56976)|9 -T6|100 Federal St, Andover, MA 01810|POINT(-71.20228 42.69118)|10 -T6|100 Federal St, Northampton, MA 01062|POINT(-72.65537 42.32273)|10 -T6|100 Federal St, Lawrence, MA 01810|POINT(-71.20228 42.69118)|10 -T6|100 Federal St, Belchertown, MA 01007|POINT(-72.41221 42.29456)|10 -T6|100 Federal St, Agawam Town, MA 01001|POINT(-72.6234 42.0848)|11 -T6|Federal Ct, Boston, MA 02110|POINT(-71.05672 42.35413)|14 +T6|98 Federal Ct, Boston, MA 02110|POINT(-71.05672 42.35413)|14 T7|529 Main St, Boston, MA 02129|POINT(-71.07187 42.38351)|0 T8|75 State St, Boston, MA 02109|POINT(-71.0557 42.35908)|0 T9|100 Federal St, Boston, MA 02110|POINT(-71.05631 42.35477)|1 -T9|100 Federal St, Lynn, MA 01905|POINT(-70.96783 42.4659)|8 -T9|100 Federal St, Blackstone, MA 01504|POINT(-71.5247 42.02885)|8 -T9|100 Federal St, Millers Falls, MA 01349|POINT(-72.49842 42.56976)|9 -T9|100 Federal St, Andover, MA 01810|POINT(-71.20228 42.69118)|10 -T9|100 Federal St, Northampton, MA 01062|POINT(-72.65537 42.32273)|10 -T9|100 Federal St, Lawrence, MA 01810|POINT(-71.20228 42.69118)|10 -T9|100 Federal St, Belchertown, MA 01007|POINT(-72.41221 42.29456)|10 -T9|100 Federal St, Agawam Town, MA 01001|POINT(-72.6234 42.0848)|11 -T9|Federal Ct, Boston, MA 02110|POINT(-71.05672 42.35413)|14 -T9|100 Federal St, Greenfield Town, MA 01301|POINT(-72.59899 42.59028)|15 -T9|Federal Ln, Dedham, MA 02026|POINT(-71.18356 42.23829)|29 +T9|98 Federal Ct, Boston, MA 02110|POINT(-71.05672 42.35413)|14 T10|530 Main St, Boston, MA 02129|POINT(-71.07173 42.38345)|0 T11|76 State St, Boston, MA 02109|POINT(-71.05615 42.359)|0 -T12|101 Federal St, Boston, MA 02110|POINT(-71.0563 42.35469)|3 -T12|101 Federal St, Lynn, MA 01905|POINT(-70.96783 42.4659)|8 -T12|101 Federal St, Blackstone, MA 01504|POINT(-71.5247 42.02886)|8 -T12|101 Federal St, Millers Falls, MA 01349|POINT(-72.49939 42.56802)|9 -T12|101 Federal St, Andover, MA 01810|POINT(-71.20125 42.69015)|10 -T12|101 Federal St, Northampton, MA 01062|POINT(-72.65537 42.32273)|10 -T12|101 Federal St, Lawrence, MA 01810|POINT(-71.20125 42.69015)|10 -T12|101 Federal St, Belchertown, MA 01007|POINT(-72.41206 42.29446)|10 -T12|101 Federal St, Agawam Town, MA 01001|POINT(-72.62231 42.08371)|11 -T12|101 Federal St, Greenfield Town, MA 01301|POINT(-72.59909 42.59006)|15 -T12|Federal Ct, Boston, MA 02110|POINT(-71.05672 42.35413)|16 -T12|Federal Ln, Dedham, MA 02026|POINT(-71.18356 42.23829)|29 +T12|101 Federal St, Boston, MA 02110|POINT(-71.0563 42.35469)|1 +T12|99 Federal Ct, Boston, MA 02110|POINT(-71.05672 42.35413)|14 +T12|98 Federal Ln, Dedham, MA 02026|POINT(-71.18356 42.23829)|27 #TB1|24 School St, Boston, MA 02108|24 School Street, Boston, MA 02108|POINT(-71.05888 42.35762)|0 #TB1|20 School St, Boston, MA 02108|20 School Street, Boston, MA 02109|POINT(-71.05876 42.35758)|1 -T13|101 Federal St, Boston, MA 02110|POINT(-71.0563 42.35469)|3 -T13|101 Federal St, Lynn, MA 01905|POINT(-70.96783 42.4659)|8 -T13|101 Federal St, Blackstone, MA 01504|POINT(-71.5247 42.02886)|8 -T13|101 Federal St, Millers Falls, MA 01349|POINT(-72.49939 42.56802)|9 -T13|101 Federal St, Andover, MA 01810|POINT(-71.20125 42.69015)|10 -T13|101 Federal St, Northampton, MA 01062|POINT(-72.65537 42.32273)|10 -T13|101 Federal St, Lawrence, MA 01810|POINT(-71.20125 42.69015)|10 -T13|101 Federal St, Belchertown, MA 01007|POINT(-72.41206 42.29446)|10 -T13|101 Federal St, Agawam Town, MA 01001|POINT(-72.62231 42.08371)|11 -T13|101 Federal St, Greenfield Town, MA 01301|POINT(-72.59909 42.59006)|15 -T13|Federal Ct, Boston, MA 02110|POINT(-71.05672 42.35413)|16 -T13|Federal Ln, Dedham, MA 02026|POINT(-71.18356 42.23829)|29 -T14|101 Federal St, Boston, MA 02110|POINT(-71.0563 42.35469)|3 -T15|101 Federal St, Boston, MA 02110|POINT(-71.0563 42.35469)|13 -T16|101 Federal St, Boston, MA 02110|POINT(-71.0563 42.35469)|13 -T16|101 Federal St, Lynn, MA 01905|POINT(-70.96783 42.4659)|18 -T16|101 Federal St, Blackstone, MA 01504|POINT(-71.5247 42.02886)|18 -T16|101 Federal St, Millers Falls, MA 01349|POINT(-72.49939 42.56802)|19 -T16|101 Federal St, Andover, MA 01810|POINT(-71.20125 42.69015)|20 -T16|101 Federal St, Northampton, MA 01062|POINT(-72.65537 42.32273)|20 -T16|101 Federal St, Lawrence, MA 01810|POINT(-71.20125 42.69015)|20 -T16|101 Federal St, Belchertown, MA 01007|POINT(-72.41206 42.29446)|20 -T16|101 Federal St, Agawam Town, MA 01001|POINT(-72.62231 42.08371)|21 -T16|101 Federal St, Greenfield Town, MA 01301|POINT(-72.59909 42.59006)|25 -T16|Federal Ct, Boston, MA 02110|POINT(-71.05672 42.35413)|26 -T16|Federal Ln, Dedham, MA 02026|POINT(-71.18356 42.23829)|39 +T13|101 Federal St, Boston, MA 02110|POINT(-71.0563 42.35469)|1 +T13|99 Federal Ct, Boston, MA 02110|POINT(-71.05672 42.35413)|14 +T13|98 Federal Ln, Dedham, MA 02026|POINT(-71.18356 42.23829)|27 +T14|101 Federal St, Boston, MA 02110|POINT(-71.0563 42.35469)|1 +T15|101 Federal St, Boston, MA 02110|POINT(-71.0563 42.35469)|11 +T16|101 Federal St, Boston, MA 02110|POINT(-71.0563 42.35469)|11 +T16|99 Federal Ct, Boston, MA 02110|POINT(-71.05672 42.35413)|24 +T16|98 Federal Ln, Dedham, MA 02026|POINT(-71.18356 42.23829)|37 +T17|77 Massachusetts Ave, Cambridge, MA 02139|POINT(-71.09436 42.35981)|0 #1087a|75 State St, Boston, MA 02109|POINT(-71.0557 42.35908)|1 -#1087a|75 State St, Milton, MA 02186|POINT(-71.04091 42.25635)|5 -#1087b|75 State St, Boston, MA 02109|POINT(-71.0557 42.35908)|3 -#1087b|75 State St, Milton, MA 02186|POINT(-71.04091 42.25635)|6 +#1087b|75 State St, Boston, MA 02109|POINT(-71.0557 42.35908)|1 +#1087b|75 State St, Milton, MA 02186|POINT(-71.04091 42.25635)|4 #1087c|75 State St, Boston, MA 02109|POINT(-71.0557 42.35908)|0 -#1073a|212 3rd Ave N, Minneapolis, MN 55401|212 3rd Ave N, MINNEAPOLIS, MN 553404|POINT(-93.27181 44.98502)|2 -#1073a|212 3rd Ave S, Minneapolis, MN 55404|212 3rd Ave N, MINNEAPOLIS, MN 553404|POINT(-93.26355 44.98062)|3 +#1073a|212 3rd Ave N, Minneapolis, MN 55401|212 3rd Ave N, MINNEAPOLIS, MN 553404|POINT(-93.27181 44.98502)|10 +#1073a|212 3rd Ave S, Minneapolis, MN 55401|212 3rd Ave N, MINNEAPOLIS, MN 553404|POINT(-93.26334 44.98087)|12 #1073b|212 3rd Ave N, Minneapolis, MN 55401|POINT(-93.27181 44.98502)|0 #1076a|16725 Co Rd 24, Plymouth, MN 55447|16725 Co Rd 24, Plymouth, MN 55447|POINT(-93.49326 45.02186)|25 -#1076a|16725 24th Ave N, Plymouth, MN 55447|16725 Co Rd 24, Plymouth, MN 55447|POINT(-93.4927 45.00457)|47 +#1076a|15898 Co Rd 24, Plymouth, MN 55446|16725 Co Rd 24, Plymouth, MN 55447|POINT(-93.48125 45.02691)|31 #1076b|16725 Co Rd 24, Plymouth, MN 55447|16725 County Road 24, Plymouth, MN 55447|POINT(-93.49326 45.02186)|25 -#1076b|16725 24th Ave N, Plymouth, MN 55447|16725 County Road 24, Plymouth, MN 55447|POINT(-93.4927 45.00457)|47 #1076c|13800 Co Hwy 9, Andover, MN 55304|13800 County Hwy 9, Andover, MN 55304|POINT(-93.35733 45.22052)|30 #1076d|13800 Co Hwy 9, Andover, MN 55304|13800 9, Andover, MN 55304|POINT(-93.35733 45.22052)|0 -#1076e|3900 US Hwy 6, North Eastham, MA 02642|3900 Route 6, Eastham, Massachusetts 02642|POINT(-69.98743 41.85543)|15 +#1076e|3877 US Hwy 6, North Eastham, MA 02642|3900 Route 6, Eastham, Massachusetts 02642|POINT(-69.98698 41.84775)|23 #1076f|1940 Co Rd C W, Roseville, MN 55113|1940 County Road C W, Roseville, MN 55113|POINT(-93.18492 45.02058)|25 #1076f|1940 W Co Rd C, Roseville, MN 55113|1940 County Road C W, Roseville, MN 55113|POINT(-93.18492 45.02058)|29 +#1076f|138 Co Rd C W, Little Canada, MN 55113|1940 County Road C W, Roseville, MN 55113|POINT(-93.10518 45.02074)|47 #1076g|15709 Rockford Rd, Plymouth, MN 55447|15709 Rockford Road, Plymouth, MN 55447|POINT(-93.47898 45.02726)|0 #1076h|300 State Hwy 3, Hingham, MA 02043|300 Rt 3A, Hingham, MA|POINT(-70.91448 42.24915)|18 #1076h|300 State Hwy 3, Burlington, MA 01803|300 Rt 3A, Hingham, MA|POINT(-71.20674 42.51586)|25 #1076h|300 State Hwy 3, Boston, MA 02114|300 Rt 3A, Hingham, MA|POINT(-71.07011 42.36428)|25 #1074a|8525 Cottagewood Ter NE, Blaine, MN 55434|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-93.24462 45.12481)|14 -#1074a|Cottage Wood Dr SW, Lake Shore, MN 56468|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-94.39339 46.43663)|32 -#1074a|Cottage Wood Dr SW, Merrifield, MN 56468|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-94.39339 46.43663)|32 -#1074a|Cottage Wood Dr SW, Nisswa, MN 56468|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-94.39339 46.43663)|32 -#1074a|Cottage Wood Dr SW, Pequot Lakes, MN 56468|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-94.39339 46.43663)|32 -#1074a|Cottage Wood Dr SW, Pillager, MN 56468|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-94.39339 46.43663)|32 -#1074a|Cottagewood Ter NE, Spring Lake Park, MN 55432|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-93.24464 45.1237)|33 -#1074a|Cottage Wood Ln, Fifty Lakes, MN 56448|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-94.07085 46.75406)|34 -#1074a|Cottage Wood, Fifty Lakes, MN 56448|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-94.07085 46.75406)|34 -#1074a|Cottagewood Ave, Deephaven, MN 55331|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-93.53206 44.92783)|43 +#1074a|8499 Cottagewood Ter NE, Spring Lake Park, MN 55432|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-93.24464 45.1237)|34 #1074b|8525 Cottagewood Ter NE, Blaine, MN 55434|8525 COTTAGEWOOD TERR, Blaine, MN 55434|POINT(-93.24462 45.12481)|4 +#1074b|8499 Cottagewood Ter NE, Spring Lake Park, MN 55432|8525 COTTAGEWOOD TERR, Blaine, MN 55434|POINT(-93.24464 45.1237)|24 #1070a|100 Federal St, Lynn, MA 01905|POINT(-70.96783 42.4659)|8 #1070b|100 Federal St, Lynn, MA 01905|POINT(-70.96783 42.4659)|8 #1112a|8401 W 35W Svc Rd NE, Blaine, MN 55449|8401 W 35W Service Dr NE, Blaine, MN 55449|POINT(-93.19084 45.12386)|10 @@ -111,7 +59,15 @@ T16|Federal Ln, Dedham, MA 02026|POINT(-71.18356 42.23829)|39 #1113a|8040 Old Cedar Ave S, Bloomington, MN 55425|8040 OLD CEDAR AVE S, BLOOMINGTON, MN 55425|POINT(-93.24792 44.85708)|0 #1113b|8040 Old Cedar Ave S, Bloomington, MN 55425|8040 CEDAR AVE S, BLOOMINGTON, MN 55425|POINT(-93.24792 44.85708)|10 #1113c|17405 Old Rockford Rd, Plymouth, MN 55446|17405 Old Rockford Rd, Plymouth, MN 55446|POINT(-93.5012 45.0345)|0 -#1113d|Rockford Rd, Plymouth, MN 55446|17405 Rockford Rd, Plymouth, MN 55446|POINT(-93.47977 45.02701)|5 +#1113d|12898 Rockford Rd, Plymouth, MN 55446|17405 Rockford Rd, Plymouth, MN 55446|POINT(-93.44005 45.03082)|6 +#1113d|15801 Rockford Rd, Plymouth, MN 55447|17405 Rockford Rd, Plymouth, MN 55446|POINT(-93.47977 45.02701)|6 #1113e|198 Old Constance Blvd NW, Andover, MN 55304|198 OLD CONSTANCE BLVD, ANDOVER, MN 55304|POINT(-93.27027 45.26203)|4 #1113f|198 Constance Blvd NW, Andover, MN 55304|198 CONSTANCE BLVD, ANDOVER, MN 55304|POINT(-93.26839 45.26229)|4 #1113f|198 Constance Blvd NE, Ham Lake, MN 55304|198 CONSTANCE BLVD, ANDOVER, MN 55304|POINT(-93.26114 45.2657)|11 +#1145a|4051 27th Ave S, Minneapolis, MN 55406|4051 27th Ave S Minneapolis MN 55405|POINT(-93.23339 44.92959)|1 +#1145b|3625 18th Ave S, Minneapolis, MN 55407|3625 18th Ave S Minneapolis MN 55406|POINT(-93.24863 44.9373)|1 +#1145c|4057 10th Ave S, Minneapolis, MN 55407|4057 10th Ave S Minneapolis MN 55406|POINT(-93.25997 44.92951)|1 +#1145d|8498 141st Ct, Apple Valley, MN 55124|8512 141 St Ct Apple Valley MN 55124|POINT(-93.23694 44.74478)|15 +#1145d|4898 141st St W, Apple Valley, MN 55124|8512 141 St Ct Apple Valley MN 55124|POINT(-93.1625 44.74472)|24 +#1145e|103 E 36th St, Minneapolis, MN 55408|103 36th St W Minneapolis MN 55409|POINT(-93.2766 44.93774)|5 +#1145e|103 W 36th St, Minneapolis, MN 55408|103 36th St W Minneapolis MN 55409|POINT(-93.27979 44.93773)|5 diff --git a/extras/tiger_geocoder/tiger_2010/regress/geocode_regress.sql b/extras/tiger_geocoder/tiger_2010/regress/geocode_regress.sql index ea2713cc5..948cf33f1 100644 --- a/extras/tiger_geocoder/tiger_2010/regress/geocode_regress.sql +++ b/extras/tiger_geocoder/tiger_2010/regress/geocode_regress.sql @@ -30,6 +30,10 @@ SELECT 'T14', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00 SELECT 'T15', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('101 Fedaral Street, Boston, MA',1); SELECT 'T16', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('101 Fedaral Street, Boston, MA',50); +-- needs addr these ones have more than 2 sides +-- my alma mater doesn't geocode right without addr check -- +SELECT 'T17', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('77 Massachusetts Avenue, Cambridge, MA 02139',1); + -- Ratings wrong for missing or wrong local zips SELECT '#1087a' As ticket, pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('75 State Street, Boston, MA 02110',3); SELECT '#1087b' As ticket, pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('75 State Street, Boston, MA',3); @@ -42,7 +46,7 @@ SELECT '#1073b' As ticket, pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid -- country roads and highways with spaces in street type SELECT '#1076a' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target) As g, target FROM (SELECT '16725 Co Rd 24, Plymouth, MN 55447'::text As target) As f) As foo; -SELECT '#1076b' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '16725 County Road 24, Plymouth, MN 55447'::text As target) As f) As foo; +SELECT '#1076b' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,1) As g, target FROM (SELECT '16725 County Road 24, Plymouth, MN 55447'::text As target) As f) As foo; SELECT '#1076c' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,1) As g, target FROM (SELECT '13800 County Hwy 9, Andover, MN 55304'::text As target) AS f) As foo; SELECT '#1076d' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,1) As g, target FROM (SELECT '13800 9, Andover, MN 55304'::text As target) AS f) As foo; SELECT '#1076e' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,4) As g, target FROM (SELECT '3900 Route 6, Eastham, Massachusetts 02642'::text As target) AS f) As foo; @@ -67,10 +71,10 @@ SELECT '#1070b' As ticket, pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid -- service roads and interstates SELECT '#1112a' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '8401 W 35W Service Dr NE, Blaine, MN 55449'::text As target) As f) As foo; -SELECT '#1112b' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '8401 35W, Blaine, MN 55449'::text As target) As f) As foo; -SELECT '#1112c' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '8401 35W West, Blaine, MN 55449'::text As target) As f) As foo; -SELECT '#1112d' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '8401 West 35W, Blaine, MN 55449'::text As target) As f) As foo; -SELECT '#1112e' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '8401 W 35W, Blaine, MN 55449'::text As target) As f) As foo; +SELECT '#1112b' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,1) As g, target FROM (SELECT '8401 35W, Blaine, MN 55449'::text As target) As f) As foo; +SELECT '#1112c' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,1) As g, target FROM (SELECT '8401 35W West, Blaine, MN 55449'::text As target) As f) As foo; +SELECT '#1112d' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,1) As g, target FROM (SELECT '8401 West 35W, Blaine, MN 55449'::text As target) As f) As foo; +SELECT '#1112e' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,1) As g, target FROM (SELECT '8401 W 35W, Blaine, MN 55449'::text As target) As f) As foo; -- working with prequalabrv such as Old .. something or other SELECT '#1113a' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '8040 OLD CEDAR AVE S, BLOOMINGTON, MN 55425'::text As target) As f) As foo; @@ -79,4 +83,11 @@ SELECT '#1113c' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(S SELECT '#1113d' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '17405 Rockford Rd, Plymouth, MN 55446'::text As target) As f) As foo; SELECT '#1113e' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '198 OLD CONSTANCE BLVD, ANDOVER, MN 55304'::text As target) As f) As foo; SELECT '#1113f' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '198 CONSTANCE BLVD, ANDOVER, MN 55304'::text As target) As f) As foo; + +-- #1145 addresses used to be slow to geocode took minutes +SELECT '#1145a' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '4051 27th Ave S Minneapolis MN 55405'::text As target) As f) As foo; +SELECT '#1145b' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '3625 18th Ave S Minneapolis MN 55406'::text As target) As f) As foo; +SELECT '#1145c' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '4057 10th Ave S Minneapolis MN 55406'::text As target) As f) As foo; +SELECT '#1145d' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '8512 141 St Ct Apple Valley MN 55124'::text As target) As f) As foo; +SELECT '#1145e' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target) As g, target FROM (SELECT '103 36th St W Minneapolis MN 55409'::text As target) As f) As foo; \timing diff --git a/extras/tiger_geocoder/tiger_2010/regress/regress.sql b/extras/tiger_geocoder/tiger_2010/regress/regress.sql index 3b37a907f..b303ddaf4 100644 --- a/extras/tiger_geocoder/tiger_2010/regress/regress.sql +++ b/extras/tiger_geocoder/tiger_2010/regress/regress.sql @@ -1,4 +1,5 @@ \a +--SET seq_page_cost='1000'; \o normalize_address_regress.out \i normalize_address_regress.sql \o geocode_regress.out diff --git a/extras/tiger_geocoder/tiger_2010/upgrade_geocode.sql b/extras/tiger_geocoder/tiger_2010/upgrade_geocode.sql index cbcab6d2a..0555acbc0 100644 --- a/extras/tiger_geocoder/tiger_2010/upgrade_geocode.sql +++ b/extras/tiger_geocoder/tiger_2010/upgrade_geocode.sql @@ -60,12 +60,16 @@ ALTER TABLE street_type_lookup ADD COLUMN is_hw boolean NOT NULL DEFAULT false; DROP FUNCTION IF EXISTS rate_attributes(character varying, character varying, character varying, character varying, character varying, character varying, character varying, character varying); DROP FUNCTION IF EXISTS rate_attributes(character varying, character varying, character varying, character varying, character varying, character varying, character varying, character varying, character varying, character varying); +--ALTER TABLE tiger.addr ALTER tlid TYPE bigint; +ALTER TABLE featnames ALTER COLUMN tlid SET NOT NULL; +ALTER TABLE edges ALTER COLUMN tlid SET NOT NULL; +ALTER TABLE addr ALTER COLUMN tlid SET NOT NULL; BEGIN; -- Type used to pass around a normalized address between functions -- This is s bit dangerous since it could potentially drop peoples tables -- TODO: put in logic to check if any tables have norm_addy and don't drop if they do -- Remarking this out for now since we aren't changing norm_addy anyway -/*DROP TYPE IF EXISTS norm_addy CASCADE; +/*DROP TYPE IF EXISTS norm_addy CASCADE; CREATE TYPE norm_addy AS ( address INTEGER, preDirAbbrev VARCHAR, @@ -76,7 +80,7 @@ CREATE TYPE norm_addy AS ( location VARCHAR, stateAbbrev VARCHAR, zip VARCHAR, - parsed BOOLEAN);*/ + parsed BOOLEAN); */ -- prefix and suffix street names for numbered highways CREATE TEMPORARY TABLE temp_types AS SELECT name, abbrev @@ -120,7 +124,6 @@ SELECT name, abbrev ('I', 'I-'), ('I-', 'I-'), ('INTERSTATE', 'I-'), - ('LOOP', 'Loop'), ('ROUTE', 'Rte'), ('RTE', 'Rte'), ('RT', 'Rte'), @@ -157,10 +160,12 @@ SELECT name, abbrev, true WHERE t.name NOT IN(SELECT name FROM street_type_lookup); DROP TABLE temp_types; DELETE FROM street_type_lookup WHERE name = 'FOREST'; +UPDATE street_type_lookup SET is_hw = false WHERE abbrev = 'Loop'; CREATE TEMPORARY TABLE temp_types AS SELECT name, abbrev FROM (VALUES + ('LOOP', 'Loop'), ('SERVICE DRIVE', 'Svc Dr'), ('SERVICE DR', 'Svc Dr'), ('SERVICE ROAD', 'Svc Rd'), @@ -212,4 +217,9 @@ COMMIT; -- install missing indexes \echo 'Installing missing indexes - this might take a while so be patient ..' SELECT install_missing_indexes(); +\a +--\o 'drop_dup_feat_create_index.sql' +--\i generate_drop_dupe_featnames.sql +\o +--\i drop_dup_feat_create_index.sql \echo 'Missing index Install completed' \ No newline at end of file diff --git a/extras/tiger_geocoder/tiger_2010/upgrade_geocoder.bat b/extras/tiger_geocoder/tiger_2010/upgrade_geocoder.bat index a2cd816c3..5dc36b1bb 100644 --- a/extras/tiger_geocoder/tiger_2010/upgrade_geocoder.bat +++ b/extras/tiger_geocoder/tiger_2010/upgrade_geocoder.bat @@ -6,7 +6,9 @@ set PGPASSWORD=yourpasswordhere set THEDB=geocoder set PGBIN=C:\Program Files\PostgreSQL\8.4\bin set PGCONTRIB=C:\Program Files\PostgreSQL\8.4\share\contrib -"%PGBIN%\psql" -d "%THEDB%" -f "tiger_loader.sql" +REM "%PGBIN%\psql" -d "%THEDB%" -f "tiger_loader.sql" "%PGBIN%\psql" -d "%THEDB%" -f "upgrade_geocode.sql" +cd regress +"%PGBIN%\psql" -t -f regress.sql pause