From 95528ffa5af82010b0b520772edce247c1fdfdf2 Mon Sep 17 00:00:00 2001 From: Regina Obe Date: Sun, 3 Jul 2011 16:51:09 +0000 Subject: [PATCH] #1087 improve rating algorithm. also start to use prepared statements where appropriate (don't put state in prepared part since that would prevent constraint exclusion from kicking in) git-svn-id: http://svn.osgeo.org/postgis/trunk@7565 b70326c6-7e19-0410-871a-916f4a2858ee --- .../tiger_2010/geocode/geocode.sql | 2 +- .../tiger_2010/geocode/geocode_address.sql | 105 ++++++++++++------ .../geocode/other_helper_functions.sql | 14 ++- .../tiger_2010/regress/geocode_regress | 96 ++++++++++------ .../tiger_2010/regress/geocode_regress.sql | 45 +++++--- .../tiger_2010/upgrade_geocode.sql | 1 + 6 files changed, 169 insertions(+), 94 deletions(-) diff --git a/extras/tiger_geocoder/tiger_2010/geocode/geocode.sql b/extras/tiger_geocoder/tiger_2010/geocode/geocode.sql index 34b0bcf6b..3cb258f7e 100644 --- a/extras/tiger_geocoder/tiger_2010/geocode/geocode.sql +++ b/extras/tiger_geocoder/tiger_2010/geocode/geocode.sql @@ -72,7 +72,7 @@ BEGIN ) * FROM - geocode_address(IN_ADDY) a + geocode_address(IN_ADDY, max_results) a ORDER BY (a.addy).address, (a.addy).predirabbrev, diff --git a/extras/tiger_geocoder/tiger_2010/geocode/geocode_address.sql b/extras/tiger_geocoder/tiger_2010/geocode/geocode_address.sql index 1aecf2939..486568d4d 100644 --- a/extras/tiger_geocoder/tiger_2010/geocode/geocode_address.sql +++ b/extras/tiger_geocoder/tiger_2010/geocode/geocode_address.sql @@ -1,5 +1,5 @@ --$Id$ -CREATE OR REPLACE FUNCTION geocode_address(IN parsed norm_addy, OUT addy norm_addy, OUT geomout geometry, OUT rating integer) +CREATE OR REPLACE FUNCTION geocode_address(IN parsed norm_addy, max_results integer DEFAULT 10, OUT addy norm_addy, OUT geomout geometry, OUT rating integer) RETURNS SETOF record AS $$ DECLARE @@ -9,6 +9,8 @@ DECLARE in_statefp VARCHAR; exact_street boolean := false; var_debug boolean := false; + var_sql text := ''; + var_n integer := 0; BEGIN IF parsed.streetName IS NULL THEN -- A street name must be given. Think about it. @@ -23,31 +25,59 @@ BEGIN -- to lower-preference options. -- We start out with zip-code matching, where the zip code could possibly be in more than one -- state. We loop through each state its in. - -- Next, we try to find the location in our side-table, which is based off of the 'place' data + -- Next, we try to find the location in our side-table, which is based off of the 'place' data exact first then sounds like -- Next, we look up the location/city and use the zip code which is returned from that -- Finally, if we didn't get a zip code or a city match, we fall back to just a location/street -- lookup to try and find *something* useful. -- In the end, we *have* to find a statefp, one way or another. - FOR zip_info IN - SELECT statefp,location,zip,column1 as exact,min(pref) FROM - (SELECT zip_state.statefp as statefp,parsed.location as location,ARRAY[zip_state.zip] as zip,1 as pref - FROM zip_state WHERE zip_state.zip = parsed.zip AND (in_statefp IS NULL OR zip_state.statefp = in_statefp) - UNION SELECT zip_state_loc.statefp,parsed.location,array_accum(zip_state_loc.zip),2 + var_sql := + ' SELECT statefp,location,zip,exact,min(pref) FROM + (SELECT zip_state.statefp as statefp,$1 as location, true As exact, ARRAY[zip_state.zip] as zip,1 as pref + FROM zip_state WHERE zip_state.zip = $2 + AND (' || quote_nullable(in_statefp) || ' IS NULL OR zip_state.statefp = ' || quote_nullable(in_statefp) || ') + UNION SELECT zip_state_loc.statefp,zip_state_loc.place As location,false As exact, array_accum(zip_state_loc.zip) AS zip,1 + COALESCE(diff_zip(min(zip), $2),0) As pref + FROM zip_state_loc + WHERE zip_state_loc.statefp = ' || quote_nullable(in_statefp) || ' + AND lower($1) = lower(zip_state_loc.place) + GROUP BY zip_state_loc.statefp,zip_state_loc.place + UNION SELECT zip_state_loc.statefp,zip_state_loc.place As location,false As exact, array_accum(zip_state_loc.zip),3 + FROM zip_state_loc + WHERE zip_state_loc.statefp = ' || quote_nullable(in_statefp) || ' + AND soundex($1) = soundex(zip_state_loc.place) + GROUP BY zip_state_loc.statefp,zip_state_loc.place + UNION SELECT zip_lookup_base.statefp,zip_lookup_base.city As location,false As exact, array_accum(zip_lookup_base.zip),4 + FROM zip_lookup_base + WHERE zip_lookup_base.statefp = ' || quote_nullable(in_statefp) || ' + AND (soundex($1) = soundex(zip_lookup_base.city) OR soundex($1) = soundex(zip_lookup_base.county)) + GROUP BY zip_lookup_base.statefp,zip_lookup_base.city + UNION SELECT ' || quote_nullable(in_statefp) || ' As statefp,$1 As location,false As exact,NULL, 5) as a + WHERE statefp IS NOT NULL + GROUP BY statefp,location,zip,exact, pref ORDER BY exact desc, pref, zip'; + /** FOR zip_info IN SELECT statefp,location,zip,exact,min(pref) FROM + (SELECT zip_state.statefp as statefp,parsed.location as location, true As exact, ARRAY[zip_state.zip] as zip,1 as pref + FROM zip_state WHERE zip_state.zip = parsed.zip + AND (in_statefp IS NULL OR zip_state.statefp = in_statefp) + UNION SELECT zip_state_loc.statefp,parsed.location,false As exact, array_accum(zip_state_loc.zip),2 + diff_zip(zip[1], parsed.zip) + FROM zip_state_loc + WHERE zip_state_loc.statefp = in_statefp + AND lower(parsed.location) = lower(zip_state_loc.place) + GROUP BY zip_state_loc.statefp,parsed.location + UNION SELECT zip_state_loc.statefp,parsed.location,false As exact, array_accum(zip_state_loc.zip),3 FROM zip_state_loc WHERE zip_state_loc.statefp = in_statefp AND soundex(parsed.location) = soundex(zip_state_loc.place) GROUP BY zip_state_loc.statefp,parsed.location - UNION SELECT zip_lookup_base.statefp,parsed.location,array_accum(zip_lookup_base.zip),3 + UNION SELECT zip_lookup_base.statefp,parsed.location,false As exact, array_accum(zip_lookup_base.zip),4 FROM zip_lookup_base WHERE zip_lookup_base.statefp = in_statefp AND (soundex(parsed.location) = soundex(zip_lookup_base.city) OR soundex(parsed.location) = soundex(zip_lookup_base.county)) GROUP BY zip_lookup_base.statefp,parsed.location - UNION SELECT in_statefp,parsed.location,NULL,4) as a - JOIN (VALUES (true),(false)) as b on TRUE + UNION SELECT in_statefp,parsed.location,false As exact,NULL, 5) as a + --JOIN (VALUES (true),(false)) as b(exact) on TRUE WHERE statefp IS NOT NULL - GROUP BY statefp,location,zip,column1 ORDER BY 4 desc, 5, 3 - LOOP - + GROUP BY statefp,location,zip,exact, pref ORDER BY exact desc, pref, zip **/ + FOR zip_info IN EXECUTE var_sql USING parsed.location, parsed.zip LOOP + -- For zip distance metric we consider both the distance of zip based on numeric as well aa levenshtein stmt := 'SELECT DISTINCT ON (sub.predirabrv,sub.name,sub.suftypabrv,sub.sufdirabrv,coalesce(p.name,zip.city,cs.name,co.name),s.stusps,sub.zip)' || ' sub.predirabrv as fedirp,' || ' sub.name as fename,' @@ -56,45 +86,45 @@ BEGIN || ' coalesce(p.name,zip.city,cs.name,co.name)::varchar as place,' || ' s.stusps as state,' || ' sub.zip as zip,' - || ' interpolate_from_address(' || coalesce(parsed.address::text,'NULL') || ', to_number(sub.fromhn,''99999999'')::integer,' + || ' interpolate_from_address($1, to_number(sub.fromhn,''99999999'')::integer,' || ' to_number(sub.tohn,''99999999'')::integer, e.the_geom) as address_geom,' - || coalesce(' sub.sub_rating + coalesce(levenshtein_ignore_case(' || quote_literal(zip_info.zip[1]) || ', sub.zip),0)', - ' sub.sub_rating + coalesce(levenshtein_ignore_case(' || quote_literal(parsed.location) || ', coalesce(p.name,zip.city,cs.name,co.name)),0)', - 'sub.sub_rating') + || ' sub.sub_rating + ' + || CASE WHEN parsed.zip > '' THEN ' least((coalesce(diff_zip(' || quote_nullable(parsed.zip) || ' , sub.zip),0) *1.00/2)::integer, coalesce(levenshtein_ignore_case(' || quote_nullable(zip_info.zip[1]) || ', sub.zip),0) ) ' + ELSE '1' END::text + || ' + coalesce(levenshtein_ignore_case($3, coalesce(p.name,zip.city,cs.name,co.name)),0)' || ' as sub_rating,' || ' sub.exact_address as exact_address' || ' FROM (' - || ' SELECT tlid, predirabrv, name, suftypabrv, sufdirabrv, fromhn, tohn, side, statefp, zip, rate_attributes(' || coalesce(quote_literal(parsed.preDirAbbrev),'NULL') || ', a.predirabrv,' - || ' ' || coalesce(quote_literal(parsed.streetName),'NULL') || ', a.name, ' || coalesce(quote_literal(parsed.streetTypeAbbrev),'NULL') || ',' - || ' a.suftypabrv, ' || coalesce(quote_literal(parsed.postDirAbbrev),'NULL') || ',' + || ' SELECT tlid, predirabrv, name, suftypabrv, sufdirabrv, fromhn, tohn, side, statefp, zip, rate_attributes($5, a.predirabrv,' + || ' $2, a.name, $4,' + || ' a.suftypabrv, $6,' || ' a.sufdirabrv) + ' || ' CASE ' - || ' WHEN ' || coalesce(quote_literal(parsed.address),'NULL') || '::integer IS NULL OR b.fromhn IS NULL THEN 20' - || ' WHEN ' || coalesce(quote_literal(parsed.address),'NULL') || '::integer >= least_hn(b.fromhn, b.tohn) ' - || ' AND ' || coalesce(quote_literal(parsed.address),'NULL') || '::integer <= greatest_hn(b.fromhn,b.tohn)' - || ' AND (' || coalesce(quote_literal(parsed.address),'NULL') || '::integer % 2) = (to_number(b.fromhn,''99999999'') % 2)::integer' + || ' WHEN $1::integer IS NULL OR b.fromhn IS NULL THEN 20' + || ' WHEN $1::integer >= least_hn(b.fromhn, b.tohn) ' + || ' AND $1::integer <= greatest_hn(b.fromhn,b.tohn)' + || ' AND ($1::integer % 2) = (to_number(b.fromhn,''99999999'') % 2)::integer' || ' THEN 0' - || ' WHEN ' || coalesce(quote_literal(parsed.address),'NULL') || '::integer >= least_hn(b.fromhn,b.tohn)' - || ' AND ' || coalesce(quote_literal(parsed.address),'NULL') || '::integer <= greatest_hn(b.fromhn,b.tohn)' + || ' WHEN $1::integer >= least_hn(b.fromhn,b.tohn)' + || ' AND $1::integer <= greatest_hn(b.fromhn,b.tohn)' || ' THEN 2' || ' ELSE' || ' ((1.0 - ' - || '(least_hn(' || coalesce(quote_literal(parsed.address),'NULL') || ',least_hn(b.fromhn,b.tohn)::text)::numeric /' - || ' greatest(1,greatest_hn(' || coalesce(quote_literal(parsed.address),'NULL') || ',greatest_hn(b.fromhn,b.tohn)::text)))' + || '(least_hn($1::text,least_hn(b.fromhn,b.tohn)::text)::numeric /' + || ' greatest(1,greatest_hn($1::text,greatest_hn(b.fromhn,b.tohn)::text)))' || ') * 5)::integer + 5' || ' END' - || ' as sub_rating,' - || coalesce(quote_literal(parsed.address),'NULL') || '::integer >= least_hn(b.fromhn,b.tohn) ' - || ' AND ' || coalesce(quote_literal(parsed.address),'NULL') || '::integer <= greatest_hn(b.fromhn,b.tohn) ' - || ' AND (' || coalesce(quote_literal(parsed.address),'NULL') || ' % 2)::numeric::integer = (to_number(b.fromhn,''99999999'') % 2)' + || ' as sub_rating,$1::integer >= least_hn(b.fromhn,b.tohn) ' + || ' AND $1::integer <= greatest_hn(b.fromhn,b.tohn) ' + || ' AND ($1 % 2)::numeric::integer = (to_number(b.fromhn,''99999999'') % 2)' || ' as exact_address' || ' FROM featnames a join addr b using (tlid,statefp)' || ' WHERE' || ' statefp = ' || quote_literal(zip_info.statefp) || '' || coalesce(' AND b.zip IN (''' || array_to_string(zip_info.zip,''',''') || ''') ','') || CASE WHEN zip_info.exact - THEN ' AND (lower(' || coalesce(quote_literal(parsed.streetName),'NULL') || ') = lower(a.name) OR numeric_streets_equal(' || coalesce(quote_literal(parsed.streetName), 'NULL') || ', a.name) ) ' - ELSE ' AND (soundex(' || coalesce(quote_literal(parsed.streetName),'NULL') || ') = soundex(a.name) OR numeric_streets_equal(' || coalesce(quote_literal(parsed.streetName), 'NULL') || ', a.name) ) ' + THEN ' AND (lower($2) = lower(a.name) OR numeric_streets_equal($2, a.name) ) ' + ELSE ' AND (soundex($2) = soundex(a.name) OR numeric_streets_equal($2, a.name) ) ' END || ' ORDER BY 11' || ' LIMIT 20' @@ -120,7 +150,7 @@ BEGIN RETURN; END IF; - FOR results IN EXECUTE stmt LOOP + FOR results IN EXECUTE stmt USING parsed.address,parsed.streetName, parsed.location, parsed.streetTypeAbbrev, parsed.preDirAbbrev, parsed.postDirAbbrev LOOP -- If we found a match with an exact street, then don't bother -- trying to do non-exact matches @@ -145,6 +175,7 @@ BEGIN GEOMOUT := results.address_geom; RATING := results.sub_rating; + var_n := var_n + 1; RETURN NEXT; @@ -154,7 +185,9 @@ BEGIN END IF; END LOOP; - + IF var_n > max_results THEN --we have exceeded our desired limit + RETURN; + END IF; END LOOP; RETURN; diff --git a/extras/tiger_geocoder/tiger_2010/geocode/other_helper_functions.sql b/extras/tiger_geocoder/tiger_2010/geocode/other_helper_functions.sql index 138138f10..302122003 100644 --- a/extras/tiger_geocoder/tiger_2010/geocode/other_helper_functions.sql +++ b/extras/tiger_geocoder/tiger_2010/geocode/other_helper_functions.sql @@ -11,7 +11,7 @@ CREATE OR REPLACE FUNCTION least_hn(fromhn varchar, tohn varchar) RETURNS integer AS $$ SELECT least(to_number( CASE WHEN trim($1) ~ '^[0-9]+$' THEN $1 ELSE '0' END,'9999999'),to_number(CASE WHEN trim($2) ~ '^[0-9]+$' THEN $2 ELSE '0' END,'9999999') )::integer; $$ LANGUAGE sql IMMUTABLE - COST 5; + COST 200; -- Note we are wrapping this in a function so we can make it immutable (for some reason least and greatest aren't considered immutable) -- and thu useable in an index or cacheable for multiple calls @@ -19,7 +19,17 @@ CREATE OR REPLACE FUNCTION greatest_hn(fromhn varchar, tohn varchar) RETURNS integer AS $$ SELECT greatest(to_number( CASE WHEN trim($1) ~ '^[0-9]+$' THEN $1 ELSE '0' END,'99999999'),to_number(CASE WHEN trim($2) ~ '^[0-9]+$' THEN $2 ELSE '0' END,'99999999') )::integer; $$ LANGUAGE sql IMMUTABLE - COST 5; + COST 200; + +-- Returns an absolute difference between two zips +-- This is generally more efficient than doing levenshtein +-- Since when people get the wrong zip, its usually off by one or 2 numeric distance +-- We only consider the first 5 digits +CREATE OR REPLACE FUNCTION diff_zip(zip1 varchar, zip2 varchar) + RETURNS integer AS +$$ SELECT abs(to_number( CASE WHEN trim(substring($1,1,5)) ~ '^[0-9]+$' THEN $1 ELSE '0' END,'99999')::integer - to_number( CASE WHEN trim(substring($2,1,5)) ~ '^[0-9]+$' THEN $2 ELSE '0' END,'99999')::integer )::integer; $$ + LANGUAGE sql IMMUTABLE + COST 200; -- function return true or false if 2 numeric streets are equal such as 15th St, 23rd st -- it compares just the numeric part of the street for equality diff --git a/extras/tiger_geocoder/tiger_2010/regress/geocode_regress b/extras/tiger_geocoder/tiger_2010/regress/geocode_regress index c1c2d148f..9b186365e 100644 --- a/extras/tiger_geocoder/tiger_2010/regress/geocode_regress +++ b/extras/tiger_geocoder/tiger_2010/regress/geocode_regress @@ -1,37 +1,59 @@ -529 Main St, Boston, MA 02129|POINT(-71.0718721304348 42.3835137826087)|0 -75 State St, Boston, MA 02109|POINT(-71.0556974285714 42.3590795714286)|0 -100 Federal St, Boston, MA 02110|POINT(-71.056314 42.354769)|0 -529 Main St, Boston, MA 02129|POINT(-71.0718721304348 42.3835137826087)|0 -75 State St, Boston, MA 02109|POINT(-71.0556974285714 42.3590795714286)|0 -100 Federal St, Boston, MA 02110|POINT(-71.056314 42.354769)|0 -529 Main St, Boston, MA 02129|POINT(-71.0718721304348 42.3835137826087)|0 -75 State St, Boston, MA 02109|POINT(-71.0556974285714 42.3590795714286)|0 -100 Federal St, Boston, MA 02110|POINT(-71.056314 42.354769)|0 -530 Main St, Boston, MA 02129|POINT(-71.071734 42.38345)|0 -76 State St, Boston, MA 02109|POINT(-71.0561527692308 42.3589996153846)|0 -101 Federal St, Boston, MA 02110|POINT(-71.0563038 42.3546946)|0 -101 Federal St, Boston, MA 02110|POINT(-71.0563038 42.3546946)|0 -101 Federal St, Boston, MA 02110|POINT(-71.0563038 42.3546946)|0 -101 Federal St, Boston, MA 02110|POINT(-71.0563038 42.3546946)|10 -101 Federal St, Boston, MA 02110|POINT(-71.0563038 42.3546946)|10 -101 Federal St, Blackstone, MA 01504|POINT(-71.5246987130121 42.0288604697987)|15 -101 Federal St, Lynn, MA 01905|POINT(-70.9678344129807 42.4659009057427)|15 -101 Federal St, Lawrence, MA 01810|POINT(-71.2012528459153 42.6901540310566)|17 -101 Federal St, Andover, MA 01810|POINT(-71.2012528459153 42.6901540310566)|17 -101 Federal St, Northampton, MA 01062|POINT(-72.655372 42.32273)|17 -101 Federal St, Belchertown, MA 01007|POINT(-72.4120567596665 42.2944575040247)|17 -101 Federal St, Agawam Town, MA 01001|POINT(-72.622305017362 42.0837072292948)|18 -101 Federal St, Millers Falls, MA 01349|POINT(-72.4993935958437 42.5680170002944)|22 -101 Federal St, Greenfield Town, MA 01301|POINT(-72.5990944761905 42.5900632380952)|22 -Federal Ct, Boston, MA 02110|POINT(-71.0567205 42.354134)|24 -Federal Ln, Dedham, MA 02026|POINT(-71.183565 42.238295)|33 -#1073|212 3rd Ave N, Minneapolis, MN 55401|POINT(-93.2718114 44.9850188)|1 -#1073|212 3rd Ave S, Minneapolis, MN 55404|POINT(-93.2635498396584 44.9806201397408)|3 -#1073|212 3rd Ave S, Minneapolis, MN 55401|POINT(-93.2633388442551 44.9808723431463)|3 -#1073|212 3rd Ave S, Minneapolis, MN 55415|POINT(-93.2641725306122 44.9798755102041)|4 -#1073|212 3rd Ave NE, Minneapolis, MN 55413|POINT(-93.2592025 44.990786)|4 -#1073|212 3rd Ave N, Minneapolis, MN 55401|POINT(-93.2718114 44.9850188)|1 -#1073|212 3rd Ave S, Minneapolis, MN 55404|POINT(-93.2635498396584 44.9806201397408)|3 -#1073|212 3rd Ave S, Minneapolis, MN 55401|POINT(-93.2633388442551 44.9808723431463)|3 -#1073|212 3rd Ave S, Minneapolis, MN 55415|POINT(-93.2641725306122 44.9798755102041)|4 -#1073|212 3rd Ave NE, Minneapolis, MN 55413|POINT(-93.2592025 44.990786)|4 +T1|529 Main St, Boston, MA 02129|POINT(-71.0718721304348 42.3835137826087)|0 +T2|75 State St, Boston, MA 02109|POINT(-71.0556974285714 42.3590795714286)|0 +T3|100 Federal St, Boston, MA 02110|POINT(-71.056314 42.354769)|1 +T4|529 Main St, Boston, MA 02129|POINT(-71.0718721304348 42.3835137826087)|0 +T5|75 State St, Boston, MA 02109|POINT(-71.0556974285714 42.3590795714286)|0 +T6|100 Federal St, Boston, MA 02110|POINT(-71.056314 42.354769)|0 +T7|529 Main St, Boston, MA 02129|POINT(-71.0718721304348 42.3835137826087)|0 +T8|75 State St, Boston, MA 02109|POINT(-71.0556974285714 42.3590795714286)|0 +T9|100 Federal St, Boston, MA 02110|POINT(-71.056314 42.354769)|0 +T10|530 Main St, Boston, MA 02129|POINT(-71.071734 42.38345)|0 +T11|76 State St, Boston, MA 02109|POINT(-71.0561527692308 42.3589996153846)|0 +T12|101 Federal St, Boston, MA 02110|POINT(-71.0563038 42.3546946)|1 +T12|101 Federal St, Blackstone, MA 01504|POINT(-71.5246987130121 42.0288604697987)|6 +T12|101 Federal St, Lynn, MA 01905|POINT(-70.9678344129807 42.4659009057427)|6 +T12|101 Federal St, Lawrence, MA 01810|POINT(-71.2012528459153 42.6901540310566)|8 +T12|101 Federal St, Andover, MA 01810|POINT(-71.2012528459153 42.6901540310566)|8 +T12|101 Federal St, Northampton, MA 01062|POINT(-72.655372 42.32273)|8 +T12|101 Federal St, Belchertown, MA 01007|POINT(-72.4120567596665 42.2944575040247)|8 +T12|101 Federal St, Agawam Town, MA 01001|POINT(-72.622305017362 42.0837072292948)|9 +T12|101 Federal St, Millers Falls, MA 01349|POINT(-72.4993935958437 42.5680170002944)|13 +T12|101 Federal St, Greenfield Town, MA 01301|POINT(-72.5990944761905 42.5900632380952)|13 +T12|Federal Ct, Boston, MA 02110|POINT(-71.0567205 42.354134)|14 +T12|Federal Ln, Dedham, MA 02026|POINT(-71.183565 42.238295)|27 +#TB1|24 School St, Boston, MA 02108|24 School Street, Boston, MA 02108|POINT(-71.058879 42.357619)|0 +#TB1|20 School St, Boston, MA 02108|20 School Street, Boston, MA 02109|POINT(-71.058759 42.357577)|1 +T13|101 Federal St, Boston, MA 02110|POINT(-71.0563038 42.3546946)|1 +T13|101 Federal St, Blackstone, MA 01504|POINT(-71.5246987130121 42.0288604697987)|6 +T13|101 Federal St, Lynn, MA 01905|POINT(-70.9678344129807 42.4659009057427)|6 +T13|101 Federal St, Lawrence, MA 01810|POINT(-71.2012528459153 42.6901540310566)|8 +T13|101 Federal St, Andover, MA 01810|POINT(-71.2012528459153 42.6901540310566)|8 +T13|101 Federal St, Northampton, MA 01062|POINT(-72.655372 42.32273)|8 +T13|101 Federal St, Belchertown, MA 01007|POINT(-72.4120567596665 42.2944575040247)|8 +T13|101 Federal St, Agawam Town, MA 01001|POINT(-72.622305017362 42.0837072292948)|9 +T13|101 Federal St, Millers Falls, MA 01349|POINT(-72.4993935958437 42.5680170002944)|13 +T13|101 Federal St, Greenfield Town, MA 01301|POINT(-72.5990944761905 42.5900632380952)|13 +T13|Federal Ct, Boston, MA 02110|POINT(-71.0567205 42.354134)|14 +T13|Federal Ln, Dedham, MA 02026|POINT(-71.183565 42.238295)|27 +T14|101 Federal St, Boston, MA 02110|POINT(-71.0563038 42.3546946)|1 +T15|101 Federal St, Boston, MA 02110|POINT(-71.0563038 42.3546946)|11 +T16|101 Federal St, Boston, MA 02110|POINT(-71.0563038 42.3546946)|11 +T16|101 Federal St, Blackstone, MA 01504|POINT(-71.5246987130121 42.0288604697987)|16 +T16|101 Federal St, Lynn, MA 01905|POINT(-70.9678344129807 42.4659009057427)|16 +T16|101 Federal St, Lawrence, MA 01810|POINT(-71.2012528459153 42.6901540310566)|18 +T16|101 Federal St, Andover, MA 01810|POINT(-71.2012528459153 42.6901540310566)|18 +T16|101 Federal St, Northampton, MA 01062|POINT(-72.655372 42.32273)|18 +T16|101 Federal St, Belchertown, MA 01007|POINT(-72.4120567596665 42.2944575040247)|18 +T16|101 Federal St, Agawam Town, MA 01001|POINT(-72.622305017362 42.0837072292948)|19 +T16|101 Federal St, Millers Falls, MA 01349|POINT(-72.4993935958437 42.5680170002944)|23 +T16|101 Federal St, Greenfield Town, MA 01301|POINT(-72.5990944761905 42.5900632380952)|23 +T16|Federal Ct, Boston, MA 02110|POINT(-71.0567205 42.354134)|24 +T16|Federal Ln, Dedham, MA 02026|POINT(-71.183565 42.238295)|37 +#1087a|75 State St, Boston, MA 02109|POINT(-71.0556974285714 42.3590795714286)|1 +#1087a|75 State St, Milton, MA 02186|POINT(-71.0409054545455 42.2563454545455)|5 +#1087b|75 State St, Boston, MA 02109|POINT(-71.0556974285714 42.3590795714286)|1 +#1087b|75 State St, Milton, MA 02186|POINT(-71.0409054545455 42.2563454545455)|4 +#1087c|75 State St, Boston, MA 02109|POINT(-71.0556974285714 42.3590795714286)|0 +#1073a|212 3rd Ave N, Minneapolis, MN 55401|212 3rd Ave N, MINNEAPOLIS, MN 553404|POINT(-93.2718114 44.9850188)|1 +#1073a|212 3rd Ave S, Minneapolis, MN 55401|212 3rd Ave N, MINNEAPOLIS, MN 553404|POINT(-93.2633388442551 44.9808723431463)|3 +#1073b|212 3rd Ave N, Minneapolis, MN 55401|POINT(-93.2718114 44.9850188)|0 \ No newline at end of file diff --git a/extras/tiger_geocoder/tiger_2010/regress/geocode_regress.sql b/extras/tiger_geocoder/tiger_2010/regress/geocode_regress.sql index 1051e8c7e..3e7a4b9e4 100644 --- a/extras/tiger_geocoder/tiger_2010/regress/geocode_regress.sql +++ b/extras/tiger_geocoder/tiger_2010/regress/geocode_regress.sql @@ -1,33 +1,42 @@ --$Id$ \timing -- Limit 1 -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('529 Main Street, Boston, MA 02129',1); -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('75 State Street, Boston, MA 02109',1); -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('100 Federal Street, Boston, MA 02109',1); +SELECT 'T1', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('529 Main Street, Boston, MA 02129',1); +SELECT 'T2', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('75 State Street, Boston, MA 02109',1); +SELECT 'T3', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('100 Federal Street, Boston, MA 02109',1); -- default -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('529 Main Street, Boston, MA 02129'); -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('75 State Street, Boston, MA 02109'); -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('100 Federal Street, Boston,MA 02109'); +SELECT 'T4', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('529 Main Street, Boston, MA 02129'); +SELECT 'T5', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('75 State Street, Boston, MA 02109'); +SELECT 'T6', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('100 Federal Street, Boston,MA 02109'); -- 20 -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('529 Main Street, Boston, MA 02129',20); -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('75 State Street, Boston, MA 02109',20); -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('100 Federal Street, Boston, MA 02109',20); +SELECT 'T7', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('529 Main Street, Boston, MA 02129',20); +SELECT 'T8', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('75 State Street, Boston, MA 02109',20); +SELECT 'T9', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('100 Federal Street, Boston, MA 02109',20); -- Limit 1 - Test caching effects -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('530 Main Street, Boston MA, 02129',1); -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('76 State Street, Boston MA, 02109',1); -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Federal Street, Boston MA, 02109',20); +SELECT 'T10', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('530 Main Street, Boston MA, 02129',1); +SELECT 'T11', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('76 State Street, Boston MA, 02109',1); +SELECT 'T12', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Federal Street, Boston, MA',20); + +-- Test batch geocoding along a street +SELECT '#TB1' As ticket, pprint_addy((g).addy) As address, target, ST_AsText((g).geomout) As pt, (g).rating FROM (SELECT geocode(target::text,1) As g, target FROM (VALUES ('24 School Street, Boston, MA 02108'), ('20 School Street, Boston, MA 02109')) As f(target) ) As foo; -- Partial address -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Federal Street, Boston MA',20); -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Federal Street, Boston MA',1); +SELECT 'T13', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Federal Street, Boston MA',20); +SELECT 'T14', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Federal Street, Boston MA',1); --Test misspellings and missing zip -- -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Fedaral Street, Boston, MA',1); -SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Fedaral Street, Boston, MA',50); +SELECT 'T15', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Fedaral Street, Boston, MA',1); +SELECT 'T16', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Fedaral Street, Boston, MA',50); + +-- Ratings wrong for missing or wrong local zips +SELECT '#1087a' As ticket, pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('75 State Street, Boston, MA 02110',3); +SELECT '#1087b' As ticket, pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('75 State Street, Boston, MA',3); +--right zip +SELECT '#1087c' As ticket, pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('75 State Street, Boston, MA 02109',1); --Geocoding mangled zipcodes -SELECT '#1073' As ticket, pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('212 3rd Ave N, MINNEAPOLIS, MN 553404',5); -SELECT '#1073' As ticket, pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('212 3rd Ave N, MINNEAPOLIS, MN 55340-',5); +SELECT '#1073a' As ticket, pprint_addy((g).addy) As address, target, ST_AsText((g).geomout) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '212 3rd Ave N, MINNEAPOLIS, MN 553404'::text As target) As f) As foo; +SELECT '#1073b' As ticket, pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('212 3rd Ave N, MINNEAPOLIS, MN 55401-',2); \timing diff --git a/extras/tiger_geocoder/tiger_2010/upgrade_geocode.sql b/extras/tiger_geocoder/tiger_2010/upgrade_geocode.sql index a680351e2..8a2e808cf 100644 --- a/extras/tiger_geocoder/tiger_2010/upgrade_geocode.sql +++ b/extras/tiger_geocoder/tiger_2010/upgrade_geocode.sql @@ -26,6 +26,7 @@ CREATE INDEX tiger_state_the_geom_gist ON faces USING gist(the_geom); DROP FUNCTION IF EXISTS reverse_geocode(geometry); /** changed to use default parameters **/ DROP FUNCTION IF EXISTS geocode(varchar); /** changed to include default parameter for max_results **/ DROP FUNCTION IF EXISTS geocode(norm_addy); /** changed to include default parameter for max_results **/ +DROP FUNCTION IF EXISTS geocode_address(norm_addy); /** changed to include default parameter for max_results **/ -- TODO: Put in logic to update lookup tables as they change. street_type_lookup has changed since initial release -- CREATE TABLE zcta5 -- 2.50.1