From: Regina Obe Date: Fri, 1 Jul 2011 02:22:27 +0000 (+0000) Subject: Fix for #1086 Issue with parsing out location. also added to regression tests X-Git-Tag: 2.0.0alpha1~1311 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2c0286eaba5154fe9db1f2548ac47c0683c72099;p=postgis Fix for #1086 Issue with parsing out location. also added to regression tests git-svn-id: http://svn.osgeo.org/postgis/trunk@7531 b70326c6-7e19-0410-871a-916f4a2858ee --- diff --git a/extras/tiger_geocoder/tiger_2010/normalize/location_extract.sql b/extras/tiger_geocoder/tiger_2010/normalize/location_extract.sql index b855303a8..dc62f6c44 100644 --- a/extras/tiger_geocoder/tiger_2010/normalize/location_extract.sql +++ b/extras/tiger_geocoder/tiger_2010/normalize/location_extract.sql @@ -31,7 +31,7 @@ BEGIN ws := E'[ ,.\n\f\t]'; IF stateAbbrev IS NOT NULL THEN - lstate := statefp FROM state WHERE state.stusps = stateAbbrev; + lstate := statefp FROM state_lookup WHERE abbrev = stateAbbrev; END IF; lstate := COALESCE(lstate,''); diff --git a/extras/tiger_geocoder/tiger_2010/normalize/normalize_address.sql b/extras/tiger_geocoder/tiger_2010/normalize/normalize_address.sql index 8b3185563..f07459fe2 100644 --- a/extras/tiger_geocoder/tiger_2010/normalize/normalize_address.sql +++ b/extras/tiger_geocoder/tiger_2010/normalize/normalize_address.sql @@ -128,6 +128,9 @@ BEGIN IF zipString IS NOT NULL THEN fullStreet := substring(rawInput from '(.*)' || ws || '+' || cull_null(zipString) || '[- ]?([0-9]{4})?$'); + /** strip off any trailing spaces or ,**/ + fullStreet := btrim(fullStreet, ','); + ELSE fullStreet := rawInput; END IF; diff --git a/extras/tiger_geocoder/tiger_2010/normalize/state_extract.sql b/extras/tiger_geocoder/tiger_2010/normalize/state_extract.sql index a599f9f6a..d616a5f55 100644 --- a/extras/tiger_geocoder/tiger_2010/normalize/state_extract.sql +++ b/extras/tiger_geocoder/tiger_2010/normalize/state_extract.sql @@ -26,7 +26,8 @@ BEGIN -- this is to handle case where people use , instead of space to separate state and zip -- such as '2450 N COLORADO ST, PHILADELPHIA, PA, 19132' instead of '2450 N COLORADO ST, PHILADELPHIA, PA 19132' - tempString := regexp_replace(rawInput, E'(.*)' || ws || '+', E'\\1'); + --tempString := regexp_replace(rawInput, E'(.*)' || ws || '+', E'\\1'); + tempString := btrim(rawInput, ', '); -- Separate out the last word of the state, and use it to compare to -- the state lookup table to determine the entire name, as well as the -- abbreviation associated with it. The zip code may or may not have diff --git a/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress b/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress index 9b406a8c3..4ab88ffe2 100644 --- a/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress +++ b/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress @@ -2,7 +2,7 @@ 529 | | Main | St | | | Boston | MA | 02129 | t - 529 | | Main | St | | | Boston,MA | | 02129 | t + 529 | | Main | St | | | Boston | MA | 02129 | t 529 | | Main | St | | | Boston | MA | | t @@ -12,3 +12,7 @@ 212 | | 3rd | Ave | N | | MINNEAPOLIS | MN | 55401 | t + 949 | N | 3rd | St | | | New Hyde Park | NY | 11040 | t + + 949 | N | 3rd | St | | | New Hyde Park | NY | 11040 | t + diff --git a/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress.sql b/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress.sql index 8381090c2..9c805315f 100644 --- a/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress.sql +++ b/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress.sql @@ -12,4 +12,10 @@ SELECT * FROM normalize_address('529 Main Street, Boston, MA 021'); -- Mangled zipcodes SELECT * FROM normalize_address('212 3rd Ave N, MINNEAPOLIS, MN 553404'); SELECT * FROM normalize_address('212 3rd Ave N, MINNEAPOLIS, MN 55401-'); + +-- comma in wrong position +SELECT * FROM normalize_address('949 N 3rd St, New Hyde Park, NY, 11040'); + +-- comma in right position -- +SELECT * FROM normalize_address('949 N 3rd St, New Hyde Park, NY 11040'); \timing \ No newline at end of file