-- Assume that the address begins with a digit, and extract it from
-- the input string.
- addressString := substring(rawInput from '^([0-9].*?)[ ,/.]');
+ addressString := substring(rawInput from E'^([0-9].*?)[ ,/.]');
IF debug_flag THEN
raise notice '% addressString: %', clock_timestamp(), addressString;
END IF;
- -- There are two formats for zip code, the normal 5 digit, and
+ -- There are two formats for zip code, the normal 5 digit , and
-- the nine digit zip-4. It may also not exist.
- zipString := substring(rawInput from ws || '([0-9]{5})$');
+
+ zipString := substring(rawInput from ws || E'([0-9]{5})$');
IF zipString IS NULL THEN
- zipString := substring(rawInput from ws || '([0-9]{5})-[0-9]{4}$');
- -- Check if all we got was a zipcode, of either form
+ -- Check if the zip is just a partial or a one with -s
+ -- or one that just has more than 5 digits
+ zipString := COALESCE(substring(rawInput from ws || '([0-9]{5})-[0-9]{0,4}$'),
+ substring(rawInput from ws || '([0-9]{2,5})$'),
+ substring(rawInput from ws || '([0-9]{6,14})$'));
+
+ -- Check if all we got was a zipcode, of either form
IF zipString IS NULL THEN
zipString := substring(rawInput from '^([0-9]{5})$');
IF zipString IS NULL THEN
Federal Ct, Boston, MA 02110 | POINT(-71.0567205 42.354134) | 24
Federal Ln, Dedham, MA 02026 | POINT(-71.183565 42.238295) | 33
+ 212 3rd Ave N, Minneapolis, MN 55401 | POINT(-93.2718114 44.9850188) | 1
+ 212 3rd Ave S, Minneapolis, MN 55404 | POINT(-93.2635498396584 44.9806201397408) | 3
+ 212 3rd Ave S, Minneapolis, MN 55401 | POINT(-93.2633388442551 44.9808723431463) | 3
+ 212 3rd Ave NE, Minneapolis, MN 55413 | POINT(-93.2592025 44.990786) | 4
+ 212 3rd Ave S, Minneapolis, MN 55415 | POINT(-93.2641725306122 44.9798755102041) | 4
+ 212 3rd Ave SE, Minneapolis, MN 55414 | POINT(-93.2526539591837 44.9848287755102) | 5
+ 212 3rd Ave N, Biwabik, MN 55708 | POINT(-92.3450754489796 47.5343256938775) | 8
+ 212 3rd Ave N, Lakefield, MN 56150 | POINT(-95.1733443061224 43.6785488163265) | 9
+ 212 3rd Ave N, Bayport, MN 55003 | POINT(-92.7789324000695 45.019825623949) | 9
+ 212 3rd Ave N, Long Prairie, MN 56347 | POINT(-94.8618907833944 45.977553389256) | 10
+
+ Plymouth, MN 55340 | POINT(-93.5757500937089 45.076212213583) | 100
+
--Test misspellings and missing zip --
SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Fedaral Street, Boston, MA',1);
SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Fedaral Street, Boston, MA',50);
+
+--Geocoding mangled zipcodes
+SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('212 3rd Ave N, MINNEAPOLIS, MN 553404');
+SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('212 3rd Ave N, MINNEAPOLIS, MN 55340-');
\timing
\ No newline at end of file
529 | | Main | St | | | Boston | MA | 02129 | t
- 529 | | Main | St | | | Boston | MA | 02129 | t
+ 529 | | Main | St | | | Boston,MA | | 02129 | t
+
+ 529 | | Main | St | | | Boston | MA | | t
+
+ 529 | | Main | St | | | Boston | MA | 021 | t
+
+ 212 | | 3rd | Ave | N | | MINNEAPOLIS | MN | 553404 | t
- 529 | | Main | St | | | Boston MA | | | t
+ 212 | | 3rd | Ave | N | | MINNEAPOLIS | MN | 55401 | t
-- comma in wrong spot
SELECT * FROM normalize_address('529 Main Street, Boston MA, 02129');
-- comma in right spot
-SELECT * FROM normalize_address('529 Main Street, Boston MA, 02129');
+SELECT * FROM normalize_address('529 Main Street, Boston,MA 02129');
+-- partial address
+SELECT * FROM normalize_address('529 Main Street, Boston, MA');
-- partial address
-SELECT * FROM normalize_address('529 Main Street, Boston MA');
+SELECT * FROM normalize_address('529 Main Street, Boston, MA 021');
+
+-- Mangled zipcodes
+SELECT * FROM normalize_address('212 3rd Ave N, MINNEAPOLIS, MN 553404');
+SELECT * FROM normalize_address('212 3rd Ave N, MINNEAPOLIS, MN 55401-');
\timing
\ No newline at end of file