ws := E'[ ,.\n\f\t]';
IF stateAbbrev IS NOT NULL THEN
- lstate := statefp FROM state WHERE state.stusps = stateAbbrev;
+ lstate := statefp FROM state_lookup WHERE abbrev = stateAbbrev;
END IF;
lstate := COALESCE(lstate,'');
IF zipString IS NOT NULL THEN
fullStreet := substring(rawInput from '(.*)'
|| ws || '+' || cull_null(zipString) || '[- ]?([0-9]{4})?$');
+ /** strip off any trailing spaces or ,**/
+ fullStreet := btrim(fullStreet, ',');
+
ELSE
fullStreet := rawInput;
END IF;
-- this is to handle case where people use , instead of space to separate state and zip
-- such as '2450 N COLORADO ST, PHILADELPHIA, PA, 19132' instead of '2450 N COLORADO ST, PHILADELPHIA, PA 19132'
- tempString := regexp_replace(rawInput, E'(.*)' || ws || '+', E'\\1');
+ --tempString := regexp_replace(rawInput, E'(.*)' || ws || '+', E'\\1');
+ tempString := btrim(rawInput, ', ');
-- Separate out the last word of the state, and use it to compare to
-- the state lookup table to determine the entire name, as well as the
-- abbreviation associated with it. The zip code may or may not have
529 | | Main | St | | | Boston | MA | 02129 | t
- 529 | | Main | St | | | Boston,MA | | 02129 | t
+ 529 | | Main | St | | | Boston | MA | 02129 | t
529 | | Main | St | | | Boston | MA | | t
212 | | 3rd | Ave | N | | MINNEAPOLIS | MN | 55401 | t
+ 949 | N | 3rd | St | | | New Hyde Park | NY | 11040 | t
+
+ 949 | N | 3rd | St | | | New Hyde Park | NY | 11040 | t
+
-- Mangled zipcodes
SELECT * FROM normalize_address('212 3rd Ave N, MINNEAPOLIS, MN 553404');
SELECT * FROM normalize_address('212 3rd Ave N, MINNEAPOLIS, MN 55401-');
+
+-- comma in wrong position
+SELECT * FROM normalize_address('949 N 3rd St, New Hyde Park, NY, 11040');
+
+-- comma in right position --
+SELECT * FROM normalize_address('949 N 3rd St, New Hyde Park, NY 11040');
\timing
\ No newline at end of file