tempInt := 0;
-- the last matching abbrev in the string is the most likely one
FOR rec IN SELECT * FROM
- (SELECT abbrev, substring(fullStreet, '(?i)' || ws || '?('
+ (SELECT abbrev, name, substring(fullStreet, '(?i)' || ws || '?('
|| name || ')(?:' || ws || '|$)') AS given, is_hw ,
- ROW_NUMBER() OVER( ORDER BY position(name IN trim(fullStreet)) ) As n_type,
- ROW_NUMBER() OVER() As nrecs
+ RANK() OVER( ORDER BY position(name IN upper(trim(fullStreet))) ) As n_start,
+ RANK() OVER( ORDER BY position(name IN upper(trim(fullStreet))) + length(name) ) As n_end,
+ COUNT(*) OVER() As nrecs, position(name IN upper(trim(fullStreet)))
FROM street_type_lookup
WHERE fullStreet ILIKE '%' || name || '%' AND
trim(upper(fullStreet)) != name AND
- texticregexeq(fullStreet, '(?i)' || ws || '(' || name
+ (texticregexeq(fullStreet, '(?i)' || ws || '(' || name
-- we only consider street types that are regular and not at beginning of name or are highways (since those can be at beg or end)
-- we take the one that is the longest e.g Country Road would be more correct than Road
- || ')(?:' || ws || '|$)') AND ((NOT is_hw AND position(name IN trim(fullStreet)) > 2) OR is_hw) ) As foo
- -- If it's a highway, we want the first highway that is street type
- -- if its a regular, we want the last type in string, if tie, just return the longest
- ORDER BY CASE WHEN is_hw THEN (nrecs - n_type) ELSE n_type END DESC, length(given) DESC LIMIT 1 LOOP
+ || ')(?:' || ws || '|$)') OR (is_hw AND fullstreet ILIKE name || ' %') )
+ AND ((NOT is_hw AND position(name IN upper(trim(fullStreet))) > 1 OR is_hw) )
+ ) As foo
+ -- N_start - N_end - ensure we first get the one with the most overlapping sub types
+ -- Then of those get the one that ends last and then starts first
+ ORDER BY n_start - n_end, n_end DESC, n_start LIMIT 1 LOOP
-- If we have found an internal address, make sure the type
-- precedes it.
/** TODO: I don't think we need a loop anymore since we are just returning one and the one in the last position
-- Partial address with apartment
SELECT * FROM normalize_address('529 Main Street, Apt 201, Boston, MA');
+--This one fails so lead out for now
+--SELECT * FROM normalize_address('529 Main Street, Suite 201, Boston, MA 02129');
+
-- Partial and Mangled zipcodes
SELECT '#1073a' As ticket, * FROM normalize_address('212 3rd Ave N, MINNEAPOLIS, MN 553404');
SELECT '#1073b' As ticket, * FROM normalize_address('212 3rd Ave N, MINNEAPOLIS, MN 55401-');
SELECT '#1109b' As ticket, * from normalize_address('4373 LAKE DR, ROBBINSDALE, MN 55422');
-- another type (Is) that is part of street name but a compound street name
-SELECT '#1074a' As ticket, * fROM normalize_address('3420 RHODE ISLAND AVE S, ST. LOUIS PARK, MN 55426');
+SELECT '#1074a' As ticket, * FROM normalize_address('3420 RHODE ISLAND AVE S, ST. LOUIS PARK, MN 55426');
+
+-- another type that is part of street name --
+SELECT '#1074b' As ticket, * FROM normalize_address('26 Court Street, Boston,MA 02109')
\timing