fullStreet := substring(rawInput from '(.*)'
|| ws || '+' || cull_null(zipString) || '[- ]?([0-9]{4})?$');
/** strip off any trailing spaces or ,**/
- fullStreet := btrim(fullStreet, ',');
+ fullStreet := btrim(fullStreet, ' ,');
ELSE
fullStreet := rawInput;
-- street level, internal address, location
-- street level, internal address location
-- The first three are useful.
+
tempString := substring(fullStreet, '(?i),' || ws || '+(.*?)(,?' || ws ||
'*' || cull_null(state) || '$)');
IF tempString = '' THEN tempString := NULL; END IF;
IF tempString IS NOT NULL THEN
- result.location := tempString;
+ IF tempString LIKE '%,%' THEN -- if it has a comma probably has suite, strip it from location
+ result.location := trim(split_part(tempString,',',2));
+ ELSE
+ result.location := tempString;
+ END IF;
IF addressString IS NOT NULL THEN
fullStreet := substring(fullStreet, '(?i)' || addressString || ws ||
'+(.*),' || ws || '+' || result.location);
-- Pull potential street types from the full street information
-- this count is surprisingly slow by itself but much faster if you add an ILIKE AND clause
-- difference of 98ms vs 16 ms for example
+ -- Put a space in front to make regex easier can always count on it starting with space
+ -- Reject all street types where the fullstreet name is equal to the name
+ fullStreet := ' ' || trim(fullStreet);
tempInt := count(*) FROM street_type_lookup
- WHERE fullStreet ILIKE '%' || name || '%' AND texticregexeq(fullStreet, '(?i)' || ws || '(' || name
+ WHERE fullStreet ILIKE '%' || name || '%' AND
+ trim(upper(fullStreet)) != name AND
+ texticregexeq(fullStreet, '(?i)' || ws || '(' || name
|| ')(?:' || ws || '|$)');
IF tempInt = 1 THEN
SELECT INTO rec abbrev, substring(fullStreet, '(?i)' || ws || '('
|| name || ')(?:' || ws || '|$)') AS given FROM street_type_lookup
- WHERE fullStreet ILIKE '%' || name || '%' AND texticregexeq(fullStreet, '(?i)' || ws || '(' || name
- || ')(?:' || ws || '|$)');
+ WHERE fullStreet ILIKE '%' || name || '%' AND
+ trim(upper(fullStreet)) != name AND
+ texticregexeq(fullStreet, '(?i)' || ws || '(' || name
+ || ')(?:' || ws || '|$)') ;
streetType := rec.given;
result.streetTypeAbbrev := rec.abbrev;
ELSIF tempInt > 1 THEN
tempInt := 0;
- FOR rec IN SELECT abbrev, substring(fullStreet, '(?i)' || ws || '('
+ FOR rec IN SELECT abbrev, substring(fullStreet, '(?i)' || ws || '?('
|| name || ')(?:' || ws || '|$)') AS given FROM street_type_lookup
- WHERE fullStreet ILIKE '%' || name || '%' AND texticregexeq(fullStreet, '(?i)' || ws || '(' || name
- || ')(?:' || ws || '|$)') LOOP
+ WHERE fullStreet ILIKE '%' || name || '%' AND
+ trim(upper(fullStreet)) != name AND
+ texticregexeq(fullStreet, '(?i)' || ws || '(' || name
+ || ')(?:' || ws || '|$)') ORDER BY length(name) DESC LIMIT 1 LOOP
-- If we have found an internal address, make sure the type
-- precedes it.
IF result.internal IS NOT NULL THEN
END IF;
-- There is a little more processing required now. If the word after the
- -- street type begins with a number, the street type should be considered
- -- part of the name, as well as the next word. eg, State Route 225a. If
+ -- street type begins with a number, then its most likely a highway like State Route 225a. If
+ -- In Tiger 2010+ the reduced Street name just has the number
-- the next word starts with a char, then everything after the street type
-- will be considered location. If there is no street type, then I'm sad.
IF streetType IS NOT NULL THEN
- tempString := substring(fullStreet, streetType || ws ||
- E'+([0-9][^ ,.\t\r\n\f]*?)' || ws);
- IF tempString IS NOT NULL THEN
- IF result.location IS NULL THEN
- result.location := substring(fullStreet, streetType || ws || '+'
- || tempString || ws || '+(.*)$');
- END IF;
- reducedStreet := substring(fullStreet, '(.*)' || ws || '+'
- || result.location || '$');
- streetType := NULL;
- result.streetTypeAbbrev := NULL;
+ -- Check if the fullStreet contains the streetType and ends in just numbers
+ -- If it does its a road number like a country road or state route
+ -- Just set the number to be the name of street
+ tempString := substring(fullStreet, streetType || ws || '+' || E'([0-9]+)' || ws || '*$');
+
+ IF tempString > '' AND result.location IS NOT NULL THEN
+ reducedStreet := tempString;
+ result.streetName := trim(reducedStreet);
ELSE
- IF result.location IS NULL THEN
- result.location := substring(fullStreet, streetType || ws || '+(.*)$');
- END IF;
- reducedStreet := substring(fullStreet, '^(.*)' || ws || '+'
- || streetType);
- -- the post direction might be portion of fullStreet after reducedStreet
- tempString := trim(regexp_replace(fullStreet, reducedStreet || ws || streetType,''));
- IF tempString > '' THEN
+ tempString := substring(fullStreet, streetType || ws ||
+ E'+([0-9][^ ,.\t\r\n\f]*?)' || ws);
+ IF tempString IS NOT NULL THEN
+ IF result.location IS NULL THEN
+ result.location := substring(fullStreet, streetType || ws || '+'
+ || tempString || ws || '+(.*)$');
+ END IF;
+ reducedStreet := substring(fullStreet, '(.*)' || ws || '+'
+ || result.location || '$');
+ streetType := NULL;
+ result.streetTypeAbbrev := NULL;
+ ELSE
+ IF result.location IS NULL THEN
+ result.location := substring(fullStreet, streetType || ws || '+(.*)$');
+ END IF;
+ reducedStreet := substring(fullStreet, '^(.*)' || ws || '+'
+ || streetType);
+ END IF;
+ -- the post direction might be portion of fullStreet after reducedStreet
+ -- reducedStreet: Main fullStreet: Main St, N or fullStreet: Main St N
+ tempString := trim(regexp_replace(fullStreet, reducedStreet || ws || '+' || streetType,''));
+ IF tempString > '' THEN
tempString := abbrev FROM direction_lookup WHERE
- tempString ILIKE '%' || name || '%'
+ tempString ILIKE '%' || name || '%' AND texticregexeq(reducedStreet || ws || '+' || streetType, '(?i)(' || name || ')' || ws || '+|$')
ORDER BY length(name) DESC LIMIT 1;
IF tempString IS NOT NULL THEN
result.postDirAbbrev = trim(tempString);
END IF;
- END IF;
- END IF;
+ END IF;
+
- IF debug_flag THEN
- raise notice '% reduced street: %', clock_timestamp(), reducedStreet;
- END IF;
-
- -- The pre direction should be at the beginning of the fullStreet string.
- -- The post direction should be at the beginning of the location string
- -- if there is no internal address
- tempString := substring(reducedStreet, '(?i)(^' || name
- || ')' || ws) FROM direction_lookup WHERE
- reducedStreet ILIKE '%' || name || '%' AND texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
- ORDER BY length(name) DESC LIMIT 1;
- IF tempString IS NOT NULL THEN
- preDir := tempString;
- result.preDirAbbrev := abbrev FROM direction_lookup
- where reducedStreet ILIKE '%' || name '%' AND texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
- ORDER BY length(name) DESC LIMIT 1;
- result.streetName := trim(substring(reducedStreet, '^' || preDir || ws || '(.*)'));
- ELSE
- result.streetName := trim(reducedStreet);
+ IF debug_flag THEN
+ raise notice '% reduced street: %', clock_timestamp(), reducedStreet;
+ END IF;
+
+ -- The pre direction should be at the beginning of the fullStreet string.
+ -- The post direction should be at the beginning of the location string
+ -- if there is no internal address
+ reducedStreet := trim(reducedStreet);
+ tempString := trim(regexp_replace(fullStreet, ws || '+' || reducedStreet || ws || '+',''));
+ IF tempString > '' THEN
+ tempString := substring(reducedStreet, '(?i)(^' || name
+ || ')' || ws) FROM direction_lookup WHERE
+ reducedStreet ILIKE '%' || name || '%' AND texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+ ORDER BY length(name) DESC LIMIT 1;
+ END IF;
+ IF tempString > '' THEN
+ preDir := tempString;
+ result.preDirAbbrev := abbrev FROM direction_lookup
+ where reducedStreet ILIKE '%' || name '%' AND texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+ ORDER BY length(name) DESC LIMIT 1;
+ result.streetName := trim(substring(reducedStreet, '^' || preDir || ws || '(.*)'));
+ ELSE
+ result.streetName := trim(reducedStreet);
+ END IF;
END IF;
-
IF texticregexeq(result.location, '(?i)' || result.internal || '$') THEN
-- If the internal address is at the end of the location, then no
-- location was given. We still need to look for post direction.
$$
LANGUAGE plpgsql STABLE
COST 100;
+
\ No newline at end of file
-T1|529 Main St, Boston, MA 02129|POINT(-71.0718721304348 42.3835137826087)|0
-T2|75 State St, Boston, MA 02109|POINT(-71.0556974285714 42.3590795714286)|0
-T3|100 Federal St, Boston, MA 02110|POINT(-71.056314 42.354769)|1
-T4|529 Main St, Boston, MA 02129|POINT(-71.0718721304348 42.3835137826087)|0
-T5|75 State St, Boston, MA 02109|POINT(-71.0556974285714 42.3590795714286)|0
-T6|100 Federal St, Boston, MA 02110|POINT(-71.056314 42.354769)|0
-T7|529 Main St, Boston, MA 02129|POINT(-71.0718721304348 42.3835137826087)|0
-T8|75 State St, Boston, MA 02109|POINT(-71.0556974285714 42.3590795714286)|0
-T9|100 Federal St, Boston, MA 02110|POINT(-71.056314 42.354769)|0
-T10|530 Main St, Boston, MA 02129|POINT(-71.071734 42.38345)|0
-T11|76 State St, Boston, MA 02109|POINT(-71.0561527692308 42.3589996153846)|0
-T12|101 Federal St, Boston, MA 02110|POINT(-71.0563038 42.3546946)|1
-T12|101 Federal St, Blackstone, MA 01504|POINT(-71.5246987130121 42.0288604697987)|6
-T12|101 Federal St, Lynn, MA 01905|POINT(-70.9678344129807 42.4659009057427)|6
-T12|101 Federal St, Lawrence, MA 01810|POINT(-71.2012528459153 42.6901540310566)|8
-T12|101 Federal St, Andover, MA 01810|POINT(-71.2012528459153 42.6901540310566)|8
-T12|101 Federal St, Northampton, MA 01062|POINT(-72.655372 42.32273)|8
-T12|101 Federal St, Belchertown, MA 01007|POINT(-72.4120567596665 42.2944575040247)|8
-T12|101 Federal St, Agawam Town, MA 01001|POINT(-72.622305017362 42.0837072292948)|9
-T12|101 Federal St, Millers Falls, MA 01349|POINT(-72.4993935958437 42.5680170002944)|13
-T12|101 Federal St, Greenfield Town, MA 01301|POINT(-72.5990944761905 42.5900632380952)|13
-T12|Federal Ct, Boston, MA 02110|POINT(-71.0567205 42.354134)|14
-T12|Federal Ln, Dedham, MA 02026|POINT(-71.183565 42.238295)|27
-#TB1|24 School St, Boston, MA 02108|24 School Street, Boston, MA 02108|POINT(-71.058879 42.357619)|0
-#TB1|20 School St, Boston, MA 02108|20 School Street, Boston, MA 02109|POINT(-71.058759 42.357577)|1
-T13|101 Federal St, Boston, MA 02110|POINT(-71.0563038 42.3546946)|1
-T13|101 Federal St, Blackstone, MA 01504|POINT(-71.5246987130121 42.0288604697987)|6
-T13|101 Federal St, Lynn, MA 01905|POINT(-70.9678344129807 42.4659009057427)|6
-T13|101 Federal St, Lawrence, MA 01810|POINT(-71.2012528459153 42.6901540310566)|8
-T13|101 Federal St, Andover, MA 01810|POINT(-71.2012528459153 42.6901540310566)|8
-T13|101 Federal St, Northampton, MA 01062|POINT(-72.655372 42.32273)|8
-T13|101 Federal St, Belchertown, MA 01007|POINT(-72.4120567596665 42.2944575040247)|8
-T13|101 Federal St, Agawam Town, MA 01001|POINT(-72.622305017362 42.0837072292948)|9
-T13|101 Federal St, Millers Falls, MA 01349|POINT(-72.4993935958437 42.5680170002944)|13
-T13|101 Federal St, Greenfield Town, MA 01301|POINT(-72.5990944761905 42.5900632380952)|13
-T13|Federal Ct, Boston, MA 02110|POINT(-71.0567205 42.354134)|14
-T13|Federal Ln, Dedham, MA 02026|POINT(-71.183565 42.238295)|27
-T14|101 Federal St, Boston, MA 02110|POINT(-71.0563038 42.3546946)|1
-T15|101 Federal St, Boston, MA 02110|POINT(-71.0563038 42.3546946)|11
-T16|101 Federal St, Boston, MA 02110|POINT(-71.0563038 42.3546946)|11
-T16|101 Federal St, Blackstone, MA 01504|POINT(-71.5246987130121 42.0288604697987)|16
-T16|101 Federal St, Lynn, MA 01905|POINT(-70.9678344129807 42.4659009057427)|16
-T16|101 Federal St, Lawrence, MA 01810|POINT(-71.2012528459153 42.6901540310566)|18
-T16|101 Federal St, Andover, MA 01810|POINT(-71.2012528459153 42.6901540310566)|18
-T16|101 Federal St, Northampton, MA 01062|POINT(-72.655372 42.32273)|18
-T16|101 Federal St, Belchertown, MA 01007|POINT(-72.4120567596665 42.2944575040247)|18
-T16|101 Federal St, Agawam Town, MA 01001|POINT(-72.622305017362 42.0837072292948)|19
-T16|101 Federal St, Millers Falls, MA 01349|POINT(-72.4993935958437 42.5680170002944)|23
-T16|101 Federal St, Greenfield Town, MA 01301|POINT(-72.5990944761905 42.5900632380952)|23
-T16|Federal Ct, Boston, MA 02110|POINT(-71.0567205 42.354134)|24
-T16|Federal Ln, Dedham, MA 02026|POINT(-71.183565 42.238295)|37
-#1087a|75 State St, Boston, MA 02109|POINT(-71.0556974285714 42.3590795714286)|1
-#1087a|75 State St, Milton, MA 02186|POINT(-71.0409054545455 42.2563454545455)|5
-#1087b|75 State St, Boston, MA 02109|POINT(-71.0556974285714 42.3590795714286)|1
-#1087b|75 State St, Milton, MA 02186|POINT(-71.0409054545455 42.2563454545455)|4
-#1087c|75 State St, Boston, MA 02109|POINT(-71.0556974285714 42.3590795714286)|0
-#1073a|212 3rd Ave N, Minneapolis, MN 55401|212 3rd Ave N, MINNEAPOLIS, MN 553404|POINT(-93.2718114 44.9850188)|1
-#1073a|212 3rd Ave S, Minneapolis, MN 55401|212 3rd Ave N, MINNEAPOLIS, MN 553404|POINT(-93.2633388442551 44.9808723431463)|3
-#1073b|212 3rd Ave N, Minneapolis, MN 55401|POINT(-93.2718114 44.9850188)|0
\ No newline at end of file
+T1|529 Main St, Boston, MA 02129|POINT(-71.07187 42.38351)|0
+T2|75 State St, Boston, MA 02109|POINT(-71.0557 42.35908)|0
+T3|100 Federal St, Boston, MA 02110|POINT(-71.05631 42.35477)|1
+T4|529 Main St, Boston, MA 02129|POINT(-71.07187 42.38351)|0
+T5|75 State St, Boston, MA 02109|POINT(-71.0557 42.35908)|0
+T6|100 Federal St, Boston, MA 02110|POINT(-71.05631 42.35477)|0
+T7|529 Main St, Boston, MA 02129|POINT(-71.07187 42.38351)|0
+T8|75 State St, Boston, MA 02109|POINT(-71.0557 42.35908)|0
+T9|100 Federal St, Boston, MA 02110|POINT(-71.05631 42.35477)|0
+T10|530 Main St, Boston, MA 02129|POINT(-71.07173 42.38345)|0
+T11|76 State St, Boston, MA 02109|POINT(-71.05615 42.359)|0
+T12|101 Federal St, Boston, MA 02110|POINT(-71.0563 42.35469)|1
+T12|101 Federal St, Blackstone, MA 01504|POINT(-71.5247 42.02886)|6
+T12|101 Federal St, Lynn, MA 01905|POINT(-70.96783 42.4659)|6
+T12|101 Federal St, Lawrence, MA 01810|POINT(-71.20125 42.69015)|8
+T12|101 Federal St, Andover, MA 01810|POINT(-71.20125 42.69015)|8
+T12|101 Federal St, Northampton, MA 01062|POINT(-72.65537 42.32273)|8
+T12|101 Federal St, Belchertown, MA 01007|POINT(-72.41206 42.29446)|8
+T12|101 Federal St, Agawam Town, MA 01001|POINT(-72.62231 42.08371)|9
+T12|101 Federal St, Millers Falls, MA 01349|POINT(-72.49939 42.56802)|13
+T12|101 Federal St, Greenfield Town, MA 01301|POINT(-72.59909 42.59006)|13
+T12|Federal Ct, Boston, MA 02110|POINT(-71.05672 42.35413)|14
+T12|Federal Ln, Dedham, MA 02026|POINT(-71.18356 42.23829)|27
+#TB1|24 School St, Boston, MA 02108|24 School Street, Boston, MA 02108|POINT(-71.05888 42.35762)|0
+#TB1|20 School St, Boston, MA 02108|20 School Street, Boston, MA 02109|POINT(-71.05876 42.35758)|1
+T13|101 Federal St, Boston, MA 02110|POINT(-71.0563 42.35469)|1
+T13|101 Federal St, Blackstone, MA 01504|POINT(-71.5247 42.02886)|6
+T13|101 Federal St, Lynn, MA 01905|POINT(-70.96783 42.4659)|6
+T13|101 Federal St, Lawrence, MA 01810|POINT(-71.20125 42.69015)|8
+T13|101 Federal St, Andover, MA 01810|POINT(-71.20125 42.69015)|8
+T13|101 Federal St, Northampton, MA 01062|POINT(-72.65537 42.32273)|8
+T13|101 Federal St, Belchertown, MA 01007|POINT(-72.41206 42.29446)|8
+T13|101 Federal St, Agawam Town, MA 01001|POINT(-72.62231 42.08371)|9
+T13|101 Federal St, Millers Falls, MA 01349|POINT(-72.49939 42.56802)|13
+T13|101 Federal St, Greenfield Town, MA 01301|POINT(-72.59909 42.59006)|13
+T13|Federal Ct, Boston, MA 02110|POINT(-71.05672 42.35413)|14
+T13|Federal Ln, Dedham, MA 02026|POINT(-71.18356 42.23829)|27
+T14|101 Federal St, Boston, MA 02110|POINT(-71.0563 42.35469)|1
+T15|101 Federal St, Boston, MA 02110|POINT(-71.0563 42.35469)|11
+T16|101 Federal St, Boston, MA 02110|POINT(-71.0563 42.35469)|11
+T16|101 Federal St, Blackstone, MA 01504|POINT(-71.5247 42.02886)|16
+T16|101 Federal St, Lynn, MA 01905|POINT(-70.96783 42.4659)|16
+T16|101 Federal St, Lawrence, MA 01810|POINT(-71.20125 42.69015)|18
+T16|101 Federal St, Andover, MA 01810|POINT(-71.20125 42.69015)|18
+T16|101 Federal St, Northampton, MA 01062|POINT(-72.65537 42.32273)|18
+T16|101 Federal St, Belchertown, MA 01007|POINT(-72.41206 42.29446)|18
+T16|101 Federal St, Agawam Town, MA 01001|POINT(-72.62231 42.08371)|19
+T16|101 Federal St, Millers Falls, MA 01349|POINT(-72.49939 42.56802)|23
+T16|101 Federal St, Greenfield Town, MA 01301|POINT(-72.59909 42.59006)|23
+T16|Federal Ct, Boston, MA 02110|POINT(-71.05672 42.35413)|24
+T16|Federal Ln, Dedham, MA 02026|POINT(-71.18356 42.23829)|37
+#1087a|75 State St, Boston, MA 02109|POINT(-71.0557 42.35908)|1
+#1087a|75 State St, Milton, MA 02186|POINT(-71.04091 42.25635)|5
+#1087b|75 State St, Boston, MA 02109|POINT(-71.0557 42.35908)|1
+#1087b|75 State St, Milton, MA 02186|POINT(-71.04091 42.25635)|4
+#1087c|75 State St, Boston, MA 02109|POINT(-71.0557 42.35908)|0
+#1073a|212 3rd Ave N, Minneapolis, MN 55401|212 3rd Ave N, MINNEAPOLIS, MN 553404|POINT(-93.27181 44.98502)|2
+#1073a|212 3rd Ave S, Minneapolis, MN 55404|212 3rd Ave N, MINNEAPOLIS, MN 553404|POINT(-93.26355 44.98062)|3
+#1073b|212 3rd Ave N, Minneapolis, MN 55401|POINT(-93.27181 44.98502)|0
+#1076a|16725 24, Plymouth, MN 55447|16725 Co Rd 24, Plymouth, MN 55447|POINT(-93.49326 45.02186)|25
+#1076a|16725 24th Ave N, Plymouth, MN 55447|16725 Co Rd 24, Plymouth, MN 55447|POINT(-93.4927 45.00457)|27
+#1076b|16725 24, Plymouth, MN 55447|16725 County Road 24, Plymouth, MN 55447|POINT(-93.49326 45.02186)|25
+#1076b|16725 24th Ave N, Plymouth, MN 55447|16725 County Road 24, Plymouth, MN 55447|POINT(-93.4927 45.00457)|27
+#1076c|13800 9, Andover, MN 55304|13800 County Hwy 9, Andover, MN 55304|POINT(-93.35733 45.22052)|30
+#1076d|13800 9, Andover, MN 55304|13800 9, Andover, MN 55304|POINT(-93.35733 45.22052)|10
+#1076e|3900 6, North Eastham, MA 02642|3900 Route 6, Eastham, Massachusetts 02642|POINT(-69.98743 41.85543)|21
\ No newline at end of file
--$Id$
\timing
-- Limit 1
-SELECT 'T1', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('529 Main Street, Boston, MA 02129',1);
-SELECT 'T2', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('75 State Street, Boston, MA 02109',1);
-SELECT 'T3', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('100 Federal Street, Boston, MA 02109',1);
+SELECT 'T1', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('529 Main Street, Boston, MA 02129',1);
+SELECT 'T2', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('75 State Street, Boston, MA 02109',1);
+SELECT 'T3', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('100 Federal Street, Boston, MA 02109',1);
-- default
-SELECT 'T4', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('529 Main Street, Boston, MA 02129');
-SELECT 'T5', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('75 State Street, Boston, MA 02109');
-SELECT 'T6', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('100 Federal Street, Boston,MA 02109');
+SELECT 'T4', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('529 Main Street, Boston, MA 02129');
+SELECT 'T5', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('75 State Street, Boston, MA 02109');
+SELECT 'T6', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('100 Federal Street, Boston,MA 02109');
-- 20
-SELECT 'T7', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('529 Main Street, Boston, MA 02129',20);
-SELECT 'T8', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('75 State Street, Boston, MA 02109',20);
-SELECT 'T9', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('100 Federal Street, Boston, MA 02109',20);
+SELECT 'T7', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('529 Main Street, Boston, MA 02129',20);
+SELECT 'T8', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('75 State Street, Boston, MA 02109',20);
+SELECT 'T9', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('100 Federal Street, Boston, MA 02109',20);
-- Limit 1 - Test caching effects
-SELECT 'T10', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('530 Main Street, Boston MA, 02129',1);
-SELECT 'T11', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('76 State Street, Boston MA, 02109',1);
-SELECT 'T12', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Federal Street, Boston, MA',20);
+SELECT 'T10', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('530 Main Street, Boston MA, 02129',1);
+SELECT 'T11', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('76 State Street, Boston MA, 02109',1);
+SELECT 'T12', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('101 Federal Street, Boston, MA',20);
-- Test batch geocoding along a street
-SELECT '#TB1' As ticket, pprint_addy((g).addy) As address, target, ST_AsText((g).geomout) As pt, (g).rating FROM (SELECT geocode(target::text,1) As g, target FROM (VALUES ('24 School Street, Boston, MA 02108'), ('20 School Street, Boston, MA 02109')) As f(target) ) As foo;
+SELECT '#TB1' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target::text,1) As g, target FROM (VALUES ('24 School Street, Boston, MA 02108'), ('20 School Street, Boston, MA 02109')) As f(target) ) As foo;
-- Partial address
-SELECT 'T13', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Federal Street, Boston MA',20);
-SELECT 'T14', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Federal Street, Boston MA',1);
+SELECT 'T13', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('101 Federal Street, Boston MA',20);
+SELECT 'T14', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('101 Federal Street, Boston MA',1);
--Test misspellings and missing zip --
-SELECT 'T15', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Fedaral Street, Boston, MA',1);
-SELECT 'T16', pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Fedaral Street, Boston, MA',50);
+SELECT 'T15', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('101 Fedaral Street, Boston, MA',1);
+SELECT 'T16', pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('101 Fedaral Street, Boston, MA',50);
-- Ratings wrong for missing or wrong local zips
-SELECT '#1087a' As ticket, pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('75 State Street, Boston, MA 02110',3);
-SELECT '#1087b' As ticket, pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('75 State Street, Boston, MA',3);
+SELECT '#1087a' As ticket, pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('75 State Street, Boston, MA 02110',3);
+SELECT '#1087b' As ticket, pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('75 State Street, Boston, MA',3);
--right zip
-SELECT '#1087c' As ticket, pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('75 State Street, Boston, MA 02109',1);
+SELECT '#1087c' As ticket, pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('75 State Street, Boston, MA 02109',1);
--Geocoding mangled zipcodes
-SELECT '#1073a' As ticket, pprint_addy((g).addy) As address, target, ST_AsText((g).geomout) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '212 3rd Ave N, MINNEAPOLIS, MN 553404'::text As target) As f) As foo;
-SELECT '#1073b' As ticket, pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('212 3rd Ave N, MINNEAPOLIS, MN 55401-',2);
+SELECT '#1073a' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '212 3rd Ave N, MINNEAPOLIS, MN 553404'::text As target) AS f) As foo;
+SELECT '#1073b' As ticket, pprint_addy(addy) As address, ST_AsText(ST_SnapToGrid(geomout,0.00001)) As pt, rating FROM geocode('212 3rd Ave N, MINNEAPOLIS, MN 55401-',2);
+
+-- country roads and highways with spaces in street type
+SELECT '#1076a' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target) As g, target FROM (SELECT '16725 Co Rd 24, Plymouth, MN 55447'::text As target) As f) As foo;
+SELECT '#1076b' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '16725 County Road 24, Plymouth, MN 55447'::text As target) As f) As foo;
+SELECT '#1076c' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,1) As g, target FROM (SELECT '13800 County Hwy 9, Andover, MN 55304'::text As target) AS f) As foo;
+SELECT '#1076d' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,1) As g, target FROM (SELECT '13800 9, Andover, MN 55304'::text As target) AS f) As foo;
+SELECT '#1076e' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,1) As g, target FROM (SELECT '3900 Route 6, Eastham, Massachusetts 02642'::text As target) AS f) As foo;
+
\timing
#887|2450|N|COLORADO|St|||PHILADELPHIA|PA|19132|t
-#1051|212||3rd|Ave|N|Suite 560|Minneapolis|MN|55401|t
-#1051|3937||43RD|Ave|S||MINNEAPOLIS|MN|55406|t
-#1051|212|N|3rd|Ave|||Minneapolis|MN|55401|t
-#1051|212||3rd|Ave|N||Minneapolis|MN|55401|t
+#1051a|212||3rd|Ave|N|Suite 560|Minneapolis|MN|55401|t
+#1051b|3937||43RD|Ave|S||MINNEAPOLIS|MN|55406|t
+#1051c|212|N|3rd|Ave|||Minneapolis|MN|55401|t
+#1051d|212||3rd|Ave|N||Minneapolis|MN|55401|t
529||Main|St|||Boston|MA|02129|t
529||Main|St|||Boston|MA|02129|t
529||Main|St|||Boston|MA||t
-#1073|212||3rd|Ave|N||MINNEAPOLIS|MN|553404|t
-#1073|212||3rd|Ave|N||MINNEAPOLIS|MN|55401|t
-#1073|529||Main|St|||Boston|MA|021|t
-#1086|949|N|3rd|St|||New Hyde Park|NY|11040|t
-#1086|949|N|3rd|St|||New Hyde Park|NY|11040|t
+529||Main|St||Apt 201|Boston|MA|02129|t
+529||Main|St||Apt 201|Boston|MA|02129|t
+529||Main|St||Apt 201|Boston|MA||t
+#1073a|212||3rd|Ave|N||MINNEAPOLIS|MN|553404|t
+#1073b|212||3rd|Ave|N||MINNEAPOLIS|MN|55401|t
+#1073c|529||Main|St|||Boston|MA|021|t
+#1086a|949|N|3rd|St|||New Hyde Park|NY|11040|t
+#1086b|949|N|3rd|St|||New Hyde Park|NY|11040|t
+#1076a|16725||24|Co Rd|||Plymouth|MN|55447|t
+#1076b|16725||24|Co Rd|||Plymouth|MN|55447|t
+#1076c|13800||9|Co Hwy|||Andover|MN|55304|t
+#1076d|13800|| 9||||Andover|MN|55304|t
+#1076e|14||Forest|Rd|||Acton|MA||t
--$Id$
\timing
SELECT '#887' As ticket, * FROM normalize_address('2450 N COLORADO ST, PHILADELPHIA, PA, 19132');
-SELECT '#1051' As ticket, * FROM normalize_address('212 3rd Ave N Suite 560, Minneapolis, MN 55401');
-SELECT '#1051' As ticket, * FROM normalize_address('3937 43RD AVE S, MINNEAPOLIS, MN 55406');
-SELECT '#1051' As ticket, * FROM normalize_address('212 N 3rd Ave, Minneapolis, MN 55401');
+SELECT '#1051a' As ticket, * FROM normalize_address('212 3rd Ave N Suite 560, Minneapolis, MN 55401');
+SELECT '#1051b' As ticket, * FROM normalize_address('3937 43RD AVE S, MINNEAPOLIS, MN 55406');
+SELECT '#1051c' As ticket, * FROM normalize_address('212 N 3rd Ave, Minneapolis, MN 55401');
-- City missing , -- NOTE this one won't normalize right if you don't have MN data loaded
-SELECT '#1051' As ticket, * FROM normalize_address('212 3rd Ave N Minneapolis, MN 55401');
+SELECT '#1051d' As ticket, * FROM normalize_address('212 3rd Ave N Minneapolis, MN 55401');
-- comma in wrong spot
SELECT * FROM normalize_address('529 Main Street, Boston MA, 02129');
-- comma in right spot
SELECT * FROM normalize_address('529 Main Street, Boston,MA 02129');
-- partial address
SELECT * FROM normalize_address('529 Main Street, Boston, MA');
+-- Full address with suite using ,
+SELECT * FROM normalize_address('529 Main Street, Apt 201, Boston, MA 02129');
+-- Full address with apart using space
+SELECT * FROM normalize_address('529 Main Street Apt 201, Boston, MA 02129');
+-- Partial address with apartment
+SELECT * FROM normalize_address('529 Main Street, Apt 201, Boston, MA');
-- Partial and Mangled zipcodes
-SELECT '#1073' As ticket, * FROM normalize_address('212 3rd Ave N, MINNEAPOLIS, MN 553404');
-SELECT '#1073' As ticket, * FROM normalize_address('212 3rd Ave N, MINNEAPOLIS, MN 55401-');
-SELECT '#1073' As ticket, * FROM normalize_address('529 Main Street, Boston, MA 021');
+SELECT '#1073a' As ticket, * FROM normalize_address('212 3rd Ave N, MINNEAPOLIS, MN 553404');
+SELECT '#1073b' As ticket, * FROM normalize_address('212 3rd Ave N, MINNEAPOLIS, MN 55401-');
+SELECT '#1073c' As ticket, * FROM normalize_address('529 Main Street, Boston, MA 021');
-- comma in wrong position
-SELECT '#1086' As ticket, * FROM normalize_address('949 N 3rd St, New Hyde Park, NY, 11040');
+SELECT '#1086a' As ticket, * FROM normalize_address('949 N 3rd St, New Hyde Park, NY, 11040');
-- comma in right position --
-SELECT '#1086' As ticket, * FROM normalize_address('949 N 3rd St, New Hyde Park, NY 11040');
+SELECT '#1086b' As ticket, * FROM normalize_address('949 N 3rd St, New Hyde Park, NY 11040');
+
+-- country roads and highways with spaces in street type
+SELECT '#1076a' As ticket, * FROM normalize_address('16725 Co Rd 24, Plymouth, MN 55447');
+SELECT '#1076b' As ticket, * FROM normalize_address('16725 County Road 24, Plymouth, MN 55447');
+SELECT '#1076c' As ticket, * FROM normalize_address('13800 County Hwy 9, Andover, MN 55304');
+SELECT '#1076d' As ticket, * FROM normalize_address('13800 9, Andover, MN 55304');
+-- this one is a regular street that happens to have a street type as the name
+SELECT '#1076e' As ticket, * FROM normalize_address('14 Forest Road, Acton, MA');
\timing
\a
-\o normalize_address.out
+\o normalize_address_regress.out
\i normalize_address_regress.sql
\o geocode_regress.out
\i geocode_regress.sql
\ No newline at end of file
-- Create street type lookup table
DROP TABLE IF EXISTS street_type_lookup;
-CREATE TABLE street_type_lookup (name VARCHAR(20) PRIMARY KEY, abbrev VARCHAR(4));
+CREATE TABLE street_type_lookup (name VARCHAR(50) PRIMARY KEY, abbrev VARCHAR(50));
INSERT INTO street_type_lookup (name, abbrev) VALUES ('ALLEE', 'Aly');
INSERT INTO street_type_lookup (name, abbrev) VALUES ('ALLEY', 'Aly');
INSERT INTO street_type_lookup (name, abbrev) VALUES ('ALLY', 'Aly');
INSERT INTO street_type_lookup (name, abbrev) VALUES ('FORD', 'Frd');
INSERT INTO street_type_lookup (name, abbrev) VALUES ('FRD', 'Frd');
INSERT INTO street_type_lookup (name, abbrev) VALUES ('FORDS', 'Frds');
-INSERT INTO street_type_lookup (name, abbrev) VALUES ('FOREST', 'Frst');
-INSERT INTO street_type_lookup (name, abbrev) VALUES ('FORESTS', 'Frst');
-INSERT INTO street_type_lookup (name, abbrev) VALUES ('FRST', 'Frst');
+--INSERT INTO street_type_lookup (name, abbrev) VALUES ('FOREST', 'Frst');
+--INSERT INTO street_type_lookup (name, abbrev) VALUES ('FORESTS', 'Frst');
+--INSERT INTO street_type_lookup (name, abbrev) VALUES ('FRST', 'Frst');
INSERT INTO street_type_lookup (name, abbrev) VALUES ('FORG', 'Frg');
INSERT INTO street_type_lookup (name, abbrev) VALUES ('FORGE', 'Frg');
INSERT INTO street_type_lookup (name, abbrev) VALUES ('FRG', 'Frg');
INSERT INTO street_type_lookup (name, abbrev) VALUES ('UPAS', 'Upas');
INSERT INTO street_type_lookup (name, abbrev) VALUES ('UNS', 'Uns');
INSERT INTO street_type_lookup (name, abbrev) VALUES ('WL', 'Wl');
+
+-- prefix and suffix street names for highways and roads with spaces in them
+INSERT INTO street_type_lookup (name, abbrev)
+SELECT name, abbrev
+ FROM (VALUES
+ ('COUNTY HWY', 'Co Hwy'),
+ ('COUNTY HIGHWAY', 'Co Hwy'),
+ ('COUNTY HIGH WAY', 'Co Hwy'),
+ ('COUNTY ROAD', 'Co Rd'),
+ ('CO RD', 'Co Rd'),
+ ('CORD', 'Co Rd'),
+ ('CO RTE', 'Co Rte'),
+ ('COUNTY ROUTE', 'Co Rte'),
+ ('CO ST AID HWY', 'Co St Aid Hwy'),
+ ('FARM RD', 'Farm Rd'),
+ ('FIRE RD', 'Fire Rd'),
+ ('FOREST RD', 'Forest Rd'),
+ ('FOREST ROAD', 'Forest Rd'),
+ ('FOREST RTE', 'Forest Rte'),
+ ('FOREST ROUTE', 'Forest Rte'),
+ ('STATE HWY', 'State Hwy'),
+ ('STATE HIGHWAY', 'State Hwy'),
+ ('STATE HIGH WAY', 'State Hwy'),
+ ('STATE RD', 'State Rd'),
+ ('STATE ROAD', 'State Rd'),
+ ('STATE ROUTE', 'State Rte'),
+ ('STATE RTE', 'State Rte'),
+ ('US HWY', 'US Hwy'),
+ ('US HIGHWAY', 'US Hwy'),
+ ('US HIGH WAY', 'US Hwy'),
+ ('US RTE', 'US Rte'),
+ ('US ROUTE', 'US Rte'),
+ ('US RT', 'US Rte'),
+ ('USFS HWY', 'USFS Hwy'),
+ ('USFS HIGHWAY', 'USFS Hwy'),
+ ('USFS HIGH WAY', 'USFS Hwy'),
+ ('USFS RD', 'USFS Rd'),
+ ('USFS ROAD', 'USFS Rd')
+ ) t(name, abbrev)
+ WHERE t.name NOT IN(SELECT name FROM street_type_lookup);
CREATE INDEX street_type_lookup_abbrev_idx ON street_type_lookup (abbrev);
-- Create place and countysub lookup tables
stateAbbrev VARCHAR,
zip VARCHAR,
parsed BOOLEAN);*/
-
+-- prefix and suffix street names for highways and roads with spaces in them
+INSERT INTO street_type_lookup (name, abbrev)
+ SELECT name, abbrev
+ FROM (VALUES
+ ('COUNTY HWY', 'Co Hwy'),
+ ('COUNTY HIGHWAY', 'Co Hwy'),
+ ('COUNTY HIGH WAY', 'Co Hwy'),
+ ('COUNTY ROAD', 'Co Rd'),
+ ('CO RD', 'Co Rd'),
+ ('CORD', 'Co Rd'),
+ ('CO RTE', 'Co Rte'),
+ ('COUNTY ROUTE', 'Co Rte'),
+ ('CO ST AID HWY', 'Co St Aid Hwy'),
+ ('FARM RD', 'Farm Rd'),
+ ('FIRE RD', 'Fire Rd'),
+ ('FOREST RTE', 'Forest Rte'),
+ ('FOREST ROUTE', 'Forest Rte'),
+ ('STATE HWY', 'State Hwy'),
+ ('STATE HIGHWAY', 'State Hwy'),
+ ('STATE HIGH WAY', 'State Hwy'),
+ ('STATE RD', 'State Rd'),
+ ('STATE ROAD', 'State Rd'),
+ ('STATE ROUTE', 'State Rte'),
+ ('STATE RTE', 'State Rte'),
+ ('US HWY', 'US Hwy'),
+ ('US HIGHWAY', 'US Hwy'),
+ ('US HIGH WAY', 'US Hwy'),
+ ('US RTE', 'US Rte'),
+ ('US ROUTE', 'US Rte'),
+ ('US RT', 'US Rte'),
+ ('USFS HWY', 'USFS Hwy'),
+ ('USFS HIGHWAY', 'USFS Hwy'),
+ ('USFS HIGH WAY', 'USFS Hwy'),
+ ('USFS RD', 'USFS Rd'),
+ ('USFS ROAD', 'USFS Rd')
+ ) t(name, abbrev)
+ WHERE t.name NOT IN(SELECT name FROM street_type_lookup);
+DELETE FROM street_type_lookup WHERE name = 'FOREST';
-- System/General helper functions
\i utility/utmzone.sql
\i utility/cull_null.sql