diff --git a/data/words.sql b/data/words.sql index f927d000..85578f20 100644 --- a/data/words.sql +++ b/data/words.sql @@ -29787,7 +29787,6 @@ st 5557484 -- prefill word table select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null; -select count(make_keywords(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode') as w where v is not null; select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w; -- copy the word frequencies diff --git a/docs/Migration.md b/docs/Migration.md new file mode 100644 index 00000000..62cbc4d9 --- /dev/null +++ b/docs/Migration.md @@ -0,0 +1,56 @@ +Database Migrations +=================== + +This page describes database migrations necessary to update existing databases +to newer versions of Nominatim. + +SQL statements should be executed from the postgres commandline. Execute +`psql nominiatim` to enter command line mode. + +3.0.0 +----- + +### Postcode Table + +A new separate table for artificially computed postcode centroids was introduced. +Migration to the new format is possible but **not recommended**. + + * create postcode table and indexes, running the following SQL statements: + + CREATE TABLE location_postcode + (place_id BIGINT, parent_place_id BIGINT, rank_search SMALLINT, + rank_address SMALLINT, indexed_status SMALLINT, indexed_date TIMESTAMP, + country_code varchar(2), postcode TEXT, + geometry GEOMETRY(Geometry, 4326)); + CREATE INDEX idx_postcode_geometry ON location_postcode USING GIST (geometry); + CREATE UNIQUE INDEX idx_postcode_id ON location_postcode USING BTREE (place_id); + CREATE INDEX idx_postcode_postcode ON location_postcode USING BTREE (postcode); + GRANT SELECT ON location_postcode TO "www-data"; + + * add postcode column to location_area tables with SQL statement: + + ALTER TABLE location_area ADD COLUMN postcode TEXT; + + * reimport functions + + ./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions + + * create appropriate triggers with SQL: + + CREATE TRIGGER location_postcode_before_update BEFORE UPDATE ON location_postcode + FOR EACH ROW EXECUTE PROCEDURE postcode_update(); + + * populate postcode table (will take a while): + + ./utils/setup.php --calculate-postcodes --index --index-noanalyse + +This will create a working database. You may also delete the old artificial +postcodes now. Note that this may be expensive and is not absolutely necessary. +The following SQL statement will remove them: + + DELETE FROM place_addressline a USING placex p + WHERE a.address_place_id = p.place_id and p.osm_type = 'P'; + ALTER TABLE placex DISABLE TRIGGER USER; + DELETE FROM placex WHERE osm_type = 'P'; + ALTER TABLE placex ENABLE TRIGGER USER; + diff --git a/lib/Geocode.php b/lib/Geocode.php index 80449cb6..b8933704 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -51,10 +51,22 @@ class Geocode protected $sQuery = false; protected $aStructuredQuery = false; + protected $oNormalizer = null; + public function __construct(&$oDB) { $this->oDB =& $oDB; + $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules); + } + + private function normTerm($sTerm) + { + if ($this->oNormalizer === null) { + return null; + } + + return $this->oNormalizer->transliterate($sTerm); } public function setReverseInPlan($bReverse) @@ -410,8 +422,15 @@ class Geocode $sPlaceIDs = join(',', array_keys($aPlaceIDs)); $sImportanceSQL = ''; - if ($this->sViewboxSmallSQL) $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; - if ($this->sViewboxLargeSQL) $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sImportanceSQLGeom = ''; + if ($this->sViewboxSmallSQL) { + $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sImportanceSQLGeom .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, geometry) THEN 1 ELSE 0.75 END * "; + } + if ($this->sViewboxLargeSQL) { + $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sImportanceSQLGeom .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, geometry) THEN 1 ELSE 0.75 END * "; + } $sSQL = "SELECT "; $sSQL .= " osm_type,"; @@ -476,6 +495,35 @@ class Geocode if ($this->bIncludeNameDetails) $sSQL .= "name, "; $sSQL .= " extratags->'place' "; + // postcode table + $sSQL .= "UNION "; + $sSQL .= "SELECT"; + $sSQL .= " 'P' as osm_type,"; + $sSQL .= " (SELECT osm_id from placex p WHERE p.place_id = lp.parent_place_id) as osm_id,"; + $sSQL .= " 'place' as class, 'postcode' as type,"; + $sSQL .= " null as admin_level, rank_search, rank_address,"; + $sSQL .= " place_id, parent_place_id, country_code,"; + $sSQL .= " get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) AS langaddress,"; + $sSQL .= " postcode as placename,"; + $sSQL .= " postcode as ref,"; + if ($this->bIncludeExtraTags) $sSQL .= "null AS extra,"; + if ($this->bIncludeNameDetails) $sSQL .= "null AS names,"; + $sSQL .= " ST_x(st_centroid(geometry)) AS lon, ST_y(st_centroid(geometry)) AS lat,"; + $sSQL .= $sImportanceSQLGeom."(0.75-(rank_search::float/40)) AS importance, "; + $sSQL .= " ("; + $sSQL .= " SELECT max(p.importance*(p.rank_address+2))"; + $sSQL .= " FROM "; + $sSQL .= " place_addressline s, "; + $sSQL .= " placex p"; + $sSQL .= " WHERE s.place_id = lp.parent_place_id"; + $sSQL .= " AND p.place_id = s.address_place_id "; + $sSQL .= " AND s.isaddress"; + $sSQL .= " AND p.importance is not null"; + $sSQL .= " ) AS addressimportance, "; + $sSQL .= " null AS extra_place "; + $sSQL .= "FROM location_postcode lp"; + $sSQL .= " WHERE place_id in ($sPlaceIDs) "; + if (30 >= $this->iMinAddressRank && 30 <= $this->iMaxAddressRank) { // only Tiger housenumbers and interpolation lines need to be interpolated, because they are saved as lines // with start- and endnumber, the common osm housenumbers are usually saved as points @@ -666,12 +714,12 @@ class Geocode Score how good the search is so they can be ordered */ - foreach ($aPhrases as $iPhrase => $sPhrase) { + foreach ($aPhrases as $iPhrase => $aPhrase) { $aNewPhraseSearches = array(); if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase]; else $sPhraseType = ''; - foreach ($aPhrases[$iPhrase]['wordsets'] as $iWordSet => $aWordset) { + foreach ($aPhrase['wordsets'] as $iWordSet => $aWordset) { // Too many permutations - too expensive if ($iWordSet > 120) break; @@ -701,42 +749,29 @@ class Geocode } if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - } elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null) { - if ($aSearch['oNear'] === false) { - $aSearch['oNear'] = new NearPoint( - $aSearchTerm['lat'], - $aSearchTerm['lon'], - $aSearchTerm['radius'] - ); - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } elseif ($sPhraseType == 'postalcode') { + } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) { // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both - if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { - // If we already have a name try putting the postcode first - if (sizeof($aSearch['aName'])) { + if ($aSearch['sPostcode'] === '' && $aSearch['sHouseNumber'] === '' && + isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) { + // If we have structured search or this is the first term, + // make the postcode the primary search element. + if ($aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) { $aNewSearch = $aSearch; + $aNewSearch['sOperator'] = 'postcode'; $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); - $aNewSearch['aName'] = array(); - $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word']; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch; } - if (sizeof($aSearch['aName'])) { - if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strpos($sToken, ' ') !== false)) { - $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - } else { - $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - $aSearch['iSearchRank'] += 1000; // skip; - } - } else { - $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - //$aSearch['iNamePhrase'] = $iPhrase; + // If we have a structured search or this is not the first term, + // add the postcode as an addendum. + if ($aSearch['sOperator'] !== 'postcode' && ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']))) { + $aSearch['sPostcode'] = $aSearchTerm['word']; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } } elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house') { - if ($aSearch['sHouseNumber'] === '') { + if ($aSearch['sHouseNumber'] === '' && $aSearch['sOperator'] !== 'postcode') { $aSearch['sHouseNumber'] = $sToken; // sanity check: if the housenumber is not mainly made // up of numbers, add a penalty @@ -919,13 +954,7 @@ class Geocode { if (!$this->sQuery && !$this->aStructuredQuery) return array(); - $oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules); - if ($oNormalizer !== null) { - $sNormQuery = $oNormalizer->transliterate($this->sQuery); - } else { - $sNormQuery = null; - } - + $sNormQuery = $this->normTerm($this->sQuery); $sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]"; $sCountryCodesSQL = false; if ($this->aCountryCodes) { @@ -985,6 +1014,7 @@ class Geocode 'sClass' => '', 'sType' => '', 'sHouseNumber' => '', + 'sPostcode' => '', 'oNear' => $oNearPoint ) ); @@ -1109,21 +1139,9 @@ class Geocode } if (CONST_Debug) var_Dump($aPhrases, $aValidTokens); - // Try and calculate GB postcodes we might be missing + // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code foreach ($aTokens as $sToken) { - // Source of gb postcodes is now definitive - always use - if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData)) { - if (substr($aData[1], -2, 1) != ' ') { - $aData[0] = substr($aData[0], 0, strlen($aData[1])-1).' '.substr($aData[0], strlen($aData[1])-1); - $aData[1] = substr($aData[1], 0, -1).' '.substr($aData[1], -1, 1); - } - $aGBPostcodeLocation = gbPostcodeCalculate($aData[0], $aData[1], $aData[2], $this->oDB); - if ($aGBPostcodeLocation) { - $aValidTokens[$sToken] = $aGBPostcodeLocation; - } - } elseif (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { - // US ZIP+4 codes - if there is no token, - // merge in the 5-digit ZIP code + if (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { if (isset($aValidTokens[$aData[1]])) { foreach ($aValidTokens[$aData[1]] as $aToken) { if (!$aToken['class']) { @@ -1190,7 +1208,6 @@ class Geocode ksort($aGroupedSearches); } - if (CONST_Debug) var_Dump($aGroupedSearches); if (CONST_Search_TryDroppedAddressTerms && sizeof($this->aStructuredQuery) > 0) { $aCopyGroupedSearches = $aGroupedSearches; foreach ($aCopyGroupedSearches as $iGroup => $aSearches) { @@ -1247,13 +1264,16 @@ class Geocode if (CONST_Debug) echo "
Search Loop, group $iGroupLoop, loop $iQueryLoop"; if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens); + if ($sCountryCodesSQL && $aSearch['sCountryCode'] && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) { + continue; + } + // No location term? if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['oNear']) { if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber']) { // Just looking for a country by code - look it up if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) { $sSQL = "SELECT place_id FROM placex WHERE country_code='".$aSearch['sCountryCode']."' AND rank_search = 4"; - if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; if ($bBoundingBoxSearch) $sSQL .= " AND _st_intersects($this->sViewboxSmallSQL, geometry)"; $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1"; @@ -1312,6 +1332,24 @@ class Geocode // If a coordinate is given, the search must either // be for a name or a special search. Ignore everythin else. $aPlaceIDs = array(); + } elseif ($aSearch['sOperator'] == 'postcode') { + $sSQL = "SELECT p.place_id FROM location_postcode p "; + if (sizeof($aSearch['aAddress'])) { + $sSQL .= ", search_name s "; + $sSQL .= "WHERE s.place_id = p.parent_place_id "; + $sSQL .= "AND array_cat(s.nameaddress_vector, s.name_vector) @> ARRAY[".join($aSearch['aAddress'], ",")."] AND "; + } else { + $sSQL .= " WHERE "; + } + $sSQL .= "p.postcode = '".pg_escape_string(reset($aSearch['aName']))."'"; + if ($aSearch['sCountryCode']) { + $sSQL .= " AND p.country_code = '".$aSearch['sCountryCode']."'"; + } elseif ($sCountryCodesSQL) { + $sSQL .= " AND p.country_code in ($sCountryCodesSQL)"; + } + $sSQL .= " LIMIT $this->iLimit"; + if (CONST_Debug) var_dump($sSQL); + $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); } else { $aPlaceIDs = array(); @@ -1377,6 +1415,12 @@ class Geocode $aTerms[] = $aSearch['oNear']->withinSQL('centroid'); $aOrder[] = $aSearch['oNear']->distanceSQL('centroid'); + } elseif ($aSearch['sPostcode']) { + if (!sizeof($aSearch['aAddress'])) { + $aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."' AND ST_DWithin(search_name.centroid, p.geometry, 0.1))"; + } else { + $aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."')"; + } } if (sizeof($this->aExcludePlaceIDs)) { $aTerms[] = "place_id not in (".join(',', $this->aExcludePlaceIDs).")"; @@ -1665,6 +1709,21 @@ class Geocode var_Dump($aPlaceIDs); } + if (sizeof($aPlaceIDs) && $aSearch['sPostcode']) { + $sSQL = 'SELECT place_id FROM placex'; + $sSQL .= ' WHERE place_id in ('.join(',', $aPlaceIDs).')'; + $sSQL .= " AND postcode = '".pg_escape_string($aSearch['sPostcode'])."'"; + if (CONST_Debug) var_dump($sSQL); + $aFilteredPlaceIDs = chksql($this->oDB->getCol($sSQL)); + if ($aFilteredPlaceIDs) { + $aPlaceIDs = $aFilteredPlaceIDs; + if (CONST_Debug) { + echo "
Place IDs after postcode filtering: "; + var_Dump($aPlaceIDs); + } + } + } + foreach ($aPlaceIDs as $iPlaceID) { // array for placeID => -1 | Tiger housenumber $aResultPlaceIDs[$iPlaceID] = $searchedHousenumber; diff --git a/lib/lib.php b/lib/lib.php index cb599ae8..8f353a4d 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -116,25 +116,6 @@ function getTokensFromSets($aSets) } -function gbPostcodeCalculate($sPostcode, $sPostcodeSector, $sPostcodeEnd, &$oDB) -{ - // Try an exact match on the gb_postcode table - $sSQL = 'select \'AA\', ST_X(ST_Centroid(geometry)) as lon,ST_Y(ST_Centroid(geometry)) as lat from gb_postcode where postcode = \''.$sPostcode.'\''; - $aNearPostcodes = chksql($oDB->getAll($sSQL)); - - if (sizeof($aNearPostcodes)) { - $aPostcodes = array(); - foreach ($aNearPostcodes as $aPostcode) { - $aPostcodes[] = array('lat' => $aPostcode['lat'], 'lon' => $aPostcode['lon'], 'radius' => 0.005); - } - - return $aPostcodes; - } - - return false; -} - - function getClassTypes() { return array( @@ -516,7 +497,7 @@ function _debugDumpGroupedSearches($aData, $aTokens) echo ""; echo ""; echo ""; - echo ""; + echo ""; echo ""; foreach ($aData as $iRank => $aRankedSet) { foreach ($aRankedSet as $aRow) { @@ -561,6 +542,7 @@ function _debugDumpGroupedSearches($aData, $aTokens) echo ""; echo ""; + echo ""; echo ""; echo ""; diff --git a/lib/template/details-html.php b/lib/template/details-html.php index 46b6a50c..76818395 100644 --- a/lib/template/details-html.php +++ b/lib/template/details-html.php @@ -124,6 +124,8 @@ kv('Wikipedia Calculated' , wikipediaLink($aPointDetails) ); } + kv('Computed Postcode', $aPointDetails['postcode']); + kv('Address Tags' , hash_to_subtable($aPointDetails['aAddressTags']) ); kv('Extra Tags' , hash_to_subtable($aPointDetails['aExtraTags']) ); ?> diff --git a/sql/functions.sql b/sql/functions.sql index 930fa561..a9bca542 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -83,6 +83,26 @@ END; $$ LANGUAGE plpgsql; +CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + lookup_word TEXT; + return_word_id INTEGER; +BEGIN + lookup_word := upper(trim(postcode)); + lookup_token := ' ' || make_standard_name(lookup_word); + SELECT min(word_id) FROM word WHERE word_token = lookup_token and class='place' and type='postcode' into return_word_id; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word, 'place', 'postcode', null, 0); + END IF; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT, lookup_country_code varchar(2)) RETURNS INTEGER AS $$ @@ -236,6 +256,99 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; + +CREATE OR REPLACE FUNCTION get_postcode_rank(country_code VARCHAR(2), postcode TEXT, + OUT rank_search SMALLINT, OUT rank_address SMALLINT) +AS $$ +DECLARE + part TEXT; +BEGIN + rank_search := 30; + rank_address := 30; + postcode := upper(postcode); + + IF country_code = 'gb' THEN + IF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN + rank_search := 25; + rank_address := 5; + ELSEIF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN + rank_search := 23; + rank_address := 5; + ELSEIF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN + rank_search := 21; + rank_address := 5; + END IF; + + ELSEIF country_code = 'sg' THEN + IF postcode ~ '^([0-9]{6})$' THEN + rank_search := 25; + rank_address := 11; + END IF; + + ELSEIF country_code = 'de' THEN + IF postcode ~ '^([0-9]{5})$' THEN + rank_search := 21; + rank_address := 11; + END IF; + + ELSE + -- Guess at the postcode format and coverage (!) + IF postcode ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local + rank_search := 21; + rank_address := 11; + ELSE + -- Does it look splitable into and area and local code? + part := substring(postcode from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$'); + + IF part IS NOT NULL THEN + rank_search := 25; + rank_address := 11; + ELSEIF postcode ~ '^[- :A-Z0-9]{6,}$' THEN + rank_search := 21; + rank_address := 11; + END IF; + END IF; + END IF; + +END; +$$ +LANGUAGE plpgsql IMMUTABLE; + +-- Find the nearest artificial postcode for the given geometry. +-- TODO For areas there should not be more than two inside the geometry. +CREATE OR REPLACE FUNCTION get_nearest_postcode(country VARCHAR(2), geom GEOMETRY) RETURNS TEXT + AS $$ +DECLARE + outcode TEXT; + cnt INTEGER; +BEGIN + -- If the geometry is an area then only one postcode must be within + -- that area, otherwise consider the area as not having a postcode. + IF ST_GeometryType(geom) in ('ST_Polygon','ST_MultiPolygon') THEN + SELECT min(postcode), count(*) FROM + (SELECT postcode FROM location_postcode + WHERE ST_Contains(geom, location_postcode.geometry) LIMIT 2) sub + INTO outcode, cnt; + + IF cnt = 1 THEN + RETURN outcode; + ELSE + RETURN null; + END IF; + END IF; + + SELECT postcode FROM location_postcode + WHERE ST_DWithin(geom, location_postcode.geometry, 0.05) + AND location_postcode.country_code = country + ORDER BY ST_Distance(geom, location_postcode.geometry) LIMIT 1 + INTO outcode; + + RETURN outcode; +END; +$$ +LANGUAGE plpgsql; + + CREATE OR REPLACE FUNCTION create_country(src HSTORE, lookup_country_code varchar(2)) RETURNS VOID AS $$ DECLARE @@ -515,36 +628,38 @@ CREATE OR REPLACE FUNCTION add_location( keywords INTEGER[], rank_search INTEGER, rank_address INTEGER, + in_postcode TEXT, geometry GEOMETRY ) RETURNS BOOLEAN AS $$ DECLARE locationid INTEGER; - isarea BOOLEAN; centroid GEOMETRY; diameter FLOAT; x BOOLEAN; splitGeom RECORD; secgeo GEOMETRY; + postcode TEXT; BEGIN IF rank_search > 25 THEN RAISE EXCEPTION 'Adding location with rank > 25 (% rank %)', place_id, rank_search; END IF; --- RAISE WARNING 'Adding location with rank > 25 (% rank %)', place_id, rank_search; - x := deleteLocationArea(partition, place_id, rank_search); - isarea := false; - IF (ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_IsValid(geometry)) THEN + -- add postcode only if it contains a single entry, i.e. ignore postcode lists + postcode := NULL; + IF in_postcode is not null AND in_postcode not similar to '%(,|;)%' THEN + postcode := upper(trim (in_postcode)); + END IF; - isArea := true; + IF ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') THEN centroid := ST_Centroid(geometry); FOR secgeo IN select split_geometry(geometry) AS geom LOOP - x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, false, centroid, secgeo); + x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, false, postcode, centroid, secgeo); END LOOP; ELSE @@ -569,7 +684,7 @@ BEGIN -- RAISE WARNING 'adding % diameter %', place_id, diameter; secgeo := ST_Buffer(geometry, diameter); - x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, true, ST_Centroid(geometry), secgeo); + x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, true, postcode, ST_Centroid(geometry), secgeo); END IF; @@ -722,54 +837,10 @@ BEGIN RETURN NULL; END IF; - NEW.postcode := NEW.address->'postcode'; - NEW.name := hstore('ref', NEW.postcode); + NEW.name := hstore('ref', NEW.address->'postcode'); - IF NEW.country_code = 'gb' THEN - - IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN - NEW.rank_search := 25; - NEW.rank_address := 5; - ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN - NEW.rank_search := 23; - NEW.rank_address := 5; - ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN - NEW.rank_search := 21; - NEW.rank_address := 5; - END IF; - - ELSEIF NEW.country_code = 'sg' THEN - - IF NEW.postcode ~ '^([0-9]{6})$' THEN - NEW.rank_search := 25; - NEW.rank_address := 11; - END IF; - - ELSEIF NEW.country_code = 'de' THEN - - IF NEW.postcode ~ '^([0-9]{5})$' THEN - NEW.rank_search := 21; - NEW.rank_address := 11; - END IF; - - ELSE - -- Guess at the postcode format and coverage (!) - IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local - NEW.rank_search := 21; - NEW.rank_address := 11; - ELSE - -- Does it look splitable into and area and local code? - postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$'); - - IF postcode IS NOT NULL THEN - NEW.rank_search := 25; - NEW.rank_address := 11; - ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN - NEW.rank_search := 21; - NEW.rank_address := 11; - END IF; - END IF; - END IF; + SELECT * FROM get_postcode_rank(NEW.country_code, NEW.address->'postcode') + INTO NEW.rank_search, NEW.rank_address; ELSEIF NEW.class = 'place' THEN IF NEW.type in ('continent') THEN @@ -988,8 +1059,8 @@ DECLARE linegeo GEOMETRY; splitline GEOMETRY; sectiongeo GEOMETRY; + interpol_postcode TEXT; postcode TEXT; - seg_postcode TEXT; BEGIN -- deferred delete IF OLD.indexed_status = 100 THEN @@ -1008,9 +1079,11 @@ BEGIN NEW.address->'place', NEW.partition, place_centroid, NEW.linegeo); - - IF NEW.address is not NULL and NEW.address ? 'postcode' THEN - NEW.postcode = NEW.address->'postcode'; + IF NEW.address is not NULL AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN + interpol_postcode := NEW.address->'postcode'; + housenum := getorcreate_postcode_id(NEW.address->'postcode'); + ELSE + interpol_postcode := NULL; END IF; -- if the line was newly inserted, split the line as necessary @@ -1023,7 +1096,6 @@ BEGIN linegeo := NEW.linegeo; startnumber := NULL; - postcode := NEW.postcode; FOR nodeidpos in 1..array_upper(waynodes, 1) LOOP @@ -1056,15 +1128,24 @@ BEGIN sectiongeo := ST_Reverse(sectiongeo); END IF; - seg_postcode := coalesce(postcode, - prevnode.address->'postcode', - nextnode.address->'postcode'); + -- determine postcode + postcode := coalesce(interpol_postcode, + prevnode.address->'postcode', + nextnode.address->'postcode', + postcode); + + IF postcode is NULL THEN + SELECT placex.postcode FROM placex WHERE place_id = NEW.parent_place_id INTO postcode; + END IF; + IF postcode is NULL THEN + postcode := get_nearest_postcode(NEW.country_code, nextnode.geometry); + END IF; IF NEW.startnumber IS NULL THEN NEW.startnumber := startnumber; NEW.endnumber := endnumber; NEW.linegeo := sectiongeo; - NEW.postcode := seg_postcode; + NEW.postcode := upper(trim(postcode)); ELSE insert into location_property_osmline (linegeo, partition, osm_id, parent_place_id, @@ -1073,7 +1154,7 @@ BEGIN geometry_sector, indexed_status) values (sectiongeo, NEW.partition, NEW.osm_id, NEW.parent_place_id, startnumber, endnumber, NEW.interpolationtype, - NEW.address, seg_postcode, + NEW.address, postcode, NEW.country_code, NEW.geometry_sector, 0); END IF; END IF; @@ -1097,7 +1178,42 @@ END; $$ LANGUAGE plpgsql; +-- Trigger for updates of location_postcode +-- +-- Computes the parent object the postcode most likely refers to. +-- This will be the place that determines the address displayed when +-- searching for this postcode. +CREATE OR REPLACE FUNCTION postcode_update() RETURNS +TRIGGER + AS $$ +DECLARE + partition SMALLINT; + location RECORD; +BEGIN + IF NEW.indexed_status != 0 OR OLD.indexed_status = 0 THEN + RETURN NEW; + END IF; + NEW.indexed_date = now(); + + partition := get_partition(NEW.country_code); + + SELECT * FROM get_postcode_rank(NEW.country_code, NEW.postcode) + INTO NEW.rank_search, NEW.rank_address; + + NEW.parent_place_id = 0; + FOR location IN + SELECT place_id + FROM getNearFeatures(partition, NEW.geometry, NEW.rank_search, '{}'::int[]) + WHERE NOT isguess ORDER BY rank_address DESC LIMIT 1 + LOOP + NEW.parent_place_id = location.place_id; + END LOOP; + + RETURN NEW; +END; +$$ +LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION placex_update() RETURNS TRIGGER @@ -1161,11 +1277,6 @@ BEGIN --DEBUG: RAISE WARNING 'placex_update % % (%)',NEW.osm_type,NEW.osm_id,NEW.place_id; - IF NEW.class = 'place' AND NEW.type = 'postcodearea' THEN - -- Silently do nothing - RETURN NEW; - END IF; - NEW.indexed_date = now(); result := deleteSearchName(NEW.partition, NEW.place_id); @@ -1202,13 +1313,16 @@ BEGIN addr_street = NEW.address->'street'; addr_place = NEW.address->'place'; - NEW.postcode = NEW.address->'postcode'; + IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(,|;)%' THEN + i := getorcreate_postcode_id(NEW.address->'postcode'); + END IF; END IF; -- Speed up searches - just use the centroid of the feature -- cheaper but less acurate place_centroid := ST_PointOnSurface(NEW.geometry); NEW.centroid := null; + NEW.postcode := null; --DEBUG: RAISE WARNING 'Computing preliminary centroid at %',ST_AsText(place_centroid); -- recalculate country and partition @@ -1445,25 +1559,16 @@ BEGIN NEW.country_code := location.country_code; --DEBUG: RAISE WARNING 'Got parent details from search name'; - -- Merge the postcode into the parent's address if necessary - IF NEW.postcode IS NOT NULL THEN - --DEBUG: RAISE WARNING 'Merging postcode into parent'; - isin_tokens := '{}'::int[]; - address_street_word_id := getorcreate_word_id(make_standard_name(NEW.postcode)); - IF address_street_word_id is not null - and not ARRAY[address_street_word_id] <@ location.nameaddress_vector THEN - isin_tokens := isin_tokens || address_street_word_id; - END IF; - address_street_word_id := getorcreate_name_id(make_standard_name(NEW.postcode)); - IF address_street_word_id is not null - and not ARRAY[address_street_word_id] <@ location.nameaddress_vector THEN - isin_tokens := isin_tokens || address_street_word_id; - END IF; - IF isin_tokens != '{}'::int[] THEN - UPDATE search_name - SET nameaddress_vector = search_name.nameaddress_vector || isin_tokens - WHERE place_id = NEW.parent_place_id; - END IF; + -- determine postcode + IF NEW.rank_search > 4 THEN + IF NEW.address is not null AND NEW.address ? 'postcode' THEN + NEW.postcode = upper(trim(NEW.address->'postcode')); + ELSE + SELECT postcode FROM placex WHERE place_id = NEW.parent_place_id INTO NEW.postcode; + END IF; + IF NEW.postcode is null THEN + NEW.postcode := get_nearest_postcode(NEW.country_code, place_centroid); + END IF; END IF; -- If there is no name it isn't searchable, don't bother to create a search record @@ -1481,7 +1586,7 @@ BEGIN -- Just be happy with inheriting from parent road only IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN - result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, NEW.geometry); + result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry); --DEBUG: RAISE WARNING 'Place added to location table'; END IF; @@ -1668,6 +1773,7 @@ BEGIN isin := avals(NEW.address); IF array_upper(isin, 1) IS NOT NULL THEN FOR i IN 1..array_upper(isin, 1) LOOP + -- TODO further split terms with comma and semicolon address_street_word_id := get_name_id(make_standard_name(isin[i])); IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); @@ -1682,26 +1788,6 @@ BEGIN END LOOP; END IF; END IF; - --DEBUG: RAISE WARNING '"address:* tokens collected'; - IF NEW.postcode IS NOT NULL THEN - isin := regexp_split_to_array(NEW.postcode, E'[;,]'); - IF array_upper(isin, 1) IS NOT NULL THEN - FOR i IN 1..array_upper(isin, 1) LOOP - address_street_word_id := get_name_id(make_standard_name(isin[i])); - IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN - nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); - isin_tokens := isin_tokens || address_street_word_id; - END IF; - - -- merge into address vector - address_street_word_id := get_word_id(make_standard_name(isin[i])); - IF address_street_word_id IS NOT NULL THEN - nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); - END IF; - END LOOP; - END IF; - END IF; - --DEBUG: RAISE WARNING 'postcode tokens collected'; -- %NOTIGERDATA% IF 0 THEN -- for the USA we have an additional address table. Merge in zip codes from there too @@ -1773,6 +1859,11 @@ BEGIN VALUES (NEW.place_id, location.place_id, true, location_isaddress, location.distance, location.rank_address); IF location_isaddress THEN + -- add postcode if we have one + -- (If multiple postcodes are available, we end up with the highest ranking one.) + IF location.postcode is not null THEN + NEW.postcode = location.postcode; + END IF; address_havelevel[location.rank_address] := true; IF NOT location.isguess THEN @@ -1807,6 +1898,11 @@ BEGIN nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]); INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address) VALUES (NEW.place_id, location.place_id, false, NOT address_havelevel[location.rank_address], location.distance, location.rank_address); + IF NEW.postcode is null AND location.postcode is not null + AND NOT address_havelevel[location.rank_address] THEN + NEW.postcode := location.postcode; + END IF; + address_havelevel[location.rank_address] := true; IF location.rank_address > parent_place_id_rank THEN @@ -1849,11 +1945,20 @@ BEGIN END IF; --DEBUG: RAISE WARNING 'search terms for long ways added'; + IF NEW.address is not null AND NEW.address ? 'postcode' + AND NEW.address->'postcode' not similar to '%(,|;)%' THEN + NEW.postcode := upper(trim(NEW.address->'postcode')); + END IF; + + IF NEW.postcode is null AND NEW.rank_search > 8 THEN + NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry); + END IF; + -- if we have a name add this to the name search table IF NEW.name IS NOT NULL THEN IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN - result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, NEW.geometry); + result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry); --DEBUG: RAISE WARNING 'added to location (full)'; END IF; @@ -2250,46 +2355,6 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; - -CREATE OR REPLACE FUNCTION get_address_postcode(for_place_id BIGINT) RETURNS TEXT - AS $$ -DECLARE - result TEXT[]; - search TEXT[]; - for_postcode TEXT; - found INTEGER; - location RECORD; -BEGIN - - found := 1000; - search := ARRAY['ref']; - result := '{}'; - - select postcode from placex where place_id = for_place_id limit 1 into for_postcode; - - FOR location IN - select rank_address,name,distance,length(name::text) as namelength - from place_addressline join placex on (address_place_id = placex.place_id) - where place_addressline.place_id = for_place_id and rank_address in (5,11) - order by rank_address desc,rank_search desc,fromarea desc,distance asc,namelength desc - LOOP - IF array_upper(search, 1) IS NOT NULL AND array_upper(location.name, 1) IS NOT NULL THEN - FOR j IN 1..array_upper(search, 1) LOOP - FOR k IN 1..array_upper(location.name, 1) LOOP - IF (found > location.rank_address AND location.name[k].key = search[j] AND location.name[k].value != '') AND NOT result @> ARRAY[trim(location.name[k].value)] AND (for_postcode IS NULL OR location.name[k].value ilike for_postcode||'%') THEN - result[(100 - location.rank_address)] := trim(location.name[k].value); - found := location.rank_address; - END IF; - END LOOP; - END LOOP; - END IF; - END LOOP; - - RETURN array_to_string(result,', '); -END; -$$ -LANGUAGE plpgsql; - --housenumber only needed for tiger data CREATE OR REPLACE FUNCTION get_address_by_language(for_place_id BIGINT, housenumber INTEGER, languagepref TEXT[]) RETURNS TEXT AS $$ @@ -2373,11 +2438,19 @@ BEGIN -- %NOAUXDATA% IF 0 THEN IF for_place_id IS NULL THEN select parent_place_id,'us', housenumber, 30, postcode, null, 'place', 'house' from location_property_aux - WHERE place_id = in_place_id + WHERE place_id = in_place_id INTO for_place_id,searchcountrycode, searchhousenumber, searchrankaddress, searchpostcode, searchhousename, searchclass, searchtype; END IF; -- %NOAUXDATA% END IF; + -- postcode table + IF for_place_id IS NULL THEN + select parent_place_id, country_code, rank_address, postcode, 'place', 'postcode' + FROM location_postcode + WHERE place_id = in_place_id + INTO for_place_id, searchcountrycode, searchrankaddress, searchpostcode, searchclass, searchtype; + END IF; + IF for_place_id IS NULL THEN select parent_place_id, country_code, housenumber, rank_search, postcode, name, class, type from placex WHERE place_id = in_place_id and rank_search > 27 @@ -2396,9 +2469,8 @@ BEGIN found := 1000; hadcountry := false; FOR location IN - select placex.place_id, osm_type, osm_id, - CASE WHEN class = 'place' and type = 'postcode' THEN hstore('name', postcode) ELSE name END as name, - class, type, admin_level, true as fromarea, true as isaddress, + select placex.place_id, osm_type, osm_id, name, + class, type, admin_level, true as isaddress, CASE WHEN rank_address = 0 THEN 100 WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address, 0 as distance, country_code, postcode from placex @@ -2408,13 +2480,9 @@ BEGIN IF searchcountrycode IS NULL AND location.country_code IS NOT NULL THEN searchcountrycode := location.country_code; END IF; - IF searchpostcode IS NOT NULL and location.type = 'postcode' THEN + IF location.type in ('postcode', 'postal_code') THEN location.isaddress := FALSE; - END IF; - IF searchpostcode IS NULL and location.postcode IS NOT NULL THEN - searchpostcode := location.postcode; - END IF; - IF location.rank_address = 4 AND location.isaddress THEN + ELSEIF location.rank_address = 4 THEN hadcountry := true; END IF; IF location.rank_address < 4 AND NOT hadcountry THEN @@ -2425,15 +2493,14 @@ BEGIN END IF; END IF; countrylocation := ROW(location.place_id, location.osm_type, location.osm_id, location.name, location.class, - location.type, location.admin_level, location.fromarea, location.isaddress, location.rank_address, + location.type, location.admin_level, true, location.isaddress, location.rank_address, location.distance)::addressline; RETURN NEXT countrylocation; found := location.rank_address; END LOOP; FOR location IN - select placex.place_id, osm_type, osm_id, - CASE WHEN class = 'place' and type = 'postcode' THEN hstore('name', postcode) ELSE name END as name, + select placex.place_id, osm_type, osm_id, name, CASE WHEN extratags ? 'place' THEN 'place' ELSE class END as class, CASE WHEN extratags ? 'place' THEN extratags->'place' ELSE type END as type, admin_level, fromarea, isaddress, @@ -2450,12 +2517,9 @@ BEGIN IF searchcountrycode IS NULL AND location.country_code IS NOT NULL THEN searchcountrycode := location.country_code; END IF; - IF searchpostcode IS NOT NULL and location.type = 'postcode' THEN + IF location.type in ('postcode', 'postal_code') THEN location.isaddress := FALSE; END IF; - IF searchpostcode IS NULL and location.isaddress and location.type != 'postcode' and location.postcode IS NOT NULL and location.postcode not similar to '%(,|;)%' THEN - searchpostcode := location.postcode; - END IF; IF location.rank_address = 4 AND location.isaddress THEN hadcountry := true; END IF; @@ -2489,7 +2553,6 @@ BEGIN IF searchhousename IS NOT NULL THEN location := ROW(in_place_id, null, null, searchhousename, searchclass, searchtype, null, true, true, 29, 0)::addressline; --- location := ROW(in_place_id, null, null, searchhousename, 'place', 'house_name', null, true, true, 29, 0)::addressline; RETURN NEXT location; END IF; @@ -2635,9 +2698,7 @@ BEGIN IF out_postcode IS NULL THEN SELECT postcode from placex where place_id = out_parent_place_id INTO out_postcode; END IF; - IF out_postcode IS NULL THEN - out_postcode := getNearestPostcode(out_partition, place_centroid); - END IF; + -- XXX look into postcode table newpoints := 0; insert into location_property_aux (place_id, partition, parent_place_id, housenumber, postcode, centroid) diff --git a/sql/indices.src.sql b/sql/indices.src.sql index bbd5a76a..cf5c4bc1 100644 --- a/sql/indices.src.sql +++ b/sql/indices.src.sql @@ -25,4 +25,5 @@ DROP INDEX IF EXISTS place_id_idx; CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type) {ts:address-index}; -CREATE INDEX idx_gb_postcode_postcode ON gb_postcode USING BTREE (postcode) {ts:search-index}; +CREATE UNIQUE INDEX idx_postcode_id ON location_postcode USING BTREE (place_id) {ts:search-index}; +CREATE INDEX idx_postcode_postcode ON location_postcode USING BTREE (postcode) {ts:search-index}; diff --git a/sql/loaddata.sql b/sql/loaddata.sql deleted file mode 100644 index a3c8914a..00000000 --- a/sql/loaddata.sql +++ /dev/null @@ -1,16 +0,0 @@ -TRUNCATE placex; -TRUNCATE search_name; -TRUNCATE place_addressline; -TRUNCATE location_area; - -DROP SEQUENCE seq_place; -CREATE SEQUENCE seq_place start 100000; - -insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) - select osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry from place where osm_type = 'N'; -insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) - select osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry from place where osm_type = 'W'; -insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) - select osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry from place where osm_type = 'R'; - ---select count(*) from (select create_interpolation(osm_id, housenumber) from placex where indexed=false and class='place' and type='houses') as x; diff --git a/sql/partition-functions.src.sql b/sql/partition-functions.src.sql index 7d4f7cbe..110dd0cd 100644 --- a/sql/partition-functions.src.sql +++ b/sql/partition-functions.src.sql @@ -6,12 +6,12 @@ BEGIN -- start IF in_partition = -partition- THEN FOR r IN - SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(feature, centroid)) as distance, isguess, centroid FROM ( + SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(feature, centroid)) as distance, isguess, postcode, centroid FROM ( SELECT * FROM location_area_large_-partition- WHERE ST_Intersects(geometry, feature) and rank_search < maxrank UNION ALL SELECT * FROM location_area_country WHERE ST_Intersects(geometry, feature) and rank_search < maxrank ) as location_area - GROUP BY place_id, keywords, rank_address, rank_search, isguess, centroid + GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid ORDER BY rank_address, isin_tokens && keywords desc, isguess asc, ST_Distance(feature, centroid) * CASE @@ -55,8 +55,8 @@ $$ LANGUAGE plpgsql; create or replace function insertLocationAreaLarge( - in_partition INTEGER, in_place_id BIGINT, in_country_code VARCHAR(2), in_keywords INTEGER[], - in_rank_search INTEGER, in_rank_address INTEGER, in_estimate BOOLEAN, + in_partition INTEGER, in_place_id BIGINT, in_country_code VARCHAR(2), in_keywords INTEGER[], + in_rank_search INTEGER, in_rank_address INTEGER, in_estimate BOOLEAN, postcode TEXT, in_centroid GEOMETRY, in_geometry GEOMETRY) RETURNS BOOLEAN AS $$ DECLARE BEGIN @@ -72,8 +72,8 @@ BEGIN -- start IF in_partition = -partition- THEN - INSERT INTO location_area_large_-partition- (partition, place_id, country_code, keywords, rank_search, rank_address, isguess, centroid, geometry) - values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); + INSERT INTO location_area_large_-partition- (partition, place_id, country_code, keywords, rank_search, rank_address, isguess, postcode, centroid, geometry) + values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, postcode, in_centroid, in_geometry); RETURN TRUE; END IF; -- end @@ -173,29 +173,6 @@ $$ LANGUAGE plpgsql; -create or replace function getNearestPostcode(in_partition INTEGER, point GEOMETRY) - RETURNS TEXT AS $$ -DECLARE - out_postcode TEXT; -BEGIN - --- start - IF in_partition = -partition- THEN - SELECT postcode - FROM location_area_large_-partition- join placex using (place_id) - WHERE st_contains(location_area_large_-partition-.geometry, point) - AND class = 'place' and type = 'postcode' - ORDER BY st_distance(location_area_large_-partition-.centroid, point) ASC limit 1 - INTO out_postcode; - RETURN out_postcode; - END IF; --- end - - RAISE EXCEPTION 'Unknown partition %', in_partition; -END -$$ -LANGUAGE plpgsql; - create or replace function insertSearchName( in_partition INTEGER, in_place_id BIGINT, in_country_code VARCHAR(2), in_name_vector INTEGER[], in_nameaddress_vector INTEGER[], diff --git a/sql/partition-tables.src.sql b/sql/partition-tables.src.sql index 6f799bcf..d8f02e10 100644 --- a/sql/partition-tables.src.sql +++ b/sql/partition-tables.src.sql @@ -21,6 +21,7 @@ create type nearfeaturecentr as ( rank_search smallint, distance float, isguess boolean, + postcode TEXT, centroid GEOMETRY ); @@ -29,9 +30,9 @@ CREATE TABLE search_name_blank ( place_id BIGINT, search_rank smallint, address_rank smallint, - name_vector integer[] + name_vector integer[], + centroid GEOMETRY(Geometry, 4326) ); -SELECT AddGeometryColumn('search_name_blank', 'centroid', 4326, 'GEOMETRY', 2); CREATE TABLE location_area_country () INHERITS (location_area_large) {ts:address-data}; @@ -54,9 +55,9 @@ CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- CREATE TABLE location_road_-partition- ( place_id BIGINT, partition SMALLINT, - country_code VARCHAR(2) + country_code VARCHAR(2), + geometry GEOMETRY(Geometry, 4326) ) {ts:address-data}; -SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2); CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry) {ts:address-index}; CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id) {ts:address-index}; diff --git a/sql/tables.sql b/sql/tables.sql index f3217d5a..4a22a814 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -61,10 +61,11 @@ CREATE TABLE location_area ( rank_search SMALLINT NOT NULL, rank_address SMALLINT NOT NULL, country_code VARCHAR(2), - isguess BOOL + isguess BOOL, + postcode TEXT, + centroid GEOMETRY(Point, 4326), + geometry GEOMETRY(Geometry, 4326) ); -SELECT AddGeometryColumn('location_area', 'centroid', 4326, 'POINT', 2); -SELECT AddGeometryColumn('location_area', 'geometry', 4326, 'GEOMETRY', 2); CREATE TABLE location_area_large () INHERITS (location_area); @@ -74,9 +75,9 @@ CREATE TABLE location_property ( parent_place_id BIGINT, partition SMALLINT, housenumber TEXT, - postcode TEXT + postcode TEXT, + centroid GEOMETRY(Point, 4326) ); -SELECT AddGeometryColumn('location_property', 'centroid', 4326, 'POINT', 2); CREATE TABLE location_property_aux () INHERITS (location_property); CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id); @@ -125,9 +126,9 @@ CREATE TABLE search_name ( address_rank SMALLINT, name_vector integer[], nameaddress_vector integer[], - country_code varchar(2) + country_code varchar(2), + centroid GEOMETRY(Geometry, 4326) ) {ts:search-data}; -SELECT AddGeometryColumn('search_name', 'centroid', 4326, 'GEOMETRY', 2); CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) {ts:search-index}; drop table IF EXISTS place_addressline; @@ -157,9 +158,9 @@ CREATE TABLE placex ( wikipedia TEXT, -- calculated wikipedia article name (language:title) country_code varchar(2), housenumber TEXT, - postcode TEXT + postcode TEXT, + centroid GEOMETRY(Geometry, 4326) ) {ts:search-data}; -SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2); CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) {ts:search-index}; CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) {ts:search-index}; CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {ts:address-index} WHERE linked_place_id IS NOT NULL; @@ -197,8 +198,24 @@ CREATE TRIGGER place_before_delete BEFORE DELETE ON place CREATE TRIGGER place_before_insert BEFORE INSERT ON place FOR EACH ROW EXECUTE PROCEDURE place_insert(); -DROP SEQUENCE IF EXISTS seq_postcodes; -CREATE SEQUENCE seq_postcodes start 1; +-- Table for synthetic postcodes. +DROP TABLE IF EXISTS location_postcode; +CREATE TABLE location_postcode ( + place_id BIGINT, + parent_place_id BIGINT, + rank_search SMALLINT, + rank_address SMALLINT, + indexed_status SMALLINT, + indexed_date TIMESTAMP, + country_code varchar(2), + postcode TEXT, + geometry GEOMETRY(Geometry, 4326) + ); +CREATE INDEX idx_postcode_geometry ON location_postcode USING GIST (geometry) {ts:address-index}; +GRANT SELECT ON location_postcode TO "{www-user}" ; + +CREATE TRIGGER location_postcode_before_update BEFORE UPDATE ON location_postcode + FOR EACH ROW EXECUTE PROCEDURE postcode_update(); DROP TABLE IF EXISTS import_polygon_error; CREATE TABLE import_polygon_error ( @@ -209,10 +226,10 @@ CREATE TABLE import_polygon_error ( name HSTORE, country_code varchar(2), updated timestamp, - errormessage text + errormessage text, + prevgeometry GEOMETRY(Geometry, 4326), + newgeometry GEOMETRY(Geometry, 4326) ); -SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2); -SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2); CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id); GRANT SELECT ON import_polygon_error TO "{www-user}"; diff --git a/test/bdd/api/search/params.feature b/test/bdd/api/search/params.feature index d1cff1cc..bfc7cb15 100644 --- a/test/bdd/api/search/params.feature +++ b/test/bdd/api/search/params.feature @@ -66,7 +66,7 @@ Feature: Search queries Then there are duplicates Scenario: Search with bounded viewbox in right area - When sending json search query "restaurant" with address + When sending json search query "bar" with address | bounded | viewbox | | 1 | -56.16786,-34.84061,-56.12525,-34.86526 | Then result addresses contain @@ -74,7 +74,7 @@ Feature: Search queries | Montevideo | Scenario: Search with bounded viewboxlbrt in right area - When sending json search query "restaurant" with address + When sending json search query "bar" with address | bounded | viewboxlbrt | | 1 | -56.16786,-34.86526,-56.12525,-34.84061 | Then result addresses contain diff --git a/test/bdd/db/import/placex.feature b/test/bdd/db/import/placex.feature index f124e482..f3695642 100644 --- a/test/bdd/db/import/placex.feature +++ b/test/bdd/db/import/placex.feature @@ -79,13 +79,11 @@ Feature: Import into placex | osm | class | type | postcode | geometry | | N1 | place | postcode | EA452CD | country:gb | | N2 | place | postcode | E45 23 | country:gb | - | N3 | place | postcode | y45 | country:gb | When importing Then placex contains | object | country_code | rank_search | rank_address | | N1 | gb | 30 | 30 | | N2 | gb | 30 | 30 | - | N3 | gb | 30 | 30 | Scenario: search and address rank for DE postcodes correctly assigned Given the places diff --git a/test/bdd/db/import/postcodes.feature b/test/bdd/db/import/postcodes.feature new file mode 100644 index 00000000..be469fe6 --- /dev/null +++ b/test/bdd/db/import/postcodes.feature @@ -0,0 +1,115 @@ +@DB +Feature: Import of postcodes + Tests for postcode estimation + + Scenario: Postcodes on the object are prefered over those on the address + Given the scene admin-areas + And the named places + | osm | class | type | admin | addr+postcode | geometry | + | R1 | boundary | administrative | 6 | 112 | :b0 | + | R34 | boundary | administrative | 8 | 112 DE | :b1:E | + | R4 | boundary | administrative | 10 | 112 DE 34 | :b2:N | + And the named places + | osm | class | type | addr+postcode | geometry | + | W93 | highway | residential | 112 DE 344 | :w2N | + | W22 | building | yes | 112 DE 344N | :building:w2N | + When importing + Then placex contains + | object | postcode | + | W22 | 112 DE 344N | + | W93 | 112 DE 344 | + | R4 | 112 DE 34 | + | R34 | 112 DE | + | R1 | 112 | + + Scenario: Postcodes from a road are inherited by an attached building + Given the scene admin-areas + And the named places + | osm | class | type | addr+postcode | geometry | + | W93 | highway | residential | 86034 | :w2N | + And the named places + | osm | class | type | geometry | + | W22 | building | yes | :building:w2N | + When importing + Then placex contains + | object | postcode | parent_place_id | + | W22 | 86034 | W93 | + + Scenario: Postcodes from the lowest admin area are inherited by ways + Given the scene admin-areas + And the named places + | osm | class | type | admin | addr+postcode | geometry | + | R1 | boundary | administrative | 6 | 112 | :b0 | + | R34 | boundary | administrative | 8 | 112 DE | :b1:E | + | R4 | boundary | administrative | 10 | 112 DE 34 | :b2:N | + And the named places + | osm | class | type | geometry | + | W93 | highway | residential | :w2N | + When importing + Then placex contains + | object | postcode | + | W93 | 112 DE 34 | + + Scenario: Postcodes from the lowest admin area with postcode are inherited by ways + Given the scene admin-areas + And the named places + | osm | class | type | admin | addr+postcode | geometry | + | R1 | boundary | administrative | 6 | 112 | :b0 | + | R34 | boundary | administrative | 8 | 112 DE | :b1:E | + And the named places + | osm | class | type | admin | geometry | + | R4 | boundary | administrative | 10 | :b2:N | + And the named places + | osm | class | type | geometry | + | W93 | highway | residential | :w2N | + When importing + Then placex contains + | object | postcode | parent_place_id | + | W93 | 112 DE | R4 | + + Scenario: Postcodes from the lowest admin area are inherited by buildings + Given the scene admin-areas + And the named places + | osm | class | type | admin | addr+postcode | geometry | + | R1 | boundary | administrative | 6 | 112 | :b0 | + | R34 | boundary | administrative | 8 | 112 DE | :b1:E | + | R4 | boundary | administrative | 10 | 112 DE 34 | :b2:N | + And the named places + | osm | class | type | geometry | + | W22 | building | yes | :building:w2N | + When importing + Then placex contains + | object | postcode | + | W22 | 112 DE 34 | + + Scenario: Roads get postcodes from nearby buildings without other info + Given the scene admin-areas + And the named places + | osm | class | type | geometry | + | W93 | highway | residential | :w2N | + And the named places + | osm | class | type | addr+postcode | geometry | + | W22 | building | yes | 445023 | :building:w2N | + When importing + Then placex contains + | object | postcode | + | W93 | 445023 | + + @wip + Scenario: Postcodes from admin boundaries are preferred over estimated postcodes + Given the scene admin-areas + And the named places + | osm | class | type | admin | addr+postcode | geometry | + | R1 | boundary | administrative | 6 | 112 | :b0 | + | R34 | boundary | administrative | 8 | 112 DE | :b1:E | + | R4 | boundary | administrative | 10 | 112 DE 34 | :b2:N | + And the named places + | osm | class | type | geometry | + | W93 | highway | residential | :w2N | + And the named places + | osm | class | type | addr+postcode | geometry | + | W22 | building | yes | 445023 | :building:w2N | + When importing + Then placex contains + | object | postcode | + | W93 | 112 DE 34 | diff --git a/test/bdd/db/import/search_name.feature b/test/bdd/db/import/search_name.feature index 98def330..86bdea9b 100644 --- a/test/bdd/db/import/search_name.feature +++ b/test/bdd/db/import/search_name.feature @@ -23,17 +23,3 @@ Feature: Creation of search terms Then search_name contains | object | name_vector | nameaddress_vector | | N1 | foo | the road | - - Scenario: Roads take over the postcode from attached houses - Given the scene roads-with-pois - And the places - | osm | class | type | housenr | postcode | street | geometry | - | N1 | place | house | 1 | 12345 | North St | :p-S1 | - And the places - | osm | class | type | name | geometry | - | W1 | highway | residential | North St | :w-north | - When importing - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - diff --git a/test/bdd/db/query/search_simple.feature b/test/bdd/db/query/search_simple.feature index 417df769..409ed44f 100644 --- a/test/bdd/db/query/search_simple.feature +++ b/test/bdd/db/query/search_simple.feature @@ -25,7 +25,9 @@ Feature: Searching of simple objects | osm | class | type | postcode | geometry | | R1 | boundary | postal_code | 54321 | poly-area:1.0 | And searching for "12345" - Then exactly 0 results are returned + Then results contain + | osm_type | + | P | When searching for "54321" Then results contain | ID | osm_type | osm_id | diff --git a/test/bdd/db/update/poi-inherited-postcode.feature b/test/bdd/db/update/poi-inherited-postcode.feature deleted file mode 100644 index 1b2065e6..00000000 --- a/test/bdd/db/update/poi-inherited-postcode.feature +++ /dev/null @@ -1,57 +0,0 @@ -@DB -Feature: Update of POI-inherited poscode - Test updates of postcodes on street which was inherited from a related POI - - Background: Street and house with postcode - Given the scene roads-with-pois - And the places - | osm | class | type | housenr | postcode | street | geometry | - | N1 | place | house | 1 | 12345 | North St |:p-S1 | - And the places - | osm | class | type | name | geometry | - | W1 | highway | residential | North St | :w-north | - When importing - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - - Scenario: POI-inherited postcode remains when way type is changed - When updating places - | osm | class | type | name | geometry | - | W1 | highway | unclassified | North St | :w-north | - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - - Scenario: POI-inherited postcode remains when way name is changed - When updating places - | osm | class | type | name | geometry | - | W1 | highway | unclassified | South St | :w-north | - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - - Scenario: POI-inherited postcode remains when way geometry is changed - When updating places - | osm | class | type | name | geometry | - | W1 | highway | unclassified | South St | :w-south | - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - - Scenario: POI-inherited postcode is added when POI postcode changes - When updating places - | osm | class | type | housenr | postcode | street | geometry | - | N1 | place | house | 1 | 54321 | North St |:p-S1 | - Then search_name contains - | object | nameaddress_vector | - | W1 | 54321 | - - Scenario: POI-inherited postcode remains when POI geometry changes - When updating places - | osm | class | type | housenr | postcode | street | geometry | - | N1 | place | house | 1 | 12345 | North St |:p-S2 | - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - diff --git a/test/bdd/db/update/search_terms.feature b/test/bdd/db/update/search_terms.feature deleted file mode 100644 index 07dbd451..00000000 --- a/test/bdd/db/update/search_terms.feature +++ /dev/null @@ -1,21 +0,0 @@ -@DB -Feature: Update of search terms - Tests that search_name table is updated correctly - - Scenario: POI-inherited postcode remains when another POI is deleted - Given the scene roads-with-pois - And the places - | osm | class | type | housenr | postcode | street | geometry | - | N1 | place | house | 1 | 12345 | North St |:p-S1 | - | N2 | place | house | 2 | | North St |:p-S2 | - And the places - | osm | class | type | name | geometry | - | W1 | highway | residential | North St | :w-north | - When importing - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - When marking for delete N2 - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | diff --git a/test/bdd/steps/db_ops.py b/test/bdd/steps/db_ops.py index fa8cd0de..be2211fa 100644 --- a/test/bdd/steps/db_ops.py +++ b/test/bdd/steps/db_ops.py @@ -287,7 +287,7 @@ def import_and_index_data_from_place_table(context): WHERE class='place' and type='houses' and osm_type='W' and ST_GeometryType(geometry) = 'ST_LineString'""") context.db.commit() - context.nominatim.run_setup_script('index', 'index-noanalyse') + context.nominatim.run_setup_script('calculate-postcodes', 'index', 'index-noanalyse') @when("updating places") def update_place_table(context): diff --git a/test/scenes/bin/osm2wkt.cc b/test/scenes/bin/osm2wkt.cc index d13a4632..22e74b4d 100644 --- a/test/scenes/bin/osm2wkt.cc +++ b/test/scenes/bin/osm2wkt.cc @@ -7,19 +7,49 @@ #include #include -#include -#include +#include + #include #include #include #include #include +#include #include +#include typedef osmium::index::map::SparseMemArray index_type; typedef osmium::handler::NodeLocationsForWays location_handler_type; +struct AbsoluteIdHandler : public osmium::handler::Handler { + + enum { BASE = 100000000 }; + + void node(osmium::Node& o) { + if (o.id() < 0) + o.set_id(BASE-o.id()); + } + + void way(osmium::Way& o) { + if (o.id() < 0) + o.set_id(BASE-o.id()); + + for (osmium::NodeRef &n: o.nodes()) + if (n.ref() < 0) + n.set_ref(BASE-n.ref()); + } + + void relation(osmium::Relation& o) { + if (o.id() < 0) + o.set_id(BASE-o.id()); + + for (auto &m : o.members()) + if (m.ref() < 0) + m.set_ref(BASE-m.ref()); + } +}; + class ExportToWKTHandler : public osmium::handler::Handler { @@ -33,7 +63,8 @@ public: } void way(const osmium::Way& way) { - if (!way.is_closed() || !way.tags().get_value_by_key("area")) + if (!way.nodes().empty() + && (!way.is_closed() || !way.tags().get_value_by_key("area"))) print_geometry(way.tags(), m_factory.create_linestring(way)); } @@ -48,7 +79,6 @@ public: } private: - void print_geometry(const osmium::TagList& tags, const std::string& wkt) { const char* scenario = tags.get_value_by_key("test:section"); const char* id = tags.get_value_by_key("test:id"); @@ -68,28 +98,42 @@ int main(int argc, char* argv[]) { exit(1); } - std::string input_filename {argv[1]}; + osmium::io::File input_file{argv[1]}; - osmium::area::ProblemReporterException problem_reporter; - osmium::area::Assembler::config_type assembler_config(&problem_reporter); - osmium::area::MultipolygonCollector collector(assembler_config); + // need to sort the data first and make ids absolute + std::cerr << "Read file...\n"; + osmium::io::Reader reader{input_file}; + std::vector changes; + osmium::ObjectPointerCollection objects; + AbsoluteIdHandler abshandler; + while (osmium::memory::Buffer buffer = reader.read()) { + osmium::apply(buffer, abshandler, objects); + changes.push_back(std::move(buffer)); + } + reader.close(); + + std::cerr << "Sort file...\n"; + objects.sort(osmium::object_order_type_id_version()); + + osmium::area::Assembler::config_type assembler_config; + osmium::area::MultipolygonManager mp_manager{assembler_config}; std::cerr << "Pass 1...\n"; - osmium::io::Reader reader1(input_filename, osmium::osm_entity_bits::relation); - collector.read_relations(reader1); - std::cerr << "Pass 1 done\n"; - index_type index_pos; index_type index_neg; location_handler_type location_handler(index_pos, index_neg); + ExportToWKTHandler export_handler; + osmium::apply(objects.begin(), objects.end(), location_handler, + export_handler, mp_manager); + mp_manager.prepare_for_lookup(); + std::cerr << "Pass 1 done\n"; + std::cerr << "Pass 2...\n"; - ExportToWKTHandler export_handler; - osmium::io::Reader reader2(input_filename); - osmium::apply(reader2, location_handler, export_handler, collector.handler([&export_handler](osmium::memory::Buffer&& buffer) { + osmium::apply(objects.cbegin(), objects.cend(), mp_manager.handler([&export_handler](osmium::memory::Buffer&& buffer) { osmium::apply(buffer, export_handler); })); - reader2.close(); + export_handler.close(); std::cerr << "Pass 2 done\n"; } diff --git a/test/scenes/data/admin-areas.wkt b/test/scenes/data/admin-areas.wkt new file mode 100644 index 00000000..af741d99 --- /dev/null +++ b/test/scenes/data/admin-areas.wkt @@ -0,0 +1,19 @@ +c1:N | POINT(73.8419358 60.0763887) +c1:E | POINT(73.8393798 60.0488584) +c0 | POINT(73.8679209 60.0588527) +c2:N | POINT(73.896249 60.0631047) +c2:S | POINT(73.8932671 60.0434346) +c2:E | POINT(73.9162704 60.0471569) +c1:W | POINT(73.8990179 60.055876) +c2:W | POINT(73.8568453 60.0597032) +w2N | LINESTRING(73.8836825 60.0612977,73.8880489 60.0598094,73.8953972 60.0601283,73.9033844 60.058959) +w1W:2W | LINESTRING(73.8523722 60.0497092,73.85791 60.0520485,73.8617439 60.0573645,73.8706896 60.0554508) +building:w2N | LINESTRING(73.8963618 60.0604955,73.8961463 60.0602249,73.8967091 60.0601132,73.8969246 60.0603838,73.8963618 60.0604955) +b0 | MULTIPOLYGON(((73.8012539 60.0573645,73.8225532 60.0371591,73.8493903 60.035457,73.8843212 60.0356698,73.9049815 60.0358825,73.9192521 60.0356698,73.9260679 60.0514105,73.9216633 60.0591056,73.9141402 60.0722448,73.8804873 60.070332,73.8719676 60.0917916,73.8255351 60.0875433,73.8084956 60.0758576,73.8012539 60.0573645))) +b1:N | MULTIPOLYGON(((73.8012539 60.0573645,73.8447045 60.0611915,73.8692843 60.0674706,73.8804873 60.070332,73.8719676 60.0917916,73.8255351 60.0875433,73.8084956 60.0758576,73.8012539 60.0573645))) +b2:S | MULTIPOLYGON(((73.8694117 60.0507725,73.8843212 60.0356698,73.9049815 60.0358825,73.9075368 60.0523758,73.8830432 60.0517295,73.8694117 60.0507725))) +b1:W | MULTIPOLYGON(((73.8012539 60.0573645,73.8225532 60.0371591,73.8493903 60.035457,73.8843212 60.0356698,73.8694117 60.0507725,73.8447045 60.0611915,73.8012539 60.0573645))) +b1:E | MULTIPOLYGON(((73.8447045 60.0611915,73.8694117 60.0507725,73.8843212 60.0356698,73.9049815 60.0358825,73.9192521 60.0356698,73.9260679 60.0514105,73.9216633 60.0591056,73.9141402 60.0722448,73.8804873 60.070332,73.8692843 60.0674706,73.8447045 60.0611915))) +b2:E | MULTIPOLYGON(((73.9049815 60.0358825,73.9192521 60.0356698,73.9260679 60.0514105,73.9216633 60.0591056,73.9075368 60.0523758,73.9049815 60.0358825))) +b2:N | MULTIPOLYGON(((73.8692843 60.0674706,73.8830432 60.0517295,73.9075368 60.0523758,73.9216633 60.0591056,73.9141402 60.0722448,73.8804873 60.070332,73.8692843 60.0674706))) +b2:W | MULTIPOLYGON(((73.8447045 60.0611915,73.8694117 60.0507725,73.8830432 60.0517295,73.8692843 60.0674706,73.8447045 60.0611915))) diff --git a/test/scenes/data/admin.osm b/test/scenes/data/admin.osm new file mode 100644 index 00000000..bfe4340e --- /dev/null +++ b/test/scenes/data/admin.osm @@ -0,0 +1,250 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/scenes/data/building-on-street-corner.wkt b/test/scenes/data/building-on-street-corner.wkt index 4c007e9e..24da78f1 100644 --- a/test/scenes/data/building-on-street-corner.wkt +++ b/test/scenes/data/building-on-street-corner.wkt @@ -4,4 +4,4 @@ n-outer | POINT(1.0039478 2.0004676) n-edge-WE | POINT(1.0039599 2.0002345) w-WE | LINESTRING(1.0031759 2.0002316,1.0040361 2.0002211,1.0042735 2.0002264) w-NS | LINESTRING(1.0040414 2.0001051,1.0040361 2.0002211,1.0040364 2.0006377) -w-building | MULTIPOLYGON(((1.0040019 2.000324,1.0040016 2.0002344,1.0039599 2.0002345,1.0039037 2.0002347,1.0039043 2.0004389,1.0040023 2.0004386,1.0040019 2.000324))) +w-building | MULTIPOLYGON(((1.0039037 2.0002347,1.0039599 2.0002345,1.0040016 2.0002344,1.0040019 2.000324,1.0040023 2.0004386,1.0039043 2.0004389,1.0039037 2.0002347))) diff --git a/test/scenes/data/poly-area.wkt b/test/scenes/data/poly-area.wkt index a8fb045d..e65f2054 100644 --- a/test/scenes/data/poly-area.wkt +++ b/test/scenes/data/poly-area.wkt @@ -1,11 +1,11 @@ -0.0001 | MULTIPOLYGON(((0.001 0,0 0,0 0.1,0.001 0.1,0.001 0))) -0.0005 | MULTIPOLYGON(((0.005 0,0 0,0 0.1,0.005 0.1,0.005 0))) -0.001 | MULTIPOLYGON(((0.01 0,0 0,0 0.1,0.01 0.1,0.01 0))) -0.005 | MULTIPOLYGON(((0.05 0,0 0,0 0.1,0.05 0.1,0.05 0))) -0.01 | MULTIPOLYGON(((0.1 0,0 0,0 0.1,0.1 0.1,0.1 0))) -0.05 | MULTIPOLYGON(((0.5 0,0 0,0 0.1,0.5 0.1,0.5 0))) -0.1 | MULTIPOLYGON(((0.1 0,0 0,0 1,0.1 1,0.1 0))) -0.5 | MULTIPOLYGON(((0.5 0,0 0,0 1,0.5 1,0.5 0))) -1.0 | MULTIPOLYGON(((1 0,0 0,0 1,1 1,1 0))) -2.0 | MULTIPOLYGON(((2 0,0 0,0 1,2 1,2 0))) -5.0 | MULTIPOLYGON(((5 0,0 0,0 1,5 1,5 0))) +0.0001 | MULTIPOLYGON(((0 0,0.001 0,0.001 0.1,0 0.1,0 0))) +0.0005 | MULTIPOLYGON(((0 0,0.005 0,0.005 0.1,0 0.1,0 0))) +0.001 | MULTIPOLYGON(((0 0,0.01 0,0.01 0.1,0 0.1,0 0))) +0.005 | MULTIPOLYGON(((0 0,0.05 0,0.05 0.1,0 0.1,0 0))) +0.01 | MULTIPOLYGON(((0 0,0.1 0,0.1 0.1,0 0.1,0 0))) +0.05 | MULTIPOLYGON(((0 0,0.5 0,0.5 0.1,0 0.1,0 0))) +0.1 | MULTIPOLYGON(((0 0,0.1 0,0.1 1,0 1,0 0))) +0.5 | MULTIPOLYGON(((0 0,0.5 0,0.5 1,0 1,0 0))) +1.0 | MULTIPOLYGON(((0 0,1 0,1 1,0 1,0 0))) +2.0 | MULTIPOLYGON(((0 0,2 0,2 1,0 1,0 0))) +5.0 | MULTIPOLYGON(((0 0,5 0,5 1,0 1,0 0))) diff --git a/test/scenes/data/way-area-with-center.wkt b/test/scenes/data/way-area-with-center.wkt index 8a773633..ed34cd3b 100644 --- a/test/scenes/data/way-area-with-center.wkt +++ b/test/scenes/data/way-area-with-center.wkt @@ -1,5 +1,5 @@ -inner-C | POINT(0.0035625 -0.0066188) -outer-C | POINT(0.0041244 -0.0060007) -inner-N | POINT(0.0018846 -0.0023652) inner-S | POINT(0.0048516 -0.0095176) -area | MULTIPOLYGON(((0.0077125 -0.0066566,0.0065469 -0.0099414,0.0038979 -0.0109481,0.0026794 -0.0105772,0.0022025 -0.0099944,0.0026264 -0.0091997,0.0026264 -0.0080341,0.0019376 -0.0065507,0.0010369 -0.0072924,0.0005071 -0.0060738,0.0017787 -0.00565,0.0005071 -0.0042195,0.0005601 -0.0025771,0.0013019 -0.0015175,0.0050105 -0.0021533,0.006441 -0.0025771,0.0075006 -0.0040076,0.0033681 -0.0059149,0.0051694 -0.0076633,0.0061231 -0.0064977,0.0068648 -0.0049612,0.0077125 -0.0066566))) +inner-N | POINT(0.0018846 -0.0023652) +outer-C | POINT(0.0041244 -0.0060007) +inner-C | POINT(0.0035625 -0.0066188) +area | MULTIPOLYGON(((0.0005071 -0.0060738,0.0010369 -0.0072924,0.0019376 -0.0065507,0.0026264 -0.0080341,0.0026264 -0.0091997,0.0022025 -0.0099944,0.0026794 -0.0105772,0.0038979 -0.0109481,0.0065469 -0.0099414,0.0077125 -0.0066566,0.0068648 -0.0049612,0.0061231 -0.0064977,0.0051694 -0.0076633,0.0033681 -0.0059149,0.0075006 -0.0040076,0.006441 -0.0025771,0.0050105 -0.0021533,0.0013019 -0.0015175,0.0005601 -0.0025771,0.0005071 -0.0042195,0.0017787 -0.00565,0.0005071 -0.0060738))) diff --git a/utils/setup.php b/utils/setup.php index 7490d820..67c7fd7f 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -493,24 +493,59 @@ if ($aCMDResult['import-tiger-data']) { if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { $bDidSomething = true; $oDB =& getDB(); - if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection)); - $sSQL = "insert into placex (osm_type,osm_id,class,type,address,country_code,geometry) "; - $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',"; - $sSQL .= "hstore('postcode', pc),country_code,"; - $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select country_code,"; - $sSQL .= "address->'postcode' as pc,"; - $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y "; - $sSQL .= "from placex where address ? 'postcode' group by country_code,pc) as x "; - $sSQL .= "where ST_Point(x,y) is not null"; - if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); + if (!pg_query($oDB->connection, 'TRUNCATE location_postcode')) { + fail(pg_last_error($oDB->connection)); + } + + $sSQL = "INSERT INTO location_postcode"; + $sSQL .= " (place_id, indexed_status, country_code, postcode, geometry) "; + $sSQL .= "SELECT nextval('seq_place'), 1, country_code,"; + $sSQL .= " upper(trim (both ' ' from address->'postcode')) as pc,"; + $sSQL .= " ST_Centroid(ST_Collect(ST_Centroid(geometry)))"; + $sSQL .= " FROM placex"; + $sSQL .= " WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'"; + $sSQL .= " AND geometry IS NOT null"; + $sSQL .= " GROUP BY country_code, pc"; + + if (!pg_query($oDB->connection, $sSQL)) { + fail(pg_last_error($oDB->connection)); + } if (CONST_Use_Extra_US_Postcodes) { - $sSQL = "insert into placex (osm_type,osm_id,class,type,address,country_code,geometry) "; - $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',"; - $sSQL .= "hstore('postcode', postcode),'us',"; - $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode"; + // only add postcodes that are not yet available in OSM + $sSQL = "INSERT INTO location_postcode"; + $sSQL .= " (place_id, indexed_status, country_code, postcode, geometry) "; + $sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,"; + $sSQL .= " ST_SetSRID(ST_Point(x,y),4326)"; + $sSQL .= " FROM us_postcode WHERE postcode NOT IN"; + $sSQL .= " (SELECT postcode FROM location_postcode"; + $sSQL .= " WHERE country_code = 'us')"; + if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); } + + // add missing postcodes for GB (if available) + $sSQL = "INSERT INTO location_postcode"; + $sSQL .= " (place_id, indexed_status, country_code, postcode, geometry) "; + $sSQL .= "SELECT nextval('seq_place'), 1, 'gb', postcode, geometry"; + $sSQL .= " FROM gb_postcode WHERE postcode NOT IN"; + $sSQL .= " (SELECT postcode FROM location_postcode"; + $sSQL .= " WHERE country_code = 'gb')"; + if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); + + if (!$aCMDResult['all']) { + $sSQL = "DELETE FROM word WHERE class='place' and type='postcode'"; + $sSQL .= "and word NOT IN (SELECT postcode FROM location_postcode)"; + if (!pg_query($oDB->connection, $sSQL)) { + fail(pg_last_error($oDB->connection)); + } + } + $sSQL = "SELECT count(getorcreate_postcode_id(v)) FROM "; + $sSQL .= "(SELECT distinct(postcode) as v FROM location_postcode) p"; + + if (!pg_query($oDB->connection, $sSQL)) { + fail(pg_last_error($oDB->connection)); + } } if ($aCMDResult['osmosis-init']) { @@ -527,6 +562,11 @@ if ($aCMDResult['index'] || $aCMDResult['all']) { passthruCheckReturn($sBaseCmd.' -r 5 -R 25'); if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE'); passthruCheckReturn($sBaseCmd.' -r 26'); + + echo "Indexing postcodes....\n"; + $oDB =& getDB(); + $sSQL = 'UPDATE location_postcode SET indexed_status = 0'; + if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); } if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) { diff --git a/website/details.php b/website/details.php index 7f0a951f..4d7c1efb 100755 --- a/website/details.php +++ b/website/details.php @@ -73,6 +73,9 @@ if (CONST_Use_US_Tiger_Data) { $iParentPlaceID = chksql($oDB->getOne('SELECT parent_place_id FROM location_property_osmline WHERE place_id = '.$iPlaceID)); if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; +// artificial postcodes +$iParentPlaceID = chksql($oDB->getOne('SELECT parent_place_id FROM location_postcode WHERE place_id = '.$iPlaceID)); +if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; if (CONST_Use_Aux_Location_data) { $iParentPlaceID = chksql($oDB->getOne('SELECT parent_place_id FROM location_property_aux WHERE place_id = '.$iPlaceID)); @@ -130,6 +133,13 @@ if (PEAR::isError($aPointDetails['aNames'])) { // possible timeout $aPointDetails['aNames'] = []; } +// Address tags +$sSQL = "SELECT (each(address)).key as key,(each(address)).value FROM placex WHERE place_id = $iPlaceID ORDER BY key"; +$aPointDetails['aAddressTags'] = $oDB->getAssoc($sSQL); +if (PEAR::isError($aPointDetails['aAddressTags'])) { // possible timeout + $aPointDetails['aAddressTags'] = []; +} + // Extra tags $sSQL = "SELECT (each(extratags)).key,(each(extratags)).value FROM placex WHERE place_id = $iPlaceID ORDER BY (each(extratags)).key"; $aPointDetails['aExtraTags'] = $oDB->getAssoc($sSQL);
rankName TokensName NotAddress TokensAddress Notcountryoperatorclasstypehouse#operatorclasstypepostcodehouse#LatLonRadius
".$aRow['sClass']."".$aRow['sType']."".$aRow['sPostcode']."".$aRow['sHouseNumber']."".$aRow['fLat']."