diff --git a/data/words.sql b/data/words.sql index f927d000..85578f20 100644 --- a/data/words.sql +++ b/data/words.sql @@ -29787,7 +29787,6 @@ st 5557484 -- prefill word table select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null; -select count(make_keywords(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode') as w where v is not null; select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w; -- copy the word frequencies diff --git a/docs/Migration.md b/docs/Migration.md new file mode 100644 index 00000000..62cbc4d9 --- /dev/null +++ b/docs/Migration.md @@ -0,0 +1,56 @@ +Database Migrations +=================== + +This page describes database migrations necessary to update existing databases +to newer versions of Nominatim. + +SQL statements should be executed from the postgres commandline. Execute +`psql nominiatim` to enter command line mode. + +3.0.0 +----- + +### Postcode Table + +A new separate table for artificially computed postcode centroids was introduced. +Migration to the new format is possible but **not recommended**. + + * create postcode table and indexes, running the following SQL statements: + + CREATE TABLE location_postcode + (place_id BIGINT, parent_place_id BIGINT, rank_search SMALLINT, + rank_address SMALLINT, indexed_status SMALLINT, indexed_date TIMESTAMP, + country_code varchar(2), postcode TEXT, + geometry GEOMETRY(Geometry, 4326)); + CREATE INDEX idx_postcode_geometry ON location_postcode USING GIST (geometry); + CREATE UNIQUE INDEX idx_postcode_id ON location_postcode USING BTREE (place_id); + CREATE INDEX idx_postcode_postcode ON location_postcode USING BTREE (postcode); + GRANT SELECT ON location_postcode TO "www-data"; + + * add postcode column to location_area tables with SQL statement: + + ALTER TABLE location_area ADD COLUMN postcode TEXT; + + * reimport functions + + ./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions + + * create appropriate triggers with SQL: + + CREATE TRIGGER location_postcode_before_update BEFORE UPDATE ON location_postcode + FOR EACH ROW EXECUTE PROCEDURE postcode_update(); + + * populate postcode table (will take a while): + + ./utils/setup.php --calculate-postcodes --index --index-noanalyse + +This will create a working database. You may also delete the old artificial +postcodes now. Note that this may be expensive and is not absolutely necessary. +The following SQL statement will remove them: + + DELETE FROM place_addressline a USING placex p + WHERE a.address_place_id = p.place_id and p.osm_type = 'P'; + ALTER TABLE placex DISABLE TRIGGER USER; + DELETE FROM placex WHERE osm_type = 'P'; + ALTER TABLE placex ENABLE TRIGGER USER; + diff --git a/lib/Geocode.php b/lib/Geocode.php index 80449cb6..b8933704 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -51,10 +51,22 @@ class Geocode protected $sQuery = false; protected $aStructuredQuery = false; + protected $oNormalizer = null; + public function __construct(&$oDB) { $this->oDB =& $oDB; + $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules); + } + + private function normTerm($sTerm) + { + if ($this->oNormalizer === null) { + return null; + } + + return $this->oNormalizer->transliterate($sTerm); } public function setReverseInPlan($bReverse) @@ -410,8 +422,15 @@ class Geocode $sPlaceIDs = join(',', array_keys($aPlaceIDs)); $sImportanceSQL = ''; - if ($this->sViewboxSmallSQL) $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; - if ($this->sViewboxLargeSQL) $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sImportanceSQLGeom = ''; + if ($this->sViewboxSmallSQL) { + $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sImportanceSQLGeom .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, geometry) THEN 1 ELSE 0.75 END * "; + } + if ($this->sViewboxLargeSQL) { + $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sImportanceSQLGeom .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, geometry) THEN 1 ELSE 0.75 END * "; + } $sSQL = "SELECT "; $sSQL .= " osm_type,"; @@ -476,6 +495,35 @@ class Geocode if ($this->bIncludeNameDetails) $sSQL .= "name, "; $sSQL .= " extratags->'place' "; + // postcode table + $sSQL .= "UNION "; + $sSQL .= "SELECT"; + $sSQL .= " 'P' as osm_type,"; + $sSQL .= " (SELECT osm_id from placex p WHERE p.place_id = lp.parent_place_id) as osm_id,"; + $sSQL .= " 'place' as class, 'postcode' as type,"; + $sSQL .= " null as admin_level, rank_search, rank_address,"; + $sSQL .= " place_id, parent_place_id, country_code,"; + $sSQL .= " get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) AS langaddress,"; + $sSQL .= " postcode as placename,"; + $sSQL .= " postcode as ref,"; + if ($this->bIncludeExtraTags) $sSQL .= "null AS extra,"; + if ($this->bIncludeNameDetails) $sSQL .= "null AS names,"; + $sSQL .= " ST_x(st_centroid(geometry)) AS lon, ST_y(st_centroid(geometry)) AS lat,"; + $sSQL .= $sImportanceSQLGeom."(0.75-(rank_search::float/40)) AS importance, "; + $sSQL .= " ("; + $sSQL .= " SELECT max(p.importance*(p.rank_address+2))"; + $sSQL .= " FROM "; + $sSQL .= " place_addressline s, "; + $sSQL .= " placex p"; + $sSQL .= " WHERE s.place_id = lp.parent_place_id"; + $sSQL .= " AND p.place_id = s.address_place_id "; + $sSQL .= " AND s.isaddress"; + $sSQL .= " AND p.importance is not null"; + $sSQL .= " ) AS addressimportance, "; + $sSQL .= " null AS extra_place "; + $sSQL .= "FROM location_postcode lp"; + $sSQL .= " WHERE place_id in ($sPlaceIDs) "; + if (30 >= $this->iMinAddressRank && 30 <= $this->iMaxAddressRank) { // only Tiger housenumbers and interpolation lines need to be interpolated, because they are saved as lines // with start- and endnumber, the common osm housenumbers are usually saved as points @@ -666,12 +714,12 @@ class Geocode Score how good the search is so they can be ordered */ - foreach ($aPhrases as $iPhrase => $sPhrase) { + foreach ($aPhrases as $iPhrase => $aPhrase) { $aNewPhraseSearches = array(); if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase]; else $sPhraseType = ''; - foreach ($aPhrases[$iPhrase]['wordsets'] as $iWordSet => $aWordset) { + foreach ($aPhrase['wordsets'] as $iWordSet => $aWordset) { // Too many permutations - too expensive if ($iWordSet > 120) break; @@ -701,42 +749,29 @@ class Geocode } if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - } elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null) { - if ($aSearch['oNear'] === false) { - $aSearch['oNear'] = new NearPoint( - $aSearchTerm['lat'], - $aSearchTerm['lon'], - $aSearchTerm['radius'] - ); - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } elseif ($sPhraseType == 'postalcode') { + } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) { // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both - if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { - // If we already have a name try putting the postcode first - if (sizeof($aSearch['aName'])) { + if ($aSearch['sPostcode'] === '' && $aSearch['sHouseNumber'] === '' && + isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) { + // If we have structured search or this is the first term, + // make the postcode the primary search element. + if ($aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) { $aNewSearch = $aSearch; + $aNewSearch['sOperator'] = 'postcode'; $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); - $aNewSearch['aName'] = array(); - $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word']; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch; } - if (sizeof($aSearch['aName'])) { - if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strpos($sToken, ' ') !== false)) { - $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - } else { - $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - $aSearch['iSearchRank'] += 1000; // skip; - } - } else { - $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - //$aSearch['iNamePhrase'] = $iPhrase; + // If we have a structured search or this is not the first term, + // add the postcode as an addendum. + if ($aSearch['sOperator'] !== 'postcode' && ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']))) { + $aSearch['sPostcode'] = $aSearchTerm['word']; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } } elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house') { - if ($aSearch['sHouseNumber'] === '') { + if ($aSearch['sHouseNumber'] === '' && $aSearch['sOperator'] !== 'postcode') { $aSearch['sHouseNumber'] = $sToken; // sanity check: if the housenumber is not mainly made // up of numbers, add a penalty @@ -919,13 +954,7 @@ class Geocode { if (!$this->sQuery && !$this->aStructuredQuery) return array(); - $oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules); - if ($oNormalizer !== null) { - $sNormQuery = $oNormalizer->transliterate($this->sQuery); - } else { - $sNormQuery = null; - } - + $sNormQuery = $this->normTerm($this->sQuery); $sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]"; $sCountryCodesSQL = false; if ($this->aCountryCodes) { @@ -985,6 +1014,7 @@ class Geocode 'sClass' => '', 'sType' => '', 'sHouseNumber' => '', + 'sPostcode' => '', 'oNear' => $oNearPoint ) ); @@ -1109,21 +1139,9 @@ class Geocode } if (CONST_Debug) var_Dump($aPhrases, $aValidTokens); - // Try and calculate GB postcodes we might be missing + // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code foreach ($aTokens as $sToken) { - // Source of gb postcodes is now definitive - always use - if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData)) { - if (substr($aData[1], -2, 1) != ' ') { - $aData[0] = substr($aData[0], 0, strlen($aData[1])-1).' '.substr($aData[0], strlen($aData[1])-1); - $aData[1] = substr($aData[1], 0, -1).' '.substr($aData[1], -1, 1); - } - $aGBPostcodeLocation = gbPostcodeCalculate($aData[0], $aData[1], $aData[2], $this->oDB); - if ($aGBPostcodeLocation) { - $aValidTokens[$sToken] = $aGBPostcodeLocation; - } - } elseif (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { - // US ZIP+4 codes - if there is no token, - // merge in the 5-digit ZIP code + if (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { if (isset($aValidTokens[$aData[1]])) { foreach ($aValidTokens[$aData[1]] as $aToken) { if (!$aToken['class']) { @@ -1190,7 +1208,6 @@ class Geocode ksort($aGroupedSearches); } - if (CONST_Debug) var_Dump($aGroupedSearches); if (CONST_Search_TryDroppedAddressTerms && sizeof($this->aStructuredQuery) > 0) { $aCopyGroupedSearches = $aGroupedSearches; foreach ($aCopyGroupedSearches as $iGroup => $aSearches) { @@ -1247,13 +1264,16 @@ class Geocode if (CONST_Debug) echo "
| rank | Name Tokens | Name Not | "; echo "Address Tokens | Address Not | country | "; - echo "operator | class | type | house# | "; + echo "operator | class | type | postcode | house# | "; echo "Lat | Lon | Radius | ".$aRow['sClass']." | "; echo "".$aRow['sType']." | "; + echo "".$aRow['sPostcode']." | "; echo "".$aRow['sHouseNumber']." | "; echo "".$aRow['fLat']." | "; diff --git a/lib/template/details-html.php b/lib/template/details-html.php index 46b6a50c..76818395 100644 --- a/lib/template/details-html.php +++ b/lib/template/details-html.php @@ -124,6 +124,8 @@ kv('Wikipedia Calculated' , wikipediaLink($aPointDetails) ); } + kv('Computed Postcode', $aPointDetails['postcode']); + kv('Address Tags' , hash_to_subtable($aPointDetails['aAddressTags']) ); kv('Extra Tags' , hash_to_subtable($aPointDetails['aExtraTags']) ); ?> diff --git a/sql/functions.sql b/sql/functions.sql index 930fa561..a9bca542 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -83,6 +83,26 @@ END; $$ LANGUAGE plpgsql; +CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + lookup_word TEXT; + return_word_id INTEGER; +BEGIN + lookup_word := upper(trim(postcode)); + lookup_token := ' ' || make_standard_name(lookup_word); + SELECT min(word_id) FROM word WHERE word_token = lookup_token and class='place' and type='postcode' into return_word_id; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word, 'place', 'postcode', null, 0); + END IF; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT, lookup_country_code varchar(2)) RETURNS INTEGER AS $$ @@ -236,6 +256,99 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; + +CREATE OR REPLACE FUNCTION get_postcode_rank(country_code VARCHAR(2), postcode TEXT, + OUT rank_search SMALLINT, OUT rank_address SMALLINT) +AS $$ +DECLARE + part TEXT; +BEGIN + rank_search := 30; + rank_address := 30; + postcode := upper(postcode); + + IF country_code = 'gb' THEN + IF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN + rank_search := 25; + rank_address := 5; + ELSEIF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN + rank_search := 23; + rank_address := 5; + ELSEIF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN + rank_search := 21; + rank_address := 5; + END IF; + + ELSEIF country_code = 'sg' THEN + IF postcode ~ '^([0-9]{6})$' THEN + rank_search := 25; + rank_address := 11; + END IF; + + ELSEIF country_code = 'de' THEN + IF postcode ~ '^([0-9]{5})$' THEN + rank_search := 21; + rank_address := 11; + END IF; + + ELSE + -- Guess at the postcode format and coverage (!) + IF postcode ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local + rank_search := 21; + rank_address := 11; + ELSE + -- Does it look splitable into and area and local code? + part := substring(postcode from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$'); + + IF part IS NOT NULL THEN + rank_search := 25; + rank_address := 11; + ELSEIF postcode ~ '^[- :A-Z0-9]{6,}$' THEN + rank_search := 21; + rank_address := 11; + END IF; + END IF; + END IF; + +END; +$$ +LANGUAGE plpgsql IMMUTABLE; + +-- Find the nearest artificial postcode for the given geometry. +-- TODO For areas there should not be more than two inside the geometry. +CREATE OR REPLACE FUNCTION get_nearest_postcode(country VARCHAR(2), geom GEOMETRY) RETURNS TEXT + AS $$ +DECLARE + outcode TEXT; + cnt INTEGER; +BEGIN + -- If the geometry is an area then only one postcode must be within + -- that area, otherwise consider the area as not having a postcode. + IF ST_GeometryType(geom) in ('ST_Polygon','ST_MultiPolygon') THEN + SELECT min(postcode), count(*) FROM + (SELECT postcode FROM location_postcode + WHERE ST_Contains(geom, location_postcode.geometry) LIMIT 2) sub + INTO outcode, cnt; + + IF cnt = 1 THEN + RETURN outcode; + ELSE + RETURN null; + END IF; + END IF; + + SELECT postcode FROM location_postcode + WHERE ST_DWithin(geom, location_postcode.geometry, 0.05) + AND location_postcode.country_code = country + ORDER BY ST_Distance(geom, location_postcode.geometry) LIMIT 1 + INTO outcode; + + RETURN outcode; +END; +$$ +LANGUAGE plpgsql; + + CREATE OR REPLACE FUNCTION create_country(src HSTORE, lookup_country_code varchar(2)) RETURNS VOID AS $$ DECLARE @@ -515,36 +628,38 @@ CREATE OR REPLACE FUNCTION add_location( keywords INTEGER[], rank_search INTEGER, rank_address INTEGER, + in_postcode TEXT, geometry GEOMETRY ) RETURNS BOOLEAN AS $$ DECLARE locationid INTEGER; - isarea BOOLEAN; centroid GEOMETRY; diameter FLOAT; x BOOLEAN; splitGeom RECORD; secgeo GEOMETRY; + postcode TEXT; BEGIN IF rank_search > 25 THEN RAISE EXCEPTION 'Adding location with rank > 25 (% rank %)', place_id, rank_search; END IF; --- RAISE WARNING 'Adding location with rank > 25 (% rank %)', place_id, rank_search; - x := deleteLocationArea(partition, place_id, rank_search); - isarea := false; - IF (ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_IsValid(geometry)) THEN + -- add postcode only if it contains a single entry, i.e. ignore postcode lists + postcode := NULL; + IF in_postcode is not null AND in_postcode not similar to '%(,|;)%' THEN + postcode := upper(trim (in_postcode)); + END IF; - isArea := true; + IF ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') THEN centroid := ST_Centroid(geometry); FOR secgeo IN select split_geometry(geometry) AS geom LOOP - x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, false, centroid, secgeo); + x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, false, postcode, centroid, secgeo); END LOOP; ELSE @@ -569,7 +684,7 @@ BEGIN -- RAISE WARNING 'adding % diameter %', place_id, diameter; secgeo := ST_Buffer(geometry, diameter); - x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, true, ST_Centroid(geometry), secgeo); + x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, true, postcode, ST_Centroid(geometry), secgeo); END IF; @@ -722,54 +837,10 @@ BEGIN RETURN NULL; END IF; - NEW.postcode := NEW.address->'postcode'; - NEW.name := hstore('ref', NEW.postcode); + NEW.name := hstore('ref', NEW.address->'postcode'); - IF NEW.country_code = 'gb' THEN - - IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN - NEW.rank_search := 25; - NEW.rank_address := 5; - ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN - NEW.rank_search := 23; - NEW.rank_address := 5; - ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN - NEW.rank_search := 21; - NEW.rank_address := 5; - END IF; - - ELSEIF NEW.country_code = 'sg' THEN - - IF NEW.postcode ~ '^([0-9]{6})$' THEN - NEW.rank_search := 25; - NEW.rank_address := 11; - END IF; - - ELSEIF NEW.country_code = 'de' THEN - - IF NEW.postcode ~ '^([0-9]{5})$' THEN - NEW.rank_search := 21; - NEW.rank_address := 11; - END IF; - - ELSE - -- Guess at the postcode format and coverage (!) - IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local - NEW.rank_search := 21; - NEW.rank_address := 11; - ELSE - -- Does it look splitable into and area and local code? - postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$'); - - IF postcode IS NOT NULL THEN - NEW.rank_search := 25; - NEW.rank_address := 11; - ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN - NEW.rank_search := 21; - NEW.rank_address := 11; - END IF; - END IF; - END IF; + SELECT * FROM get_postcode_rank(NEW.country_code, NEW.address->'postcode') + INTO NEW.rank_search, NEW.rank_address; ELSEIF NEW.class = 'place' THEN IF NEW.type in ('continent') THEN @@ -988,8 +1059,8 @@ DECLARE linegeo GEOMETRY; splitline GEOMETRY; sectiongeo GEOMETRY; + interpol_postcode TEXT; postcode TEXT; - seg_postcode TEXT; BEGIN -- deferred delete IF OLD.indexed_status = 100 THEN @@ -1008,9 +1079,11 @@ BEGIN NEW.address->'place', NEW.partition, place_centroid, NEW.linegeo); - - IF NEW.address is not NULL and NEW.address ? 'postcode' THEN - NEW.postcode = NEW.address->'postcode'; + IF NEW.address is not NULL AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN + interpol_postcode := NEW.address->'postcode'; + housenum := getorcreate_postcode_id(NEW.address->'postcode'); + ELSE + interpol_postcode := NULL; END IF; -- if the line was newly inserted, split the line as necessary @@ -1023,7 +1096,6 @@ BEGIN linegeo := NEW.linegeo; startnumber := NULL; - postcode := NEW.postcode; FOR nodeidpos in 1..array_upper(waynodes, 1) LOOP @@ -1056,15 +1128,24 @@ BEGIN sectiongeo := ST_Reverse(sectiongeo); END IF; - seg_postcode := coalesce(postcode, - prevnode.address->'postcode', - nextnode.address->'postcode'); + -- determine postcode + postcode := coalesce(interpol_postcode, + prevnode.address->'postcode', + nextnode.address->'postcode', + postcode); + + IF postcode is NULL THEN + SELECT placex.postcode FROM placex WHERE place_id = NEW.parent_place_id INTO postcode; + END IF; + IF postcode is NULL THEN + postcode := get_nearest_postcode(NEW.country_code, nextnode.geometry); + END IF; IF NEW.startnumber IS NULL THEN NEW.startnumber := startnumber; NEW.endnumber := endnumber; NEW.linegeo := sectiongeo; - NEW.postcode := seg_postcode; + NEW.postcode := upper(trim(postcode)); ELSE insert into location_property_osmline (linegeo, partition, osm_id, parent_place_id, @@ -1073,7 +1154,7 @@ BEGIN geometry_sector, indexed_status) values (sectiongeo, NEW.partition, NEW.osm_id, NEW.parent_place_id, startnumber, endnumber, NEW.interpolationtype, - NEW.address, seg_postcode, + NEW.address, postcode, NEW.country_code, NEW.geometry_sector, 0); END IF; END IF; @@ -1097,7 +1178,42 @@ END; $$ LANGUAGE plpgsql; +-- Trigger for updates of location_postcode +-- +-- Computes the parent object the postcode most likely refers to. +-- This will be the place that determines the address displayed when +-- searching for this postcode. +CREATE OR REPLACE FUNCTION postcode_update() RETURNS +TRIGGER + AS $$ +DECLARE + partition SMALLINT; + location RECORD; +BEGIN + IF NEW.indexed_status != 0 OR OLD.indexed_status = 0 THEN + RETURN NEW; + END IF; + NEW.indexed_date = now(); + + partition := get_partition(NEW.country_code); + + SELECT * FROM get_postcode_rank(NEW.country_code, NEW.postcode) + INTO NEW.rank_search, NEW.rank_address; + + NEW.parent_place_id = 0; + FOR location IN + SELECT place_id + FROM getNearFeatures(partition, NEW.geometry, NEW.rank_search, '{}'::int[]) + WHERE NOT isguess ORDER BY rank_address DESC LIMIT 1 + LOOP + NEW.parent_place_id = location.place_id; + END LOOP; + + RETURN NEW; +END; +$$ +LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION placex_update() RETURNS TRIGGER @@ -1161,11 +1277,6 @@ BEGIN --DEBUG: RAISE WARNING 'placex_update % % (%)',NEW.osm_type,NEW.osm_id,NEW.place_id; - IF NEW.class = 'place' AND NEW.type = 'postcodearea' THEN - -- Silently do nothing - RETURN NEW; - END IF; - NEW.indexed_date = now(); result := deleteSearchName(NEW.partition, NEW.place_id); @@ -1202,13 +1313,16 @@ BEGIN addr_street = NEW.address->'street'; addr_place = NEW.address->'place'; - NEW.postcode = NEW.address->'postcode'; + IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(,|;)%' THEN + i := getorcreate_postcode_id(NEW.address->'postcode'); + END IF; END IF; -- Speed up searches - just use the centroid of the feature -- cheaper but less acurate place_centroid := ST_PointOnSurface(NEW.geometry); NEW.centroid := null; + NEW.postcode := null; --DEBUG: RAISE WARNING 'Computing preliminary centroid at %',ST_AsText(place_centroid); -- recalculate country and partition @@ -1445,25 +1559,16 @@ BEGIN NEW.country_code := location.country_code; --DEBUG: RAISE WARNING 'Got parent details from search name'; - -- Merge the postcode into the parent's address if necessary - IF NEW.postcode IS NOT NULL THEN - --DEBUG: RAISE WARNING 'Merging postcode into parent'; - isin_tokens := '{}'::int[]; - address_street_word_id := getorcreate_word_id(make_standard_name(NEW.postcode)); - IF address_street_word_id is not null - and not ARRAY[address_street_word_id] <@ location.nameaddress_vector THEN - isin_tokens := isin_tokens || address_street_word_id; - END IF; - address_street_word_id := getorcreate_name_id(make_standard_name(NEW.postcode)); - IF address_street_word_id is not null - and not ARRAY[address_street_word_id] <@ location.nameaddress_vector THEN - isin_tokens := isin_tokens || address_street_word_id; - END IF; - IF isin_tokens != '{}'::int[] THEN - UPDATE search_name - SET nameaddress_vector = search_name.nameaddress_vector || isin_tokens - WHERE place_id = NEW.parent_place_id; - END IF; + -- determine postcode + IF NEW.rank_search > 4 THEN + IF NEW.address is not null AND NEW.address ? 'postcode' THEN + NEW.postcode = upper(trim(NEW.address->'postcode')); + ELSE + SELECT postcode FROM placex WHERE place_id = NEW.parent_place_id INTO NEW.postcode; + END IF; + IF NEW.postcode is null THEN + NEW.postcode := get_nearest_postcode(NEW.country_code, place_centroid); + END IF; END IF; -- If there is no name it isn't searchable, don't bother to create a search record @@ -1481,7 +1586,7 @@ BEGIN -- Just be happy with inheriting from parent road only IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN - result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, NEW.geometry); + result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry); --DEBUG: RAISE WARNING 'Place added to location table'; END IF; @@ -1668,6 +1773,7 @@ BEGIN isin := avals(NEW.address); IF array_upper(isin, 1) IS NOT NULL THEN FOR i IN 1..array_upper(isin, 1) LOOP + -- TODO further split terms with comma and semicolon address_street_word_id := get_name_id(make_standard_name(isin[i])); IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); @@ -1682,26 +1788,6 @@ BEGIN END LOOP; END IF; END IF; - --DEBUG: RAISE WARNING '"address:* tokens collected'; - IF NEW.postcode IS NOT NULL THEN - isin := regexp_split_to_array(NEW.postcode, E'[;,]'); - IF array_upper(isin, 1) IS NOT NULL THEN - FOR i IN 1..array_upper(isin, 1) LOOP - address_street_word_id := get_name_id(make_standard_name(isin[i])); - IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN - nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); - isin_tokens := isin_tokens || address_street_word_id; - END IF; - - -- merge into address vector - address_street_word_id := get_word_id(make_standard_name(isin[i])); - IF address_street_word_id IS NOT NULL THEN - nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); - END IF; - END LOOP; - END IF; - END IF; - --DEBUG: RAISE WARNING 'postcode tokens collected'; -- %NOTIGERDATA% IF 0 THEN -- for the USA we have an additional address table. Merge in zip codes from there too @@ -1773,6 +1859,11 @@ BEGIN VALUES (NEW.place_id, location.place_id, true, location_isaddress, location.distance, location.rank_address); IF location_isaddress THEN + -- add postcode if we have one + -- (If multiple postcodes are available, we end up with the highest ranking one.) + IF location.postcode is not null THEN + NEW.postcode = location.postcode; + END IF; address_havelevel[location.rank_address] := true; IF NOT location.isguess THEN @@ -1807,6 +1898,11 @@ BEGIN nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]); INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address) VALUES (NEW.place_id, location.place_id, false, NOT address_havelevel[location.rank_address], location.distance, location.rank_address); + IF NEW.postcode is null AND location.postcode is not null + AND NOT address_havelevel[location.rank_address] THEN + NEW.postcode := location.postcode; + END IF; + address_havelevel[location.rank_address] := true; IF location.rank_address > parent_place_id_rank THEN @@ -1849,11 +1945,20 @@ BEGIN END IF; --DEBUG: RAISE WARNING 'search terms for long ways added'; + IF NEW.address is not null AND NEW.address ? 'postcode' + AND NEW.address->'postcode' not similar to '%(,|;)%' THEN + NEW.postcode := upper(trim(NEW.address->'postcode')); + END IF; + + IF NEW.postcode is null AND NEW.rank_search > 8 THEN + NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry); + END IF; + -- if we have a name add this to the name search table IF NEW.name IS NOT NULL THEN IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN - result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, NEW.geometry); + result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry); --DEBUG: RAISE WARNING 'added to location (full)'; END IF; @@ -2250,46 +2355,6 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; - -CREATE OR REPLACE FUNCTION get_address_postcode(for_place_id BIGINT) RETURNS TEXT - AS $$ -DECLARE - result TEXT[]; - search TEXT[]; - for_postcode TEXT; - found INTEGER; - location RECORD; -BEGIN - - found := 1000; - search := ARRAY['ref']; - result := '{}'; - - select postcode from placex where place_id = for_place_id limit 1 into for_postcode; - - FOR location IN - select rank_address,name,distance,length(name::text) as namelength - from place_addressline join placex on (address_place_id = placex.place_id) - where place_addressline.place_id = for_place_id and rank_address in (5,11) - order by rank_address desc,rank_search desc,fromarea desc,distance asc,namelength desc - LOOP - IF array_upper(search, 1) IS NOT NULL AND array_upper(location.name, 1) IS NOT NULL THEN - FOR j IN 1..array_upper(search, 1) LOOP - FOR k IN 1..array_upper(location.name, 1) LOOP - IF (found > location.rank_address AND location.name[k].key = search[j] AND location.name[k].value != '') AND NOT result @> ARRAY[trim(location.name[k].value)] AND (for_postcode IS NULL OR location.name[k].value ilike for_postcode||'%') THEN - result[(100 - location.rank_address)] := trim(location.name[k].value); - found := location.rank_address; - END IF; - END LOOP; - END LOOP; - END IF; - END LOOP; - - RETURN array_to_string(result,', '); -END; -$$ -LANGUAGE plpgsql; - --housenumber only needed for tiger data CREATE OR REPLACE FUNCTION get_address_by_language(for_place_id BIGINT, housenumber INTEGER, languagepref TEXT[]) RETURNS TEXT AS $$ @@ -2373,11 +2438,19 @@ BEGIN -- %NOAUXDATA% IF 0 THEN IF for_place_id IS NULL THEN select parent_place_id,'us', housenumber, 30, postcode, null, 'place', 'house' from location_property_aux - WHERE place_id = in_place_id + WHERE place_id = in_place_id INTO for_place_id,searchcountrycode, searchhousenumber, searchrankaddress, searchpostcode, searchhousename, searchclass, searchtype; END IF; -- %NOAUXDATA% END IF; + -- postcode table + IF for_place_id IS NULL THEN + select parent_place_id, country_code, rank_address, postcode, 'place', 'postcode' + FROM location_postcode + WHERE place_id = in_place_id + INTO for_place_id, searchcountrycode, searchrankaddress, searchpostcode, searchclass, searchtype; + END IF; + IF for_place_id IS NULL THEN select parent_place_id, country_code, housenumber, rank_search, postcode, name, class, type from placex WHERE place_id = in_place_id and rank_search > 27 @@ -2396,9 +2469,8 @@ BEGIN found := 1000; hadcountry := false; FOR location IN - select placex.place_id, osm_type, osm_id, - CASE WHEN class = 'place' and type = 'postcode' THEN hstore('name', postcode) ELSE name END as name, - class, type, admin_level, true as fromarea, true as isaddress, + select placex.place_id, osm_type, osm_id, name, + class, type, admin_level, true as isaddress, CASE WHEN rank_address = 0 THEN 100 WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address, 0 as distance, country_code, postcode from placex @@ -2408,13 +2480,9 @@ BEGIN IF searchcountrycode IS NULL AND location.country_code IS NOT NULL THEN searchcountrycode := location.country_code; END IF; - IF searchpostcode IS NOT NULL and location.type = 'postcode' THEN + IF location.type in ('postcode', 'postal_code') THEN location.isaddress := FALSE; - END IF; - IF searchpostcode IS NULL and location.postcode IS NOT NULL THEN - searchpostcode := location.postcode; - END IF; - IF location.rank_address = 4 AND location.isaddress THEN + ELSEIF location.rank_address = 4 THEN hadcountry := true; END IF; IF location.rank_address < 4 AND NOT hadcountry THEN @@ -2425,15 +2493,14 @@ BEGIN END IF; END IF; countrylocation := ROW(location.place_id, location.osm_type, location.osm_id, location.name, location.class, - location.type, location.admin_level, location.fromarea, location.isaddress, location.rank_address, + location.type, location.admin_level, true, location.isaddress, location.rank_address, location.distance)::addressline; RETURN NEXT countrylocation; found := location.rank_address; END LOOP; FOR location IN - select placex.place_id, osm_type, osm_id, - CASE WHEN class = 'place' and type = 'postcode' THEN hstore('name', postcode) ELSE name END as name, + select placex.place_id, osm_type, osm_id, name, CASE WHEN extratags ? 'place' THEN 'place' ELSE class END as class, CASE WHEN extratags ? 'place' THEN extratags->'place' ELSE type END as type, admin_level, fromarea, isaddress, @@ -2450,12 +2517,9 @@ BEGIN IF searchcountrycode IS NULL AND location.country_code IS NOT NULL THEN searchcountrycode := location.country_code; END IF; - IF searchpostcode IS NOT NULL and location.type = 'postcode' THEN + IF location.type in ('postcode', 'postal_code') THEN location.isaddress := FALSE; END IF; - IF searchpostcode IS NULL and location.isaddress and location.type != 'postcode' and location.postcode IS NOT NULL and location.postcode not similar to '%(,|;)%' THEN - searchpostcode := location.postcode; - END IF; IF location.rank_address = 4 AND location.isaddress THEN hadcountry := true; END IF; @@ -2489,7 +2553,6 @@ BEGIN IF searchhousename IS NOT NULL THEN location := ROW(in_place_id, null, null, searchhousename, searchclass, searchtype, null, true, true, 29, 0)::addressline; --- location := ROW(in_place_id, null, null, searchhousename, 'place', 'house_name', null, true, true, 29, 0)::addressline; RETURN NEXT location; END IF; @@ -2635,9 +2698,7 @@ BEGIN IF out_postcode IS NULL THEN SELECT postcode from placex where place_id = out_parent_place_id INTO out_postcode; END IF; - IF out_postcode IS NULL THEN - out_postcode := getNearestPostcode(out_partition, place_centroid); - END IF; + -- XXX look into postcode table newpoints := 0; insert into location_property_aux (place_id, partition, parent_place_id, housenumber, postcode, centroid) diff --git a/sql/indices.src.sql b/sql/indices.src.sql index bbd5a76a..cf5c4bc1 100644 --- a/sql/indices.src.sql +++ b/sql/indices.src.sql @@ -25,4 +25,5 @@ DROP INDEX IF EXISTS place_id_idx; CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type) {ts:address-index}; -CREATE INDEX idx_gb_postcode_postcode ON gb_postcode USING BTREE (postcode) {ts:search-index}; +CREATE UNIQUE INDEX idx_postcode_id ON location_postcode USING BTREE (place_id) {ts:search-index}; +CREATE INDEX idx_postcode_postcode ON location_postcode USING BTREE (postcode) {ts:search-index}; diff --git a/sql/loaddata.sql b/sql/loaddata.sql deleted file mode 100644 index a3c8914a..00000000 --- a/sql/loaddata.sql +++ /dev/null @@ -1,16 +0,0 @@ -TRUNCATE placex; -TRUNCATE search_name; -TRUNCATE place_addressline; -TRUNCATE location_area; - -DROP SEQUENCE seq_place; -CREATE SEQUENCE seq_place start 100000; - -insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) - select osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry from place where osm_type = 'N'; -insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) - select osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry from place where osm_type = 'W'; -insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) - select osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry from place where osm_type = 'R'; - ---select count(*) from (select create_interpolation(osm_id, housenumber) from placex where indexed=false and class='place' and type='houses') as x; diff --git a/sql/partition-functions.src.sql b/sql/partition-functions.src.sql index 7d4f7cbe..110dd0cd 100644 --- a/sql/partition-functions.src.sql +++ b/sql/partition-functions.src.sql @@ -6,12 +6,12 @@ BEGIN -- start IF in_partition = -partition- THEN FOR r IN - SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(feature, centroid)) as distance, isguess, centroid FROM ( + SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(feature, centroid)) as distance, isguess, postcode, centroid FROM ( SELECT * FROM location_area_large_-partition- WHERE ST_Intersects(geometry, feature) and rank_search < maxrank UNION ALL SELECT * FROM location_area_country WHERE ST_Intersects(geometry, feature) and rank_search < maxrank ) as location_area - GROUP BY place_id, keywords, rank_address, rank_search, isguess, centroid + GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid ORDER BY rank_address, isin_tokens && keywords desc, isguess asc, ST_Distance(feature, centroid) * CASE @@ -55,8 +55,8 @@ $$ LANGUAGE plpgsql; create or replace function insertLocationAreaLarge( - in_partition INTEGER, in_place_id BIGINT, in_country_code VARCHAR(2), in_keywords INTEGER[], - in_rank_search INTEGER, in_rank_address INTEGER, in_estimate BOOLEAN, + in_partition INTEGER, in_place_id BIGINT, in_country_code VARCHAR(2), in_keywords INTEGER[], + in_rank_search INTEGER, in_rank_address INTEGER, in_estimate BOOLEAN, postcode TEXT, in_centroid GEOMETRY, in_geometry GEOMETRY) RETURNS BOOLEAN AS $$ DECLARE BEGIN @@ -72,8 +72,8 @@ BEGIN -- start IF in_partition = -partition- THEN - INSERT INTO location_area_large_-partition- (partition, place_id, country_code, keywords, rank_search, rank_address, isguess, centroid, geometry) - values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); + INSERT INTO location_area_large_-partition- (partition, place_id, country_code, keywords, rank_search, rank_address, isguess, postcode, centroid, geometry) + values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, postcode, in_centroid, in_geometry); RETURN TRUE; END IF; -- end @@ -173,29 +173,6 @@ $$ LANGUAGE plpgsql; -create or replace function getNearestPostcode(in_partition INTEGER, point GEOMETRY) - RETURNS TEXT AS $$ -DECLARE - out_postcode TEXT; -BEGIN - --- start - IF in_partition = -partition- THEN - SELECT postcode - FROM location_area_large_-partition- join placex using (place_id) - WHERE st_contains(location_area_large_-partition-.geometry, point) - AND class = 'place' and type = 'postcode' - ORDER BY st_distance(location_area_large_-partition-.centroid, point) ASC limit 1 - INTO out_postcode; - RETURN out_postcode; - END IF; --- end - - RAISE EXCEPTION 'Unknown partition %', in_partition; -END -$$ -LANGUAGE plpgsql; - create or replace function insertSearchName( in_partition INTEGER, in_place_id BIGINT, in_country_code VARCHAR(2), in_name_vector INTEGER[], in_nameaddress_vector INTEGER[], diff --git a/sql/partition-tables.src.sql b/sql/partition-tables.src.sql index 6f799bcf..d8f02e10 100644 --- a/sql/partition-tables.src.sql +++ b/sql/partition-tables.src.sql @@ -21,6 +21,7 @@ create type nearfeaturecentr as ( rank_search smallint, distance float, isguess boolean, + postcode TEXT, centroid GEOMETRY ); @@ -29,9 +30,9 @@ CREATE TABLE search_name_blank ( place_id BIGINT, search_rank smallint, address_rank smallint, - name_vector integer[] + name_vector integer[], + centroid GEOMETRY(Geometry, 4326) ); -SELECT AddGeometryColumn('search_name_blank', 'centroid', 4326, 'GEOMETRY', 2); CREATE TABLE location_area_country () INHERITS (location_area_large) {ts:address-data}; @@ -54,9 +55,9 @@ CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- CREATE TABLE location_road_-partition- ( place_id BIGINT, partition SMALLINT, - country_code VARCHAR(2) + country_code VARCHAR(2), + geometry GEOMETRY(Geometry, 4326) ) {ts:address-data}; -SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2); CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry) {ts:address-index}; CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id) {ts:address-index}; diff --git a/sql/tables.sql b/sql/tables.sql index f3217d5a..4a22a814 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -61,10 +61,11 @@ CREATE TABLE location_area ( rank_search SMALLINT NOT NULL, rank_address SMALLINT NOT NULL, country_code VARCHAR(2), - isguess BOOL + isguess BOOL, + postcode TEXT, + centroid GEOMETRY(Point, 4326), + geometry GEOMETRY(Geometry, 4326) ); -SELECT AddGeometryColumn('location_area', 'centroid', 4326, 'POINT', 2); -SELECT AddGeometryColumn('location_area', 'geometry', 4326, 'GEOMETRY', 2); CREATE TABLE location_area_large () INHERITS (location_area); @@ -74,9 +75,9 @@ CREATE TABLE location_property ( parent_place_id BIGINT, partition SMALLINT, housenumber TEXT, - postcode TEXT + postcode TEXT, + centroid GEOMETRY(Point, 4326) ); -SELECT AddGeometryColumn('location_property', 'centroid', 4326, 'POINT', 2); CREATE TABLE location_property_aux () INHERITS (location_property); CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id); @@ -125,9 +126,9 @@ CREATE TABLE search_name ( address_rank SMALLINT, name_vector integer[], nameaddress_vector integer[], - country_code varchar(2) + country_code varchar(2), + centroid GEOMETRY(Geometry, 4326) ) {ts:search-data}; -SELECT AddGeometryColumn('search_name', 'centroid', 4326, 'GEOMETRY', 2); CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) {ts:search-index}; drop table IF EXISTS place_addressline; @@ -157,9 +158,9 @@ CREATE TABLE placex ( wikipedia TEXT, -- calculated wikipedia article name (language:title) country_code varchar(2), housenumber TEXT, - postcode TEXT + postcode TEXT, + centroid GEOMETRY(Geometry, 4326) ) {ts:search-data}; -SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2); CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) {ts:search-index}; CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) {ts:search-index}; CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {ts:address-index} WHERE linked_place_id IS NOT NULL; @@ -197,8 +198,24 @@ CREATE TRIGGER place_before_delete BEFORE DELETE ON place CREATE TRIGGER place_before_insert BEFORE INSERT ON place FOR EACH ROW EXECUTE PROCEDURE place_insert(); -DROP SEQUENCE IF EXISTS seq_postcodes; -CREATE SEQUENCE seq_postcodes start 1; +-- Table for synthetic postcodes. +DROP TABLE IF EXISTS location_postcode; +CREATE TABLE location_postcode ( + place_id BIGINT, + parent_place_id BIGINT, + rank_search SMALLINT, + rank_address SMALLINT, + indexed_status SMALLINT, + indexed_date TIMESTAMP, + country_code varchar(2), + postcode TEXT, + geometry GEOMETRY(Geometry, 4326) + ); +CREATE INDEX idx_postcode_geometry ON location_postcode USING GIST (geometry) {ts:address-index}; +GRANT SELECT ON location_postcode TO "{www-user}" ; + +CREATE TRIGGER location_postcode_before_update BEFORE UPDATE ON location_postcode + FOR EACH ROW EXECUTE PROCEDURE postcode_update(); DROP TABLE IF EXISTS import_polygon_error; CREATE TABLE import_polygon_error ( @@ -209,10 +226,10 @@ CREATE TABLE import_polygon_error ( name HSTORE, country_code varchar(2), updated timestamp, - errormessage text + errormessage text, + prevgeometry GEOMETRY(Geometry, 4326), + newgeometry GEOMETRY(Geometry, 4326) ); -SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2); -SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2); CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id); GRANT SELECT ON import_polygon_error TO "{www-user}"; diff --git a/test/bdd/api/search/params.feature b/test/bdd/api/search/params.feature index d1cff1cc..bfc7cb15 100644 --- a/test/bdd/api/search/params.feature +++ b/test/bdd/api/search/params.feature @@ -66,7 +66,7 @@ Feature: Search queries Then there are duplicates Scenario: Search with bounded viewbox in right area - When sending json search query "restaurant" with address + When sending json search query "bar" with address | bounded | viewbox | | 1 | -56.16786,-34.84061,-56.12525,-34.86526 | Then result addresses contain @@ -74,7 +74,7 @@ Feature: Search queries | Montevideo | Scenario: Search with bounded viewboxlbrt in right area - When sending json search query "restaurant" with address + When sending json search query "bar" with address | bounded | viewboxlbrt | | 1 | -56.16786,-34.86526,-56.12525,-34.84061 | Then result addresses contain diff --git a/test/bdd/db/import/placex.feature b/test/bdd/db/import/placex.feature index f124e482..f3695642 100644 --- a/test/bdd/db/import/placex.feature +++ b/test/bdd/db/import/placex.feature @@ -79,13 +79,11 @@ Feature: Import into placex | osm | class | type | postcode | geometry | | N1 | place | postcode | EA452CD | country:gb | | N2 | place | postcode | E45 23 | country:gb | - | N3 | place | postcode | y45 | country:gb | When importing Then placex contains | object | country_code | rank_search | rank_address | | N1 | gb | 30 | 30 | | N2 | gb | 30 | 30 | - | N3 | gb | 30 | 30 | Scenario: search and address rank for DE postcodes correctly assigned Given the places diff --git a/test/bdd/db/import/postcodes.feature b/test/bdd/db/import/postcodes.feature new file mode 100644 index 00000000..be469fe6 --- /dev/null +++ b/test/bdd/db/import/postcodes.feature @@ -0,0 +1,115 @@ +@DB +Feature: Import of postcodes + Tests for postcode estimation + + Scenario: Postcodes on the object are prefered over those on the address + Given the scene admin-areas + And the named places + | osm | class | type | admin | addr+postcode | geometry | + | R1 | boundary | administrative | 6 | 112 | :b0 | + | R34 | boundary | administrative | 8 | 112 DE | :b1:E | + | R4 | boundary | administrative | 10 | 112 DE 34 | :b2:N | + And the named places + | osm | class | type | addr+postcode | geometry | + | W93 | highway | residential | 112 DE 344 | :w2N | + | W22 | building | yes | 112 DE 344N | :building:w2N | + When importing + Then placex contains + | object | postcode | + | W22 | 112 DE 344N | + | W93 | 112 DE 344 | + | R4 | 112 DE 34 | + | R34 | 112 DE | + | R1 | 112 | + + Scenario: Postcodes from a road are inherited by an attached building + Given the scene admin-areas + And the named places + | osm | class | type | addr+postcode | geometry | + | W93 | highway | residential | 86034 | :w2N | + And the named places + | osm | class | type | geometry | + | W22 | building | yes | :building:w2N | + When importing + Then placex contains + | object | postcode | parent_place_id | + | W22 | 86034 | W93 | + + Scenario: Postcodes from the lowest admin area are inherited by ways + Given the scene admin-areas + And the named places + | osm | class | type | admin | addr+postcode | geometry | + | R1 | boundary | administrative | 6 | 112 | :b0 | + | R34 | boundary | administrative | 8 | 112 DE | :b1:E | + | R4 | boundary | administrative | 10 | 112 DE 34 | :b2:N | + And the named places + | osm | class | type | geometry | + | W93 | highway | residential | :w2N | + When importing + Then placex contains + | object | postcode | + | W93 | 112 DE 34 | + + Scenario: Postcodes from the lowest admin area with postcode are inherited by ways + Given the scene admin-areas + And the named places + | osm | class | type | admin | addr+postcode | geometry | + | R1 | boundary | administrative | 6 | 112 | :b0 | + | R34 | boundary | administrative | 8 | 112 DE | :b1:E | + And the named places + | osm | class | type | admin | geometry | + | R4 | boundary | administrative | 10 | :b2:N | + And the named places + | osm | class | type | geometry | + | W93 | highway | residential | :w2N | + When importing + Then placex contains + | object | postcode | parent_place_id | + | W93 | 112 DE | R4 | + + Scenario: Postcodes from the lowest admin area are inherited by buildings + Given the scene admin-areas + And the named places + | osm | class | type | admin | addr+postcode | geometry | + | R1 | boundary | administrative | 6 | 112 | :b0 | + | R34 | boundary | administrative | 8 | 112 DE | :b1:E | + | R4 | boundary | administrative | 10 | 112 DE 34 | :b2:N | + And the named places + | osm | class | type | geometry | + | W22 | building | yes | :building:w2N | + When importing + Then placex contains + | object | postcode | + | W22 | 112 DE 34 | + + Scenario: Roads get postcodes from nearby buildings without other info + Given the scene admin-areas + And the named places + | osm | class | type | geometry | + | W93 | highway | residential | :w2N | + And the named places + | osm | class | type | addr+postcode | geometry | + | W22 | building | yes | 445023 | :building:w2N | + When importing + Then placex contains + | object | postcode | + | W93 | 445023 | + + @wip + Scenario: Postcodes from admin boundaries are preferred over estimated postcodes + Given the scene admin-areas + And the named places + | osm | class | type | admin | addr+postcode | geometry | + | R1 | boundary | administrative | 6 | 112 | :b0 | + | R34 | boundary | administrative | 8 | 112 DE | :b1:E | + | R4 | boundary | administrative | 10 | 112 DE 34 | :b2:N | + And the named places + | osm | class | type | geometry | + | W93 | highway | residential | :w2N | + And the named places + | osm | class | type | addr+postcode | geometry | + | W22 | building | yes | 445023 | :building:w2N | + When importing + Then placex contains + | object | postcode | + | W93 | 112 DE 34 | diff --git a/test/bdd/db/import/search_name.feature b/test/bdd/db/import/search_name.feature index 98def330..86bdea9b 100644 --- a/test/bdd/db/import/search_name.feature +++ b/test/bdd/db/import/search_name.feature @@ -23,17 +23,3 @@ Feature: Creation of search terms Then search_name contains | object | name_vector | nameaddress_vector | | N1 | foo | the road | - - Scenario: Roads take over the postcode from attached houses - Given the scene roads-with-pois - And the places - | osm | class | type | housenr | postcode | street | geometry | - | N1 | place | house | 1 | 12345 | North St | :p-S1 | - And the places - | osm | class | type | name | geometry | - | W1 | highway | residential | North St | :w-north | - When importing - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - diff --git a/test/bdd/db/query/search_simple.feature b/test/bdd/db/query/search_simple.feature index 417df769..409ed44f 100644 --- a/test/bdd/db/query/search_simple.feature +++ b/test/bdd/db/query/search_simple.feature @@ -25,7 +25,9 @@ Feature: Searching of simple objects | osm | class | type | postcode | geometry | | R1 | boundary | postal_code | 54321 | poly-area:1.0 | And searching for "12345" - Then exactly 0 results are returned + Then results contain + | osm_type | + | P | When searching for "54321" Then results contain | ID | osm_type | osm_id | diff --git a/test/bdd/db/update/poi-inherited-postcode.feature b/test/bdd/db/update/poi-inherited-postcode.feature deleted file mode 100644 index 1b2065e6..00000000 --- a/test/bdd/db/update/poi-inherited-postcode.feature +++ /dev/null @@ -1,57 +0,0 @@ -@DB -Feature: Update of POI-inherited poscode - Test updates of postcodes on street which was inherited from a related POI - - Background: Street and house with postcode - Given the scene roads-with-pois - And the places - | osm | class | type | housenr | postcode | street | geometry | - | N1 | place | house | 1 | 12345 | North St |:p-S1 | - And the places - | osm | class | type | name | geometry | - | W1 | highway | residential | North St | :w-north | - When importing - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - - Scenario: POI-inherited postcode remains when way type is changed - When updating places - | osm | class | type | name | geometry | - | W1 | highway | unclassified | North St | :w-north | - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - - Scenario: POI-inherited postcode remains when way name is changed - When updating places - | osm | class | type | name | geometry | - | W1 | highway | unclassified | South St | :w-north | - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - - Scenario: POI-inherited postcode remains when way geometry is changed - When updating places - | osm | class | type | name | geometry | - | W1 | highway | unclassified | South St | :w-south | - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - - Scenario: POI-inherited postcode is added when POI postcode changes - When updating places - | osm | class | type | housenr | postcode | street | geometry | - | N1 | place | house | 1 | 54321 | North St |:p-S1 | - Then search_name contains - | object | nameaddress_vector | - | W1 | 54321 | - - Scenario: POI-inherited postcode remains when POI geometry changes - When updating places - | osm | class | type | housenr | postcode | street | geometry | - | N1 | place | house | 1 | 12345 | North St |:p-S2 | - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - diff --git a/test/bdd/db/update/search_terms.feature b/test/bdd/db/update/search_terms.feature deleted file mode 100644 index 07dbd451..00000000 --- a/test/bdd/db/update/search_terms.feature +++ /dev/null @@ -1,21 +0,0 @@ -@DB -Feature: Update of search terms - Tests that search_name table is updated correctly - - Scenario: POI-inherited postcode remains when another POI is deleted - Given the scene roads-with-pois - And the places - | osm | class | type | housenr | postcode | street | geometry | - | N1 | place | house | 1 | 12345 | North St |:p-S1 | - | N2 | place | house | 2 | | North St |:p-S2 | - And the places - | osm | class | type | name | geometry | - | W1 | highway | residential | North St | :w-north | - When importing - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | - When marking for delete N2 - Then search_name contains - | object | nameaddress_vector | - | W1 | 12345 | diff --git a/test/bdd/steps/db_ops.py b/test/bdd/steps/db_ops.py index fa8cd0de..be2211fa 100644 --- a/test/bdd/steps/db_ops.py +++ b/test/bdd/steps/db_ops.py @@ -287,7 +287,7 @@ def import_and_index_data_from_place_table(context): WHERE class='place' and type='houses' and osm_type='W' and ST_GeometryType(geometry) = 'ST_LineString'""") context.db.commit() - context.nominatim.run_setup_script('index', 'index-noanalyse') + context.nominatim.run_setup_script('calculate-postcodes', 'index', 'index-noanalyse') @when("updating places") def update_place_table(context): diff --git a/test/scenes/bin/osm2wkt.cc b/test/scenes/bin/osm2wkt.cc index d13a4632..22e74b4d 100644 --- a/test/scenes/bin/osm2wkt.cc +++ b/test/scenes/bin/osm2wkt.cc @@ -7,19 +7,49 @@ #include
|---|